Skip to content

Commit a4cb08c

Browse files
authoredFeb 12, 2025
Add document segmentation feedback to UI (#23)
* Add document segmentation feedback to UI * style: fix formatting and trailing whitespace
1 parent 92a0219 commit a4cb08c

File tree

1 file changed

+52
-20
lines changed

1 file changed

+52
-20
lines changed
 

‎demo/app.py

+52-20
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def load_model():
1919

2020
@st.cache_resource
2121
def convert_to_sections(uploaded_file, output_dir):
22-
document_to_sections_dir(
22+
return document_to_sections_dir(
2323
pymupdf.open("type", BytesIO(uploaded_file.read())),
2424
output_dir,
2525
)
@@ -39,23 +39,55 @@ def convert_to_sections(uploaded_file, output_dir):
3939
st.markdown("[Docs for this Step]()")
4040
st.divider()
4141

42-
convert_to_sections(uploaded_file, f"example_outputs/{uploaded_file.name}")
43-
44-
sections = [f.stem for f in Path(f"example_outputs/{uploaded_file.name}").iterdir()]
45-
st.json(sections)
46-
47-
model = load_model()
48-
question = st.text_input("Enter a question:")
49-
if question:
50-
with st.spinner("Answering..."):
51-
answer, sections_checked = find_retrieve_answer(
52-
model=model,
53-
sections_dir=f"example_outputs/{uploaded_file.name}",
54-
question=question,
55-
find_prompt=FIND_PROMPT,
56-
answer_prompt=ANSWER_PROMPT,
42+
try:
43+
with st.spinner("Converting document to sections..."):
44+
section_names = convert_to_sections(
45+
uploaded_file, f"example_outputs/{uploaded_file.name}"
5746
)
58-
st.text("Sections checked:")
59-
st.json(sections_checked)
60-
st.text("Answer:")
61-
st.text(answer)
47+
sections = [
48+
f.stem for f in Path(f"example_outputs/{uploaded_file.name}").iterdir()
49+
]
50+
51+
# Provide feedback about segmentation
52+
st.success(
53+
f"Successfully extracted {len(sections)} sections from the document."
54+
)
55+
56+
# Check for potential segmentation issues
57+
if len(sections) == 1:
58+
st.warning(
59+
"⚠️ Only one section was found. This might indicate that the document structure wasn't properly detected."
60+
)
61+
elif len(sections) == 0:
62+
st.error(
63+
"❌ No sections were found in the document. The document might not have a clear structure or might be in an unsupported format."
64+
)
65+
elif "INTRO" in sections and len(sections) < 3:
66+
st.warning(
67+
"⚠️ Only found default sections. The document structure might not have been properly detected."
68+
)
69+
70+
# Show sections
71+
st.text("Detected Sections:")
72+
st.json(sections)
73+
74+
model = load_model()
75+
question = st.text_input("Enter a question:")
76+
if question:
77+
with st.spinner("Answering..."):
78+
answer, sections_checked = find_retrieve_answer(
79+
model=model,
80+
sections_dir=f"example_outputs/{uploaded_file.name}",
81+
question=question,
82+
find_prompt=FIND_PROMPT,
83+
answer_prompt=ANSWER_PROMPT,
84+
)
85+
st.text("Sections checked:")
86+
st.json(sections_checked)
87+
st.text("Answer:")
88+
st.text(answer)
89+
except Exception as e:
90+
st.error(f"❌ Error processing document: {str(e)}")
91+
st.info(
92+
"💡 Try uploading a different document or check if the file is corrupted."
93+
)

0 commit comments

Comments
 (0)