Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add document segmentation feedback to UI #23

Merged
merged 2 commits into from
Feb 12, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 52 additions & 20 deletions demo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def load_model():

@st.cache_resource
def convert_to_sections(uploaded_file, output_dir):
document_to_sections_dir(
return document_to_sections_dir(
pymupdf.open("type", BytesIO(uploaded_file.read())),
output_dir,
)
Expand All @@ -39,23 +39,55 @@ def convert_to_sections(uploaded_file, output_dir):
st.markdown("[Docs for this Step]()")
st.divider()

convert_to_sections(uploaded_file, f"example_outputs/{uploaded_file.name}")

sections = [f.stem for f in Path(f"example_outputs/{uploaded_file.name}").iterdir()]
st.json(sections)

model = load_model()
question = st.text_input("Enter a question:")
if question:
with st.spinner("Answering..."):
answer, sections_checked = find_retrieve_answer(
model=model,
sections_dir=f"example_outputs/{uploaded_file.name}",
question=question,
find_prompt=FIND_PROMPT,
answer_prompt=ANSWER_PROMPT,
try:
with st.spinner("Converting document to sections..."):
section_names = convert_to_sections(
uploaded_file, f"example_outputs/{uploaded_file.name}"
)
st.text("Sections checked:")
st.json(sections_checked)
st.text("Answer:")
st.text(answer)
sections = [
f.stem for f in Path(f"example_outputs/{uploaded_file.name}").iterdir()
]

# Provide feedback about segmentation
st.success(
f"Successfully extracted {len(sections)} sections from the document."
)

# Check for potential segmentation issues
if len(sections) == 1:
st.warning(
"⚠️ Only one section was found. This might indicate that the document structure wasn't properly detected."
)
elif len(sections) == 0:
st.error(
"❌ No sections were found in the document. The document might not have a clear structure or might be in an unsupported format."
)
elif "INTRO" in sections and len(sections) < 3:
st.warning(
"⚠️ Only found default sections. The document structure might not have been properly detected."
)

# Show sections
st.text("Detected Sections:")
st.json(sections)

model = load_model()
question = st.text_input("Enter a question:")
if question:
with st.spinner("Answering..."):
answer, sections_checked = find_retrieve_answer(
model=model,
sections_dir=f"example_outputs/{uploaded_file.name}",
question=question,
find_prompt=FIND_PROMPT,
answer_prompt=ANSWER_PROMPT,
)
st.text("Sections checked:")
st.json(sections_checked)
st.text("Answer:")
st.text(answer)
except Exception as e:
st.error(f"❌ Error processing document: {str(e)}")
st.info(
"💡 Try uploading a different document or check if the file is corrupted."
)
Loading