Spaces:

ignaciaginting
/

answer_question_from_doc

Sleeping

File size: 1,945 Bytes

fe0246c
8bc3f30
fe0246c
 
 
8bc3f30
fe0246c
 
 
 
 
 
 
58d0a5e
e7ce40a
fe0246c
e7ce40a
 
fe0246c
 
 
 
e7ce40a
 
 
 
 
 
fe0246c
e7ce40a
 
 
 
 
fe0246c
e7ce40a
 
 
 
fe0246c
 
 
e7ce40a
 
 
 
d75400e
e7ce40a
 
 
 
 
 
d75400e
e7ce40a

import streamlit as st
from transformers import pipeline
from PIL import Image
import tempfile
import fitz  # PyMuPDF

# Load the model
@st.cache_resource
def load_model():
    return pipeline("document-question-answering", model="impira/layoutlm-document-qa")

qa_pipeline = load_model()

st.title("📄 Document Question Answering App")
st.write("Upload a PDF or Image file, enter a question, and get answers from the document.")

# Upload PDF or image
uploaded_file = st.file_uploader("Upload PDF or Image", type=["pdf", "png", "jpg", "jpeg"])

# Ask a question
question = st.text_input("Ask a question about the document:")

if uploaded_file and question:
    # Handle PDF file
    if uploaded_file.type == "application/pdf":
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
            tmp_file.write(uploaded_file.read())
            pdf_path = tmp_file.name

        doc = fitz.open(pdf_path)
        page = doc.load_page(0)  # just first page for now
        pix = page.get_pixmap(dpi=150)
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        st.image(img, caption="Page 1 of PDF")

    # Handle image file
    else:
        img = Image.open(uploaded_file)
        st.image(img, caption="Uploaded Image")

    # Run the pipeline
    with st.spinner("Searching for the answer..."):
        results = qa_pipeline(img, question)

        if results:
            top_answer = results[0]  # get the highest-scoring answer
            st.success(f"**Answer:** {top_answer['answer']} (score: {top_answer['score']:.2f})")

            # Show top 3 options if available
            if len(results) > 1:
                st.markdown("\n**Other possible answers:**")
                for idx, ans in enumerate(results[1:3], start=2):
                    st.markdown(f"- Option {idx}: {ans['answer']} (score: {ans['score']:.2f})")
        else:
            st.warning("No answer found.")