Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
from tempfile import NamedTemporaryFile
|
4 |
+
from langchain.document_loaders import PyPDFLoader
|
5 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
6 |
+
from langchain.vectorstores import Chroma
|
7 |
+
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
|
8 |
+
|
9 |
+
# Function to save the uploaded PDF to a temporary file
|
10 |
+
def save_uploaded_file(uploaded_file):
|
11 |
+
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
12 |
+
temp_file.write(uploaded_file.read())
|
13 |
+
return temp_file.name
|
14 |
+
|
15 |
+
# Function to get answers from the PDF
|
16 |
+
def get_answer(question, db, model, tokenizer):
|
17 |
+
doc = db.similarity_search(question, k=4)
|
18 |
+
context = doc[0].page_content + doc[1].page_content + doc[2].page_content + doc[3].page_content
|
19 |
+
|
20 |
+
# Load the model & tokenizer for question-answering
|
21 |
+
model_name = "deepset/roberta-base-squad2"
|
22 |
+
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
24 |
+
|
25 |
+
# Create a question-answering pipeline
|
26 |
+
nlp = pipeline("question-answering", model=model, tokenizer=tokenizer)
|
27 |
+
|
28 |
+
# Prepare the input
|
29 |
+
QA_input = {
|
30 |
+
"question": question,
|
31 |
+
"context": context,
|
32 |
+
}
|
33 |
+
|
34 |
+
# Get the answer
|
35 |
+
result = nlp(**QA_input)
|
36 |
+
|
37 |
+
return result["answer"]
|
38 |
+
|
39 |
+
# Streamlit UI
|
40 |
+
st.title("PDF Question Answering App")
|
41 |
+
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
|
42 |
+
if uploaded_file is not None:
|
43 |
+
# Save the uploaded file to a temporary location
|
44 |
+
temp_file_path = save_uploaded_file(uploaded_file)
|
45 |
+
|
46 |
+
# Load the PDF document using PyPDFLoader
|
47 |
+
loader = PyPDFLoader(temp_file_path)
|
48 |
+
pages = loader.load_and_split()
|
49 |
+
|
50 |
+
# Initialize embeddings and Chroma
|
51 |
+
embed = HuggingFaceEmbeddings()
|
52 |
+
db = Chroma.from_documents(pages, embed)
|
53 |
+
|
54 |
+
# Load the model & tokenizer for question-answering
|
55 |
+
model_name = "deepset/roberta-base-squad2"
|
56 |
+
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
|
57 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
58 |
+
|
59 |
+
# Initializations
|
60 |
+
conversation = []
|
61 |
+
st.write("Ask your questions, and I'll provide answers:")
|
62 |
+
|
63 |
+
# Continuous question-answering loop
|
64 |
+
while True:
|
65 |
+
question = st.text_input("Enter your question:")
|
66 |
+
if st.button("Get Answer"):
|
67 |
+
answer = get_answer(question, db, model, tokenizer)
|
68 |
+
st.write("Answer:")
|
69 |
+
st.write(answer)
|
70 |
+
conversation.append({"question": question, "answer": answer})
|
71 |
+
|
72 |
+
# Add an option to end the conversation
|
73 |
+
if st.button("End Conversation"):
|
74 |
+
break
|
75 |
+
|
76 |
+
# Display the conversation history
|
77 |
+
st.write("Conversation History:")
|
78 |
+
for entry in conversation:
|
79 |
+
st.write(f"Q: {entry['question']}")
|
80 |
+
st.write(f"A: {entry['answer']}")
|
81 |
+
|
82 |
+
# Cleanup: Delete the temporary file
|
83 |
+
os.remove(temp_file_path)
|