ivyblossom commited on
Commit
9988861
1 Parent(s): 663bca5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -24
app.py CHANGED
@@ -1,15 +1,8 @@
1
  import os
2
  import streamlit as st
3
  from transformers import pipeline
4
- import re
5
  from PyPDF2 import PdfReader
6
 
7
- # Function to truncate text to the nearest word boundary
8
- def truncate_to_word_boundary(text, max_words=100):
9
- words = re.findall(r'\w+', text)
10
- truncated_text = ' '.join(words[:max_words])
11
- return truncated_text
12
-
13
  # Function to perform question-answering
14
  def question_answering(question, pdf_path):
15
  pdf_reader = PdfReader(pdf_path)
@@ -25,27 +18,20 @@ def question_answering(question, pdf_path):
25
  question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
26
  answer = question_answerer(question=question, context=pdf_text)
27
 
28
- return answer, pdf_text_with_pages
29
-
30
- def get_context_text(pdf_text_with_pages, context_page_num, context_window=3):
31
- context_start = max(0, context_page_num - context_window - 1)
32
- context_end = min(len(pdf_text_with_pages), context_page_num + context_window)
33
- context_lines = [text for _, text in pdf_text_with_pages[context_start:context_end]]
34
- context_text = "\n".join(context_lines)
35
- return context_text
36
 
37
  def main():
38
  st.title("Question Answering on a PDF File")
39
 
40
  uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
41
  question = st.text_input("Ask your question:")
42
-
43
  if st.button("Answer") and uploaded_file is not None:
44
  pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
45
  with open(pdf_path, "wb") as f:
46
  f.write(uploaded_file.read())
47
 
48
- answer, pdf_text_with_pages = question_answering(question, pdf_path)
49
 
50
  # Delete the uploaded file after processing
51
  os.remove(pdf_path)
@@ -54,11 +40,5 @@ def main():
54
  st.write("Answer:", answer['answer'])
55
  st.write("Score:", answer['score'])
56
 
57
- # Display context where the answer came from
58
- context_page_num = answer['start']
59
- context_text = get_context_text(pdf_text_with_pages, context_page_num)
60
- st.write("Context:")
61
- st.write(context_text)
62
-
63
  if __name__ == "__main__":
64
- main()
 
1
  import os
2
  import streamlit as st
3
  from transformers import pipeline
 
4
  from PyPDF2 import PdfReader
5
 
 
 
 
 
 
 
6
  # Function to perform question-answering
7
  def question_answering(question, pdf_path):
8
  pdf_reader = PdfReader(pdf_path)
 
18
  question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
19
  answer = question_answerer(question=question, context=pdf_text)
20
 
21
+ return answer
 
 
 
 
 
 
 
22
 
23
  def main():
24
  st.title("Question Answering on a PDF File")
25
 
26
  uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
27
  question = st.text_input("Ask your question:")
28
+
29
  if st.button("Answer") and uploaded_file is not None:
30
  pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
31
  with open(pdf_path, "wb") as f:
32
  f.write(uploaded_file.read())
33
 
34
+ answer = question_answering(question, pdf_path)
35
 
36
  # Delete the uploaded file after processing
37
  os.remove(pdf_path)
 
40
  st.write("Answer:", answer['answer'])
41
  st.write("Score:", answer['score'])
42
 
 
 
 
 
 
 
43
  if __name__ == "__main__":
44
+ main()