ivyblossom commited on
Commit
6b1590b
·
1 Parent(s): 4bd7bfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import streamlit as st
3
  from transformers import pipeline
4
  import re
 
5
 
6
  # Function to truncate text to the nearest word boundary
7
  def truncate_to_word_boundary(text, max_words=100):
@@ -20,7 +21,7 @@ def question_answering(question, text):
20
  def main():
21
  st.title("Question Answering on an Uploaded File")
22
 
23
- uploaded_file = st.file_uploader("Upload a file:", type=["pdf", "txt"]) # , "docx", "csv", "json", "txt"
24
  question = st.text_input("Ask your question:")
25
 
26
  if st.button("Answer") and uploaded_file is not None:
@@ -28,13 +29,12 @@ def main():
28
  file_contents = uploaded_file.read()
29
 
30
  if file_extension == ".pdf":
31
- # Handle PDF files
32
- import fitz # PyMuPDF for parsing PDF
33
-
34
  pdf_text = ""
35
- with fitz.open(stream=uploaded_file, filetype="pdf") as pdf_document:
36
- for page in pdf_document:
37
- pdf_text += page.get_text()
38
 
39
  # Perform question-answering
40
  answer = question_answering(question, pdf_text)
 
2
  import streamlit as st
3
  from transformers import pipeline
4
  import re
5
+ from PyPDF2 import PdfFileReader
6
 
7
  # Function to truncate text to the nearest word boundary
8
  def truncate_to_word_boundary(text, max_words=100):
 
21
  def main():
22
  st.title("Question Answering on an Uploaded File")
23
 
24
+ uploaded_file = st.file_uploader("Upload a file:", type=["pdf", "txt", "docx", "csv", "json", "txt"])
25
  question = st.text_input("Ask your question:")
26
 
27
  if st.button("Answer") and uploaded_file is not None:
 
29
  file_contents = uploaded_file.read()
30
 
31
  if file_extension == ".pdf":
32
+ # Handle PDF files using PyPDF2
33
+ pdf_reader = PdfFileReader(uploaded_file)
 
34
  pdf_text = ""
35
+ for page_num in range(pdf_reader.getNumPages()):
36
+ pdf_page = pdf_reader.getPage(page_num)
37
+ pdf_text += pdf_page.extractText()
38
 
39
  # Perform question-answering
40
  answer = question_answering(question, pdf_text)