Spaces:
Running
Running
ivyblossom
commited on
Commit
·
6b1590b
1
Parent(s):
4bd7bfe
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
import streamlit as st
|
3 |
from transformers import pipeline
|
4 |
import re
|
|
|
5 |
|
6 |
# Function to truncate text to the nearest word boundary
|
7 |
def truncate_to_word_boundary(text, max_words=100):
|
@@ -20,7 +21,7 @@ def question_answering(question, text):
|
|
20 |
def main():
|
21 |
st.title("Question Answering on an Uploaded File")
|
22 |
|
23 |
-
uploaded_file = st.file_uploader("Upload a file:", type=["pdf", "txt"
|
24 |
question = st.text_input("Ask your question:")
|
25 |
|
26 |
if st.button("Answer") and uploaded_file is not None:
|
@@ -28,13 +29,12 @@ def main():
|
|
28 |
file_contents = uploaded_file.read()
|
29 |
|
30 |
if file_extension == ".pdf":
|
31 |
-
# Handle PDF files
|
32 |
-
|
33 |
-
|
34 |
pdf_text = ""
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
|
39 |
# Perform question-answering
|
40 |
answer = question_answering(question, pdf_text)
|
|
|
2 |
import streamlit as st
|
3 |
from transformers import pipeline
|
4 |
import re
|
5 |
+
from PyPDF2 import PdfFileReader
|
6 |
|
7 |
# Function to truncate text to the nearest word boundary
|
8 |
def truncate_to_word_boundary(text, max_words=100):
|
|
|
21 |
def main():
|
22 |
st.title("Question Answering on an Uploaded File")
|
23 |
|
24 |
+
uploaded_file = st.file_uploader("Upload a file:", type=["pdf", "txt", "docx", "csv", "json", "txt"])
|
25 |
question = st.text_input("Ask your question:")
|
26 |
|
27 |
if st.button("Answer") and uploaded_file is not None:
|
|
|
29 |
file_contents = uploaded_file.read()
|
30 |
|
31 |
if file_extension == ".pdf":
|
32 |
+
# Handle PDF files using PyPDF2
|
33 |
+
pdf_reader = PdfFileReader(uploaded_file)
|
|
|
34 |
pdf_text = ""
|
35 |
+
for page_num in range(pdf_reader.getNumPages()):
|
36 |
+
pdf_page = pdf_reader.getPage(page_num)
|
37 |
+
pdf_text += pdf_page.extractText()
|
38 |
|
39 |
# Perform question-answering
|
40 |
answer = question_answering(question, pdf_text)
|