Spaces:
Running
Running
PanigrahiNirma
commited on
Commit
•
4efabce
1
Parent(s):
c6ecfc8
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
|
3 |
from pdfminer.high_level import extract_text
|
4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
from sklearn.metrics.pairwise import cosine_similarity
|
@@ -74,17 +74,18 @@ def answer_question(pdf, question, num_words):
|
|
74 |
elif relevant_text and len(answer.split()) < num_words:
|
75 |
remaining_words = num_words - len(answer.split())
|
76 |
added_words = 0
|
77 |
-
|
78 |
-
for sentence in
|
79 |
sentence_words = sentence.split()
|
80 |
words_to_add = min(remaining_words - added_words, len(sentence_words))
|
81 |
if words_to_add > 0:
|
82 |
-
|
83 |
-
answer_words.extend(sentence_words[:words_to_add])
|
84 |
-
answer = " ".join(answer_words)
|
85 |
added_words += words_to_add
|
86 |
if added_words >= remaining_words:
|
87 |
break
|
|
|
|
|
|
|
88 |
if len(answer.split()) > num_words:
|
89 |
answer = " ".join(answer.split()[:num_words])
|
90 |
|
@@ -96,10 +97,10 @@ def answer_question(pdf, question, num_words):
|
|
96 |
with gr.Blocks() as iface:
|
97 |
gr.Markdown("PDF Q&A with RoBERTa")
|
98 |
with gr.Row():
|
99 |
-
with gr.Column(scale=2):
|
100 |
question_input = gr.Textbox(lines=2, placeholder="Ask a question", label="Question")
|
101 |
btn = gr.Button("Submit")
|
102 |
-
with gr.Column(scale=1):
|
103 |
pdf_input = gr.File(type="filepath", label="Upload PDF")
|
104 |
num_words_slider = gr.Slider(minimum=1, maximum=500, value=100, step=1, label="Number of Words")
|
105 |
answer_output = gr.Textbox(label="Answer", lines=5)
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
|
3 |
from pdfminer.high_level import extract_text
|
4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
74 |
elif relevant_text and len(answer.split()) < num_words:
|
75 |
remaining_words = num_words - len(answer.split())
|
76 |
added_words = 0
|
77 |
+
added_sentences =
|
78 |
+
for sentence in sent_tokenize(relevant_text):
|
79 |
sentence_words = sentence.split()
|
80 |
words_to_add = min(remaining_words - added_words, len(sentence_words))
|
81 |
if words_to_add > 0:
|
82 |
+
added_sentences.append(" ".join(sentence_words[:words_to_add]))
|
|
|
|
|
83 |
added_words += words_to_add
|
84 |
if added_words >= remaining_words:
|
85 |
break
|
86 |
+
answer += " " + " ".join(added_sentences)
|
87 |
+
answer = answer.strip()
|
88 |
+
answer = " ".join(answer.split())
|
89 |
if len(answer.split()) > num_words:
|
90 |
answer = " ".join(answer.split()[:num_words])
|
91 |
|
|
|
97 |
with gr.Blocks() as iface:
|
98 |
gr.Markdown("PDF Q&A with RoBERTa")
|
99 |
with gr.Row():
|
100 |
+
with gr.Column(scale=2):
|
101 |
question_input = gr.Textbox(lines=2, placeholder="Ask a question", label="Question")
|
102 |
btn = gr.Button("Submit")
|
103 |
+
with gr.Column(scale=1):
|
104 |
pdf_input = gr.File(type="filepath", label="Upload PDF")
|
105 |
num_words_slider = gr.Slider(minimum=1, maximum=500, value=100, step=1, label="Number of Words")
|
106 |
answer_output = gr.Textbox(label="Answer", lines=5)
|