PanigrahiNirma commited on
Commit
4efabce
1 Parent(s): c6ecfc8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
3
  from pdfminer.high_level import extract_text
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
@@ -74,17 +74,18 @@ def answer_question(pdf, question, num_words):
74
  elif relevant_text and len(answer.split()) < num_words:
75
  remaining_words = num_words - len(answer.split())
76
  added_words = 0
77
- remaining_sentences = sent_tokenize(relevant_text)
78
- for sentence in remaining_sentences:
79
  sentence_words = sentence.split()
80
  words_to_add = min(remaining_words - added_words, len(sentence_words))
81
  if words_to_add > 0:
82
- answer_words = answer.split()
83
- answer_words.extend(sentence_words[:words_to_add])
84
- answer = " ".join(answer_words)
85
  added_words += words_to_add
86
  if added_words >= remaining_words:
87
  break
 
 
 
88
  if len(answer.split()) > num_words:
89
  answer = " ".join(answer.split()[:num_words])
90
 
@@ -96,10 +97,10 @@ def answer_question(pdf, question, num_words):
96
  with gr.Blocks() as iface:
97
  gr.Markdown("PDF Q&A with RoBERTa")
98
  with gr.Row():
99
- with gr.Column(scale=2): # Question area takes up 2/3 of the row
100
  question_input = gr.Textbox(lines=2, placeholder="Ask a question", label="Question")
101
  btn = gr.Button("Submit")
102
- with gr.Column(scale=1): # Other inputs take the remaining 1/3
103
  pdf_input = gr.File(type="filepath", label="Upload PDF")
104
  num_words_slider = gr.Slider(minimum=1, maximum=500, value=100, step=1, label="Number of Words")
105
  answer_output = gr.Textbox(label="Answer", lines=5)
 
1
  import gradio as gr
2
+ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
3
  from pdfminer.high_level import extract_text
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
 
74
  elif relevant_text and len(answer.split()) < num_words:
75
  remaining_words = num_words - len(answer.split())
76
  added_words = 0
77
+ added_sentences =
78
+ for sentence in sent_tokenize(relevant_text):
79
  sentence_words = sentence.split()
80
  words_to_add = min(remaining_words - added_words, len(sentence_words))
81
  if words_to_add > 0:
82
+ added_sentences.append(" ".join(sentence_words[:words_to_add]))
 
 
83
  added_words += words_to_add
84
  if added_words >= remaining_words:
85
  break
86
+ answer += " " + " ".join(added_sentences)
87
+ answer = answer.strip()
88
+ answer = " ".join(answer.split())
89
  if len(answer.split()) > num_words:
90
  answer = " ".join(answer.split()[:num_words])
91
 
 
97
  with gr.Blocks() as iface:
98
  gr.Markdown("PDF Q&A with RoBERTa")
99
  with gr.Row():
100
+ with gr.Column(scale=2):
101
  question_input = gr.Textbox(lines=2, placeholder="Ask a question", label="Question")
102
  btn = gr.Button("Submit")
103
+ with gr.Column(scale=1):
104
  pdf_input = gr.File(type="filepath", label="Upload PDF")
105
  num_words_slider = gr.Slider(minimum=1, maximum=500, value=100, step=1, label="Number of Words")
106
  answer_output = gr.Textbox(label="Answer", lines=5)