Pranjal12345 commited on
Commit
bfca8e7
1 Parent(s): 83f6d3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import gradio as gr
 
2
  import re
3
  import os
4
  import fitz
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
- #dsad
7
 
8
  tokenizer = AutoTokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
9
  model = AutoModelForSeq2SeqLM.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
@@ -20,6 +20,9 @@ def generate_question_answer_pairs(pdf_file):
20
  if pdf_file is None:
21
  return "Please upload a PDF file"
22
 
 
 
 
23
  pdf_text = extract_text_from_pdf(pdf_file.name)
24
 
25
  sentences = re.split(r'(?<=[.!?])', pdf_text)
@@ -38,18 +41,22 @@ def generate_question_answer_pairs(pdf_file):
38
  if len(qa_parts) >= 2:
39
  question_part = qa_parts[0] + "?"
40
  answer_part = qa_parts[1].strip()
 
 
41
  result += f"Question: {question_part}\nAnswer: {answer_part}\n\n"
42
-
43
- return result
 
44
 
45
  title = "Question-Answer Pairs Generation"
46
  input_file = gr.File(label="Upload a PDF file")
 
47
  output_text = gr.Textbox()
48
 
49
  interface = gr.Interface(
50
  fn=generate_question_answer_pairs,
51
  inputs=input_file,
52
- outputs=output_text,
53
  title=title,
54
  )
55
  interface.launch()
 
1
  import gradio as gr
2
+ import pandas as pd
3
  import re
4
  import os
5
  import fitz
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
7
 
8
  tokenizer = AutoTokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
9
  model = AutoModelForSeq2SeqLM.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer")
 
20
  if pdf_file is None:
21
  return "Please upload a PDF file"
22
 
23
+ d = {'Question':[],'Answer':[]}
24
+ df = pd.DataFrame(data=d)
25
+
26
  pdf_text = extract_text_from_pdf(pdf_file.name)
27
 
28
  sentences = re.split(r'(?<=[.!?])', pdf_text)
 
41
  if len(qa_parts) >= 2:
42
  question_part = qa_parts[0] + "?"
43
  answer_part = qa_parts[1].strip()
44
+ new_data = {'Question': [question_part], 'Answer': [answer_part]}
45
+ df = pd.concat([df, pd.DataFrame(new_data)], ignore_index=True)
46
  result += f"Question: {question_part}\nAnswer: {answer_part}\n\n"
47
+
48
+ df.to_csv("QAPairs.csv")
49
+ return result, "QAPairs.csv"
50
 
51
  title = "Question-Answer Pairs Generation"
52
  input_file = gr.File(label="Upload a PDF file")
53
+ output_file = gr.File(label="Download as csv")
54
  output_text = gr.Textbox()
55
 
56
  interface = gr.Interface(
57
  fn=generate_question_answer_pairs,
58
  inputs=input_file,
59
+ outputs=[output_text, output_file],
60
  title=title,
61
  )
62
  interface.launch()