thangved commited on
Commit
e0a45a8
·
1 Parent(s): ef45c41

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +3 -9
  2. __pycache__/main.cpython-310.pyc +0 -0
  3. flagged/log.csv +2 -0
  4. main.py +115 -0
  5. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: T5 Generate Questions
3
- emoji: 🐨
4
- colorFrom: blue
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 4.13.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: t5-generate-questions
3
+ app_file: main.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.12.0
 
 
6
  ---
 
 
__pycache__/main.cpython-310.pyc ADDED
Binary file (2.68 kB). View file
 
flagged/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ file,question,output,flag,username,timestamp
2
+ ,,,,,2024-01-04 13:46:23.480392
main.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from transformers import pipeline
4
+ from PyPDF2 import PdfReader
5
+
6
+ generate_question_pipe = pipeline("text2text-generation", model="thangved/t5-generate-question")
7
+ qa_pipe = pipeline("question-answering", model="SharKRippeR/QA_T5_small_seq2seq")
8
+
9
+ MAX_OUTPUT = 3
10
+ MAX_INPUT_TOKENS = 256
11
+
12
+ # Truncate text to 256 tokens
13
+ def split_texts(text:str) -> list[str]:
14
+ tokens = text.split(' ') # Split text into tokens
15
+
16
+ # If the number of tokens is greater than 256, truncate it
17
+ if len(tokens) > MAX_INPUT_TOKENS:
18
+ tokens = tokens[:MAX_INPUT_TOKENS]
19
+
20
+ texts = []
21
+
22
+ for i in range(0, len(tokens), MAX_INPUT_TOKENS):
23
+ texts.append(' '.join(tokens[i:i+64]))
24
+
25
+ # Join tokens back into text
26
+ return texts
27
+
28
+ def generate_questions_request(text:str) -> list[str]: # type: ignore
29
+ response = generate_question_pipe(text)
30
+
31
+ if response is None:
32
+ return []
33
+
34
+ result = []
35
+
36
+ for question in response:
37
+ questions = question['generated_text'].split('Question:')[1:] # type: ignore
38
+
39
+ for question in questions:
40
+ question = question.strip()
41
+ result.append(question)
42
+
43
+
44
+ return result
45
+
46
+ def generate_questions(file):
47
+ if file is None:
48
+ return [''] * (MAX_OUTPUT+1)
49
+
50
+ reader = PdfReader(file.name)
51
+
52
+ text = ''
53
+
54
+ for page in reader.pages:
55
+ text += page.extract_text()
56
+
57
+ texts = split_texts(text)
58
+
59
+ questions = [text]
60
+
61
+ for text in texts:
62
+ questions += generate_questions_request(text)
63
+
64
+ i = len(questions)
65
+
66
+ while i <= MAX_OUTPUT:
67
+ questions.append('')
68
+ i += 1
69
+
70
+ return questions
71
+
72
+ def generate_answers(context='',q1='', q2='', q3=''):
73
+
74
+ answers = []
75
+
76
+ for q in [q1, q2, q3]:
77
+ if q == '':
78
+ answers.append('')
79
+ continue
80
+
81
+ answer = qa_pipe({
82
+ 'question': q,
83
+ 'context': context
84
+ })
85
+
86
+ answers.append(answer['answer']) # type: ignore
87
+
88
+ return answers
89
+
90
+ with gr.Blocks() as demo:
91
+ gr.Markdown("# PDF to Questions")
92
+
93
+ with gr.Row():
94
+ inp = gr.File(label='Select file', file_types=['.pdf'])
95
+ context = gr.Textbox(label='Pdf content', lines=10)
96
+
97
+ with gr.Row():
98
+ with gr.Column():
99
+ q1 = gr.Textbox(label='Question 1')
100
+ q2 = gr.Textbox(label='Question 2')
101
+ q3 = gr.Textbox(label='Question 3')
102
+
103
+ with gr.Column():
104
+ a1 = gr.Textbox(label='Answer 1')
105
+ a2 = gr.Textbox(label='Answer 2')
106
+ a3 = gr.Textbox(label='Answer 3')
107
+
108
+ generate_question_btn = gr.Button('Generate questions')
109
+ generate_answer_btn = gr.Button('Generate answers', variant='primary')
110
+
111
+ generate_question_btn.click(fn=generate_questions, inputs=inp, outputs=[context, q1, q2, q3])
112
+ generate_answer_btn.click(fn=generate_answers, inputs=[context, q1, q2, q3], outputs=[a1, a2, a3])
113
+
114
+ if __name__ == '__main__':
115
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==4.13.0
2
+ PyPDF2==3.0.1
3
+ transformers==4.36.2
4
+ torch==2.1.2