Spaces:

thangved
/

t5-generate-questions

Runtime error

App Files Files Community

thangved commited on Jan 5, 2024

Commit

e0a45a8

1 Parent(s): ef45c41

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

README.md +3 -9
__pycache__/main.cpython-310.pyc +0 -0
flagged/log.csv +2 -0
main.py +115 -0
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: T5 Generate Questions
-emoji: 🐨
-colorFrom: blue
-colorTo: red
 sdk: gradio
-sdk_version: 4.13.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: t5-generate-questions
+app_file: main.py
 sdk: gradio
+sdk_version: 4.12.0
 ---

__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (2.68 kB). View file

flagged/log.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ file,question,output,flag,username,timestamp
2	+ ,,,,,2024-01-04 13:46:23.480392

main.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import gradio as gr
+from transformers import pipeline
+from PyPDF2 import PdfReader
+generate_question_pipe = pipeline("text2text-generation", model="thangved/t5-generate-question")
+qa_pipe = pipeline("question-answering", model="SharKRippeR/QA_T5_small_seq2seq")
+MAX_OUTPUT = 3
+MAX_INPUT_TOKENS = 256
+# Truncate text to 256 tokens
+def split_texts(text:str) -> list[str]:
+    tokens = text.split(' ') # Split text into tokens
+    # If the number of tokens is greater than 256, truncate it
+    if len(tokens) > MAX_INPUT_TOKENS:
+        tokens = tokens[:MAX_INPUT_TOKENS]
+    texts = []
+    for i in range(0, len(tokens), MAX_INPUT_TOKENS):
+        texts.append(' '.join(tokens[i:i+64]))
+    # Join tokens back into text
+    return texts
+def generate_questions_request(text:str) -> list[str]: # type: ignore
+    response = generate_question_pipe(text)
+    if response is None:
+        return []
+    result = []
+    for question in response:
+        questions = question['generated_text'].split('Question:')[1:] # type: ignore
+        for question in questions:
+            question = question.strip()
+            result.append(question)
+    return result
+def generate_questions(file):
+    if file is None:
+        return [''] * (MAX_OUTPUT+1)
+    reader = PdfReader(file.name)
+    text = ''
+    for page in reader.pages:
+        text += page.extract_text()
+    texts = split_texts(text)
+    questions = [text]
+    for text in texts:
+        questions += generate_questions_request(text)
+    i = len(questions)
+    while i <= MAX_OUTPUT:
+        questions.append('')
+        i += 1
+    return questions
+def generate_answers(context='',q1='', q2='', q3=''):
+    answers = []
+    for q in [q1, q2, q3]:
+        if q == '':
+            answers.append('')
+            continue
+        answer = qa_pipe({
+            'question': q,
+            'context': context
+        })
+        answers.append(answer['answer']) # type: ignore
+    return answers
+with gr.Blocks() as demo:
+    gr.Markdown("# PDF to Questions")
+    with gr.Row():
+        inp = gr.File(label='Select file', file_types=['.pdf'])
+        context = gr.Textbox(label='Pdf content', lines=10)
+    with gr.Row():
+        with gr.Column():
+            q1 = gr.Textbox(label='Question 1')
+            q2 = gr.Textbox(label='Question 2')
+            q3 = gr.Textbox(label='Question 3')
+        with gr.Column():
+            a1 = gr.Textbox(label='Answer 1')
+            a2 = gr.Textbox(label='Answer 2')
+            a3 = gr.Textbox(label='Answer 3')
+    generate_question_btn = gr.Button('Generate questions')
+    generate_answer_btn = gr.Button('Generate answers', variant='primary')
+    generate_question_btn.click(fn=generate_questions, inputs=inp, outputs=[context, q1, q2, q3])
+    generate_answer_btn.click(fn=generate_answers, inputs=[context, q1, q2, q3], outputs=[a1, a2, a3])
+if __name__ == '__main__':
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==4.13.0
+PyPDF2==3.0.1
+transformers==4.36.2
+torch==2.1.2