Spaces:
Runtime error
Runtime error
File size: 2,919 Bytes
e0a45a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import gradio as gr
from transformers import pipeline
from PyPDF2 import PdfReader
generate_question_pipe = pipeline("text2text-generation", model="thangved/t5-generate-question")
qa_pipe = pipeline("question-answering", model="SharKRippeR/QA_T5_small_seq2seq")
MAX_OUTPUT = 3
MAX_INPUT_TOKENS = 256
# Truncate text to 256 tokens
def split_texts(text:str) -> list[str]:
tokens = text.split(' ') # Split text into tokens
# If the number of tokens is greater than 256, truncate it
if len(tokens) > MAX_INPUT_TOKENS:
tokens = tokens[:MAX_INPUT_TOKENS]
texts = []
for i in range(0, len(tokens), MAX_INPUT_TOKENS):
texts.append(' '.join(tokens[i:i+64]))
# Join tokens back into text
return texts
def generate_questions_request(text:str) -> list[str]: # type: ignore
response = generate_question_pipe(text)
if response is None:
return []
result = []
for question in response:
questions = question['generated_text'].split('Question:')[1:] # type: ignore
for question in questions:
question = question.strip()
result.append(question)
return result
def generate_questions(file):
if file is None:
return [''] * (MAX_OUTPUT+1)
reader = PdfReader(file.name)
text = ''
for page in reader.pages:
text += page.extract_text()
texts = split_texts(text)
questions = [text]
for text in texts:
questions += generate_questions_request(text)
i = len(questions)
while i <= MAX_OUTPUT:
questions.append('')
i += 1
return questions
def generate_answers(context='',q1='', q2='', q3=''):
answers = []
for q in [q1, q2, q3]:
if q == '':
answers.append('')
continue
answer = qa_pipe({
'question': q,
'context': context
})
answers.append(answer['answer']) # type: ignore
return answers
with gr.Blocks() as demo:
gr.Markdown("# PDF to Questions")
with gr.Row():
inp = gr.File(label='Select file', file_types=['.pdf'])
context = gr.Textbox(label='Pdf content', lines=10)
with gr.Row():
with gr.Column():
q1 = gr.Textbox(label='Question 1')
q2 = gr.Textbox(label='Question 2')
q3 = gr.Textbox(label='Question 3')
with gr.Column():
a1 = gr.Textbox(label='Answer 1')
a2 = gr.Textbox(label='Answer 2')
a3 = gr.Textbox(label='Answer 3')
generate_question_btn = gr.Button('Generate questions')
generate_answer_btn = gr.Button('Generate answers', variant='primary')
generate_question_btn.click(fn=generate_questions, inputs=inp, outputs=[context, q1, q2, q3])
generate_answer_btn.click(fn=generate_answers, inputs=[context, q1, q2, q3], outputs=[a1, a2, a3])
if __name__ == '__main__':
demo.launch()
|