Spaces:

thangved
/

t5-generate-questions

Runtime error

File size: 2,919 Bytes

e0a45a8

import gradio as gr

from transformers import pipeline
from PyPDF2 import PdfReader

generate_question_pipe = pipeline("text2text-generation", model="thangved/t5-generate-question")
qa_pipe = pipeline("question-answering", model="SharKRippeR/QA_T5_small_seq2seq")

MAX_OUTPUT = 3
MAX_INPUT_TOKENS = 256

# Truncate text to 256 tokens
def split_texts(text:str) -> list[str]:
    tokens = text.split(' ') # Split text into tokens

    # If the number of tokens is greater than 256, truncate it
    if len(tokens) > MAX_INPUT_TOKENS:
        tokens = tokens[:MAX_INPUT_TOKENS]

    texts = []

    for i in range(0, len(tokens), MAX_INPUT_TOKENS):
        texts.append(' '.join(tokens[i:i+64]))

    # Join tokens back into text
    return texts

def generate_questions_request(text:str) -> list[str]: # type: ignore
    response = generate_question_pipe(text)

    if response is None:
        return []

    result = []

    for question in response:
        questions = question['generated_text'].split('Question:')[1:] # type: ignore

        for question in questions:
            question = question.strip()
            result.append(question)


    return result

def generate_questions(file):
    if file is None:
        return [''] * (MAX_OUTPUT+1)

    reader = PdfReader(file.name)

    text = ''

    for page in reader.pages:
        text += page.extract_text()

    texts = split_texts(text)

    questions = [text]

    for text in texts:
        questions += generate_questions_request(text)

    i = len(questions)

    while i <= MAX_OUTPUT:
        questions.append('')
        i += 1

    return questions

def generate_answers(context='',q1='', q2='', q3=''):

    answers = []

    for q in [q1, q2, q3]:
        if q == '':
            answers.append('')
            continue

        answer = qa_pipe({
            'question': q,
            'context': context
        })

        answers.append(answer['answer']) # type: ignore

    return answers

with gr.Blocks() as demo:
    gr.Markdown("# PDF to Questions")

    with gr.Row():
        inp = gr.File(label='Select file', file_types=['.pdf'])
        context = gr.Textbox(label='Pdf content', lines=10)

    with gr.Row():
        with gr.Column():
            q1 = gr.Textbox(label='Question 1')
            q2 = gr.Textbox(label='Question 2')
            q3 = gr.Textbox(label='Question 3')

        with gr.Column():
            a1 = gr.Textbox(label='Answer 1')
            a2 = gr.Textbox(label='Answer 2')
            a3 = gr.Textbox(label='Answer 3')

    generate_question_btn = gr.Button('Generate questions')
    generate_answer_btn = gr.Button('Generate answers', variant='primary')

    generate_question_btn.click(fn=generate_questions, inputs=inp, outputs=[context, q1, q2, q3])
    generate_answer_btn.click(fn=generate_answers, inputs=[context, q1, q2, q3], outputs=[a1, a2, a3])

if __name__ == '__main__':
    demo.launch()