|
import gradio as gr |
|
from pprint import pprint |
|
from lmqg import TransformersQG |
|
|
|
|
|
|
|
model = TransformersQG(model='lmqg/t5-base-squad-qg', model_ae='lmqg/t5-base-squad-ae') |
|
|
|
|
|
|
|
def chunk_text(text, chunk_size=450): |
|
tokenizer = model.tokenizer |
|
tokens = tokenizer.encode(text) |
|
chunks = [tokens[i:i + chunk_size] for i in range(0, len(tokens), chunk_size)] |
|
return chunks |
|
|
|
|
|
def generate_qa_for_chunks(text): |
|
chunks = chunk_text(text) |
|
qa_pairs = [] |
|
for chunk in chunks: |
|
chunk_text = model.tokenizer.decode(chunk, skip_special_tokens=True) |
|
qa = model.generate_qa(chunk_text) |
|
qa_pairs.extend(qa) |
|
return "\n\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in qa_pairs]) |
|
|
|
|
|
demo = gr.Interface( |
|
fn=generate_qa_for_chunks, |
|
inputs=gr.Textbox(lines=20, label="Input Text (SAP Note or Paragraph)"), |
|
outputs=gr.Textbox(lines=30, label="Generated QA Pairs"), |
|
title="Question Generator (LMQG - T5)", |
|
description="Paste text to generate question-answer pairs using lmqg/t5-base-squad-qg" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |