File size: 1,250 Bytes
8ea8c0d 3b218c4 0e96d93 6b51d0b aef6bf4 6b51d0b 8ea8c0d 3b218c4 d0cfe06 3b218c4 8ea8c0d 3b218c4 8ea8c0d 3b218c4 8ea8c0d 233e142 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import gradio as gr
from pprint import pprint
from lmqg import TransformersQG
# Avoid importing evaluation utilities
model = TransformersQG(model='lmqg/t5-base-squad-qg', model_ae='lmqg/t5-base-squad-ae')
# Function to chunk text into smaller parts
def chunk_text(text, chunk_size=450):
tokenizer = model.tokenizer
tokens = tokenizer.encode(text)
chunks = [tokens[i:i + chunk_size] for i in range(0, len(tokens), chunk_size)]
return chunks
# Function to process each chunk and generate QA pairs
def generate_qa_for_chunks(text):
chunks = chunk_text(text)
qa_pairs = []
for chunk in chunks:
chunk_text = model.tokenizer.decode(chunk, skip_special_tokens=True)
qa = model.generate_qa(chunk_text)
qa_pairs.extend(qa)
return "\n\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in qa_pairs])
# Gradio UI
demo = gr.Interface(
fn=generate_qa_for_chunks,
inputs=gr.Textbox(lines=20, label="Input Text (SAP Note or Paragraph)"),
outputs=gr.Textbox(lines=30, label="Generated QA Pairs"),
title="Question Generator (LMQG - T5)",
description="Paste text to generate question-answer pairs using lmqg/t5-base-squad-qg"
)
if __name__ == "__main__":
demo.launch() |