import gradio as gr from pprint import pprint from lmqg import TransformersQG # Avoid importing evaluation utilities model = TransformersQG(model='lmqg/t5-base-squad-qg', model_ae='lmqg/t5-base-squad-ae') # Function to chunk text into smaller parts def chunk_text(text, chunk_size=450): tokenizer = model.tokenizer tokens = tokenizer.encode(text) chunks = [tokens[i:i + chunk_size] for i in range(0, len(tokens), chunk_size)] return chunks # Function to process each chunk and generate QA pairs def generate_qa_for_chunks(text): chunks = chunk_text(text) qa_pairs = [] for chunk in chunks: chunk_text = model.tokenizer.decode(chunk, skip_special_tokens=True) qa = model.generate_qa(chunk_text) qa_pairs.extend(qa) return "\n\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in qa_pairs]) # Gradio UI demo = gr.Interface( fn=generate_qa_for_chunks, inputs=gr.Textbox(lines=20, label="Input Text (SAP Note or Paragraph)"), outputs=gr.Textbox(lines=30, label="Generated QA Pairs"), title="Question Generator (LMQG - T5)", description="Paste text to generate question-answer pairs using lmqg/t5-base-squad-qg" ) if __name__ == "__main__": demo.launch()