File size: 1,250 Bytes
8ea8c0d
3b218c4
 
0e96d93
6b51d0b
 
aef6bf4
6b51d0b
8ea8c0d
3b218c4
 
 
 
 
 
d0cfe06
3b218c4
 
 
 
 
 
 
 
 
8ea8c0d
3b218c4
8ea8c0d
3b218c4
 
 
 
 
8ea8c0d
 
233e142
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import gradio as gr
from pprint import pprint
from lmqg import TransformersQG


# Avoid importing evaluation utilities
model = TransformersQG(model='lmqg/t5-base-squad-qg', model_ae='lmqg/t5-base-squad-ae')


# Function to chunk text into smaller parts
def chunk_text(text, chunk_size=450):
    tokenizer = model.tokenizer
    tokens = tokenizer.encode(text)
    chunks = [tokens[i:i + chunk_size] for i in range(0, len(tokens), chunk_size)]
    return chunks

# Function to process each chunk and generate QA pairs
def generate_qa_for_chunks(text):
    chunks = chunk_text(text)
    qa_pairs = []
    for chunk in chunks:
        chunk_text = model.tokenizer.decode(chunk, skip_special_tokens=True)
        qa = model.generate_qa(chunk_text)
        qa_pairs.extend(qa)
    return "\n\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in qa_pairs])

# Gradio UI
demo = gr.Interface(
    fn=generate_qa_for_chunks,
    inputs=gr.Textbox(lines=20, label="Input Text (SAP Note or Paragraph)"),
    outputs=gr.Textbox(lines=30, label="Generated QA Pairs"),
    title="Question Generator (LMQG - T5)",
    description="Paste text to generate question-answer pairs using lmqg/t5-base-squad-qg"
)

if __name__ == "__main__":
    demo.launch()