import gradio as gr import requests import re import fitz def extract_text_from_pdf(pdf_file_path): doc = fitz.open(pdf_file_path) text = "" for page in doc: text+=page.get_text() return text API_URL = "https://api-inference.huggingface.co/models/potsawee/t5-large-generation-squad-QuestionAnswer" headers = {"Authorization": "Bearer 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"} def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() def generate_question_answer_pairs(input_file): if input_file is None: return "Please upload a file" pdf_text = extract_text_from_pdf(input_file) sentences = re.split(r'(?<=[.!?])', pdf_text) outputs = [] result = '' for sentence in sentences: if sentence.strip(): output = query({ "inputs": sentence, }) outputs.append(output) for i in outputs: if type(i) == dict: continue pair = i[0]['generated_text'] question = re.search(r'^(.*?\?)', pair) answer = re.search(r'\?(.*)', pair) if question and answer: question_part = question.group(1).strip() answer_part = answer.group(1).strip() result += f"Question: {question_part}\nAnswer: {answer_part}\n\n" return result title = "Question-Answer Pairs Generation" input_file = gr.File(label="Upload a PDF file") output_text = gr.Textbox() interface = gr.Interface( fn=generate_question_answer_pairs, inputs=input_file, outputs=output_text, title=title, ) interface.launch()