import fitz import gradio as gr import requests import io import re import os from PIL import Image from transformers import pipeline summarizer = pipeline("summarization", model="facebook/bart-large-cnn") qa_model = pipeline("question-answering", model="deepset/bert-large-uncased-whole-word-masking-squad2") os.environ["HUGGINGFACE_HUB_TOKEN"] = "ctp-hw" my_key = os.environ["HUGGINGFACE_HUB_TOKEN"] def extract_text_from_pdf(pdf_file): with fitz.open(pdf_file) as pdf: text = "" for page in pdf: text += page.get_text("text") text = re.sub(r'\s+', ' ', text).strip() return text def summarize(text): if len(text) > 1000: chunks = [text[i:i+1000] for i in range(0, len(text), 1000)] summary = "" for chunk in chunks: summary += summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] + " " else: summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] return summary def answer_question(text, question): response = qa_model(question=question, context=text) answer = response['answer'] return answer def query(payload): API_URL = "https://api-inference.huggingface.co/models/Shakker-Labs/FLUX.1-dev-LoRA-AntiBlur" headers = {"Authorization": f"Bearer {my_key}"} response = requests.post(API_URL, headers=headers, json=payload) return response.content def summarize_and_qa(pdf_file, question): text = extract_text_from_pdf(pdf_file) summary = summarize(text) answer = answer_question(text, question) image_bytes = query({"inputs": summary}) if image_bytes: try: image = Image.open(io.BytesIO(image_bytes)) except Exception as e: return summary, answer, None else: image = None return summary, answer, image gr.Interface( fn=summarize_and_qa, inputs=["file", "text"], outputs=["textbox", "textbox", "image"], title="Understand your PDF Better", description="Upload a PDF to get a summary. You can ask any question regardging the content of the PDF. It will also generate a picture related to your PDF." ).launch(debug=True, share=True)