Spaces:
Sleeping
Sleeping
import fitz | |
import requests | |
from openai import OpenAI | |
from fpdf import FPDF | |
import cloudinary | |
import cloudinary.uploader | |
def download_pdf(url, save_path): | |
"""Download a PDF from a given URL and save it locally.""" | |
response = requests.get(url) | |
with open(save_path, "wb") as f: | |
f.write(response.content) | |
def extract_text_from_pdf(pdf_path): | |
"""Extract text from a PDF file.""" | |
doc = fitz.open(pdf_path) | |
text = "".join(page.get_text() for page in doc) | |
return text | |
def generate_structured_summary(text): | |
client = OpenAI( | |
base_url="https://openrouter.ai/api/v1", | |
api_key="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", | |
) | |
prompt = ( | |
"Generate a professional structured report from the following text. " | |
"The output should follow this format:\n\n" | |
"**Report Title**\n" | |
"**1. Introduction**\n" | |
"**2. Key Findings** (Use bullet points)\n" | |
"**3. Analysis** (Include structured paragraphs)\n" | |
"**4. Recommendations** (Bullet points of suggestions)\n" | |
"**5. Appendix (if applicable)**\n\n" | |
"Do NOT include additional instructions on formatting.\n" | |
"Dont add conclusions or any ending text!" | |
f"\n\n{text}" | |
) | |
completion = client.chat.completions.create( | |
extra_body={}, | |
model="deepseek/deepseek-r1:free", | |
messages=[ | |
{"role": "user", "content": prompt} | |
] | |
) | |
if completion.choices[0].message.content: | |
return completion.choices[0].message.content | |
return completion.choices[0].message.content | |
def save_structured_pdf(structured_text, output_pdf): | |
"""Save the structured text into a well-formatted PDF file using FPDF.""" | |
pdf = FPDF() | |
pdf.set_auto_page_break(auto=True, margin=15) | |
pdf.add_page() | |
pdf.set_font("Arial", size=12) | |
for line in structured_text.split("\n"): | |
print(line) | |
pdf.multi_cell(0, 10, line) | |
pdf.output(output_pdf) | |
def upload_to_cloudinary(file_path): | |
cloudinary.config( | |
cloud_name="dfdu3nobj", | |
api_key="521777423999182", | |
api_secret="cYnNmTOJahnLxTz80wrzzXuvZ88" | |
) | |
upload_result = cloudinary.uploader.upload( | |
"output.pdf", resource_type="image", | |
type="upload", | |
access_mode="public") | |
print(upload_result) | |
return upload_result.get("secure_url") | |
def main(pdf1_url, pdf2_url, output_pdf): | |
"""Download, extract, summarize, and save summaries in a structured format to a PDF, then upload it.""" | |
pdf1_path = "Final_Insurance_Report.pdf" | |
pdf2_path = "Final_Service_Centre_Report.pdf" | |
download_pdf(pdf1_url, pdf1_path) | |
download_pdf(pdf2_url, pdf2_path) | |
text1 = extract_text_from_pdf(pdf1_path) | |
text2 = extract_text_from_pdf(pdf2_path) | |
structured_summary1 = generate_structured_summary(text1) | |
structured_summary2 = generate_structured_summary(text2) | |
full_summary = f"{structured_summary1}n{structured_summary2}" | |
save_structured_pdf(full_summary, output_pdf) | |
print(f"Structured summaries saved to {output_pdf}") | |
pdf_url = upload_to_cloudinary(output_pdf) | |
print(f"PDF uploaded to: {pdf_url}") | |
return pdf_url | |