Spaces:
Sleeping
Sleeping
File size: 3,239 Bytes
59721ad 75c33bc c34b507 75c33bc 6b8a55c 59721ad 75c33bc 59721ad 39239b8 75c33bc 59721ad 75c33bc 39239b8 75c33bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import fitz
import requests
from openai import OpenAI
from fpdf import FPDF
import cloudinary
import cloudinary.uploader
def download_pdf(url, save_path):
"""Download a PDF from a given URL and save it locally."""
response = requests.get(url)
with open(save_path, "wb") as f:
f.write(response.content)
def extract_text_from_pdf(pdf_path):
"""Extract text from a PDF file."""
doc = fitz.open(pdf_path)
text = "".join(page.get_text() for page in doc)
return text
def generate_structured_summary(text):
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key="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",
)
prompt = (
"Generate a professional structured report from the following text. "
"The output should follow this format:\n\n"
"**Report Title**\n"
"**1. Introduction**\n"
"**2. Key Findings** (Use bullet points)\n"
"**3. Analysis** (Include structured paragraphs)\n"
"**4. Recommendations** (Bullet points of suggestions)\n"
"**5. Appendix (if applicable)**\n\n"
"Do NOT include additional instructions on formatting.\n"
"Dont add conclusions or any ending text!"
f"\n\n{text}"
)
completion = client.chat.completions.create(
extra_body={},
model="deepseek/deepseek-r1:free",
messages=[
{"role": "user", "content": prompt}
]
)
if completion.choices[0].message.content:
return completion.choices[0].message.content
return completion.choices[0].message.content
def save_structured_pdf(structured_text, output_pdf):
"""Save the structured text into a well-formatted PDF file using FPDF."""
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.set_font("Arial", size=12)
for line in structured_text.split("\n"):
print(line)
pdf.multi_cell(0, 10, line)
pdf.output(output_pdf)
def upload_to_cloudinary(file_path):
cloudinary.config(
cloud_name="dfdu3nobj",
api_key="521777423999182",
api_secret="cYnNmTOJahnLxTz80wrzzXuvZ88"
)
upload_result = cloudinary.uploader.upload(
"output.pdf", resource_type="image",
type="upload",
access_mode="public")
print(upload_result)
return upload_result.get("secure_url")
def main(pdf1_url, pdf2_url, output_pdf):
"""Download, extract, summarize, and save summaries in a structured format to a PDF, then upload it."""
pdf1_path = "Final_Insurance_Report.pdf"
pdf2_path = "Final_Service_Centre_Report.pdf"
download_pdf(pdf1_url, pdf1_path)
download_pdf(pdf2_url, pdf2_path)
text1 = extract_text_from_pdf(pdf1_path)
text2 = extract_text_from_pdf(pdf2_path)
structured_summary1 = generate_structured_summary(text1)
structured_summary2 = generate_structured_summary(text2)
full_summary = f"{structured_summary1}n{structured_summary2}"
save_structured_pdf(full_summary, output_pdf)
print(f"Structured summaries saved to {output_pdf}")
pdf_url = upload_to_cloudinary(output_pdf)
print(f"PDF uploaded to: {pdf_url}")
return pdf_url
|