File size: 3,239 Bytes
59721ad
75c33bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c34b507
 
 
 
 
 
 
 
 
 
 
75c33bc
 
 
 
 
 
 
 
 
 
6b8a55c
59721ad
75c33bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59721ad
 
39239b8
75c33bc
 
 
 
 
 
59721ad
 
75c33bc
 
 
 
 
 
 
 
 
 
39239b8
75c33bc
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import fitz
import requests
from openai import OpenAI
from fpdf import FPDF
import cloudinary
import cloudinary.uploader


def download_pdf(url, save_path):
    """Download a PDF from a given URL and save it locally."""
    response = requests.get(url)
    with open(save_path, "wb") as f:
        f.write(response.content)


def extract_text_from_pdf(pdf_path):
    """Extract text from a PDF file."""
    doc = fitz.open(pdf_path)
    text = "".join(page.get_text() for page in doc)
    return text


def generate_structured_summary(text):
    client = OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key="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",
    )
    prompt = (
        "Generate a professional structured report from the following text. "
        "The output should follow this format:\n\n"
        "**Report Title**\n"
        "**1. Introduction**\n"
        "**2. Key Findings** (Use bullet points)\n"
        "**3. Analysis** (Include structured paragraphs)\n"
        "**4. Recommendations** (Bullet points of suggestions)\n"
        "**5. Appendix (if applicable)**\n\n"
        "Do NOT include additional instructions on formatting.\n"
        "Dont add conclusions or any ending text!"
        f"\n\n{text}"
    )

    completion = client.chat.completions.create(

        extra_body={},
        model="deepseek/deepseek-r1:free",
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    if completion.choices[0].message.content:
        return completion.choices[0].message.content
    return completion.choices[0].message.content


def save_structured_pdf(structured_text, output_pdf):
    """Save the structured text into a well-formatted PDF file using FPDF."""
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    for line in structured_text.split("\n"):
        print(line)
        pdf.multi_cell(0, 10, line)

    pdf.output(output_pdf)


def upload_to_cloudinary(file_path):

    cloudinary.config(
        cloud_name="dfdu3nobj",
        api_key="521777423999182",
        api_secret="cYnNmTOJahnLxTz80wrzzXuvZ88"
    )

    upload_result = cloudinary.uploader.upload(
        "output.pdf", resource_type="image",
        type="upload",
        access_mode="public")
    print(upload_result)
    return upload_result.get("secure_url")


def main(pdf1_url, pdf2_url, output_pdf):
    """Download, extract, summarize, and save summaries in a structured format to a PDF, then upload it."""
    pdf1_path = "Final_Insurance_Report.pdf"
    pdf2_path = "Final_Service_Centre_Report.pdf"

    download_pdf(pdf1_url, pdf1_path)
    download_pdf(pdf2_url, pdf2_path)

    text1 = extract_text_from_pdf(pdf1_path)
    text2 = extract_text_from_pdf(pdf2_path)

    structured_summary1 = generate_structured_summary(text1)
    structured_summary2 = generate_structured_summary(text2)

    full_summary = f"{structured_summary1}n{structured_summary2}"
    save_structured_pdf(full_summary, output_pdf)
    print(f"Structured summaries saved to {output_pdf}")

    pdf_url = upload_to_cloudinary(output_pdf)
    print(f"PDF uploaded to: {pdf_url}")
    return pdf_url