File size: 3,880 Bytes
5d14cc6
f78e807
7a534ac
4ba3023
7a534ac
ec1c0d9
 
c52e882
ec1c0d9
6993c74
 
ec1c0d9
d553fab
612c0bf
fae7389
ec1c0d9
 
 
03b4c19
7a534ac
a8e9d4c
0e1f166
9ed280b
0e1f166
 
6993c74
f2fb591
185bc0f
f2fb591
 
04f5bb7
2afa0ec
299c4e4
 
 
fae7389
 
7a534ac
2afa0ec
5d14cc6
d553fab
7a534ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8028338
7a534ac
 
b8dc120
83880b7
 
 
 
 
 
 
 
 
 
 
5d14cc6
 
72393c5
5d14cc6
ec1c0d9
86551a1
ec1c0d9
 
4f86a6f
7a534ac
a91875b
 
 
7a534ac
a91875b
 
7a534ac
0042245
 
 
612c0bf
7a534ac
a91875b
 
 
 
a5f868b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import asyncio
import re
from pydantic_ai.result import RunResult
import streamlit as st
from pydantic_ai import Agent, RunContext, Tool
from pydantic_ai.models.groq import GroqModel
import nest_asyncio
from pydantic_ai.messages import ModelMessage
import pdfplumber
from transformers import pipeline
import torch
import os
import presentation as customClass
from streamlit_pdf_viewer import pdf_viewer
from dataclasses import dataclass

api_key = os.getenv("API_KEY")
data = []
last_message = ''
result_data: list[customClass.PPT] = []

# to generate ppt
model = GroqModel("llama-3.2-90b-vision-preview", api_key=api_key)

# to summarize
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def split_into_token_chunks(text: str, max_tokens: int = 300) -> list:
    tokens = text.split()
    chunks = [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)]
    return chunks

def return_data() -> str:
    return "".join(data)

@dataclass
class SupportDependencies:
    db: str

async def ppt_content(data):
    agent = Agent(model,
                  result_type=customClass.PPT,
                  tools=[return_data],
                  system_prompt=(
                      "You are an expert in making power-point presentations",
                      "Create 5 slides",
                      "Title Slide: short intro about the presentation",
                      "Methodology Slide: Summarize the methodology in detail",
                      "Results Slide: Present key findings in detail in simple text and bullet points.",
                      "Discussion Slide: Summarize the implications and limitations.",
                      "Conclusion Slide: State the overall conclusion.",
                      "Each slide should be separate",
                      "Each slide should have 4 parts:",
                      "1. Title: title of the slide",
                      "2. Text: the precise and short description or narrative content of the slide. This should include key information, explanations, or supporting arguments. Keep it concise yet informative to avoid overwhelming the audience.",
                      "3. Bullet point: A list of bullet points summarizing key information on the slide. Each bullet point should be short, precise, and highlight a specific aspect of the slide's topic. Ideally, limit to 3-5 points.",
                  ))

    listOfString = split_into_token_chunks("".join(data))
    message_history: list[ModelMessage] = []

    if len(listOfString) > 0:
        result = agent.run_sync(user_prompt=f"Create me a powerpoint presentation from {listOfString[0]}",
                                message_history=message_history)
    if len(listOfString) > 1:
        result_1 = agent.run_sync(user_prompt=f"Create me a powerpoint presentation from {listOfString[1]}",
                                  message_history=result.all_messages())
    if len(listOfString) > 2:
        result_2 = agent.run_sync(user_prompt=f"Create me a powerpoint presentation from {listOfString[2]}",
                                  message_history=result_1.all_messages())

    print(result_2.data if len(listOfString) > 2 else result_1.data if len(listOfString) > 1 else result.data)

def ai_ppt(data):
    asyncio.run(ppt_content(data=data))

def extract_data(feed):
    with pdfplumber.open(feed) as pdf:
        pages = pdf.pages
        for p in pages:
            data.append(p.extract_text())
    return None

def main():
    uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")

    if uploaded_file is not None:
        extract_data(uploaded_file)

        if st.button("Make PPT"):
            ai_ppt(data)

        binary_data = uploaded_file.getvalue()
        pdf_viewer(input=binary_data, width=700)

if __name__ == '__main__':
    import asyncio
    nest_asyncio.apply()
    main()