File size: 5,638 Bytes
82d1315
 
 
 
 
 
 
3a612b3
82d1315
3a612b3
82d1315
44db9a1
3a612b3
 
82d1315
 
 
3a612b3
6c99a95
3a612b3
82d1315
 
3a612b3
 
 
 
 
 
 
 
 
f140005
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82d1315
 
 
 
 
 
 
 
3a612b3
 
82d1315
3a612b3
 
 
 
 
 
 
82d1315
3a612b3
 
 
 
 
 
 
 
 
 
 
 
 
82d1315
 
 
 
 
 
3a612b3
44db9a1
 
7d59a7c
44db9a1
 
 
7d59a7c
44db9a1
 
 
 
 
 
 
 
 
 
82d1315
44db9a1
 
 
 
 
 
 
 
 
 
 
 
 
 
82d1315
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import streamlit as st
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, Document
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini
from llama_index.core import DocumentSummaryIndex
from llama_index.embeddings.fastembed import FastEmbedEmbedding
import google.generativeai as genai
import os
import PyPDF2
#import streamlit_analytics2 as streamlit_analytics


# Set up Google API key

# Configure Google Gemini
Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")

# Load and index the input data
def load_data(document_text):
    document = [Document(text=document_text)]
    
    #index = VectorStoreIndex.from_documents([document])
    index = VectorStoreIndex.from_documents(document)
    return index

# Default report format template
DEFAULT_REPORT_FORMAT = """
    Title Page
        Includes the report title, author's name, and date.
    Abstract
        A concise summary of the report, covering the background, objectives, methodology, key findings, and conclusions.
    Table of Contents
        Lists sections and subsections with corresponding page numbers for easy navigation.
    Introduction
        Provides background information, defines the scope of the report, and states the objectives.
    Literature Review
        Reviews relevant literature and previous research related to the report topic.
    Methodology/Approach
        Details the methods used to gather data or conduct experiments, including design and analytical techniques.
    Results and Discussion
        Presents findings in a clear format, often using tables, figures, and charts, followed by a discussion interpreting these results.
    Conclusions
        Summarizes the main findings and their implications, often linking back to the report's objectives.
    Recommendations
        Suggests actions based on the findings, highlighting potential future work or improvements.
    References
        Lists all sources cited in the report, adhering to a specific referencing style.
    Appendices
        Contains supplementary material that supports the main text, such as raw data, detailed calculations, or additional figures.
"""

# Generate report
def generate_report(index, report_format, additional_info):
    query_engine = index.as_query_engine()
    
    if not report_format.strip():
        report_format = DEFAULT_REPORT_FORMAT
        st.info("Using default report format.")
    
    response = query_engine.query(f"""
    You are a professional report writer. Your task is to create a comprehensive report based on the entire document provided.
    
    First, thoroughly analyze and summarize the entire document. Then, use the input text to create a well-structured report following the format below:
    
    Report Format:
    {report_format}
    
    Additional Information:
    {additional_info}
    
    Even if the input is shallow, generate a report
    Guidelines:
    1. Ensure you comprehend and summarize the entire document before starting the report.
    2. The report should be comprehensive, covering all major points from the document.
    3. Adapt the provided format as necessary to best fit the content and context of the document.
    4. Incorporate any additional information provided into the relevant sections of the report.
    5. Use clear, professional language throughout the report.
    6. Provide specific examples or data from the document to support your analysis and conclusions.
    7. If the document contains technical information, explain it in a way that's accessible to a general audience.
    
    Generate a thorough, well-structured report that captures the essence of the entire document.
    """)
    return response.response

# Streamlit app
def main():
    st.title("AI Report Writer")
    st.write("Upload your document and our AI will generate a comprehensive report based on its contents!")


# File uploader
    uploaded_file = st.file_uploader("Choose a file (PDF or TXT)", type=["txt", "pdf"])
    
    # Report format input
    report_format = st.text_area("Enter the desired report format (optional)", height=150, 
                                 help="Leave blank to use a default template")
    
    # Additional information input
    additional_info = st.text_area("Enter any additional information or context for the report", height=100)

    if uploaded_file is not None:
        # Read file contents
        if uploaded_file.type == "application/pdf":
            pdf_reader = PyPDF2.PdfReader(uploaded_file)
            document_text = ""
            for page in pdf_reader.pages:
                document_text += page.extract_text()
        else:
            document_text = uploaded_file.getvalue().decode("utf-8")

        if st.button("Generate Report"):
            st.write("Analyzing document and generating report...")

            # Load data and generate report
            doc_list = document_text.split(".")
            index = load_data(document_text)
            report = generate_report(index, report_format, additional_info)

            st.write("## Generated Report")
            st.write(report)
    else:
        st.warning("Please upload a file to generate a report.")

if __name__ == "__main__":
    main()