Prat0 commited on
Commit
3a612b3
·
verified ·
1 Parent(s): f140005

Update pages/Report_Writer.py

Browse files
Files changed (1) hide show
  1. pages/Report_Writer.py +42 -64
pages/Report_Writer.py CHANGED
@@ -5,76 +5,55 @@ from llama_index.core import VectorStoreIndex, Document
5
  from llama_index.embeddings.gemini import GeminiEmbedding
6
  from llama_index.llms.gemini import Gemini
7
  from llama_index.core import DocumentSummaryIndex
 
8
  import google.generativeai as genai
 
9
  import PyPDF2
10
  import streamlit_analytics2 as streamlit_analytics
11
- from llama_index.embeddings.fastembed import FastEmbedEmbedding
12
- from llama_index.core.node_parser import TokenTextSplitter
13
  # Set up Google API key
14
 
15
  # Configure Google Gemini
 
16
  Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
17
- Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
18
  llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
19
 
 
 
 
 
 
 
 
 
 
20
  DEFAULT_REPORT_FORMAT = """
21
  Title Page
22
-
23
  Includes the report title, author's name, and date.
24
-
25
  Abstract
26
-
27
  A concise summary of the report, covering the background, objectives, methodology, key findings, and conclusions.
28
-
29
  Table of Contents
30
-
31
  Lists sections and subsections with corresponding page numbers for easy navigation.
32
-
33
  Introduction
34
-
35
  Provides background information, defines the scope of the report, and states the objectives.
36
-
37
  Literature Review
38
-
39
  Reviews relevant literature and previous research related to the report topic.
40
-
41
  Methodology/Approach
42
-
43
  Details the methods used to gather data or conduct experiments, including design and analytical techniques.
44
-
45
  Results and Discussion
46
-
47
  Presents findings in a clear format, often using tables, figures, and charts, followed by a discussion interpreting these results.
48
-
49
  Conclusions
50
-
51
  Summarizes the main findings and their implications, often linking back to the report's objectives.
52
-
53
  Recommendations
54
-
55
  Suggests actions based on the findings, highlighting potential future work or improvements.
56
-
57
  References
58
-
59
  Lists all sources cited in the report, adhering to a specific referencing style.
60
-
61
  Appendices
62
-
63
  Contains supplementary material that supports the main text, such as raw data, detailed calculations, or additional figures.
64
-
65
  """
66
 
67
- # Load and index the input data
68
- def load_data(document_text):
69
- # Use a text splitter to break the document into smaller chunks
70
- text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=200)
71
- texts = text_splitter.split_text(document_text)
72
-
73
- documents = [Document(text=t) for t in texts]
74
-
75
- index = DocumentSummaryIndex.from_documents(documents)
76
- return index
77
-
78
  # Generate report
79
  def generate_report(index, report_format, additional_info):
80
  query_engine = index.as_query_engine()
@@ -83,30 +62,30 @@ def generate_report(index, report_format, additional_info):
83
  report_format = DEFAULT_REPORT_FORMAT
84
  st.info("Using default report format.")
85
 
86
- # Break down the report generation into smaller queries
87
- sections = [
88
- "Title and Abstract",
89
- "Introduction and Literature Review",
90
- "Methodology and Results",
91
- "Discussion and Conclusion",
92
- "Recommendations and References"
93
- ]
94
 
95
- full_report = ""
96
- for section in sections:
97
- response = query_engine.query(f"""
98
- Generate the {section} section of the report based on the provided document.
99
- Use the following format guidelines:
100
- {report_format}
101
-
102
- Additional Information:
103
- {additional_info}
104
-
105
- Focus on creating a comprehensive and well-structured section.
106
- """)
107
- full_report += response.response + "\n\n"
108
 
109
- return full_report
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  # Streamlit app
112
  def main():
@@ -114,7 +93,8 @@ def main():
114
  st.write("Upload your document and our AI will generate a comprehensive report based on its contents!")
115
 
116
  with streamlit_analytics.track():
117
- # File uploader
 
118
  uploaded_file = st.file_uploader("Choose a file (PDF or TXT)", type=["txt", "pdf"])
119
 
120
  # Report format input
@@ -138,11 +118,9 @@ def main():
138
  st.write("Analyzing document and generating report...")
139
 
140
  # Load data and generate report
141
- with st.spinner("Indexing document..."):
142
- index = load_data(document_text)
143
-
144
- with st.spinner("Generating report..."):
145
- report = generate_report(index, report_format, additional_info)
146
 
147
  st.write("## Generated Report")
148
  st.write(report)
 
5
  from llama_index.embeddings.gemini import GeminiEmbedding
6
  from llama_index.llms.gemini import Gemini
7
  from llama_index.core import DocumentSummaryIndex
8
+ from llama_index.embeddings.fastembed import FastEmbedEmbedding
9
  import google.generativeai as genai
10
+ import os
11
  import PyPDF2
12
  import streamlit_analytics2 as streamlit_analytics
13
+
14
+
15
  # Set up Google API key
16
 
17
  # Configure Google Gemini
18
+ Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
19
  Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
20
+ Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
21
  llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
22
 
23
+ # Load and index the input data
24
+ def load_data(document_text):
25
+ document = [Document(text=document_text)]
26
+
27
+ #index = VectorStoreIndex.from_documents([document])
28
+ index = VectorStoreIndex.from_documents(document)
29
+ return index
30
+
31
+ # Default report format template
32
  DEFAULT_REPORT_FORMAT = """
33
  Title Page
 
34
  Includes the report title, author's name, and date.
 
35
  Abstract
 
36
  A concise summary of the report, covering the background, objectives, methodology, key findings, and conclusions.
 
37
  Table of Contents
 
38
  Lists sections and subsections with corresponding page numbers for easy navigation.
 
39
  Introduction
 
40
  Provides background information, defines the scope of the report, and states the objectives.
 
41
  Literature Review
 
42
  Reviews relevant literature and previous research related to the report topic.
 
43
  Methodology/Approach
 
44
  Details the methods used to gather data or conduct experiments, including design and analytical techniques.
 
45
  Results and Discussion
 
46
  Presents findings in a clear format, often using tables, figures, and charts, followed by a discussion interpreting these results.
 
47
  Conclusions
 
48
  Summarizes the main findings and their implications, often linking back to the report's objectives.
 
49
  Recommendations
 
50
  Suggests actions based on the findings, highlighting potential future work or improvements.
 
51
  References
 
52
  Lists all sources cited in the report, adhering to a specific referencing style.
 
53
  Appendices
 
54
  Contains supplementary material that supports the main text, such as raw data, detailed calculations, or additional figures.
 
55
  """
56
 
 
 
 
 
 
 
 
 
 
 
 
57
  # Generate report
58
  def generate_report(index, report_format, additional_info):
59
  query_engine = index.as_query_engine()
 
62
  report_format = DEFAULT_REPORT_FORMAT
63
  st.info("Using default report format.")
64
 
65
+ response = query_engine.query(f"""
66
+ You are a professional report writer. Your task is to create a comprehensive report based on the entire document provided.
 
 
 
 
 
 
67
 
68
+ First, thoroughly analyze and summarize the entire document. Then, use the input text to create a well-structured report following the format below:
69
+
70
+ Report Format:
71
+ {report_format}
72
+
73
+ Additional Information:
74
+ {additional_info}
 
 
 
 
 
 
75
 
76
+ Even if the input is shallow, generate a report
77
+ Guidelines:
78
+ 1. Ensure you comprehend and summarize the entire document before starting the report.
79
+ 2. The report should be comprehensive, covering all major points from the document.
80
+ 3. Adapt the provided format as necessary to best fit the content and context of the document.
81
+ 4. Incorporate any additional information provided into the relevant sections of the report.
82
+ 5. Use clear, professional language throughout the report.
83
+ 6. Provide specific examples or data from the document to support your analysis and conclusions.
84
+ 7. If the document contains technical information, explain it in a way that's accessible to a general audience.
85
+
86
+ Generate a thorough, well-structured report that captures the essence of the entire document.
87
+ """)
88
+ return response.response
89
 
90
  # Streamlit app
91
  def main():
 
93
  st.write("Upload your document and our AI will generate a comprehensive report based on its contents!")
94
 
95
  with streamlit_analytics.track():
96
+
97
+ # File uploader
98
  uploaded_file = st.file_uploader("Choose a file (PDF or TXT)", type=["txt", "pdf"])
99
 
100
  # Report format input
 
118
  st.write("Analyzing document and generating report...")
119
 
120
  # Load data and generate report
121
+ doc_list = document_text.split(".")
122
+ index = load_data(document_text)
123
+ report = generate_report(index, report_format, additional_info)
 
 
124
 
125
  st.write("## Generated Report")
126
  st.write(report)