Mjlehtim commited on
Commit
a93a5d7
·
verified ·
1 Parent(s): 1468f65

Uppdated with credentials

Browse files
Files changed (1) hide show
  1. app.py +261 -378
app.py CHANGED
@@ -32,6 +32,7 @@ from langchain.memory import ConversationBufferMemory
32
  from langchain.prompts import PromptTemplate
33
  import joblib
34
  import nltk
 
35
 
36
  import nest_asyncio # noqa: E402
37
  nest_asyncio.apply()
@@ -49,8 +50,6 @@ groq_api_key=os.getenv('GROQ_API_KEY')
49
 
50
  st.set_page_config(layout="wide")
51
 
52
- nltk.download('averaged_perceptron_tagger')
53
-
54
  css = """
55
  <style>
56
  [data-testid="stAppViewContainer"] {
@@ -106,10 +105,55 @@ css = """
106
  """
107
 
108
  st.write(css, unsafe_allow_html=True)
109
- st.sidebar.image('lt.png', width=250)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  #-------------
111
  llm=ChatGroq(groq_api_key=groq_api_key,
112
- model_name="Llama-3.1-70b-Versatile", temperature = 0.0, streaming=True)
113
  #--------------
114
  doc_retriever_ESG = None
115
  doc_retriever_financials = None
@@ -120,7 +164,10 @@ def load_or_parse_data_ESG():
120
  data_file = "./data/parsed_data_ESG.pkl"
121
 
122
  parsingInstructionUber10k = """The provided document contain detailed information about the company's environmental, social and governance matters.
123
- It contains several tables, figures and statistical information. You must be precise while answering the questions and never provide false numeric or statistical data."""
 
 
 
124
 
125
  parser = LlamaParse(api_key=LLAMA_PARSE_API_KEY,
126
  result_type="markdown",
@@ -167,31 +214,6 @@ def load_or_parse_data_financials():
167
 
168
  return parsed_data_financials
169
 
170
- #@st.cache_data
171
- def load_or_parse_data_portfolio():
172
- data_file = "./data/parsed_data_portfolio.pkl"
173
-
174
- parsingInstructionUber10k = """The provided document is the ESG and sustainability report of LocalTapiola (Lähitapiola) group including the funds it manages.
175
- It contains several tabless, figures and statistical information. You must be precise while answering the questions and never provide false numeric or statistical data."""
176
-
177
- parser = LlamaParse(api_key=LLAMA_PARSE_API_KEY,
178
- result_type="markdown",
179
- parsing_instruction=parsingInstructionUber10k,
180
- max_timeout=5000,
181
- gpt4o_mode=True,
182
- )
183
-
184
- file_extractor = {".pdf": parser}
185
- reader = SimpleDirectoryReader("./ESG_Documents_Portfolio", file_extractor=file_extractor)
186
- documents = reader.load_data()
187
-
188
- print("Saving the parse results in .pkl format ..........")
189
- joblib.dump(documents, data_file)
190
-
191
- # Set the parsed data to the variable
192
- parsed_data_portfolio = documents
193
-
194
- return parsed_data_portfolio
195
  #--------------
196
  # Create vector database
197
 
@@ -206,42 +228,32 @@ def create_vector_database_ESG():
206
 
207
  markdown_path = "data/output_ESG.md"
208
  loader = UnstructuredMarkdownLoader(markdown_path)
209
-
210
- #loader = DirectoryLoader('data/', glob="**/*.md", show_progress=True)
211
  documents = loader.load()
212
  # Split loaded documents into chunks
213
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15)
214
  docs = text_splitter.split_documents(documents)
215
 
216
  #len(docs)
217
  print(f"length of documents loaded: {len(documents)}")
218
  print(f"total number of document chunks generated :{len(docs)}")
219
  embed_model = HuggingFaceEmbeddings()
220
- #embed_model = OpenAIEmbeddings()
221
- # Create and persist a Chroma vector database from the chunked documents
222
- # Set up the Chroma client in local mode
223
- print('Vector DB not yet created !')
224
- persist_directory = os.path.join(os.getcwd(), "chroma_db_LT")
225
- if not os.path.exists(persist_directory):
226
- os.makedirs(persist_directory)
227
 
228
  vs = Chroma.from_documents(
229
  documents=docs,
230
  embedding=embed_model,
231
- persist_directory=persist_directory, # Local mode with in-memory storage only
232
  collection_name="rag",
233
  )
234
-
235
  doc_retriever_ESG = vs.as_retriever()
236
-
237
- print('Vector DB created successfully !')
238
- return doc_retriever_ESG
 
 
239
 
240
  @st.cache_resource
241
  def create_vector_database_financials():
242
  # Call the function to either load or parse the data
243
  llama_parse_documents = load_or_parse_data_financials()
244
- print(llama_parse_documents[0].text[:300])
245
 
246
  with open('data/output_financials.md', 'a') as f: # Open the file in append mode ('a')
247
  for doc in llama_parse_documents:
@@ -249,69 +261,25 @@ def create_vector_database_financials():
249
 
250
  markdown_path = "data/output_financials.md"
251
  loader = UnstructuredMarkdownLoader(markdown_path)
252
-
253
- #loader = DirectoryLoader('data/', glob="**/*.md", show_progress=True)
254
  documents = loader.load()
255
- # Split loaded documents into chunks
256
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15)
257
  docs = text_splitter.split_documents(documents)
258
 
259
- #len(docs)
260
- print(f"length of documents loaded: {len(documents)}")
261
- print(f"total number of document chunks generated :{len(docs)}")
262
  embed_model = HuggingFaceEmbeddings()
263
- #embed_model = OpenAIEmbeddings()
264
- # Create and persist a Chroma vector database from the chunked documents
265
- persist_directory = os.path.join(os.getcwd(), "chroma_db_fin")
266
- if not os.path.exists(persist_directory):
267
- os.makedirs(persist_directory)
268
 
269
  vs = Chroma.from_documents(
270
  documents=docs,
271
  embedding=embed_model,
272
- persist_directory=persist_directory, # Local mode with in-memory storage only
273
  collection_name="rag"
274
  )
275
  doc_retriever_financials = vs.as_retriever()
276
 
277
- print('Vector DB created successfully !')
278
- return doc_retriever_financials
279
-
280
- @st.cache_resource
281
- def create_vector_database_portfolio():
282
- # Call the function to either load or parse the data
283
- llama_parse_documents = load_or_parse_data_portfolio()
284
- print(llama_parse_documents[0].text[:300])
285
-
286
- with open('data/output_portfolio.md', 'a') as f: # Open the file in append mode ('a')
287
- for doc in llama_parse_documents:
288
- f.write(doc.text + '\n')
289
-
290
- markdown_path = "data/output_portfolio.md"
291
- loader = UnstructuredMarkdownLoader(markdown_path)
292
-
293
- documents = loader.load()
294
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15)
295
- docs = text_splitter.split_documents(documents)
296
-
297
- print(f"length of documents loaded: {len(documents)}")
298
- print(f"total number of document chunks generated :{len(docs)}")
299
- embed_model = HuggingFaceEmbeddings()
300
-
301
- persist_directory = os.path.join(os.getcwd(), "chroma_db_portfolio")
302
- if not os.path.exists(persist_directory):
303
- os.makedirs(persist_directory)
304
-
305
- vs = Chroma.from_documents(
306
- documents=docs,
307
- embedding=embed_model,
308
- persist_directory=persist_directory, # Local mode with in-memory storage only
309
- collection_name="rag"
310
- )
311
- doc_retriever_portfolio = vs.as_retriever()
312
 
313
  print('Vector DB created successfully !')
314
- return doc_retriever_portfolio
 
315
  #--------------
316
  ESG_analysis_button_key = "ESG_strategy_button"
317
  portfolio_analysis_button_key = "portfolio_strategy_button"
@@ -333,7 +301,6 @@ def delete_files_and_folders(folder_path):
333
 
334
  uploaded_files_ESG = st.sidebar.file_uploader("Choose a Sustainability Report", accept_multiple_files=True, key="ESG_files")
335
  for uploaded_file in uploaded_files_ESG:
336
- #bytes_data = uploaded_file.read()
337
  st.write("filename:", uploaded_file.name)
338
  def save_uploadedfile(uploadedfile):
339
  with open(os.path.join("ESG_Documents",uploadedfile.name),"wb") as f:
@@ -343,7 +310,6 @@ for uploaded_file in uploaded_files_ESG:
343
 
344
  uploaded_files_financials = st.sidebar.file_uploader("Choose an Annual Report", accept_multiple_files=True, key="financial_files")
345
  for uploaded_file in uploaded_files_financials:
346
- #bytes_data = uploaded_file.read()
347
  st.write("filename:", uploaded_file.name)
348
  def save_uploadedfile(uploadedfile):
349
  with open(os.path.join("Financial_Documents",uploadedfile.name),"wb") as f:
@@ -353,7 +319,7 @@ for uploaded_file in uploaded_files_financials:
353
 
354
  #---------------
355
  def ESG_strategy():
356
- doc_retriever_ESG = create_vector_database_ESG()
357
  prompt_template = """<|system|>
358
  You are a seasoned specialist in environmental, social and governance matters. You write expert analyses for institutional investors. Always use figures, nemerical and statistical data when possible. Output must have sub-headings in bold font and be fluent.<|end|>
359
  <|user|>
@@ -372,18 +338,19 @@ def ESG_strategy():
372
  | StrOutputParser()
373
  )
374
 
375
- ESG_answer_1 = qa.invoke("Give a summary what ESG measures the company has taken and compare these to the best practices. Has the company issues green bonds or green loans.")
376
- ESG_answer_2 = qa.invoke("Do the company's main business fall under the European Union's taxonomy regulation? Is the company taxonomy compliant under European Union Taxonomy Regulation? Does the company follow the Paris Treaty's obligation to limit globabl warming to 1.5 celcius degrees? What are the measures to achieve this goal")
377
- ESG_answer_3 = qa.invoke("Explain what items of ESG information the company publishes. Describe what ESG transparency commitments the company has given?")
378
- ESG_answer_4 = qa.invoke("Does the company have carbon emissions reduction plan? Set out in a table the company's carbon footprint by location and its development over time. Set out carbon dioxide emissions in relation to turnover and whether the company has reached its carbod dioxide reduction objectives")
379
- ESG_answer_5 = qa.invoke("Describe and give a time series table of the company's carbon dioxide emissions (Scope 1), carbon dioxide emissions from purchased energy (Scope 2) and other indirect carbon dioxide emissions (Scope 3). Set out the company's objectives and material developments relating to these figures")
380
- ESG_answer_6 = qa.invoke("Set out in a table the company's energy and renewable energy usage for each activity coverning the last two or three years. Explain the energy efficiency measures taken by the company. Does the company have a plan to make its use of energy greemer?.")
381
  ESG_answer_7 = qa.invoke("Does the company follow UN Guiding Principles on Business and Human Rights, ILO Declaration on Fundamental Principles and Rights at Work or OECD Guidelines for Multinational Enterprises that involve affected communities? Set out the measures taken to have the gender balance on the upper management of the company.")
382
  ESG_answer_8 = qa.invoke("List the environmental permits and certifications held by the company. Set out and explain any environmental procedures and investigations and decisions taken against the company. Answer whether the company's locations or operations are connected to areas sensitive in relation to biodiversity.")
383
  ESG_answer_9 = qa.invoke("Set out waste produces by the company and possible waste into the soil by real estate. Describe if the company's real estates have hazardous waste.")
384
- ESG_answer_10 = qa.invoke("What policies has the company implemented to counter money laundering and corruption? What percentage of women are represented in the board, executive directors and upper management?")
 
385
 
386
- ESG_output = f"**__Summary of ESG reporting and obligations:__** {ESG_answer_1} \n\n **__Compliance with taxonomy:__** \n\n {ESG_answer_2} \n\n **__Disclosure transparency:__** \n\n {ESG_answer_3} \n\n **__Carbon footprint:__** \n\n {ESG_answer_4} \n\n **__Carbon dioxide emissions:__** \n\n {ESG_answer_5} \n\n **__Renewable energy:__** \n\n {ESG_answer_6} \n\n **__Human rights compliance:__** \n\n {ESG_answer_7} \n\n **__Management and gender balance:__** \n\n {ESG_answer_8} \n\n **__Waste and other emissions:__** {ESG_answer_9} \n\n **__Money laundering and corruption:__** {ESG_answer_10}"
387
  financial_output = ESG_output
388
 
389
  with open("ESG_analysis.txt", 'w') as file:
@@ -391,68 +358,6 @@ def ESG_strategy():
391
 
392
  return financial_output
393
 
394
- def portfolio_strategy():
395
- persist_directory_ESG = "chroma_db_LT"
396
- embeddings = HuggingFaceEmbeddings()
397
- doc_retriever_ESG = Chroma(persist_directory=persist_directory_ESG, embedding_function=embeddings).as_retriever()
398
-
399
- doc_retriever_portfolio = create_vector_database_portfolio()
400
- prompt_portfolio = PromptTemplate.from_template(
401
- template="""<|system|>
402
- You are a seasoned finance specialist and a specialist in environmental, social and governance matters. You write expert portofolion analyses fund management. Always use figures, numerical and statistical data when possible. Output must have sub-headings in bold font and be fluent.<|end|>
403
- <|user|> Based on the {context}, write a summary of LähiTapiola's investment policy. Set out also the most important ESG and sustainability aspects of the policy.<|end|>"\
404
- <|assistant|>""")
405
-
406
- prompt_strategy = PromptTemplate.from_template(
407
- template="""<|system|>
408
- You are a seasoned specialist in environmental, social and governance matters. You analyse companies' ESG matters. Always use figures, numerical and statistical data when possible. Output must have sub-headings in bold font and be fluent.<|end|>
409
- <|user|> Based on the {context}, give a summary of the target company's ESG policy. Set out also the most important ESG and sustainability aspects of the policy.<|end|>"\
410
- <|assistant|>""")
411
-
412
- prompt_analysis = PromptTemplate.from_template(
413
- template="""<|system|>
414
- You are a seasoned finance specialist and a specialist in environmental, social and governance matters. You write expert portofolio analyses fund management. Always use figures, numerical and statistical data when possible. Output must have sub-headings in bold font and be fluent.<|end|>
415
- <|user|> Answer the {question} based on {company_ESG} and {fund_policy}.<|end|>"\
416
- <|assistant|>""")
417
-
418
- portfolio_chain = (
419
- {
420
- "context": doc_retriever_portfolio,
421
- #"question": RunnablePassthrough(),
422
- }
423
- | prompt_portfolio
424
- | llm
425
- | StrOutputParser()
426
- )
427
- strategy_chain = (
428
- {
429
- "context": doc_retriever_ESG,
430
- #"question": RunnablePassthrough(),
431
- }
432
- | prompt_strategy
433
- | llm
434
- | StrOutputParser()
435
- )
436
-
437
- analysis_chain = (
438
- {
439
- "company_ESG": strategy_chain,
440
- "fund_policy": portfolio_chain,
441
- "question": RunnablePassthrough(),
442
- }
443
- | prompt_analysis
444
- | llm
445
- | StrOutputParser()
446
- )
447
-
448
- portfolio_answer = analysis_chain.invoke("is the company's ESG such that it fits within LähiTapiola's investment policy of: {fund_policy}? Give a policy rating")
449
- portfolio_output = f"**__Summary of fit with LähiTapiola's sustainability policy:__** {portfolio_answer} \n"
450
-
451
- with open("portfolio_analysis.txt", 'w') as file:
452
- file.write(portfolio_output)
453
-
454
- return portfolio_output
455
-
456
  #-------------
457
  @st.cache_data
458
  def generate_ESG_strategy() -> str:
@@ -460,11 +365,6 @@ def generate_ESG_strategy() -> str:
460
  st.session_state.results["ESG_analysis_button_key"] = ESG_output
461
  return ESG_output
462
 
463
- @st.cache_data
464
- def generate_portfolio_analysis() -> str:
465
- portfolio_output = portfolio_strategy()
466
- st.session_state.results["portfolio_analysis_button_key"] = portfolio_output
467
- return portfolio_output
468
  #---------------
469
  #@st.cache_data
470
  def create_pdf():
@@ -473,8 +373,8 @@ def create_pdf():
473
  pdf.add_page()
474
  pdf.set_margins(10, 10, 10)
475
  pdf.set_font("Arial", size=15)
476
- image = "lt.png"
477
- pdf.image(image, w = 40)
478
  # Add introductory lines
479
  #pdf.cell(0, 10, txt="Company name", ln=1, align='C')
480
  pdf.cell(0, 10, txt="Structured ESG Analysis", ln=2, align='C')
@@ -517,7 +417,6 @@ def create_directory_loader(file_type, directory_path):
517
  loader_cls=loaders[file_type],
518
  )
519
 
520
-
521
  strategies_container = st.container()
522
  with strategies_container:
523
  mrow1_col1, mrow1_col2 = st.columns(2)
@@ -575,6 +474,19 @@ with strategies_container:
575
  else:
576
  pass
577
  # st.warning("No 'data' subfolder found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
578
 
579
  folders_to_clean = ["data", "chroma_db_portfolio", "chroma_db_LT", "chroma_db_fin"]
580
 
@@ -615,209 +527,180 @@ with strategies_container:
615
  st.divider()
616
 
617
  with mrow1_col2:
618
- st.subheader("Analyze the ESG summary and LähiTapiola's investment policy")
619
- st.info("This tool enables analysing the company's ESG policy with respect to the portfolio and investment policy.")
620
- uploaded_files_portfolio = st.file_uploader("Choose a pdf file", accept_multiple_files=True, key="portfolio_files")
621
- for uploaded_file in uploaded_files_portfolio:
622
- st.write("filename:", uploaded_file.name)
623
- def save_uploadedfile(uploadedfile):
624
- with open(os.path.join("ESG_Documents_Portfolio",uploadedfile.name),"wb") as f:
625
- f.write(uploadedfile.getbuffer())
626
- return st.success("Saved File:{} to ESG_Documents_Portfolio".format(uploadedfile.name))
627
- save_uploadedfile(uploaded_file)
628
- button_container3 = st.container()
629
- #st.button("Portfolio Analysis")
630
- if "button_states" not in st.session_state:
631
- st.session_state.button_states = {
632
- "portfolio_analysis_button_key": False,
633
- }
634
- if button_container3.button("Portfolio Analysis", key=portfolio_analysis_button_key):
635
- st.session_state.button_states[portfolio_analysis_button_key] = True
636
- portfolio_result_generator = generate_portfolio_analysis()
637
- st.session_state.results["portfolio_analysis_output"] = portfolio_result_generator
638
- st.write(portfolio_result_generator)
 
 
 
639
 
640
- if "portfolio_analysis_output" in st.session_state.results:
641
- st.write(st.session_state.results["portfolio_analysis_output"])
 
 
642
 
643
- st.divider()
644
-
645
- with mrow1_col2:
646
- if "ESG_analysis_button_key" in st.session_state.results and st.session_state.results["ESG_analysis_button_key"]:
647
- doc_retriever_ESG = create_vector_database_ESG()
648
- doc_retriever_financials = create_vector_database_financials()
649
-
650
- persist_directory = os.path.join(os.getcwd(), "chroma_db_portfolio")
651
- if not os.path.exists(persist_directory):
652
- os.makedirs(persist_directory)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
 
654
- # Load the Chroma retriever from the persisted directory
655
- embeddings = HuggingFaceEmbeddings()
656
- doc_retriever_portfolio = Chroma(persist_directory=persist_directory, embedding_function=embeddings).as_retriever()
657
-
658
- memory = ConversationBufferMemory(memory_key="chat_history", k=3, return_messages=True)
659
- search = SerpAPIWrapper()
660
-
661
- # Updated prompt templates to include chat history
662
- def format_chat_history(chat_history):
663
- """Format chat history as a single string for input to the chain."""
664
- formatted_history = "\n".join([f"User: {entry['input']}\nAI: {entry['output']}" for entry in chat_history])
665
- return formatted_history
666
-
667
- prompt_portfolio = PromptTemplate.from_template(
668
- template="""
669
- You are a seasoned finance specialist and a specialist in environmental, social, and governance matters.
670
- Use figures, numerical, and statistical data when possible.
671
-
672
- Conversation history:
673
- {chat_history}
674
-
675
- Based on the context: {context}, write a summary of LähiTapiola's investment policy. Set out also the most important ESG and sustainability aspects of the policy.
676
- """
677
- )
678
-
679
- prompt_financials = PromptTemplate.from_template(
680
- template="""
681
- You are a seasoned corporate finance specialist.
682
- Use figures, numerical, and statistical data when possible.
683
-
684
- Conversation history:
685
- {chat_history}
686
-
687
- Based on the context: {context}, answer the following question: {question}.
688
- """
689
- )
690
-
691
- prompt_ESG = PromptTemplate.from_template(
692
- template="""
693
- You are a seasoned finance specialist and a specialist in environmental, social, and governance matters.
694
- Use figures, numerical, and statistical data when possible.
695
-
696
- Conversation history:
697
- {chat_history}
698
-
699
- Based on the context: {context}, write a summary of LähiTapiola's ESG policy. Set out also the most important sustainability aspects of the policy.
700
- """
701
- )
702
-
703
- # LCEL Chains with memory integration
704
- financials_chain = (
705
- {
706
- "context": doc_retriever_financials,
707
- # Lambda function now accepts one argument (even if unused)
708
- "chat_history": lambda _: format_chat_history(memory.load_memory_variables({})["chat_history"]),
709
- "question": RunnablePassthrough(),
710
- }
711
- | prompt_financials
712
- | llm
713
- | StrOutputParser()
714
- )
715
-
716
- portfolio_chain = (
717
- {
718
- "context": doc_retriever_portfolio,
719
- "chat_history": lambda _: format_chat_history(memory.load_memory_variables({})["chat_history"]),
720
- "question": RunnablePassthrough(),
721
- }
722
- | prompt_portfolio
723
- | llm
724
- | StrOutputParser()
725
- )
726
-
727
- ESG_chain = (
728
- {
729
- "context": doc_retriever_ESG,
730
- "chat_history": lambda _: format_chat_history(memory.load_memory_variables({})["chat_history"]),
731
- "question": RunnablePassthrough(),
732
- }
733
- | prompt_ESG
734
- | llm
735
- | StrOutputParser()
736
- )
737
-
738
- # Define the tools with LCEL expressions
739
- tools = [
740
- Tool(
741
- name="ESG QA System",
742
- func=ESG_chain.invoke,
743
- description="Useful for answering questions about environmental, social, and governance (ESG) matters related to the target company, but not LähiTapiola.",
744
- ),
745
- Tool(
746
- name="Financials QA System",
747
- func=financials_chain.invoke,
748
- description="Useful for answering questions about financial or operational information concerning the target company, but not LähiTapiola.",
749
- ),
750
- Tool(
751
- name="Policy QA System",
752
- func=portfolio_chain.invoke,
753
- description="Useful for answering questions about LähiTapiola's ESG policy and sustainability measures.",
754
- ),
755
- Tool(
756
- name="Search Tool",
757
- func=search.run,
758
- description="Useful when other tools do not provide the answer.",
759
- ),
760
- ]
761
-
762
- # Initialize the agent with LCEL tools and memory
763
- agent = initialize_agent(
764
- tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, memory=memory, handle_parsing_errors=True)
765
- def conversational_chat(query):
766
- # Get the result from the agent
767
- result = agent.invoke({"input": query, "chat_history": st.session_state['history']})
768
-
769
- # Handle different response types
770
- if isinstance(result, dict):
771
- # Extract the main content if the result is a dictionary
772
- result = result.get("output", "") # Adjust the key as needed based on your agent's output
773
- elif isinstance(result, list):
774
- # If the result is a list, join it into a single string
775
- result = "\n".join(result)
776
- elif not isinstance(result, str):
777
- # Convert the result to a string if it is not already one
778
- result = str(result)
779
-
780
- # Add the query and the result to the session state
781
- st.session_state['history'].append((query, result))
782
-
783
- # Update memory with the conversation
784
- memory.save_context({"input": query}, {"output": result})
785
-
786
- # Return the result
787
- return result
788
-
789
- # Ensure session states are initialized
790
- if 'history' not in st.session_state:
791
- st.session_state['history'] = []
792
-
793
- if 'generated' not in st.session_state:
794
- st.session_state['generated'] = ["Let's discuss the ESG matters and financial matters 🤗"]
795
-
796
- if 'past' not in st.session_state:
797
- st.session_state['past'] = ["Hey ! 👋"]
798
-
799
- if 'input' not in st.session_state:
800
- st.session_state['input'] = ""
801
-
802
- # Streamlit layout
803
- st.subheader("Discuss the ESG and financial matters")
804
- st.info("This tool is designed to enable discussion about the ESG and financial matters concerning the company and also LocalTapiola's own comprehensive sustainability policy and guidance.")
805
- response_container = st.container()
806
- container = st.container()
807
-
808
- with container:
809
- with st.form(key='my_form'):
810
- user_input = st.text_input("Query:", placeholder="What would you like to know about ESG and financial matters", key='input')
811
- submit_button = st.form_submit_button(label='Send')
812
- if submit_button and user_input:
813
- output = conversational_chat(user_input)
814
- st.session_state['past'].append(user_input)
815
- st.session_state['generated'].append(output)
816
- user_input = "Query:"
817
- #st.session_state['input'] = ""
818
- # Display generated responses
819
- if st.session_state['generated']:
820
- with response_container:
821
- for i in range(len(st.session_state['generated'])):
822
- message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="shapes")
823
- message(st.session_state["generated"][i], key=str(i), avatar_style="icons")
 
32
  from langchain.prompts import PromptTemplate
33
  import joblib
34
  import nltk
35
+ import json
36
 
37
  import nest_asyncio # noqa: E402
38
  nest_asyncio.apply()
 
50
 
51
  st.set_page_config(layout="wide")
52
 
 
 
53
  css = """
54
  <style>
55
  [data-testid="stAppViewContainer"] {
 
105
  """
106
 
107
  st.write(css, unsafe_allow_html=True)
108
+ #--------------
109
+ def load_credentials(filepath):
110
+ with open(filepath, 'r') as file:
111
+ return json.load(file)
112
+
113
+ # Load credentials from 'credentials.json'
114
+ credentials = load_credentials('Assets/credentials.json')
115
+
116
+ # Initialize session state if not already done
117
+ if 'logged_in' not in st.session_state:
118
+ st.session_state.logged_in = False
119
+ st.session_state.username = ''
120
+
121
+ # Function to handle login
122
+ def login(username, password):
123
+ if username in credentials and credentials[username] == password:
124
+ st.session_state.logged_in = True
125
+ st.session_state.username = username
126
+ st.rerun() # Rerun to reflect login state
127
+ else:
128
+ st.session_state.logged_in = False
129
+ st.session_state.username = ''
130
+ st.error("Invalid username or password.")
131
+
132
+ # Function to handle logout
133
+ def logout():
134
+ st.session_state.logged_in = False
135
+ st.session_state.username = ''
136
+ st.rerun() # Rerun to reflect logout state
137
+
138
+ # If not logged in, show login form
139
+ if not st.session_state.logged_in:
140
+ st.sidebar.write("Login")
141
+ username = st.sidebar.text_input('Username')
142
+ password = st.sidebar.text_input('Password', type='password')
143
+ if st.sidebar.button('Login'):
144
+ login(username, password)
145
+ # Stop the script here if the user is not logged in
146
+ st.stop()
147
+
148
+ # If logged in, show logout button and main content
149
+ if st.session_state.logged_in:
150
+ st.sidebar.write(f"Welcome, {st.session_state.username}!")
151
+ if st.sidebar.button('Logout'):
152
+ logout()
153
+
154
  #-------------
155
  llm=ChatGroq(groq_api_key=groq_api_key,
156
+ model_name="llama-3.2-90b-text-preview", temperature = 0.0, streaming=True)
157
  #--------------
158
  doc_retriever_ESG = None
159
  doc_retriever_financials = None
 
164
  data_file = "./data/parsed_data_ESG.pkl"
165
 
166
  parsingInstructionUber10k = """The provided document contain detailed information about the company's environmental, social and governance matters.
167
+ It contains several tables, figures and statistical information about CO2 emissions and energy consumption.
168
+ Give only precide CO2 and energy consumotion levels inly from the context documents.
169
+ You must never provide false numeric or statistical data that is not included in the context document.
170
+ Include tables and numeric data always when possible. Only refer to other sources if the context document refers to them or if necessary to provide additional understanding to company's own data."""
171
 
172
  parser = LlamaParse(api_key=LLAMA_PARSE_API_KEY,
173
  result_type="markdown",
 
214
 
215
  return parsed_data_financials
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  #--------------
218
  # Create vector database
219
 
 
228
 
229
  markdown_path = "data/output_ESG.md"
230
  loader = UnstructuredMarkdownLoader(markdown_path)
 
 
231
  documents = loader.load()
232
  # Split loaded documents into chunks
233
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=30)
234
  docs = text_splitter.split_documents(documents)
235
 
236
  #len(docs)
237
  print(f"length of documents loaded: {len(documents)}")
238
  print(f"total number of document chunks generated :{len(docs)}")
239
  embed_model = HuggingFaceEmbeddings()
 
 
 
 
 
 
 
240
 
241
  vs = Chroma.from_documents(
242
  documents=docs,
243
  embedding=embed_model,
 
244
  collection_name="rag",
245
  )
 
246
  doc_retriever_ESG = vs.as_retriever()
247
+
248
+ index = VectorStoreIndex.from_documents(llama_parse_documents)
249
+ query_engine = index.as_query_engine()
250
+
251
+ return doc_retriever_ESG, query_engine
252
 
253
  @st.cache_resource
254
  def create_vector_database_financials():
255
  # Call the function to either load or parse the data
256
  llama_parse_documents = load_or_parse_data_financials()
 
257
 
258
  with open('data/output_financials.md', 'a') as f: # Open the file in append mode ('a')
259
  for doc in llama_parse_documents:
 
261
 
262
  markdown_path = "data/output_financials.md"
263
  loader = UnstructuredMarkdownLoader(markdown_path)
 
 
264
  documents = loader.load()
 
265
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15)
266
  docs = text_splitter.split_documents(documents)
267
 
 
 
 
268
  embed_model = HuggingFaceEmbeddings()
 
 
 
 
 
269
 
270
  vs = Chroma.from_documents(
271
  documents=docs,
272
  embedding=embed_model,
 
273
  collection_name="rag"
274
  )
275
  doc_retriever_financials = vs.as_retriever()
276
 
277
+ index = VectorStoreIndex.from_documents(llama_parse_documents)
278
+ query_engine_financials = index.as_query_engine()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
  print('Vector DB created successfully !')
281
+ return doc_retriever_financials, query_engine_financials
282
+
283
  #--------------
284
  ESG_analysis_button_key = "ESG_strategy_button"
285
  portfolio_analysis_button_key = "portfolio_strategy_button"
 
301
 
302
  uploaded_files_ESG = st.sidebar.file_uploader("Choose a Sustainability Report", accept_multiple_files=True, key="ESG_files")
303
  for uploaded_file in uploaded_files_ESG:
 
304
  st.write("filename:", uploaded_file.name)
305
  def save_uploadedfile(uploadedfile):
306
  with open(os.path.join("ESG_Documents",uploadedfile.name),"wb") as f:
 
310
 
311
  uploaded_files_financials = st.sidebar.file_uploader("Choose an Annual Report", accept_multiple_files=True, key="financial_files")
312
  for uploaded_file in uploaded_files_financials:
 
313
  st.write("filename:", uploaded_file.name)
314
  def save_uploadedfile(uploadedfile):
315
  with open(os.path.join("Financial_Documents",uploadedfile.name),"wb") as f:
 
319
 
320
  #---------------
321
  def ESG_strategy():
322
+ doc_retriever_ESG, _ = create_vector_database_ESG()
323
  prompt_template = """<|system|>
324
  You are a seasoned specialist in environmental, social and governance matters. You write expert analyses for institutional investors. Always use figures, nemerical and statistical data when possible. Output must have sub-headings in bold font and be fluent.<|end|>
325
  <|user|>
 
338
  | StrOutputParser()
339
  )
340
 
341
+ ESG_answer_1 = qa.invoke("Give a summary what specific ESG measures the company has taken recently and compare these to the best practices.")
342
+ ESG_answer_2 = qa.invoke("Does the company's main business fall under the European Union's taxonomy regulation? Is the company taxonomy compliant under European Union Taxonomy Regulation?")
343
+ ESG_answer_3 = qa.invoke("Explain what items of ESG information the company publishes. Describe what ESG transparency commitments the company has given. Does the company follow the Paris Treaty's obligation to limit globabl warming to 1.5 celcius degrees?")
344
+ ESG_answer_4 = qa.invoke("Does the company have carbon emissions reduction plan and has the company reached its carbod dioxide reduction objectives? Set out in a table the company's carbon footprint by location and its development from the context. Set out carbon dioxide emissions in relation to turnover.")
345
+ ESG_answer_5 = qa.invoke("Describe and set out in a table the following carbon emissions figures: (i) Scope 1 CO2 emissions, (ii) Scope 2 CO2, and (iii) Scope 3 CO2 emissions. Set out the material changes relating to these figures.")
346
+ ESG_answer_6 = qa.invoke("Set out in a table the company's energy and renewable energy usage for each material activity coverning the available years. Explain the energy efficiency measures taken by the company.")
347
  ESG_answer_7 = qa.invoke("Does the company follow UN Guiding Principles on Business and Human Rights, ILO Declaration on Fundamental Principles and Rights at Work or OECD Guidelines for Multinational Enterprises that involve affected communities? Set out the measures taken to have the gender balance on the upper management of the company.")
348
  ESG_answer_8 = qa.invoke("List the environmental permits and certifications held by the company. Set out and explain any environmental procedures and investigations and decisions taken against the company. Answer whether the company's locations or operations are connected to areas sensitive in relation to biodiversity.")
349
  ESG_answer_9 = qa.invoke("Set out waste produces by the company and possible waste into the soil by real estate. Describe if the company's real estates have hazardous waste.")
350
+ ESG_answer_10 = qa.invoke("What percentage of women are represented in the (i) board, (ii) executive directors and (iii) upper management?")
351
+ ESG_answer_11 = qa.invoke("What policies has the company implemented to counter money laundering and corruption?")
352
 
353
+ ESG_output = f"**__Summary of ESG reporting and obligations:__** {ESG_answer_1} \n\n **__Compliance with taxonomy:__** \n\n {ESG_answer_2} \n\n **__Disclosure transparency:__** \n\n {ESG_answer_3} \n\n **__Carbon footprint:__** \n\n {ESG_answer_4} \n\n **__Carbon dioxide emissions:__** \n\n {ESG_answer_5} \n\n **__Renewable energy:__** \n\n {ESG_answer_6} \n\n **__Human rights compliance:__** \n\n {ESG_answer_7} \n\n **__Management and gender balance:__** \n\n {ESG_answer_8} \n\n **__Waste and other emissions:__** {ESG_answer_9} \n\n **__Gender equality:__** {ESG_answer_10} \n\n **__Anti-money laundering:__** {ESG_answer_11}"
354
  financial_output = ESG_output
355
 
356
  with open("ESG_analysis.txt", 'w') as file:
 
358
 
359
  return financial_output
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  #-------------
362
  @st.cache_data
363
  def generate_ESG_strategy() -> str:
 
365
  st.session_state.results["ESG_analysis_button_key"] = ESG_output
366
  return ESG_output
367
 
 
 
 
 
 
368
  #---------------
369
  #@st.cache_data
370
  def create_pdf():
 
373
  pdf.add_page()
374
  pdf.set_margins(10, 10, 10)
375
  pdf.set_font("Arial", size=15)
376
+ #image = "lt.png"
377
+ #pdf.image(image, w = 40)
378
  # Add introductory lines
379
  #pdf.cell(0, 10, txt="Company name", ln=1, align='C')
380
  pdf.cell(0, 10, txt="Structured ESG Analysis", ln=2, align='C')
 
417
  loader_cls=loaders[file_type],
418
  )
419
 
 
420
  strategies_container = st.container()
421
  with strategies_container:
422
  mrow1_col1, mrow1_col2 = st.columns(2)
 
474
  else:
475
  pass
476
  # st.warning("No 'data' subfolder found.")
477
+
478
+ if os.path.exists("ESG_Documents_Portfolio"):
479
+ # Iterate through files in the subfolder and delete them
480
+ for filename in os.listdir("ESG_Documents_Portfolio"):
481
+ file_path = os.path.join("ESG_Documents_Portfolio", filename)
482
+ try:
483
+ if os.path.isfile(file_path):
484
+ os.unlink(file_path)
485
+ except Exception as e:
486
+ st.error(f"Error deleting {file_path}: {e}")
487
+ else:
488
+ pass
489
+ # st.warning("No 'data' subfolder found.")
490
 
491
  folders_to_clean = ["data", "chroma_db_portfolio", "chroma_db_LT", "chroma_db_fin"]
492
 
 
527
  st.divider()
528
 
529
  with mrow1_col2:
530
+ if "ESG_analysis_button_key" in st.session_state.results and st.session_state.results["ESG_analysis_button_key"]:
531
+
532
+ doc_retriever_ESG, query_engine = create_vector_database_ESG()
533
+ doc_retriever_financials, query_engine_financials = create_vector_database_financials()
534
+ memory = ConversationBufferMemory(memory_key="chat_history", k=3, return_messages=True)
535
+ search = SerpAPIWrapper()
536
+
537
+ # Updated prompt templates to include chat history
538
+ def format_chat_history(chat_history):
539
+ """Format chat history as a single string for input to the chain."""
540
+ formatted_history = "\n".join([f"User: {entry['input']}\nAI: {entry['output']}" for entry in chat_history])
541
+ return formatted_history
542
+
543
+ prompt_financials = PromptTemplate.from_template(
544
+ template="""
545
+ You are a seasoned corporate finance specialist.
546
+ Use figures, numerical, and statistical data when possible. Never give false information, numbers or data.
547
+
548
+ Conversation history:
549
+ {chat_history}
550
+
551
+ Based on the context: {context}, answer the following question: {question}.
552
+ """
553
+ )
554
 
555
+ prompt_ESG = PromptTemplate.from_template(
556
+ template="""
557
+ You are a seasoned finance specialist and a specialist in environmental, social, and governance matters.
558
+ Use figures, numerical, and statistical data when possible. Never give false information, numbers or data.
559
 
560
+ Conversation history:
561
+ {chat_history}
562
+
563
+ Based on the context: answer the following question: {question}.
564
+ """
565
+ )
566
+
567
+ # LCEL Chains with memory integration
568
+ financials_chain = (
569
+ {
570
+ "context": doc_retriever_financials,
571
+ # Lambda function now accepts one argument (even if unused)
572
+ "chat_history": lambda _: format_chat_history(memory.load_memory_variables({})["chat_history"]),
573
+ "question": RunnablePassthrough(),
574
+ }
575
+ | prompt_financials
576
+ | llm
577
+ | StrOutputParser()
578
+ )
579
+
580
+ ESG_chain = (
581
+ {
582
+ "context": doc_retriever_ESG,
583
+ "chat_history": lambda _: format_chat_history(memory.load_memory_variables({})["chat_history"]),
584
+ "question": RunnablePassthrough(),
585
+ }
586
+ | prompt_ESG
587
+ | llm
588
+ | StrOutputParser()
589
+ )
590
+
591
+ # Define the tools with LCEL expressions
592
+ # Define the vector query engine tool
593
+ vector_query_tool_ESG = Tool(
594
+ name="Vector Query Engine ESG",
595
+ func=lambda query: query_engine.query(query), # Use query_engine to query the vector database
596
+ description="Useful for answering questions that require ESG figures, data and statistics.",
597
+ )
598
+
599
+ vector_query_tool_financials = Tool(
600
+ name="Vector Query Engine Financials",
601
+ func=lambda query: query_engine_financials.query(query), # Use query_engine to query the vector database
602
+ description="Useful for answering questions that require financial figures, data and statistics.",
603
+ )
604
+
605
+ # Create a function to validate responses
606
+ def validate_esg_response(query):
607
+ esg_response = vector_query_tool_ESG.func(query)
608
+ esg_validation = ESG_chain.invoke({
609
+ "context": doc_retriever_ESG,
610
+ "chat_history": format_chat_history(memory.load_memory_variables({})["chat_history"]),
611
+ "question": esg_response
612
+ })
613
+ return esg_validation
614
+
615
+ def validate_financials_response(query):
616
+ financials_response = vector_query_tool_financials.func(query)
617
+ financials_validation = financials_chain.invoke({
618
+ "context": doc_retriever_financials,
619
+ "chat_history": format_chat_history(memory.load_memory_variables({})["chat_history"]),
620
+ "question": financials_response
621
+ })
622
+ return financials_validation
623
+
624
+ # Update the tools list to include validation
625
+ tools = [
626
+ Tool(
627
+ name="Search Tool",
628
+ func=search.run,
629
+ description="Useful when other tools do not provide the answer.",
630
+ ),
631
+ Tool(
632
+ name="Validate ESG Response",
633
+ func=validate_esg_response,
634
+ description="Validates the response of the Vector Query Engine ESG tool.",
635
+ ),
636
+ Tool(
637
+ name="Validate Financials Response",
638
+ func=validate_financials_response,
639
+ description="Validates the response of the Vector Query Engine Financials tool.",
640
+ ),
641
+ vector_query_tool_ESG,
642
+ vector_query_tool_financials,
643
+ ]
644
+
645
+ # Initialize the agent with LCEL tools and memory
646
+ agent = initialize_agent(
647
+ tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, memory=memory, handle_parsing_errors=True)
648
+ def conversational_chat(query):
649
+ # Get the result from the agent
650
+ result = agent.invoke({"input": query, "chat_history": st.session_state['history']})
651
+
652
+ # Handle different response types
653
+ if isinstance(result, dict):
654
+ # Extract the main content if the result is a dictionary
655
+ result = result.get("output", "") # Adjust the key as needed based on your agent's output
656
+ elif isinstance(result, list):
657
+ # If the result is a list, join it into a single string
658
+ result = "\n".join(result)
659
+ elif not isinstance(result, str):
660
+ # Convert the result to a string if it is not already one
661
+ result = str(result)
662
+
663
+ # Add the query and the result to the session state
664
+ st.session_state['history'].append((query, result))
665
+
666
+ # Update memory with the conversation
667
+ memory.save_context({"input": query}, {"output": result})
668
 
669
+ # Return the result
670
+ return result
671
+
672
+ # Ensure session states are initialized
673
+ if 'history' not in st.session_state:
674
+ st.session_state['history'] = []
675
+
676
+ if 'generated' not in st.session_state:
677
+ st.session_state['generated'] = ["Let's discuss the ESG matters and financial matters 🤗"]
678
+
679
+ if 'past' not in st.session_state:
680
+ st.session_state['past'] = ["Hey ! 👋"]
681
+
682
+ if 'input' not in st.session_state:
683
+ st.session_state['input'] = ""
684
+
685
+ # Streamlit layout
686
+ st.subheader("Discuss the ESG and financial matters")
687
+ st.info("This tool is designed to enable discussion about the ESG and financial matters concerning the company.")
688
+ response_container = st.container()
689
+ container = st.container()
690
+
691
+ with container:
692
+ with st.form(key='my_form'):
693
+ user_input = st.text_input("Query:", placeholder="What would you like to know about ESG and financial matters", key='input')
694
+ submit_button = st.form_submit_button(label='Send')
695
+ if submit_button and user_input:
696
+ output = conversational_chat(user_input)
697
+ st.session_state['past'].append(user_input)
698
+ st.session_state['generated'].append(output)
699
+ user_input = "Query:"
700
+ #st.session_state['input'] = ""
701
+ # Display generated responses
702
+ if st.session_state['generated']:
703
+ with response_container:
704
+ for i in range(len(st.session_state['generated'])):
705
+ message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="shapes")
706
+ message(st.session_state["generated"][i], key=str(i), avatar_style="icons")