import os import streamlit as st from openai import OpenAI from llama_index.node_parser import SemanticSplitterNodeParser from llama_index.embeddings import OpenAIEmbedding from llama_index.ingestion import IngestionPipeline from pinecone.grpc import PineconeGRPC from pinecone import ServerlessSpec from llama_index.vector_stores import PineconeVectorStore from llama_index import VectorStoreIndex from llama_index.retrievers import VectorIndexRetriever from llama_index.query_engine import RetrieverQueryEngine # Set OpenAI API key from environment variables openai_api_key = os.getenv("OPENAI_API_KEY") pinecone_api_key = os.getenv("PINECONE_API_KEY") index_name = os.getenv("INDEX_NAME") # Initialize OpenAI client client = OpenAI(api_key=openai_api_key) # Initialize connection to Pinecone pc = PineconeGRPC(api_key=pinecone_api_key) # Initialize Pinecone index pinecone_index = pc.Index(index_name) # Dropdown for selecting namespace st.sidebar.title("Sector Selection") namespace = st.sidebar.selectbox( "Select a Namespace", ["cement", "engineering", "food", "Fuel", "IT", "ceramics", "Estate", "paper", "Tannery", "Telecommunication", "miscellaneous", "Textile", "Travel"] ) # Initialize VectorStore with the selected namespace vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace=namespace) pinecone_index.describe_index_stats() # Initialize vector index and retriever vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store) retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5) query_engine = RetrieverQueryEngine(retriever=retriever) # Set up LlamaIndex embedding model and pipeline embed_model = OpenAIEmbedding(api_key=openai_api_key) pipeline = IngestionPipeline( transformations=[ SemanticSplitterNodeParser(buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model), embed_model, ], ) def query_annual_report(query): response = query_engine.query(query) return response.response # Adjust the title based on the selected model st.header("BD Annual Reports") with st.expander("About this App"): st.write(f""" This ChatApp allows users to communicate with custom Annual Reports from BD Sectors. """) # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Clear chat button if st.sidebar.button("Clear Chat"): st.session_state.messages.clear() # Display chat messages from history for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Accept user input if prompt := st.chat_input("Type your question based on the BD-Sector report selected?"): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): response = query_annual_report(prompt) st.markdown(response) st.session_state.messages.append({"role": "assistant", "content": response})