Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
from openai import OpenAI | |
from llama_index.node_parser import SemanticSplitterNodeParser | |
from llama_index.embeddings import OpenAIEmbedding | |
from llama_index.ingestion import IngestionPipeline | |
from pinecone.grpc import PineconeGRPC | |
from pinecone import ServerlessSpec | |
from llama_index.vector_stores import PineconeVectorStore | |
from llama_index import VectorStoreIndex | |
from llama_index.retrievers import VectorIndexRetriever | |
from llama_index.query_engine import RetrieverQueryEngine | |
# Set OpenAI API key from environment variables | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
pinecone_api_key = os.getenv("PINECONE_API_KEY") | |
index_name = os.getenv("INDEX_NAME") | |
# Initialize OpenAI client | |
client = OpenAI(api_key=openai_api_key) | |
# Initialize connection to Pinecone | |
pc = PineconeGRPC(api_key=pinecone_api_key) | |
# Initialize Pinecone index | |
pinecone_index = pc.Index(index_name) | |
# Dropdown for selecting namespace | |
st.sidebar.title("Sector Selection") | |
namespace = st.sidebar.selectbox( | |
"Select a Namespace", | |
["cement", "engineering", "food", "Fuel", "IT", "ceramics", "Estate", "paper", "Tannery", "Telecommunication", "miscellaneous", "Textile", "Travel"] | |
) | |
# Initialize VectorStore with the selected namespace | |
vector_store = PineconeVectorStore(pinecone_index=pinecone_index, namespace=namespace) | |
pinecone_index.describe_index_stats() | |
# Initialize vector index and retriever | |
vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store) | |
retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5) | |
query_engine = RetrieverQueryEngine(retriever=retriever) | |
# Set up LlamaIndex embedding model and pipeline | |
embed_model = OpenAIEmbedding(api_key=openai_api_key) | |
pipeline = IngestionPipeline( | |
transformations=[ | |
SemanticSplitterNodeParser(buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model), | |
embed_model, | |
], | |
) | |
def query_annual_report(query): | |
response = query_engine.query(query) | |
return response.response | |
# Adjust the title based on the selected model | |
st.header("BD Annual Reports") | |
with st.expander("About this App"): | |
st.write(f""" | |
This ChatApp allows users to communicate with custom Annual Reports from BD Sectors. | |
""") | |
# Initialize chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Clear chat button | |
if st.sidebar.button("Clear Chat"): | |
st.session_state.messages.clear() | |
# Display chat messages from history | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# Accept user input | |
if prompt := st.chat_input("Type your question based on the BD-Sector report selected?"): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
with st.chat_message("assistant"): | |
response = query_annual_report(prompt) | |
st.markdown(response) | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |