Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
from openai import OpenAI | |
from llama_index.node_parser import SemanticSplitterNodeParser | |
from llama_index.embeddings import OpenAIEmbedding | |
from llama_index.ingestion import IngestionPipeline | |
from pinecone.grpc import PineconeGRPC | |
from pinecone import ServerlessSpec | |
from llama_index.vector_stores import PineconeVectorStore | |
from llama_index import VectorStoreIndex | |
from llama_index.retrievers import VectorIndexRetriever | |
from llama_index.query_engine import RetrieverQueryEngine | |
# Set OpenAI API key from environment variables | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
pinecone_api_key = os.getenv("PINECONE_API_KEY") | |
index_name = os.getenv("INDEX_NAME") | |
# Initialize OpenAI client | |
client = OpenAI(api_key=openai_api_key) | |
# Initialize connection to Pinecone | |
pc = PineconeGRPC(api_key=pinecone_api_key) | |
# Initialize your index | |
if index_name not in pc.list_indexes(): | |
spec = ServerlessSpec(replicas=1, pod_type="p1") | |
pc.create_index(name=index_name, dimension=1536, spec=spec) | |
pinecone_index = pc.Index(index_name) | |
# Initialize VectorStore | |
vector_store = PineconeVectorStore(pinecone_index=pinecone_index) | |
pinecone_index.describe_index_stats() | |
# Initialize vector index and retriever | |
vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store) | |
retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5) | |
query_engine = RetrieverQueryEngine(retriever=retriever) | |
# Set up LlamaIndex embedding model and pipeline | |
embed_model = OpenAIEmbedding(api_key=openai_api_key) | |
pipeline = IngestionPipeline( | |
transformations=[ | |
SemanticSplitterNodeParser(buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model), | |
embed_model, | |
], | |
) | |
def query_annual_report(query): | |
response = query_engine.query(query) | |
return response.response | |
# Streamlit app setup | |
st.title("ChatGPT-like Clone with Pinecone Integration") | |
# Initialize chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat messages from history | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# Accept user input | |
if prompt := st.chat_input("What is up?"): | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
with st.chat_message("assistant"): | |
response = query_annual_report(prompt) | |
st.markdown(response) | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |