# Streamlit application import streamlit as st import os from getpass import getpass from transformers import pipeline from llama_index.node_parser import SemanticSplitterNodeParser from llama_index.embeddings import OpenAIEmbedding from llama_index.ingestion import IngestionPipeline from pinecone.grpc import PineconeGRPC from pinecone import ServerlessSpec from llama_index.vector_stores import PineconeVectorStore from llama_index import VectorStoreIndex from llama_index.retrievers import VectorIndexRetriever from llama_index.query_engine import RetrieverQueryEngine # Function to initialize the Pinecone and LlamaIndex setup def initialize_pipeline(): pinecone_api_key = os.getenv("PINECONE_API_KEY") openai_api_key = os.getenv("OPENAI_API_KEY") embed_model = OpenAIEmbedding(api_key=openai_api_key) pipeline = IngestionPipeline( transformations=[ SemanticSplitterNodeParser( buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model, ), embed_model, ], ) pc = PineconeGRPC(api_key=pinecone_api_key) index_name = "anualreport" pinecone_index = pc.Index(index_name) vector_store = PineconeVectorStore(pinecone_index=pinecone_index) pinecone_index.describe_index_stats() if not os.getenv('OPENAI_API_KEY'): os.environ['OPENAI_API_KEY'] = openai_api_key vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store) retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5) query_engine = RetrieverQueryEngine(retriever=retriever) return query_engine # Streamlit UI st.title("Chat with Annual Reports") # Initialize the query engine query_engine = initialize_pipeline() # Conversation model using Hugging Face transformers conversation_pipeline = pipeline("conversational", model="microsoft/DialoGPT-medium") # User input user_input = st.text_input("You: ", "") if user_input: # Query the vector DB llm_query = query_engine.query(user_input) response = llm_query.response # Generate response using Hugging Face conversation model conversation = conversation_pipeline([user_input, response]) bot_response = conversation[-1]["generated_text"] # Display response st.text_area("Bot: ", bot_response, height=200)