File size: 2,379 Bytes
e24982e 8c67ed3 e24982e 9eeafb7 e24982e 63614ef e24982e 63614ef e24982e e6f156e e24982e 63614ef e24982e 9eeafb7 e24982e 63614ef e24982e 9eeafb7 e24982e 8c67ed3 e24982e 8c67ed3 e24982e 8c67ed3 e24982e 8c67ed3 e24982e 8c67ed3 e24982e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# Streamlit application
import streamlit as st
import os
from getpass import getpass
from transformers import pipeline
from llama_index.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings import OpenAIEmbedding
from llama_index.ingestion import IngestionPipeline
from pinecone.grpc import PineconeGRPC
from pinecone import ServerlessSpec
from llama_index.vector_stores import PineconeVectorStore
from llama_index import VectorStoreIndex
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
# Function to initialize the Pinecone and LlamaIndex setup
def initialize_pipeline():
pinecone_api_key = os.getenv("PINECONE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
embed_model = OpenAIEmbedding(api_key=openai_api_key)
pipeline = IngestionPipeline(
transformations=[
SemanticSplitterNodeParser(
buffer_size=1,
breakpoint_percentile_threshold=95,
embed_model=embed_model,
),
embed_model,
],
)
pc = PineconeGRPC(api_key=pinecone_api_key)
index_name = "anualreport"
pinecone_index = pc.Index(index_name)
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
pinecone_index.describe_index_stats()
if not os.getenv('OPENAI_API_KEY'):
os.environ['OPENAI_API_KEY'] = openai_api_key
vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
query_engine = RetrieverQueryEngine(retriever=retriever)
return query_engine
# Streamlit UI
st.title("Chat with Annual Reports")
# Initialize the query engine
query_engine = initialize_pipeline()
# Conversation model using Hugging Face transformers
conversation_pipeline = pipeline("conversational", model="microsoft/DialoGPT-medium")
# User input
user_input = st.text_input("You: ", "")
if user_input:
# Query the vector DB
llm_query = query_engine.query(user_input)
response = llm_query.response
# Generate response using Hugging Face conversation model
conversation = conversation_pipeline([user_input, response])
bot_response = conversation[-1]["generated_text"]
# Display response
st.text_area("Bot: ", bot_response, height=200)
|