Cloudflare-demo / app.py
BroBro87's picture
Update app.py
b38c398
raw
history blame
2.48 kB
import logging
import sys
import streamlit as st
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings import LangchainEmbedding
from langchain.embeddings import SentenceTransformerEmbeddings
# Set up logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
def configure_llama_model():
model_url = 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf'
llm = LlamaCPP(
model_url=model_url,
temperature=0.1,
max_new_tokens=256,
context_window=3900,
model_kwargs={"n_gpu_layers": -1},
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True,
)
return llm
def configure_embeddings():
embed_model = HuggingFaceEmbeddings(model_name="ggrn/e5-small-v2")
return embed_model
def configure_service_context(llm, embed_model):
return ServiceContext.from_defaults(chunk_size=256, llm=llm, embed_model=embed_model)
def initialize_vector_store_index(data_path, service_context):
documents = SimpleDirectoryReader(data_path).load_data()
import pickle
# Load the index from a file
with open('./index_file.pkl', 'rb') as f:
index = pickle.load(f)
#index = VectorStoreIndex.from_documents(documents, service_context=service_context)
embeddings_2 = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
index2 = FAISS.from_documents(pages, embeddings_2)
return index2
def main():
st.title("Cloudflare RAG")
# Configure and initialize components
llm = configure_llama_model()
embed_model = configure_embeddings()
service_context = configure_service_context(llm, embed_model)
index = initialize_vector_store_index("./", service_context)
# User input
user_input = st.text_input("Enter your message:")
if user_input:
# Generate response
docs = index2.similarity_search(user_input)
# Display response
st.text_area("ChatGPT Response:", response, height=100)
if __name__ == "__main__":
main()