import logging import sys import streamlit as st from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext from llama_index.llms import LlamaCPP from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain.embeddings.huggingface import HuggingFaceEmbeddings from llama_index.embeddings import LangchainEmbedding from langchain.embeddings import SentenceTransformerEmbeddings # Set up logging logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) def configure_llama_model(): model_url = 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf' llm = LlamaCPP( model_url=model_url, temperature=0.1, max_new_tokens=256, context_window=3900, model_kwargs={"n_gpu_layers": -1}, messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, verbose=True, ) return llm def configure_embeddings(): embed_model = HuggingFaceEmbeddings(model_name="ggrn/e5-small-v2") return embed_model def configure_service_context(llm, embed_model): return ServiceContext.from_defaults(chunk_size=256, llm=llm, embed_model=embed_model) def initialize_vector_store_index(data_path, service_context): documents = SimpleDirectoryReader(data_path).load_data() import pickle # Load the index from a file with open('./index_file.pkl', 'rb') as f: index = pickle.load(f) #index = VectorStoreIndex.from_documents(documents, service_context=service_context) embeddings_2 = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") index2 = FAISS.from_documents(pages, embeddings_2) return index2 def main(): st.title("Cloudflare RAG") # Configure and initialize components llm = configure_llama_model() embed_model = configure_embeddings() service_context = configure_service_context(llm, embed_model) index = initialize_vector_store_index("./", service_context) # User input user_input = st.text_input("Enter your message:") if user_input: # Generate response docs = index2.similarity_search(user_input) # Display response st.text_area("ChatGPT Response:", response, height=100) if __name__ == "__main__": main()