File size: 2,484 Bytes
45b3faf
 
e10f2ca
45b3faf
 
 
 
dbdab0e
2d22846
b38c398
dbdab0e
45b3faf
e10f2ca
 
 
45b3faf
e10f2ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd7d83d
e10f2ca
 
b38c398
 
 
e10f2ca
 
 
 
 
77364e4
 
 
 
 
b38c398
 
 
 
45b3faf
 
 
 
e10f2ca
 
 
 
eb418fc
e10f2ca
45b3faf
 
 
 
 
b38c398
45b3faf
 
 
 
 
e10f2ca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import logging
import sys
import streamlit as st
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings import LangchainEmbedding
from langchain.embeddings import SentenceTransformerEmbeddings


# Set up logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

def configure_llama_model():
    model_url = 'https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf'
    llm = LlamaCPP(
        model_url=model_url,
        temperature=0.1,
        max_new_tokens=256,
        context_window=3900,
        model_kwargs={"n_gpu_layers": -1},
        messages_to_prompt=messages_to_prompt,
        completion_to_prompt=completion_to_prompt,
        verbose=True,
    )
    return llm

def configure_embeddings():
    embed_model = HuggingFaceEmbeddings(model_name="ggrn/e5-small-v2")
    return embed_model




def configure_service_context(llm, embed_model):
    return ServiceContext.from_defaults(chunk_size=256, llm=llm, embed_model=embed_model)

def initialize_vector_store_index(data_path, service_context):
    documents = SimpleDirectoryReader(data_path).load_data()
    import pickle

    # Load the index from a file
    with open('./index_file.pkl', 'rb') as f:
        index = pickle.load(f)
    #index = VectorStoreIndex.from_documents(documents, service_context=service_context)
    embeddings_2 = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    index2 = FAISS.from_documents(pages, embeddings_2)
    return index2

def main():
    st.title("Cloudflare RAG")

    # Configure and initialize components
    llm = configure_llama_model()
    embed_model = configure_embeddings()
    service_context = configure_service_context(llm, embed_model)
    index = initialize_vector_store_index("./", service_context)

    # User input
    user_input = st.text_input("Enter your message:")

    if user_input:
        # Generate response
        docs = index2.similarity_search(user_input)

        # Display response
        st.text_area("ChatGPT Response:", response, height=100)

if __name__ == "__main__":
    main()