File size: 1,428 Bytes
e67fa80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e64423
e67fa80
 
 
1e64423
 
e67fa80
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers

# from langchain.chains import RetrievalQA
# from langchain import PromptTemplate

DB_FAISS_PATH = "mylib/vector_db"

# Initialize embeddings and database outside of functions
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}
)
db = FAISS.load_local(DB_FAISS_PATH, embeddings)

# Initialize the LLM model once
llm = CTransformers(
    model="TheBloke/Llama-2-7B-Chat-GGML",
    # model= "meta-llama/Llama-2-7b-chat-hf".
    model_type="llama",
    max_new_tokens=512,
    temperature=0.5,
)

# Initialize the QA chain once
# qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=db.as_retriever(search_kwargs={'k': 10}), return_source_documents=True)
# retriever = db.as_retriever(search_kwargs={"k": 10})


def final_result(query):
    docs_and_scores = db.similarity_search_with_score(query, k=10)
    # response = retriever.get_relevant_documents(query)
    return docs_and_scores


if __name__ == "__main__":
    while True:
        user_query = input(
            "Please enter Retailer, Brand, or Category (type 'exit' to quit): "
        )
        if user_query == "exit":
            break
        llm_response = final_result(user_query)

        print(llm_response)