File size: 3,056 Bytes
36e28d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00cfaee
 
36e28d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00cfaee
36e28d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.chains import RetrievalQA
import gradio as gr

# Mount Google Drive
# from google.colab import drive
# drive.mount('/content/drive/')
# !ls /content/drive/My\ Drive/stores/enron_cosine/

# The model used to generate responses based on retrieved content from the database in response to user inquiries. 
local_llm = "TheBloke/zephyr-7B-beta-GGUF"

config = {
    # Explicitly set the max_seq_len
    "max_seq_len" : 4096,
    "max_answer_len" : 1024,
    "max_new_token": 1024,
    "repetition_penalty": 1.1,
    "temperature": 0.1,
    "top_k": 50,
    "top_p": 0.9,
    "stream": True,
    "threads": int(os.cpu_count() / 2),
}

llm_init = CTransformers(model=local_llm, model_type="mistral", lib="avx2", **config)

prompt_template = """Use the following piece of information to answers the question asked by the user.
Don't try to make up the answer if you don't know the answer, simply say I don't know.

Context: {context}
Question: {question}

Only helpful answer below.
Helpful answer:
"""

# The model to create the embeddings of the user queries
model_name = "BAAI/bge-large-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": False}

embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

load_vector_store = Chroma(
    persist_directory="./stores/enron_cosine", embedding_function=embeddings
)

retriever = load_vector_store.as_retriever(search_kwargs={"k": 1})

print("retrieval", retriever)

#query = "In what context is mentioned Natural Gas Storage Overview?"
#semantic_search = retriever.get_relevant_documents(query)
#print(semantic_search)

# chain_type_kwargs = {"prompt": prompt}

# qa = RetrievalQA.from_chain_type(
#     llm=llm_init,
#     chain_type="stuff",
#     retriever=retriever,
#     verbose=True,
#     chain_type_kwargs=chain_type_kwargs,
#     return_source_documents=True,
# )


sample_query = []

def get_response(input):
    print("input", input)
    query = input
    chain_type_kwargs = {"prompt": prompt}
    qa = RetrievalQA.from_chain_type(
        llm=llm_init,
        chain_type="stuff",
        retriever=retriever,
        verbose=True,
        chain_type_kwargs=chain_type_kwargs,
        return_source_documents=True,
    )
    response = qa(query)
    print("Response:", response)
    return response

input = gr.Text(
    label="Query",
    show_label=True,
    max_lines=2,
    container=False,
    placeholder="Enter your question",
)

gIface = gr.Interface(
    fn=get_response,
    inputs=input,
    outputs="text",
    title="Enron emails RAG AI",
    description="RAG demo using Zephyr 7B Beta and Langchain",
    examples=sample_query,
    allow_flagging="never",
)

gIface.launch()