isayahc commited on
Commit
488b112
1 Parent(s): 3e5c1f6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain.llms import CTransformers
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
6
+ from langchain.chains import RetrievalQA
7
+ import gradio as gr
8
+
9
+ local_llm = "zephyr-7b-beta.Q4_K_S.gguf"
10
+
11
+
12
+ config = {
13
+ "max_new_token": 1024,
14
+ "repetition_penalty": 1.1,
15
+ "temperature": 0.1,
16
+ "top_k": 50,
17
+ "top_p": 0.9,
18
+ "stream": True,
19
+ "threads": int(os.cpu_count() / 2),
20
+ }
21
+
22
+ llm_init = CTransformers(model=local_llm, model_type="mistral", lib="avx2", **config)
23
+
24
+ prompt_template = """Use the following piece of information to answers the question asked by the user.
25
+ Don't try to make up the answer if you don't know the answer, simply say I don't know.
26
+
27
+ Context: {context}
28
+ Question: {question}
29
+
30
+ Only helpful answer below.
31
+ Helpful answer:
32
+ """
33
+
34
+ model_name = "BAAI/bge-large-en"
35
+ model_kwargs = {"device": "cpu"}
36
+ encode_kwargs = {"normalize_embeddings": False}
37
+
38
+ embeddings = HuggingFaceBgeEmbeddings(
39
+ model_name=model_name,
40
+ model_kwargs=model_kwargs,
41
+ encode_kwargs=encode_kwargs,
42
+ )
43
+
44
+ prompt = PromptTemplate(
45
+ template=prompt_template, input_variables=["context", "question"]
46
+ )
47
+
48
+ load_vector_store = Chroma(
49
+ persist_directory="stores/dino_cosine", embedding_function=embeddings
50
+ )
51
+
52
+ retriever = load_vector_store.as_retriever(search_kwargs={"k": 1})
53
+
54
+ # query = "How many genera of dinosaurs currently known?"
55
+
56
+ # semantic_search = retriever.get_relevant_documents(query)
57
+
58
+ # chain_type_kwargs = {"prompt": prompt}
59
+
60
+ # qa = RetrievalQA.from_chain_type(
61
+ # llm=llm_init,
62
+ # chain_type="stuff",
63
+ # retriever=retriever,
64
+ # verbose=True,
65
+ # chain_type_kwargs=chain_type_kwargs,
66
+ # return_source_documents=True,
67
+ # )
68
+
69
+ sample_query = [
70
+ "How many genera of dinosaurs currently known?",
71
+ "What methods are used to account for the incompleteness of the fossil record?",
72
+ "Were Dinosaurs in Decline Before the Cretaceous or Tertiary Boundary?",
73
+ ]
74
+
75
+
76
+ def get_response(input):
77
+ query = input
78
+ chain_type_kwargs = {"prompt": prompt}
79
+ qa = RetrievalQA.from_chain_type(
80
+ llm=llm_init,
81
+ chain_type="stuff",
82
+ retriever=retriever,
83
+ verbose=True,
84
+ chain_type_kwargs=chain_type_kwargs,
85
+ return_source_documents=True,
86
+ )
87
+ response = qa(query)
88
+ return response
89
+
90
+
91
+ input = gr.Text(
92
+ label="Query",
93
+ show_label=True,
94
+ max_lines=2,
95
+ container=False,
96
+ placeholder="Enter your question",
97
+ )
98
+
99
+ gIface = gr.Interface(
100
+ fn=get_response,
101
+ inputs=input,
102
+ outputs="text",
103
+ title="Dinosaurs Diversity RAG AI",
104
+ description="RAG demo using Zephyr 7B Beta and Langchain",
105
+ examples=sample_query,
106
+ allow_flagging="never",
107
+ )
108
+
109
+ gIface.launch()
110
+
111
+ # llm_chain = LLMChain(prompt=prompt, llm=llm_init, verbose=True)