djangomango commited on
Commit
657e6f4
1 Parent(s): 26224b0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -0
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import PromptTemplate, LLMChain
2
+ from langchain.llms import CTransformers
3
+ import os
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
8
+ from io import BytesIO
9
+ from langchain.document_loaders import PyPDFLoader
10
+ import gradio as gr
11
+
12
+
13
+ local_llm = "zephyr-7b-beta.Q5_K_S.gguf"
14
+
15
+ config = {
16
+ 'max_new_tokens': 1024,
17
+ 'repetition_penalty': 1.1,
18
+ 'temperature': 0.1,
19
+ 'top_k': 50,
20
+ 'top_p': 0.9,
21
+ 'stream': True,
22
+ 'threads': int(os.cpu_count() / 2)
23
+ }
24
+
25
+ llm = CTransformers(
26
+ model=local_llm,
27
+ model_type="mistral",
28
+ lib="avx2", #for CPU use
29
+ **config
30
+ )
31
+
32
+ print("LLM Initialized...")
33
+
34
+
35
+ prompt_template = """Use the following pieces of information to answer the user's question.
36
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
37
+
38
+ Context: {context}
39
+ Question: {question}
40
+
41
+ Only return the helpful answer below and nothing else.
42
+ Helpful answer:
43
+ """
44
+
45
+ model_name = "BAAI/bge-large-en"
46
+ model_kwargs = {'device': 'cpu'}
47
+ encode_kwargs = {'normalize_embeddings': False}
48
+ embeddings = HuggingFaceBgeEmbeddings(
49
+ model_name=model_name,
50
+ model_kwargs=model_kwargs,
51
+ encode_kwargs=encode_kwargs
52
+ )
53
+
54
+
55
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
56
+ load_vector_store = Chroma(persist_directory="stores/res_cos", embedding_function=embeddings)
57
+ retriever = load_vector_store.as_retriever(search_kwargs={"k":1})
58
+ # query = "what is the fastest speed for a greyhound dog?"
59
+ # semantic_search = retriever.get_relevant_documents(query)
60
+ # print(semantic_search)
61
+
62
+ print("######################################################################")
63
+
64
+ chain_type_kwargs = {"prompt": prompt}
65
+
66
+ # qa = RetrievalQA.from_chain_type(
67
+ # llm=llm,
68
+ # chain_type="stuff",
69
+ # retriever=retriever,
70
+ # return_source_documents = True,
71
+ # chain_type_kwargs= chain_type_kwargs,
72
+ # verbose=True
73
+ # )
74
+
75
+ # response = qa(query)
76
+
77
+ # print(response)
78
+
79
+ sample_prompts = ["what is the fastest speed for a greyhound dog?", "Why should we not feed chocolates to the dogs?", "Name two factors which might contribute to why some dogs might get scared?"]
80
+
81
+ def get_response(input):
82
+ query = input
83
+ chain_type_kwargs = {"prompt": prompt}
84
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True)
85
+ response = qa(query)
86
+ return response
87
+
88
+ input = gr.Text(
89
+ label="Prompt",
90
+ show_label=False,
91
+ max_lines=1,
92
+ placeholder="Enter your prompt",
93
+ container=False,
94
+ )
95
+
96
+ iface = gr.Interface(fn=get_response,
97
+ inputs=input,
98
+ outputs="text",
99
+ title="My Dog PetCare Bot",
100
+ description="This is a RAG implementation based on Zephyr 7B Beta LLM.",
101
+ examples=sample_prompts,
102
+ allow_screenshot=False,
103
+ allow_flagging=False
104
+ )
105
+
106
+ iface.launch()