mintaeng commited on
Commit
673d163
β€’
1 Parent(s): 3e9acda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -49
app.py CHANGED
@@ -1,63 +1,106 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
 
25
 
26
- messages.append({"role": "user", "content": message})
27
 
28
- response = ""
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
41
 
 
 
42
  """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
  )
60
 
61
 
62
- if __name__ == "__main__":
63
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import PromptTemplate, LLMChain
2
+ from langchain.llms import CTransformers
3
+ import os
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
8
+ from io import BytesIO
9
+ from langchain.document_loaders import PyPDFLoader
10
  import gradio as gr
 
11
 
 
 
 
 
12
 
13
+ local_llm = "final_model_maybe_gguf-unsloth.Q5_K_M.gguf"
14
 
15
+ config = {
16
+ 'max_new_tokens': 2048,
17
+ 'repetition_penalty': 1.1,
18
+ 'temperature': 0.6,
19
+ 'top_k': 50,
20
+ 'top_p': 0.9,
21
+ 'stream': True,
22
+ 'threads': int(os.cpu_count() / 2)
23
+ }
24
 
25
+ llm = CTransformers(
26
+ model=local_llm,
27
+ model_type="mistral",
28
+ lib="avx2", #for CPU use
29
+ **config
30
+ )
31
 
32
+ print("LLM Initialized...")
33
 
 
34
 
35
+ prompt_template = """Use the following pieces of information to answer the user's question.
36
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
 
 
 
 
 
 
37
 
38
+ Context: {context}
39
+ Question: {question}
40
 
41
+ Only return the helpful answer below and nothing else.
42
+ Helpful answer:
43
  """
44
+
45
+ model_name = "ko-sroberta-multitask"
46
+ model_kwargs = {'device': 'cpu'}
47
+ encode_kwargs = {'normalize_embeddings': False}
48
+ embeddings = HuggingFaceBgeEmbeddings(
49
+ model_name=model_name,
50
+ model_kwargs=model_kwargs,
51
+ encode_kwargs=encode_kwargs
 
 
 
 
 
 
 
 
52
  )
53
 
54
 
55
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
56
+ load_vector_store = Chroma(persist_directory="stores/pet_cosine", embedding_function=embeddings)
57
+ retriever = load_vector_store.as_retriever(search_kwargs={"k":1})
58
+ # query = "what is the fastest speed for a greyhound dog?"
59
+ # semantic_search = retriever.get_relevant_documents(query)
60
+ # print(semantic_search)
61
+
62
+ print("######################################################################")
63
+
64
+ chain_type_kwargs = {"prompt": prompt}
65
+
66
+ # qa = RetrievalQA.from_chain_type(
67
+ # llm=llm,
68
+ # chain_type="stuff",
69
+ # retriever=retriever,
70
+ # return_source_documents = True,
71
+ # chain_type_kwargs= chain_type_kwargs,
72
+ # verbose=True
73
+ # )
74
+
75
+ # response = qa(query)
76
+
77
+ # print(response)
78
+
79
+ sample_prompts = ["what is the fastest speed for a greyhound dog?", "Why should we not feed chocolates to the dogs?", "Name two factors which might contribute to why some dogs might get scared?"]
80
+
81
+ def get_response(input):
82
+ query = input
83
+ chain_type_kwargs = {"prompt": prompt}
84
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True)
85
+ response = qa(query)
86
+ return response
87
+
88
+ input = gr.Text(
89
+ label="Prompt",
90
+ show_label=False,
91
+ max_lines=1,
92
+ placeholder="Enter your prompt",
93
+ container=False,
94
+ )
95
+
96
+ iface = gr.Interface(fn=get_response,
97
+ inputs=input,
98
+ outputs="text",
99
+ title="My Dog PetCare Bot",
100
+ description="This is a RAG implementation based on Zephyr 7B Beta LLM.",
101
+ examples=sample_prompts,
102
+ allow_screenshot=False,
103
+ allow_flagging=False
104
+ )
105
+
106
+ iface.launch()