SergeyHakim commited on
Commit
e8d9a2a
·
verified ·
1 Parent(s): 2db6d6a

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +12 -0
  2. app.py +179 -0
  3. gitattributes +35 -0
  4. requirements.txt +11 -0
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Open Source LLM Chatbot
3
+ emoji: 🤖
4
+ colorFrom: indigo
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 4.14.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
+ import logging
3
+ import sys
4
+ import gradio as gr
5
+ from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
6
+ from llama_index.llms import LlamaCPP
7
+ from llama_index.llms.llama_utils import (
8
+ messages_to_prompt,
9
+ completion_to_prompt,
10
+ )
11
+
12
+ def download(model):
13
+ repo_id = model_info[model]["repo_id"]
14
+ filename = model_info[model]["filename"]
15
+
16
+ model_path = hf_hub_download(
17
+ repo_id=repo_id,
18
+ filename=filename,
19
+ resume_download=True,
20
+ cache_dir=MODELS_PATH,
21
+ )
22
+
23
+ return model_path
24
+
25
+
26
+ MODELS_PATH = "./models"
27
+ models = ["Llama-2-13B-chat", "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta", "vicuna-7B-v1.5", "CodeLlama-7B"]
28
+ model_info = {
29
+ "Llama-2-13B-chat": {
30
+ "repo_id": "TheBloke/Llama-2-13B-chat-GGUF",
31
+ "filename": "llama-2-13b-chat.Q4_K_M.gguf",
32
+ },
33
+ "Mistral-7B-Instruct-v0.2": {
34
+ "repo_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
35
+ "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
36
+ },
37
+ "zephyr-7B-beta": {
38
+ "repo_id": "TheBloke/zephyr-7B-beta-GGUF",
39
+ "filename": "zephyr-7b-beta.Q4_K_M.gguf",
40
+ },
41
+ "vicuna-7B-v1.5": {
42
+ "repo_id": "TheBloke/vicuna-7B-v1.5-GGUF",
43
+ "filename": "vicuna-7b-v1.5.Q4_K_M.gguf",
44
+ },
45
+ "CodeLlama-7B": {
46
+ "repo_id": "TheBloke/CodeLlama-7B-GGUF",
47
+ "filename": "codellama-7b.Q4_K_M.gguf",
48
+ },
49
+ # "Falcon-7B-Instruct": {
50
+ # "repo_id": "TheBloke/Falcon-7B-Instruct-GGML",
51
+ # "filename": "falcon-7b-instruct.ggccv1.q4_1.bin",
52
+ # },
53
+
54
+ }
55
+ for model_name in models:
56
+ download(model_name)
57
+
58
+
59
+ mistral_model_path = hf_hub_download(
60
+ repo_id= "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
61
+ filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
62
+ resume_download=True,
63
+ cache_dir=MODELS_PATH,)
64
+
65
+
66
+
67
+
68
+ """Step 3 : if you use GPU then make sure ( n_gpu_layers":1) at least 1, you can increase or decrease it based on your GPU performance"""
69
+
70
+ llm = LlamaCPP(
71
+ # You can pass in the URL to a GGML model to download it automatically
72
+ # model_url=model_url,
73
+ # optionally, you can set the path to a pre-downloaded model instead of model_url
74
+ model_path=mistral_model_path,
75
+ temperature=0.1,
76
+ max_new_tokens=256,
77
+ # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
78
+ context_window=3900,
79
+ # kwargs to pass to __call__()
80
+ generate_kwargs={},
81
+ # kwargs to pass to __init__()
82
+ # set to at least 1 to use GPU
83
+ model_kwargs={"n_gpu_layers": -1},
84
+ # transform inputs into Llama2 format
85
+ messages_to_prompt=messages_to_prompt,
86
+ completion_to_prompt=completion_to_prompt,
87
+ verbose=True,
88
+ )
89
+
90
+
91
+ def model_initialization(model):
92
+ if(model !=""):
93
+ gr.Info("model downloading and configuration process has been started, please wait...")
94
+ MODELS_PATH = "./models"
95
+ repo_id=""
96
+ filename=""
97
+ if(model=="Llama-2-13B-chat"):
98
+ repo_id="TheBloke/Llama-2-13B-chat-GGUF"
99
+ filename="llama-2-13b-chat.Q4_K_M.gguf"
100
+ elif(model=="Mistral-7B-Instruct-v0.2") :
101
+ repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
102
+ filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
103
+ elif(model=="zephyr-7B-beta"):
104
+ repo_id="TheBloke/zephyr-7B-beta-GGUF "
105
+ filename="zephyr-7b-beta.Q4_K_M.gguf"
106
+ elif(model=="vicuna-7B-v1.5"):
107
+ repo_id="TheBloke/vicuna-7B-v1.5-GGUF"
108
+ filename="vicuna-7b-v1.5.Q4_K_M.gguf"
109
+ # elif(model=="Falcon-7B-Instruct"):
110
+ # repo_id="TheBloke/Falcon-7B-Instruct-GGML"
111
+ # filename="falcon-7b-instruct.ggccv1.q4_1.bin"
112
+ elif(model=="CodeLlama-7B"):
113
+ repo_id="TheBloke/CodeLlama-7B-GGUF"
114
+ filename="codellama-7b.Q4_K_M.gguf"
115
+ else:
116
+ gr.Warning("please select at least one model")
117
+
118
+
119
+ mistral_model_path = hf_hub_download(
120
+ repo_id= repo_id,
121
+ filename= filename,
122
+ resume_download=True,
123
+ cache_dir=MODELS_PATH,)
124
+
125
+ llm = LlamaCPP(
126
+ # You can pass in the URL to a GGML model to download it automatically
127
+ # model_url=model_url,
128
+ # optionally, you can set the path to a pre-downloaded model instead of model_url
129
+ model_path=mistral_model_path,
130
+ temperature=0.1,
131
+ max_new_tokens=256,
132
+ # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
133
+ context_window=3900,
134
+ # kwargs to pass to __call__()
135
+ generate_kwargs={},
136
+ # set to at least 1 to use GPU
137
+ model_kwargs={"n_gpu_layers": -1},
138
+ # transform inputs into Llama2 format
139
+ messages_to_prompt=messages_to_prompt,
140
+ completion_to_prompt=completion_to_prompt,
141
+ verbose=True,
142
+ )
143
+ gr.Info("model has been configured and ready to chat")
144
+ return "model has been configured and ready to chat, your current model is "+model
145
+
146
+ def predict(message, history):
147
+ messages = []
148
+ answer = []
149
+ response = llm.stream_complete(message)
150
+ for bot_response in response:
151
+ token = bot_response.delta
152
+ answer.append(token)
153
+ final_answer = " ".join(answer)
154
+ yield final_answer
155
+
156
+ with gr.Blocks() as UI:
157
+
158
+ models=gr.Dropdown(["CodeLlama-7B","Llama-2-13B-chat" ,"Mistral-7B-Instruct-v0.2", "zephyr-7B-beta",
159
+ "vicuna-7B-v1.5"],value=["CodeLlama-7B","Llama-2-13B-chat", "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta",
160
+ "vicuna-7B-v1.5"], label="please select at least one model", info="default model is Mistral-7B-Instruct-v0.2")
161
+ textInfo = gr.Textbox(value="current model is Mistral-7B-Instruct-v0.2",label="Model Status");
162
+ # Chatbot interface
163
+ chatUI= gr.ChatInterface(
164
+ predict,
165
+ title="Open Source LLM ChatBot",
166
+ description="Ask any question",
167
+ theme="soft",
168
+ examples=["Hello", "are you LLM model?", "how can i finetune a pre-trained LLM model?","How can i build a chatbot using local open-souce LLM ?"],
169
+ cache_examples=False,
170
+ submit_btn="Send Message",
171
+ retry_btn=None,
172
+ undo_btn="Delete Previous",
173
+ clear_btn="Clear",
174
+ )
175
+
176
+ models.change(fn=model_initialization,inputs=[models],outputs=[textInfo])
177
+
178
+ if __name__ == "__main__":
179
+ UI.launch(debug=True) #
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GRADIO
2
+ gradio
3
+
4
+ # python binding for LLM
5
+ llama-cpp-python
6
+
7
+ # to download model
8
+ huggingface_hub
9
+
10
+ # llamaindex
11
+ llama-index