md-vasim commited on
Commit
81cf53b
1 Parent(s): c541d66

first commit

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ models
2
+ .venv
3
+ .env
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ ENV REPO=TheBloke/Llama-2-7B-Chat-GGUF
9
+ ENV MODEL_NAME=llama-2-7b-chat.Q5_K_M.gguf
10
+
11
+ COPY ./requirements.txt /code/requirements.txt
12
+
13
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
14
+
15
+ COPY . .
16
+
17
+ RUN !huggingface-cli download \
18
+ ${REPO} \
19
+ ${MODEL_NAME}\
20
+ --local-dir . \
21
+ --local-dir-use-symlinks False
22
+
23
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.callbacks.manager import CallbackManager
2
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
3
+ from langchain.chains import LLMChain
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain_community.llms import LlamaCpp
6
+ import gradio as gr
7
+
8
+ MODEL_PATH = "llama-2-7b-chat.Q5_K_M.gguf"
9
+
10
+ TEMPLATE = """
11
+
12
+ You are a helpful AI Assistant created by Mohammed Vasim. Mohammed Vasim is an AI Engineer.
13
+
14
+ Question: {question}
15
+
16
+ Answer: helpful answer"""
17
+
18
+ prompt = PromptTemplate.from_template(TEMPLATE)
19
+
20
+ # Callbacks support token-wise streaming
21
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
22
+
23
+ # Make sure the model path is correct for your system!
24
+ llm = LlamaCpp(
25
+ model_path=MODEL_PATH,
26
+ temperature=0.75,
27
+ max_tokens=2000,
28
+ top_p=1,
29
+ callback_manager=callback_manager,
30
+ verbose=True, # Verbose is required to pass to the callback manager
31
+ )
32
+
33
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
34
+
35
+ # question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
36
+ # llm_chain.run(question)
37
+
38
+ title = "Welcome to Open Source LLM"
39
+
40
+ description = "This is a Llama-2-GGUF"
41
+
42
+ def answer_query(message, history):
43
+ message = llm_chain.run(message)
44
+ return message
45
+
46
+ # Gradio chat interface
47
+ gr.ChatInterface(
48
+ fn=answer_query,
49
+ title=title,
50
+ description=description,
51
+ examples=[
52
+ ["What is a Large Language Model?"],
53
+ ["What's 9+2-1?"],
54
+ ["Write Python code to print the Fibonacci sequence"]
55
+ ]
56
+ ).queue().launch(server_name="0.0.0.0")
notebooks/Llama2_langchain_llama_cpp.ipynb ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {
7
+ "id": "EGTI9yHm74B0"
8
+ },
9
+ "outputs": [],
10
+ "source": [
11
+ "%%capture\n",
12
+ "!pip install huggingface-hub hf-transfer langchain llama-cpp-python"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 2,
18
+ "metadata": {
19
+ "id": "ao6p6SSd5VvW"
20
+ },
21
+ "outputs": [],
22
+ "source": [
23
+ "%%capture\n",
24
+ "# !CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install llama-cpp-python"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 3,
30
+ "metadata": {
31
+ "id": "AOmrozm5GoZZ"
32
+ },
33
+ "outputs": [],
34
+ "source": [
35
+ "# !wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_M.gguf"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": 4,
41
+ "metadata": {
42
+ "colab": {
43
+ "base_uri": "https://localhost:8080/"
44
+ },
45
+ "id": "FoxgM851hI5F",
46
+ "outputId": "fcc7276e-3d87-4e8a-cd10-ff533074d12b"
47
+ },
48
+ "outputs": [
49
+ {
50
+ "name": "stdout",
51
+ "output_type": "stream",
52
+ "text": [
53
+ "downloading https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf to /root/.cache/huggingface/hub/tmp02ipqll0\n",
54
+ "llama-2-7b-chat.Q2_K.gguf: 100% 2.83G/2.83G [00:26<00:00, 107MB/s]\n",
55
+ "./llama-2-7b-chat.Q2_K.gguf\n"
56
+ ]
57
+ }
58
+ ],
59
+ "source": [
60
+ "import os\n",
61
+ "os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\"\n",
62
+ "\n",
63
+ "# !huggingface-cli download \\\n",
64
+ "# Deci/DeciLM-7B-instruct-GGUF \\\n",
65
+ "# decilm-7b-uniform-gqa-q8_0.gguf \\\n",
66
+ "# --local-dir . \\\n",
67
+ "# --local-dir-use-symlinks False\n",
68
+ "\n",
69
+ "!huggingface-cli download \\\n",
70
+ " TheBloke/Llama-2-7B-Chat-GGUF \\\n",
71
+ " llama-2-7b-chat.Q2_K.gguf \\\n",
72
+ " --local-dir . \\\n",
73
+ " --local-dir-use-symlinks False"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 5,
79
+ "metadata": {
80
+ "id": "176a5LS68sBI"
81
+ },
82
+ "outputs": [],
83
+ "source": [
84
+ "from langchain.callbacks.manager import CallbackManager\n",
85
+ "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
86
+ "from langchain.chains import LLMChain\n",
87
+ "from langchain.prompts import PromptTemplate\n",
88
+ "from langchain_community.llms import LlamaCpp"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": 6,
94
+ "metadata": {
95
+ "id": "E0nySsfAHmu_"
96
+ },
97
+ "outputs": [],
98
+ "source": [
99
+ "MODEL_PATH = \"llama-2-7b-chat.Q2_K.gguf\""
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": 7,
105
+ "metadata": {
106
+ "id": "r_rEfQFfBYOb"
107
+ },
108
+ "outputs": [],
109
+ "source": [
110
+ "template = \"\"\"Question: {question}\n",
111
+ "\n",
112
+ "Answer: Let's work this out in a step by step way to be sure we have the right answer.\"\"\"\n",
113
+ "\n",
114
+ "prompt = PromptTemplate.from_template(template)"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": 8,
120
+ "metadata": {
121
+ "id": "VR2kLDqLBY1A"
122
+ },
123
+ "outputs": [],
124
+ "source": [
125
+ "# Callbacks support token-wise streaming\n",
126
+ "callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])"
127
+ ]
128
+ },
129
+ {
130
+ "cell_type": "code",
131
+ "execution_count": 9,
132
+ "metadata": {
133
+ "colab": {
134
+ "base_uri": "https://localhost:8080/"
135
+ },
136
+ "id": "L_KBhPNmBbCV",
137
+ "outputId": "ed5292d0-67e6-4b91-b8e0-418dd92d2572"
138
+ },
139
+ "outputs": [
140
+ {
141
+ "name": "stderr",
142
+ "output_type": "stream",
143
+ "text": [
144
+ "llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b-chat.Q2_K.gguf (version GGUF V2)\n",
145
+ "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
146
+ "llama_model_loader: - kv 0: general.architecture str = llama\n",
147
+ "llama_model_loader: - kv 1: general.name str = LLaMA v2\n",
148
+ "llama_model_loader: - kv 2: llama.context_length u32 = 4096\n",
149
+ "llama_model_loader: - kv 3: llama.embedding_length u32 = 4096\n",
150
+ "llama_model_loader: - kv 4: llama.block_count u32 = 32\n",
151
+ "llama_model_loader: - kv 5: llama.feed_forward_length u32 = 11008\n",
152
+ "llama_model_loader: - kv 6: llama.rope.dimension_count u32 = 128\n",
153
+ "llama_model_loader: - kv 7: llama.attention.head_count u32 = 32\n",
154
+ "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 = 32\n",
155
+ "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000001\n",
156
+ "llama_model_loader: - kv 10: general.file_type u32 = 10\n",
157
+ "llama_model_loader: - kv 11: tokenizer.ggml.model str = llama\n",
158
+ "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr[str,32000] = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n",
159
+ "llama_model_loader: - kv 13: tokenizer.ggml.scores arr[f32,32000] = [0.000000, 0.000000, 0.000000, 0.0000...\n",
160
+ "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr[i32,32000] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n",
161
+ "llama_model_loader: - kv 15: tokenizer.ggml.bos_token_id u32 = 1\n",
162
+ "llama_model_loader: - kv 16: tokenizer.ggml.eos_token_id u32 = 2\n",
163
+ "llama_model_loader: - kv 17: tokenizer.ggml.unknown_token_id u32 = 0\n",
164
+ "llama_model_loader: - kv 18: general.quantization_version u32 = 2\n",
165
+ "llama_model_loader: - type f32: 65 tensors\n",
166
+ "llama_model_loader: - type q2_K: 65 tensors\n",
167
+ "llama_model_loader: - type q3_K: 160 tensors\n",
168
+ "llama_model_loader: - type q6_K: 1 tensors\n",
169
+ "llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n",
170
+ "llm_load_print_meta: format = GGUF V2\n",
171
+ "llm_load_print_meta: arch = llama\n",
172
+ "llm_load_print_meta: vocab type = SPM\n",
173
+ "llm_load_print_meta: n_vocab = 32000\n",
174
+ "llm_load_print_meta: n_merges = 0\n",
175
+ "llm_load_print_meta: n_ctx_train = 4096\n",
176
+ "llm_load_print_meta: n_embd = 4096\n",
177
+ "llm_load_print_meta: n_head = 32\n",
178
+ "llm_load_print_meta: n_head_kv = 32\n",
179
+ "llm_load_print_meta: n_layer = 32\n",
180
+ "llm_load_print_meta: n_rot = 128\n",
181
+ "llm_load_print_meta: n_embd_head_k = 128\n",
182
+ "llm_load_print_meta: n_embd_head_v = 128\n",
183
+ "llm_load_print_meta: n_gqa = 1\n",
184
+ "llm_load_print_meta: n_embd_k_gqa = 4096\n",
185
+ "llm_load_print_meta: n_embd_v_gqa = 4096\n",
186
+ "llm_load_print_meta: f_norm_eps = 0.0e+00\n",
187
+ "llm_load_print_meta: f_norm_rms_eps = 1.0e-06\n",
188
+ "llm_load_print_meta: f_clamp_kqv = 0.0e+00\n",
189
+ "llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n",
190
+ "llm_load_print_meta: n_ff = 11008\n",
191
+ "llm_load_print_meta: n_expert = 0\n",
192
+ "llm_load_print_meta: n_expert_used = 0\n",
193
+ "llm_load_print_meta: rope scaling = linear\n",
194
+ "llm_load_print_meta: freq_base_train = 10000.0\n",
195
+ "llm_load_print_meta: freq_scale_train = 1\n",
196
+ "llm_load_print_meta: n_yarn_orig_ctx = 4096\n",
197
+ "llm_load_print_meta: rope_finetuned = unknown\n",
198
+ "llm_load_print_meta: model type = 7B\n",
199
+ "llm_load_print_meta: model ftype = Q2_K - Medium\n",
200
+ "llm_load_print_meta: model params = 6.74 B\n",
201
+ "llm_load_print_meta: model size = 2.63 GiB (3.35 BPW) \n",
202
+ "llm_load_print_meta: general.name = LLaMA v2\n",
203
+ "llm_load_print_meta: BOS token = 1 '<s>'\n",
204
+ "llm_load_print_meta: EOS token = 2 '</s>'\n",
205
+ "llm_load_print_meta: UNK token = 0 '<unk>'\n",
206
+ "llm_load_print_meta: LF token = 13 '<0x0A>'\n",
207
+ "llm_load_tensors: ggml ctx size = 0.11 MiB\n",
208
+ "llm_load_tensors: CPU buffer size = 2694.32 MiB\n",
209
+ ".................................................................................................\n",
210
+ "llama_new_context_with_model: n_ctx = 512\n",
211
+ "llama_new_context_with_model: freq_base = 10000.0\n",
212
+ "llama_new_context_with_model: freq_scale = 1\n",
213
+ "llama_kv_cache_init: CPU KV buffer size = 256.00 MiB\n",
214
+ "llama_new_context_with_model: KV self size = 256.00 MiB, K (f16): 128.00 MiB, V (f16): 128.00 MiB\n",
215
+ "llama_new_context_with_model: CPU input buffer size = 0.14 MiB\n",
216
+ "llama_new_context_with_model: CPU compute buffer size = 1.10 MiB\n",
217
+ "llama_new_context_with_model: graph splits (measure): 1\n",
218
+ "AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | \n",
219
+ "Model metadata: {'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '11008', 'llama.attention.layer_norm_rms_epsilon': '0.000001', 'llama.rope.dimension_count': '128', 'llama.attention.head_count': '32', 'tokenizer.ggml.bos_token_id': '1', 'llama.block_count': '32', 'llama.attention.head_count_kv': '32', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '10'}\n"
220
+ ]
221
+ }
222
+ ],
223
+ "source": [
224
+ "# Make sure the model path is correct for your system!\n",
225
+ "llm = LlamaCpp(\n",
226
+ " model_path=MODEL_PATH,\n",
227
+ " temperature=0.75,\n",
228
+ " max_tokens=2000,\n",
229
+ " top_p=1,\n",
230
+ " callback_manager=callback_manager,\n",
231
+ " verbose=True, # Verbose is required to pass to the callback manager\n",
232
+ ")"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "code",
237
+ "execution_count": 10,
238
+ "metadata": {
239
+ "colab": {
240
+ "base_uri": "https://localhost:8080/",
241
+ "height": 1000
242
+ },
243
+ "id": "crv_Wu52Bdz_",
244
+ "outputId": "4b45a176-4503-4bf7-8fb7-0bc949eed169"
245
+ },
246
+ "outputs": [
247
+ {
248
+ "name": "stdout",
249
+ "output_type": "stream",
250
+ "text": [
251
+ "\n",
252
+ "Stephen Colbert:"
253
+ ]
254
+ },
255
+ {
256
+ "name": "stderr",
257
+ "output_type": "stream",
258
+ "text": [
259
+ "ERROR:root:Internal Python error in the inspect module.\n",
260
+ "Below is the traceback from this internal error.\n",
261
+ "\n"
262
+ ]
263
+ },
264
+ {
265
+ "name": "stdout",
266
+ "output_type": "stream",
267
+ "text": [
268
+ "Traceback (most recent call last):\n",
269
+ " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n",
270
+ " exec(code_obj, self.user_global_ns, self.user_ns)\n",
271
+ " File \"<ipython-input-10-a402e682f208>\", line 4, in <cell line: 4>\n",
272
+ " llm.invoke(prompt)\n",
273
+ " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 273, in invoke\n",
274
+ " self.generate_prompt(\n",
275
+ " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 568, in generate_prompt\n",
276
+ " return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)\n",
277
+ " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 741, in generate\n",
278
+ " output = self._generate_helper(\n",
279
+ " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 605, in _generate_helper\n",
280
+ " raise e\n",
281
+ " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 592, in _generate_helper\n",
282
+ " self._generate(\n",
283
+ " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 1177, in _generate\n",
284
+ " self._call(prompt, stop=stop, run_manager=run_manager, **kwargs)\n",
285
+ " File \"/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\", line 288, in _call\n",
286
+ " for chunk in self._stream(\n",
287
+ " File \"/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\", line 341, in _stream\n",
288
+ " for part in result:\n",
289
+ " File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\", line 978, in _create_completion\n",
290
+ " for token in self.generate(\n",
291
+ " File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\", line 663, in generate\n",
292
+ " self.eval(tokens)\n",
293
+ " File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\", line 503, in eval\n",
294
+ " self._ctx.decode(self._batch)\n",
295
+ " File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/_internals.py\", line 305, in decode\n",
296
+ " return_code = llama_cpp.llama_decode(\n",
297
+ " File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama_cpp.py\", line 1636, in llama_decode\n",
298
+ " return _lib.llama_decode(ctx, batch)\n",
299
+ "KeyboardInterrupt\n",
300
+ "\n",
301
+ "During handling of the above exception, another exception occurred:\n",
302
+ "\n",
303
+ "Traceback (most recent call last):\n",
304
+ " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n",
305
+ " stb = value._render_traceback_()\n",
306
+ "AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'\n",
307
+ "\n",
308
+ "During handling of the above exception, another exception occurred:\n",
309
+ "\n",
310
+ "Traceback (most recent call last):\n",
311
+ " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1101, in get_records\n",
312
+ " return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)\n",
313
+ " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 248, in wrapped\n",
314
+ " return f(*args, **kwargs)\n",
315
+ " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 281, in _fixed_getinnerframes\n",
316
+ " records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))\n",
317
+ " File \"/usr/lib/python3.10/inspect.py\", line 1662, in getinnerframes\n",
318
+ " frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)\n",
319
+ " File \"/usr/lib/python3.10/inspect.py\", line 1620, in getframeinfo\n",
320
+ " filename = getsourcefile(frame) or getfile(frame)\n",
321
+ " File \"/usr/lib/python3.10/inspect.py\", line 829, in getsourcefile\n",
322
+ " module = getmodule(object, filename)\n",
323
+ " File \"/usr/lib/python3.10/inspect.py\", line 878, in getmodule\n",
324
+ " os.path.realpath(f)] = module.__name__\n",
325
+ " File \"/usr/lib/python3.10/posixpath.py\", line 396, in realpath\n",
326
+ " path, ok = _joinrealpath(filename[:0], filename, strict, {})\n",
327
+ " File \"/usr/lib/python3.10/posixpath.py\", line 429, in _joinrealpath\n",
328
+ " newpath = join(path, name)\n",
329
+ " File \"/usr/lib/python3.10/posixpath.py\", line 71, in join\n",
330
+ " def join(a, *p):\n",
331
+ "KeyboardInterrupt\n"
332
+ ]
333
+ },
334
+ {
335
+ "ename": "TypeError",
336
+ "evalue": "object of type 'NoneType' has no len()",
337
+ "output_type": "error",
338
+ "traceback": [
339
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
340
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
341
+ " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n",
342
+ "\u001b[0;32m<ipython-input-10-a402e682f208>\u001b[0m in \u001b[0;36m<cell line: 4>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \"\"\"\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minvoke\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
343
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, stop, **kwargs)\u001b[0m\n\u001b[1;32m 272\u001b[0m return (\n\u001b[0;32m--> 273\u001b[0;31m self.generate_prompt(\n\u001b[0m\u001b[1;32m 274\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
344
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36mgenerate_prompt\u001b[0;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m 567\u001b[0m \u001b[0mprompt_strings\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 568\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt_strings\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 569\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
345
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36mgenerate\u001b[0;34m(self, prompts, stop, callbacks, tags, metadata, run_name, **kwargs)\u001b[0m\n\u001b[1;32m 740\u001b[0m ]\n\u001b[0;32m--> 741\u001b[0;31m output = self._generate_helper(\n\u001b[0m\u001b[1;32m 742\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_managers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbool\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_arg_supported\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
346
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36m_generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_llm_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mLLMResult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgenerations\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 605\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 606\u001b[0m \u001b[0mflattened_outputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflatten\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
347
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36m_generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 591\u001b[0m output = (\n\u001b[0;32m--> 592\u001b[0;31m self._generate(\n\u001b[0m\u001b[1;32m 593\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
348
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36m_generate\u001b[0;34m(self, prompts, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 1176\u001b[0m text = (\n\u001b[0;32m-> 1177\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrun_manager\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1178\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnew_arg_supported\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
349
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, prompt, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[0mcombined_text_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 288\u001b[0;31m for chunk in self._stream(\n\u001b[0m\u001b[1;32m 289\u001b[0m \u001b[0mprompt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
350
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\u001b[0m in \u001b[0;36m_stream\u001b[0;34m(self, prompt, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 340\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstream\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 341\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mpart\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 342\u001b[0m \u001b[0mlogprobs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpart\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"choices\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"logprobs\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
351
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\u001b[0m in \u001b[0;36m_create_completion\u001b[0;34m(self, prompt, suffix, max_tokens, temperature, top_p, min_p, typical_p, logprobs, echo, stop, frequency_penalty, presence_penalty, repeat_penalty, top_k, stream, seed, tfs_z, mirostat_mode, mirostat_tau, mirostat_eta, model, stopping_criteria, logits_processor, grammar, logit_bias)\u001b[0m\n\u001b[1;32m 977\u001b[0m \u001b[0mmultibyte_fix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 978\u001b[0;31m for token in self.generate(\n\u001b[0m\u001b[1;32m 979\u001b[0m \u001b[0mprompt_tokens\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
352
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\u001b[0m in \u001b[0;36mgenerate\u001b[0;34m(self, tokens, top_k, top_p, min_p, typical_p, temp, repeat_penalty, reset, frequency_penalty, presence_penalty, tfs_z, mirostat_mode, mirostat_tau, mirostat_eta, penalize_nl, logits_processor, stopping_criteria, grammar)\u001b[0m\n\u001b[1;32m 662\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 663\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtokens\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 664\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0msample_idx\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_tokens\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
353
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\u001b[0m in \u001b[0;36meval\u001b[0;34m(self, tokens)\u001b[0m\n\u001b[1;32m 502\u001b[0m )\n\u001b[0;32m--> 503\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 504\u001b[0m \u001b[0;31m# Save tokens\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
354
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/_internals.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 304\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 305\u001b[0;31m return_code = llama_cpp.llama_decode(\n\u001b[0m\u001b[1;32m 306\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mctx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
355
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama_cpp.py\u001b[0m in \u001b[0;36mllama_decode\u001b[0;34m(ctx, batch)\u001b[0m\n\u001b[1;32m 1635\u001b[0m < 0 - error\"\"\"\n\u001b[0;32m-> 1636\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_lib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mllama_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mctx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1637\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
356
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: ",
357
+ "\nDuring handling of the above exception, another exception occurred:\n",
358
+ "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
359
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mshowtraceback\u001b[0;34m(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)\u001b[0m\n\u001b[1;32m 2098\u001b[0m \u001b[0;31m# in the engines. This should return a list of strings.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2099\u001b[0;31m \u001b[0mstb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_render_traceback_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2100\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
360
+ "\u001b[0;31mAttributeError\u001b[0m: 'KeyboardInterrupt' object has no attribute '_render_traceback_'",
361
+ "\nDuring handling of the above exception, another exception occurred:\n",
362
+ "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
363
+ " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n",
364
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mshowtraceback\u001b[0;34m(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)\u001b[0m\n\u001b[1;32m 2099\u001b[0m \u001b[0mstb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_render_traceback_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2100\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2101\u001b[0;31m stb = self.InteractiveTB.structured_traceback(etype,\n\u001b[0m\u001b[1;32m 2102\u001b[0m value, tb, tb_offset=tb_offset)\n\u001b[1;32m 2103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
365
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mstructured_traceback\u001b[0;34m(self, etype, value, tb, tb_offset, number_of_lines_of_context)\u001b[0m\n\u001b[1;32m 1365\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1366\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1367\u001b[0;31m return FormattedTB.structured_traceback(\n\u001b[0m\u001b[1;32m 1368\u001b[0m self, etype, value, tb, tb_offset, number_of_lines_of_context)\n\u001b[1;32m 1369\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
366
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mstructured_traceback\u001b[0;34m(self, etype, value, tb, tb_offset, number_of_lines_of_context)\u001b[0m\n\u001b[1;32m 1265\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose_modes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1266\u001b[0m \u001b[0;31m# Verbose modes need a full traceback\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1267\u001b[0;31m return VerboseTB.structured_traceback(\n\u001b[0m\u001b[1;32m 1268\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0metype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb_offset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumber_of_lines_of_context\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1269\u001b[0m )\n",
367
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mstructured_traceback\u001b[0;34m(self, etype, evalue, etb, tb_offset, number_of_lines_of_context)\u001b[0m\n\u001b[1;32m 1122\u001b[0m \u001b[0;34m\"\"\"Return a nice text document describing the traceback.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1123\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1124\u001b[0;31m formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n\u001b[0m\u001b[1;32m 1125\u001b[0m tb_offset)\n\u001b[1;32m 1126\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
368
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mformat_exception_as_a_whole\u001b[0;34m(self, etype, evalue, etb, number_of_lines_of_context, tb_offset)\u001b[0m\n\u001b[1;32m 1080\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1081\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1082\u001b[0;31m \u001b[0mlast_unique\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecursion_repeat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfind_recursion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0morig_etype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1083\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1084\u001b[0m \u001b[0mframes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat_records\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrecords\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlast_unique\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecursion_repeat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
369
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mfind_recursion\u001b[0;34m(etype, value, records)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[0;31m# first frame (from in to out) that looks different.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 381\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_recursion_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0metype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 382\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrecords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 383\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[0;31m# Select filename, lineno, func_name to track frames with\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
370
+ "\u001b[0;31mTypeError\u001b[0m: object of type 'NoneType' has no len()"
371
+ ]
372
+ }
373
+ ],
374
+ "source": [
375
+ "prompt = \"\"\"\n",
376
+ "Question: A rap battle between Stephen Colbert and John Oliver\n",
377
+ "\"\"\"\n",
378
+ "llm.invoke(prompt)"
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": null,
384
+ "metadata": {
385
+ "id": "Bdpj6esPBs4q"
386
+ },
387
+ "outputs": [],
388
+ "source": [
389
+ "llm_chain = LLMChain(prompt=prompt, llm=llm)"
390
+ ]
391
+ },
392
+ {
393
+ "cell_type": "code",
394
+ "execution_count": null,
395
+ "metadata": {
396
+ "id": "Ex8ZzlTKBtlm"
397
+ },
398
+ "outputs": [],
399
+ "source": [
400
+ "question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
401
+ "llm_chain.run(question)"
402
+ ]
403
+ }
404
+ ],
405
+ "metadata": {
406
+ "colab": {
407
+ "provenance": []
408
+ },
409
+ "kernelspec": {
410
+ "display_name": "Python 3",
411
+ "name": "python3"
412
+ },
413
+ "language_info": {
414
+ "name": "python"
415
+ }
416
+ },
417
+ "nbformat": 4,
418
+ "nbformat_minor": 0
419
+ }
notebooks/gradio-testing.ipynb ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 4,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# %%capture\n",
10
+ "# !pip install huggingface-hub hf-transfer langchain llama-cpp-python langchain-community"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 1,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "from langchain.callbacks.manager import CallbackManager\n",
20
+ "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
21
+ "from langchain.chains import LLMChain\n",
22
+ "from langchain.prompts import PromptTemplate\n",
23
+ "from langchain_community.llms import LlamaCpp\n",
24
+ "\n",
25
+ "import gradio as gr "
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 8,
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "def build_llm_chain():\n",
35
+ "\n",
36
+ " MODEL_PATH = \"../models/llama-2-7b-chat.Q5_K_M.gguf\"\n",
37
+ "\n",
38
+ " template = \"\"\"\n",
39
+ "\n",
40
+ " You are a helpful AI Assistant created by Mohammed Vasim. He is an AI Engineer and Specialist.\n",
41
+ " \n",
42
+ " Question: {question}\n",
43
+ "\n",
44
+ " Answer: helpful answer\"\"\"\n",
45
+ "\n",
46
+ " prompt = PromptTemplate.from_template(template)\n",
47
+ "\n",
48
+ " # Callbacks support token-wise streaming\n",
49
+ " callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])\n",
50
+ "\n",
51
+ " # Make sure the model path is correct for your system!\n",
52
+ " llm = LlamaCpp(\n",
53
+ " model_path=MODEL_PATH,\n",
54
+ " temperature=0.75,\n",
55
+ " max_tokens=2000,\n",
56
+ " top_p=1,\n",
57
+ " callback_manager=callback_manager,\n",
58
+ " verbose=True, # Verbose is required to pass to the callback manager\n",
59
+ " )\n",
60
+ "\n",
61
+ " llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
62
+ "\n",
63
+ " # question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
64
+ " # llm_chain.run(question)\n",
65
+ "\n",
66
+ " return llm_chain"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 9,
72
+ "metadata": {},
73
+ "outputs": [
74
+ {
75
+ "name": "stderr",
76
+ "output_type": "stream",
77
+ "text": [
78
+ "llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ../models/llama-2-7b-chat.Q5_K_M.gguf (version GGUF V2)\n",
79
+ "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
80
+ "llama_model_loader: - kv 0: general.architecture str = llama\n",
81
+ "llama_model_loader: - kv 1: general.name str = LLaMA v2\n",
82
+ "llama_model_loader: - kv 2: llama.context_length u32 = 4096\n",
83
+ "llama_model_loader: - kv 3: llama.embedding_length u32 = 4096\n",
84
+ "llama_model_loader: - kv 4: llama.block_count u32 = 32\n",
85
+ "llama_model_loader: - kv 5: llama.feed_forward_length u32 = 11008\n",
86
+ "llama_model_loader: - kv 6: llama.rope.dimension_count u32 = 128\n",
87
+ "llama_model_loader: - kv 7: llama.attention.head_count u32 = 32\n",
88
+ "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 = 32\n",
89
+ "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000001\n",
90
+ "llama_model_loader: - kv 10: general.file_type u32 = 17\n",
91
+ "llama_model_loader: - kv 11: tokenizer.ggml.model str = llama\n",
92
+ "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr[str,32000] = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n",
93
+ "llama_model_loader: - kv 13: tokenizer.ggml.scores arr[f32,32000] = [0.000000, 0.000000, 0.000000, 0.0000...\n",
94
+ "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr[i32,32000] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n",
95
+ "llama_model_loader: - kv 15: tokenizer.ggml.bos_token_id u32 = 1\n",
96
+ "llama_model_loader: - kv 16: tokenizer.ggml.eos_token_id u32 = 2\n",
97
+ "llama_model_loader: - kv 17: tokenizer.ggml.unknown_token_id u32 = 0\n",
98
+ "llama_model_loader: - kv 18: general.quantization_version u32 = 2\n",
99
+ "llama_model_loader: - type f32: 65 tensors\n",
100
+ "llama_model_loader: - type q5_K: 193 tensors\n",
101
+ "llama_model_loader: - type q6_K: 33 tensors\n"
102
+ ]
103
+ },
104
+ {
105
+ "name": "stderr",
106
+ "output_type": "stream",
107
+ "text": [
108
+ "llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n",
109
+ "llm_load_print_meta: format = GGUF V2\n",
110
+ "llm_load_print_meta: arch = llama\n",
111
+ "llm_load_print_meta: vocab type = SPM\n",
112
+ "llm_load_print_meta: n_vocab = 32000\n",
113
+ "llm_load_print_meta: n_merges = 0\n",
114
+ "llm_load_print_meta: n_ctx_train = 4096\n",
115
+ "llm_load_print_meta: n_embd = 4096\n",
116
+ "llm_load_print_meta: n_head = 32\n",
117
+ "llm_load_print_meta: n_head_kv = 32\n",
118
+ "llm_load_print_meta: n_layer = 32\n",
119
+ "llm_load_print_meta: n_rot = 128\n",
120
+ "llm_load_print_meta: n_embd_head_k = 128\n",
121
+ "llm_load_print_meta: n_embd_head_v = 128\n",
122
+ "llm_load_print_meta: n_gqa = 1\n",
123
+ "llm_load_print_meta: n_embd_k_gqa = 4096\n",
124
+ "llm_load_print_meta: n_embd_v_gqa = 4096\n",
125
+ "llm_load_print_meta: f_norm_eps = 0.0e+00\n",
126
+ "llm_load_print_meta: f_norm_rms_eps = 1.0e-06\n",
127
+ "llm_load_print_meta: f_clamp_kqv = 0.0e+00\n",
128
+ "llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n",
129
+ "llm_load_print_meta: n_ff = 11008\n",
130
+ "llm_load_print_meta: n_expert = 0\n",
131
+ "llm_load_print_meta: n_expert_used = 0\n",
132
+ "llm_load_print_meta: rope scaling = linear\n",
133
+ "llm_load_print_meta: freq_base_train = 10000.0\n",
134
+ "llm_load_print_meta: freq_scale_train = 1\n",
135
+ "llm_load_print_meta: n_yarn_orig_ctx = 4096\n",
136
+ "llm_load_print_meta: rope_finetuned = unknown\n",
137
+ "llm_load_print_meta: model type = 7B\n",
138
+ "llm_load_print_meta: model ftype = Q5_K - Medium\n",
139
+ "llm_load_print_meta: model params = 6.74 B\n",
140
+ "llm_load_print_meta: model size = 4.45 GiB (5.68 BPW) \n",
141
+ "llm_load_print_meta: general.name = LLaMA v2\n",
142
+ "llm_load_print_meta: BOS token = 1 '<s>'\n",
143
+ "llm_load_print_meta: EOS token = 2 '</s>'\n",
144
+ "llm_load_print_meta: UNK token = 0 '<unk>'\n",
145
+ "llm_load_print_meta: LF token = 13 '<0x0A>'\n",
146
+ "llm_load_tensors: ggml ctx size = 0.11 MiB\n",
147
+ "llm_load_tensors: CPU buffer size = 4560.87 MiB\n",
148
+ "...................................................................................................\n",
149
+ "llama_new_context_with_model: n_ctx = 512\n",
150
+ "llama_new_context_with_model: freq_base = 10000.0\n",
151
+ "llama_new_context_with_model: freq_scale = 1\n",
152
+ "llama_kv_cache_init: CPU KV buffer size = 256.00 MiB\n",
153
+ "llama_new_context_with_model: KV self size = 256.00 MiB, K (f16): 128.00 MiB, V (f16): 128.00 MiB\n",
154
+ "llama_new_context_with_model: CPU input buffer size = 0.14 MiB\n",
155
+ "llama_new_context_with_model: CPU compute buffer size = 1.10 MiB\n",
156
+ "llama_new_context_with_model: graph splits (measure): 1\n",
157
+ "AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | \n",
158
+ "Model metadata: {'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '11008', 'llama.attention.layer_norm_rms_epsilon': '0.000001', 'llama.rope.dimension_count': '128', 'llama.attention.head_count': '32', 'tokenizer.ggml.bos_token_id': '1', 'llama.block_count': '32', 'llama.attention.head_count_kv': '32', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '17'}\n"
159
+ ]
160
+ },
161
+ {
162
+ "ename": "ValidationError",
163
+ "evalue": "1 validation error for LlamaCpp\ncallback_manager\n instance of BaseCallbackManager expected (type=type_error.arbitrary_type; expected_arbitrary_type=BaseCallbackManager)",
164
+ "output_type": "error",
165
+ "traceback": [
166
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
167
+ "\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)",
168
+ "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m title \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWelcome Open Source LLM\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3\u001b[0m description \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis is a Llama-2-GGUF\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 5\u001b[0m chain \u001b[38;5;241m=\u001b[39m \u001b[43mbuild_llm_chain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21manswer_query\u001b[39m(message, history):\n\u001b[1;32m 8\u001b[0m message \u001b[38;5;241m=\u001b[39m chain\u001b[38;5;241m.\u001b[39mrun(message)\n",
169
+ "Cell \u001b[0;32mIn[8], line 19\u001b[0m, in \u001b[0;36mbuild_llm_chain\u001b[0;34m()\u001b[0m\n\u001b[1;32m 16\u001b[0m callback_manager \u001b[38;5;241m=\u001b[39m CallbackManager([StreamingStdOutCallbackHandler()])\n\u001b[1;32m 18\u001b[0m \u001b[38;5;66;03m# Make sure the model path is correct for your system!\u001b[39;00m\n\u001b[0;32m---> 19\u001b[0m llm \u001b[38;5;241m=\u001b[39m \u001b[43mLlamaCpp\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mMODEL_PATH\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.75\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2000\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallback_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallback_manager\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Verbose is required to pass to the callback manager\u001b[39;49;00m\n\u001b[1;32m 26\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m llm_chain \u001b[38;5;241m=\u001b[39m LLMChain(prompt\u001b[38;5;241m=\u001b[39mprompt, llm\u001b[38;5;241m=\u001b[39mllm)\n\u001b[1;32m 30\u001b[0m \u001b[38;5;66;03m# question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\u001b[39;00m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;66;03m# llm_chain.run(question)\u001b[39;00m\n",
170
+ "File \u001b[0;32m~/.local/lib/python3.10/site-packages/langchain_core/load/serializable.py:107\u001b[0m, in \u001b[0;36mSerializable.__init__\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lc_kwargs \u001b[38;5;241m=\u001b[39m kwargs\n",
171
+ "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pydantic/v1/main.py:341\u001b[0m, in \u001b[0;36mBaseModel.__init__\u001b[0;34m(__pydantic_self__, **data)\u001b[0m\n\u001b[1;32m 339\u001b[0m values, fields_set, validation_error \u001b[38;5;241m=\u001b[39m validate_model(__pydantic_self__\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m, data)\n\u001b[1;32m 340\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m validation_error:\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m validation_error\n\u001b[1;32m 342\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 343\u001b[0m object_setattr(__pydantic_self__, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__dict__\u001b[39m\u001b[38;5;124m'\u001b[39m, values)\n",
172
+ "\u001b[0;31mValidationError\u001b[0m: 1 validation error for LlamaCpp\ncallback_manager\n instance of BaseCallbackManager expected (type=type_error.arbitrary_type; expected_arbitrary_type=BaseCallbackManager)"
173
+ ]
174
+ }
175
+ ],
176
+ "source": [
177
+ "\n",
178
+ "title = \"Welcome Open Source LLM\"\n",
179
+ "\n",
180
+ "description = \"This is a Llama-2-GGUF\"\n",
181
+ "\n",
182
+ "chain = build_llm_chain()\n",
183
+ "\n",
184
+ "def answer_query(message, history):\n",
185
+ " message = chain.run(message)\n",
186
+ " return message \n",
187
+ "\n",
188
+ "# Gradio chat interface\n",
189
+ "gr.ChatInterface(\n",
190
+ " fn=answer_query,\n",
191
+ " title=title,\n",
192
+ " description=description,\n",
193
+ " additional_inputs=[gr.Textbox(\"You are helpful assistant.\")],\n",
194
+ " additional_inputs_accordion=\"📝 System prompt\",\n",
195
+ " examples=[\n",
196
+ " [\"What is a Large Language Model?\"],\n",
197
+ " [\"What's 9+2-1?\"],\n",
198
+ " [\"Write Python code to print the Fibonacci sequence\"]\n",
199
+ " ]\n",
200
+ ").queue().launch(server_name=\"0.0.0.0\")"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": null,
206
+ "metadata": {},
207
+ "outputs": [],
208
+ "source": []
209
+ }
210
+ ],
211
+ "metadata": {
212
+ "kernelspec": {
213
+ "display_name": "Python 3",
214
+ "language": "python",
215
+ "name": "python3"
216
+ },
217
+ "language_info": {
218
+ "codemirror_mode": {
219
+ "name": "ipython",
220
+ "version": 3
221
+ },
222
+ "file_extension": ".py",
223
+ "mimetype": "text/x-python",
224
+ "name": "python",
225
+ "nbconvert_exporter": "python",
226
+ "pygments_lexer": "ipython3",
227
+ "version": "3.0.0"
228
+ }
229
+ },
230
+ "nbformat": 4,
231
+ "nbformat_minor": 2
232
+ }