Spaces:
Sleeping
Sleeping
first commit
Browse files- .gitignore +3 -0
- Dockerfile +23 -0
- app.py +56 -0
- notebooks/Llama2_langchain_llama_cpp.ipynb +419 -0
- notebooks/gradio-testing.ipynb +232 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
models
|
2 |
+
.venv
|
3 |
+
.env
|
Dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.9
|
5 |
+
|
6 |
+
WORKDIR /code
|
7 |
+
|
8 |
+
ENV REPO=TheBloke/Llama-2-7B-Chat-GGUF
|
9 |
+
ENV MODEL_NAME=llama-2-7b-chat.Q5_K_M.gguf
|
10 |
+
|
11 |
+
COPY ./requirements.txt /code/requirements.txt
|
12 |
+
|
13 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
14 |
+
|
15 |
+
COPY . .
|
16 |
+
|
17 |
+
RUN !huggingface-cli download \
|
18 |
+
${REPO} \
|
19 |
+
${MODEL_NAME}\
|
20 |
+
--local-dir . \
|
21 |
+
--local-dir-use-symlinks False
|
22 |
+
|
23 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.callbacks.manager import CallbackManager
|
2 |
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
3 |
+
from langchain.chains import LLMChain
|
4 |
+
from langchain.prompts import PromptTemplate
|
5 |
+
from langchain_community.llms import LlamaCpp
|
6 |
+
import gradio as gr
|
7 |
+
|
8 |
+
MODEL_PATH = "llama-2-7b-chat.Q5_K_M.gguf"
|
9 |
+
|
10 |
+
TEMPLATE = """
|
11 |
+
|
12 |
+
You are a helpful AI Assistant created by Mohammed Vasim. Mohammed Vasim is an AI Engineer.
|
13 |
+
|
14 |
+
Question: {question}
|
15 |
+
|
16 |
+
Answer: helpful answer"""
|
17 |
+
|
18 |
+
prompt = PromptTemplate.from_template(TEMPLATE)
|
19 |
+
|
20 |
+
# Callbacks support token-wise streaming
|
21 |
+
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
22 |
+
|
23 |
+
# Make sure the model path is correct for your system!
|
24 |
+
llm = LlamaCpp(
|
25 |
+
model_path=MODEL_PATH,
|
26 |
+
temperature=0.75,
|
27 |
+
max_tokens=2000,
|
28 |
+
top_p=1,
|
29 |
+
callback_manager=callback_manager,
|
30 |
+
verbose=True, # Verbose is required to pass to the callback manager
|
31 |
+
)
|
32 |
+
|
33 |
+
llm_chain = LLMChain(prompt=prompt, llm=llm)
|
34 |
+
|
35 |
+
# question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
|
36 |
+
# llm_chain.run(question)
|
37 |
+
|
38 |
+
title = "Welcome to Open Source LLM"
|
39 |
+
|
40 |
+
description = "This is a Llama-2-GGUF"
|
41 |
+
|
42 |
+
def answer_query(message, history):
|
43 |
+
message = llm_chain.run(message)
|
44 |
+
return message
|
45 |
+
|
46 |
+
# Gradio chat interface
|
47 |
+
gr.ChatInterface(
|
48 |
+
fn=answer_query,
|
49 |
+
title=title,
|
50 |
+
description=description,
|
51 |
+
examples=[
|
52 |
+
["What is a Large Language Model?"],
|
53 |
+
["What's 9+2-1?"],
|
54 |
+
["Write Python code to print the Fibonacci sequence"]
|
55 |
+
]
|
56 |
+
).queue().launch(server_name="0.0.0.0")
|
notebooks/Llama2_langchain_llama_cpp.ipynb
ADDED
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {
|
7 |
+
"id": "EGTI9yHm74B0"
|
8 |
+
},
|
9 |
+
"outputs": [],
|
10 |
+
"source": [
|
11 |
+
"%%capture\n",
|
12 |
+
"!pip install huggingface-hub hf-transfer langchain llama-cpp-python"
|
13 |
+
]
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"cell_type": "code",
|
17 |
+
"execution_count": 2,
|
18 |
+
"metadata": {
|
19 |
+
"id": "ao6p6SSd5VvW"
|
20 |
+
},
|
21 |
+
"outputs": [],
|
22 |
+
"source": [
|
23 |
+
"%%capture\n",
|
24 |
+
"# !CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install llama-cpp-python"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": 3,
|
30 |
+
"metadata": {
|
31 |
+
"id": "AOmrozm5GoZZ"
|
32 |
+
},
|
33 |
+
"outputs": [],
|
34 |
+
"source": [
|
35 |
+
"# !wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_M.gguf"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": 4,
|
41 |
+
"metadata": {
|
42 |
+
"colab": {
|
43 |
+
"base_uri": "https://localhost:8080/"
|
44 |
+
},
|
45 |
+
"id": "FoxgM851hI5F",
|
46 |
+
"outputId": "fcc7276e-3d87-4e8a-cd10-ff533074d12b"
|
47 |
+
},
|
48 |
+
"outputs": [
|
49 |
+
{
|
50 |
+
"name": "stdout",
|
51 |
+
"output_type": "stream",
|
52 |
+
"text": [
|
53 |
+
"downloading https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf to /root/.cache/huggingface/hub/tmp02ipqll0\n",
|
54 |
+
"llama-2-7b-chat.Q2_K.gguf: 100% 2.83G/2.83G [00:26<00:00, 107MB/s]\n",
|
55 |
+
"./llama-2-7b-chat.Q2_K.gguf\n"
|
56 |
+
]
|
57 |
+
}
|
58 |
+
],
|
59 |
+
"source": [
|
60 |
+
"import os\n",
|
61 |
+
"os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\"\n",
|
62 |
+
"\n",
|
63 |
+
"# !huggingface-cli download \\\n",
|
64 |
+
"# Deci/DeciLM-7B-instruct-GGUF \\\n",
|
65 |
+
"# decilm-7b-uniform-gqa-q8_0.gguf \\\n",
|
66 |
+
"# --local-dir . \\\n",
|
67 |
+
"# --local-dir-use-symlinks False\n",
|
68 |
+
"\n",
|
69 |
+
"!huggingface-cli download \\\n",
|
70 |
+
" TheBloke/Llama-2-7B-Chat-GGUF \\\n",
|
71 |
+
" llama-2-7b-chat.Q2_K.gguf \\\n",
|
72 |
+
" --local-dir . \\\n",
|
73 |
+
" --local-dir-use-symlinks False"
|
74 |
+
]
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"cell_type": "code",
|
78 |
+
"execution_count": 5,
|
79 |
+
"metadata": {
|
80 |
+
"id": "176a5LS68sBI"
|
81 |
+
},
|
82 |
+
"outputs": [],
|
83 |
+
"source": [
|
84 |
+
"from langchain.callbacks.manager import CallbackManager\n",
|
85 |
+
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
86 |
+
"from langchain.chains import LLMChain\n",
|
87 |
+
"from langchain.prompts import PromptTemplate\n",
|
88 |
+
"from langchain_community.llms import LlamaCpp"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"cell_type": "code",
|
93 |
+
"execution_count": 6,
|
94 |
+
"metadata": {
|
95 |
+
"id": "E0nySsfAHmu_"
|
96 |
+
},
|
97 |
+
"outputs": [],
|
98 |
+
"source": [
|
99 |
+
"MODEL_PATH = \"llama-2-7b-chat.Q2_K.gguf\""
|
100 |
+
]
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"cell_type": "code",
|
104 |
+
"execution_count": 7,
|
105 |
+
"metadata": {
|
106 |
+
"id": "r_rEfQFfBYOb"
|
107 |
+
},
|
108 |
+
"outputs": [],
|
109 |
+
"source": [
|
110 |
+
"template = \"\"\"Question: {question}\n",
|
111 |
+
"\n",
|
112 |
+
"Answer: Let's work this out in a step by step way to be sure we have the right answer.\"\"\"\n",
|
113 |
+
"\n",
|
114 |
+
"prompt = PromptTemplate.from_template(template)"
|
115 |
+
]
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"cell_type": "code",
|
119 |
+
"execution_count": 8,
|
120 |
+
"metadata": {
|
121 |
+
"id": "VR2kLDqLBY1A"
|
122 |
+
},
|
123 |
+
"outputs": [],
|
124 |
+
"source": [
|
125 |
+
"# Callbacks support token-wise streaming\n",
|
126 |
+
"callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])"
|
127 |
+
]
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"cell_type": "code",
|
131 |
+
"execution_count": 9,
|
132 |
+
"metadata": {
|
133 |
+
"colab": {
|
134 |
+
"base_uri": "https://localhost:8080/"
|
135 |
+
},
|
136 |
+
"id": "L_KBhPNmBbCV",
|
137 |
+
"outputId": "ed5292d0-67e6-4b91-b8e0-418dd92d2572"
|
138 |
+
},
|
139 |
+
"outputs": [
|
140 |
+
{
|
141 |
+
"name": "stderr",
|
142 |
+
"output_type": "stream",
|
143 |
+
"text": [
|
144 |
+
"llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b-chat.Q2_K.gguf (version GGUF V2)\n",
|
145 |
+
"llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
|
146 |
+
"llama_model_loader: - kv 0: general.architecture str = llama\n",
|
147 |
+
"llama_model_loader: - kv 1: general.name str = LLaMA v2\n",
|
148 |
+
"llama_model_loader: - kv 2: llama.context_length u32 = 4096\n",
|
149 |
+
"llama_model_loader: - kv 3: llama.embedding_length u32 = 4096\n",
|
150 |
+
"llama_model_loader: - kv 4: llama.block_count u32 = 32\n",
|
151 |
+
"llama_model_loader: - kv 5: llama.feed_forward_length u32 = 11008\n",
|
152 |
+
"llama_model_loader: - kv 6: llama.rope.dimension_count u32 = 128\n",
|
153 |
+
"llama_model_loader: - kv 7: llama.attention.head_count u32 = 32\n",
|
154 |
+
"llama_model_loader: - kv 8: llama.attention.head_count_kv u32 = 32\n",
|
155 |
+
"llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000001\n",
|
156 |
+
"llama_model_loader: - kv 10: general.file_type u32 = 10\n",
|
157 |
+
"llama_model_loader: - kv 11: tokenizer.ggml.model str = llama\n",
|
158 |
+
"llama_model_loader: - kv 12: tokenizer.ggml.tokens arr[str,32000] = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n",
|
159 |
+
"llama_model_loader: - kv 13: tokenizer.ggml.scores arr[f32,32000] = [0.000000, 0.000000, 0.000000, 0.0000...\n",
|
160 |
+
"llama_model_loader: - kv 14: tokenizer.ggml.token_type arr[i32,32000] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n",
|
161 |
+
"llama_model_loader: - kv 15: tokenizer.ggml.bos_token_id u32 = 1\n",
|
162 |
+
"llama_model_loader: - kv 16: tokenizer.ggml.eos_token_id u32 = 2\n",
|
163 |
+
"llama_model_loader: - kv 17: tokenizer.ggml.unknown_token_id u32 = 0\n",
|
164 |
+
"llama_model_loader: - kv 18: general.quantization_version u32 = 2\n",
|
165 |
+
"llama_model_loader: - type f32: 65 tensors\n",
|
166 |
+
"llama_model_loader: - type q2_K: 65 tensors\n",
|
167 |
+
"llama_model_loader: - type q3_K: 160 tensors\n",
|
168 |
+
"llama_model_loader: - type q6_K: 1 tensors\n",
|
169 |
+
"llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n",
|
170 |
+
"llm_load_print_meta: format = GGUF V2\n",
|
171 |
+
"llm_load_print_meta: arch = llama\n",
|
172 |
+
"llm_load_print_meta: vocab type = SPM\n",
|
173 |
+
"llm_load_print_meta: n_vocab = 32000\n",
|
174 |
+
"llm_load_print_meta: n_merges = 0\n",
|
175 |
+
"llm_load_print_meta: n_ctx_train = 4096\n",
|
176 |
+
"llm_load_print_meta: n_embd = 4096\n",
|
177 |
+
"llm_load_print_meta: n_head = 32\n",
|
178 |
+
"llm_load_print_meta: n_head_kv = 32\n",
|
179 |
+
"llm_load_print_meta: n_layer = 32\n",
|
180 |
+
"llm_load_print_meta: n_rot = 128\n",
|
181 |
+
"llm_load_print_meta: n_embd_head_k = 128\n",
|
182 |
+
"llm_load_print_meta: n_embd_head_v = 128\n",
|
183 |
+
"llm_load_print_meta: n_gqa = 1\n",
|
184 |
+
"llm_load_print_meta: n_embd_k_gqa = 4096\n",
|
185 |
+
"llm_load_print_meta: n_embd_v_gqa = 4096\n",
|
186 |
+
"llm_load_print_meta: f_norm_eps = 0.0e+00\n",
|
187 |
+
"llm_load_print_meta: f_norm_rms_eps = 1.0e-06\n",
|
188 |
+
"llm_load_print_meta: f_clamp_kqv = 0.0e+00\n",
|
189 |
+
"llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n",
|
190 |
+
"llm_load_print_meta: n_ff = 11008\n",
|
191 |
+
"llm_load_print_meta: n_expert = 0\n",
|
192 |
+
"llm_load_print_meta: n_expert_used = 0\n",
|
193 |
+
"llm_load_print_meta: rope scaling = linear\n",
|
194 |
+
"llm_load_print_meta: freq_base_train = 10000.0\n",
|
195 |
+
"llm_load_print_meta: freq_scale_train = 1\n",
|
196 |
+
"llm_load_print_meta: n_yarn_orig_ctx = 4096\n",
|
197 |
+
"llm_load_print_meta: rope_finetuned = unknown\n",
|
198 |
+
"llm_load_print_meta: model type = 7B\n",
|
199 |
+
"llm_load_print_meta: model ftype = Q2_K - Medium\n",
|
200 |
+
"llm_load_print_meta: model params = 6.74 B\n",
|
201 |
+
"llm_load_print_meta: model size = 2.63 GiB (3.35 BPW) \n",
|
202 |
+
"llm_load_print_meta: general.name = LLaMA v2\n",
|
203 |
+
"llm_load_print_meta: BOS token = 1 '<s>'\n",
|
204 |
+
"llm_load_print_meta: EOS token = 2 '</s>'\n",
|
205 |
+
"llm_load_print_meta: UNK token = 0 '<unk>'\n",
|
206 |
+
"llm_load_print_meta: LF token = 13 '<0x0A>'\n",
|
207 |
+
"llm_load_tensors: ggml ctx size = 0.11 MiB\n",
|
208 |
+
"llm_load_tensors: CPU buffer size = 2694.32 MiB\n",
|
209 |
+
".................................................................................................\n",
|
210 |
+
"llama_new_context_with_model: n_ctx = 512\n",
|
211 |
+
"llama_new_context_with_model: freq_base = 10000.0\n",
|
212 |
+
"llama_new_context_with_model: freq_scale = 1\n",
|
213 |
+
"llama_kv_cache_init: CPU KV buffer size = 256.00 MiB\n",
|
214 |
+
"llama_new_context_with_model: KV self size = 256.00 MiB, K (f16): 128.00 MiB, V (f16): 128.00 MiB\n",
|
215 |
+
"llama_new_context_with_model: CPU input buffer size = 0.14 MiB\n",
|
216 |
+
"llama_new_context_with_model: CPU compute buffer size = 1.10 MiB\n",
|
217 |
+
"llama_new_context_with_model: graph splits (measure): 1\n",
|
218 |
+
"AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | \n",
|
219 |
+
"Model metadata: {'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '11008', 'llama.attention.layer_norm_rms_epsilon': '0.000001', 'llama.rope.dimension_count': '128', 'llama.attention.head_count': '32', 'tokenizer.ggml.bos_token_id': '1', 'llama.block_count': '32', 'llama.attention.head_count_kv': '32', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '10'}\n"
|
220 |
+
]
|
221 |
+
}
|
222 |
+
],
|
223 |
+
"source": [
|
224 |
+
"# Make sure the model path is correct for your system!\n",
|
225 |
+
"llm = LlamaCpp(\n",
|
226 |
+
" model_path=MODEL_PATH,\n",
|
227 |
+
" temperature=0.75,\n",
|
228 |
+
" max_tokens=2000,\n",
|
229 |
+
" top_p=1,\n",
|
230 |
+
" callback_manager=callback_manager,\n",
|
231 |
+
" verbose=True, # Verbose is required to pass to the callback manager\n",
|
232 |
+
")"
|
233 |
+
]
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"cell_type": "code",
|
237 |
+
"execution_count": 10,
|
238 |
+
"metadata": {
|
239 |
+
"colab": {
|
240 |
+
"base_uri": "https://localhost:8080/",
|
241 |
+
"height": 1000
|
242 |
+
},
|
243 |
+
"id": "crv_Wu52Bdz_",
|
244 |
+
"outputId": "4b45a176-4503-4bf7-8fb7-0bc949eed169"
|
245 |
+
},
|
246 |
+
"outputs": [
|
247 |
+
{
|
248 |
+
"name": "stdout",
|
249 |
+
"output_type": "stream",
|
250 |
+
"text": [
|
251 |
+
"\n",
|
252 |
+
"Stephen Colbert:"
|
253 |
+
]
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"name": "stderr",
|
257 |
+
"output_type": "stream",
|
258 |
+
"text": [
|
259 |
+
"ERROR:root:Internal Python error in the inspect module.\n",
|
260 |
+
"Below is the traceback from this internal error.\n",
|
261 |
+
"\n"
|
262 |
+
]
|
263 |
+
},
|
264 |
+
{
|
265 |
+
"name": "stdout",
|
266 |
+
"output_type": "stream",
|
267 |
+
"text": [
|
268 |
+
"Traceback (most recent call last):\n",
|
269 |
+
" File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n",
|
270 |
+
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
271 |
+
" File \"<ipython-input-10-a402e682f208>\", line 4, in <cell line: 4>\n",
|
272 |
+
" llm.invoke(prompt)\n",
|
273 |
+
" File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 273, in invoke\n",
|
274 |
+
" self.generate_prompt(\n",
|
275 |
+
" File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 568, in generate_prompt\n",
|
276 |
+
" return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)\n",
|
277 |
+
" File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 741, in generate\n",
|
278 |
+
" output = self._generate_helper(\n",
|
279 |
+
" File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 605, in _generate_helper\n",
|
280 |
+
" raise e\n",
|
281 |
+
" File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 592, in _generate_helper\n",
|
282 |
+
" self._generate(\n",
|
283 |
+
" File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 1177, in _generate\n",
|
284 |
+
" self._call(prompt, stop=stop, run_manager=run_manager, **kwargs)\n",
|
285 |
+
" File \"/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\", line 288, in _call\n",
|
286 |
+
" for chunk in self._stream(\n",
|
287 |
+
" File \"/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\", line 341, in _stream\n",
|
288 |
+
" for part in result:\n",
|
289 |
+
" File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\", line 978, in _create_completion\n",
|
290 |
+
" for token in self.generate(\n",
|
291 |
+
" File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\", line 663, in generate\n",
|
292 |
+
" self.eval(tokens)\n",
|
293 |
+
" File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\", line 503, in eval\n",
|
294 |
+
" self._ctx.decode(self._batch)\n",
|
295 |
+
" File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/_internals.py\", line 305, in decode\n",
|
296 |
+
" return_code = llama_cpp.llama_decode(\n",
|
297 |
+
" File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama_cpp.py\", line 1636, in llama_decode\n",
|
298 |
+
" return _lib.llama_decode(ctx, batch)\n",
|
299 |
+
"KeyboardInterrupt\n",
|
300 |
+
"\n",
|
301 |
+
"During handling of the above exception, another exception occurred:\n",
|
302 |
+
"\n",
|
303 |
+
"Traceback (most recent call last):\n",
|
304 |
+
" File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n",
|
305 |
+
" stb = value._render_traceback_()\n",
|
306 |
+
"AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'\n",
|
307 |
+
"\n",
|
308 |
+
"During handling of the above exception, another exception occurred:\n",
|
309 |
+
"\n",
|
310 |
+
"Traceback (most recent call last):\n",
|
311 |
+
" File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1101, in get_records\n",
|
312 |
+
" return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)\n",
|
313 |
+
" File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 248, in wrapped\n",
|
314 |
+
" return f(*args, **kwargs)\n",
|
315 |
+
" File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 281, in _fixed_getinnerframes\n",
|
316 |
+
" records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))\n",
|
317 |
+
" File \"/usr/lib/python3.10/inspect.py\", line 1662, in getinnerframes\n",
|
318 |
+
" frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)\n",
|
319 |
+
" File \"/usr/lib/python3.10/inspect.py\", line 1620, in getframeinfo\n",
|
320 |
+
" filename = getsourcefile(frame) or getfile(frame)\n",
|
321 |
+
" File \"/usr/lib/python3.10/inspect.py\", line 829, in getsourcefile\n",
|
322 |
+
" module = getmodule(object, filename)\n",
|
323 |
+
" File \"/usr/lib/python3.10/inspect.py\", line 878, in getmodule\n",
|
324 |
+
" os.path.realpath(f)] = module.__name__\n",
|
325 |
+
" File \"/usr/lib/python3.10/posixpath.py\", line 396, in realpath\n",
|
326 |
+
" path, ok = _joinrealpath(filename[:0], filename, strict, {})\n",
|
327 |
+
" File \"/usr/lib/python3.10/posixpath.py\", line 429, in _joinrealpath\n",
|
328 |
+
" newpath = join(path, name)\n",
|
329 |
+
" File \"/usr/lib/python3.10/posixpath.py\", line 71, in join\n",
|
330 |
+
" def join(a, *p):\n",
|
331 |
+
"KeyboardInterrupt\n"
|
332 |
+
]
|
333 |
+
},
|
334 |
+
{
|
335 |
+
"ename": "TypeError",
|
336 |
+
"evalue": "object of type 'NoneType' has no len()",
|
337 |
+
"output_type": "error",
|
338 |
+
"traceback": [
|
339 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
340 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
341 |
+
" \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n",
|
342 |
+
"\u001b[0;32m<ipython-input-10-a402e682f208>\u001b[0m in \u001b[0;36m<cell line: 4>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \"\"\"\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minvoke\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
343 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, stop, **kwargs)\u001b[0m\n\u001b[1;32m 272\u001b[0m return (\n\u001b[0;32m--> 273\u001b[0;31m self.generate_prompt(\n\u001b[0m\u001b[1;32m 274\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
344 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36mgenerate_prompt\u001b[0;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m 567\u001b[0m \u001b[0mprompt_strings\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 568\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt_strings\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 569\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
345 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36mgenerate\u001b[0;34m(self, prompts, stop, callbacks, tags, metadata, run_name, **kwargs)\u001b[0m\n\u001b[1;32m 740\u001b[0m ]\n\u001b[0;32m--> 741\u001b[0;31m output = self._generate_helper(\n\u001b[0m\u001b[1;32m 742\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_managers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbool\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_arg_supported\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
346 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36m_generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_llm_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mLLMResult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgenerations\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 605\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 606\u001b[0m \u001b[0mflattened_outputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflatten\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
347 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36m_generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 591\u001b[0m output = (\n\u001b[0;32m--> 592\u001b[0;31m self._generate(\n\u001b[0m\u001b[1;32m 593\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
348 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36m_generate\u001b[0;34m(self, prompts, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 1176\u001b[0m text = (\n\u001b[0;32m-> 1177\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrun_manager\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1178\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnew_arg_supported\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
349 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, prompt, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[0mcombined_text_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 288\u001b[0;31m for chunk in self._stream(\n\u001b[0m\u001b[1;32m 289\u001b[0m \u001b[0mprompt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
350 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\u001b[0m in \u001b[0;36m_stream\u001b[0;34m(self, prompt, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 340\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstream\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 341\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mpart\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 342\u001b[0m \u001b[0mlogprobs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpart\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"choices\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"logprobs\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
351 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\u001b[0m in \u001b[0;36m_create_completion\u001b[0;34m(self, prompt, suffix, max_tokens, temperature, top_p, min_p, typical_p, logprobs, echo, stop, frequency_penalty, presence_penalty, repeat_penalty, top_k, stream, seed, tfs_z, mirostat_mode, mirostat_tau, mirostat_eta, model, stopping_criteria, logits_processor, grammar, logit_bias)\u001b[0m\n\u001b[1;32m 977\u001b[0m \u001b[0mmultibyte_fix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 978\u001b[0;31m for token in self.generate(\n\u001b[0m\u001b[1;32m 979\u001b[0m \u001b[0mprompt_tokens\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
352 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\u001b[0m in \u001b[0;36mgenerate\u001b[0;34m(self, tokens, top_k, top_p, min_p, typical_p, temp, repeat_penalty, reset, frequency_penalty, presence_penalty, tfs_z, mirostat_mode, mirostat_tau, mirostat_eta, penalize_nl, logits_processor, stopping_criteria, grammar)\u001b[0m\n\u001b[1;32m 662\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 663\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtokens\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 664\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0msample_idx\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_tokens\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
353 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\u001b[0m in \u001b[0;36meval\u001b[0;34m(self, tokens)\u001b[0m\n\u001b[1;32m 502\u001b[0m )\n\u001b[0;32m--> 503\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 504\u001b[0m \u001b[0;31m# Save tokens\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
354 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/_internals.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 304\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 305\u001b[0;31m return_code = llama_cpp.llama_decode(\n\u001b[0m\u001b[1;32m 306\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mctx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
355 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama_cpp.py\u001b[0m in \u001b[0;36mllama_decode\u001b[0;34m(ctx, batch)\u001b[0m\n\u001b[1;32m 1635\u001b[0m < 0 - error\"\"\"\n\u001b[0;32m-> 1636\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_lib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mllama_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mctx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1637\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
356 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m: ",
|
357 |
+
"\nDuring handling of the above exception, another exception occurred:\n",
|
358 |
+
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
359 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mshowtraceback\u001b[0;34m(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)\u001b[0m\n\u001b[1;32m 2098\u001b[0m \u001b[0;31m# in the engines. This should return a list of strings.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2099\u001b[0;31m \u001b[0mstb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_render_traceback_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2100\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
360 |
+
"\u001b[0;31mAttributeError\u001b[0m: 'KeyboardInterrupt' object has no attribute '_render_traceback_'",
|
361 |
+
"\nDuring handling of the above exception, another exception occurred:\n",
|
362 |
+
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
363 |
+
" \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n",
|
364 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mshowtraceback\u001b[0;34m(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)\u001b[0m\n\u001b[1;32m 2099\u001b[0m \u001b[0mstb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_render_traceback_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2100\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2101\u001b[0;31m stb = self.InteractiveTB.structured_traceback(etype,\n\u001b[0m\u001b[1;32m 2102\u001b[0m value, tb, tb_offset=tb_offset)\n\u001b[1;32m 2103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
365 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mstructured_traceback\u001b[0;34m(self, etype, value, tb, tb_offset, number_of_lines_of_context)\u001b[0m\n\u001b[1;32m 1365\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1366\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1367\u001b[0;31m return FormattedTB.structured_traceback(\n\u001b[0m\u001b[1;32m 1368\u001b[0m self, etype, value, tb, tb_offset, number_of_lines_of_context)\n\u001b[1;32m 1369\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
366 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mstructured_traceback\u001b[0;34m(self, etype, value, tb, tb_offset, number_of_lines_of_context)\u001b[0m\n\u001b[1;32m 1265\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose_modes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1266\u001b[0m \u001b[0;31m# Verbose modes need a full traceback\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1267\u001b[0;31m return VerboseTB.structured_traceback(\n\u001b[0m\u001b[1;32m 1268\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0metype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb_offset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumber_of_lines_of_context\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1269\u001b[0m )\n",
|
367 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mstructured_traceback\u001b[0;34m(self, etype, evalue, etb, tb_offset, number_of_lines_of_context)\u001b[0m\n\u001b[1;32m 1122\u001b[0m \u001b[0;34m\"\"\"Return a nice text document describing the traceback.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1123\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1124\u001b[0;31m formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n\u001b[0m\u001b[1;32m 1125\u001b[0m tb_offset)\n\u001b[1;32m 1126\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
368 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mformat_exception_as_a_whole\u001b[0;34m(self, etype, evalue, etb, number_of_lines_of_context, tb_offset)\u001b[0m\n\u001b[1;32m 1080\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1081\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1082\u001b[0;31m \u001b[0mlast_unique\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecursion_repeat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfind_recursion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0morig_etype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1083\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1084\u001b[0m \u001b[0mframes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat_records\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrecords\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlast_unique\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecursion_repeat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
369 |
+
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mfind_recursion\u001b[0;34m(etype, value, records)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[0;31m# first frame (from in to out) that looks different.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 381\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_recursion_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0metype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 382\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrecords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 383\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[0;31m# Select filename, lineno, func_name to track frames with\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
370 |
+
"\u001b[0;31mTypeError\u001b[0m: object of type 'NoneType' has no len()"
|
371 |
+
]
|
372 |
+
}
|
373 |
+
],
|
374 |
+
"source": [
|
375 |
+
"prompt = \"\"\"\n",
|
376 |
+
"Question: A rap battle between Stephen Colbert and John Oliver\n",
|
377 |
+
"\"\"\"\n",
|
378 |
+
"llm.invoke(prompt)"
|
379 |
+
]
|
380 |
+
},
|
381 |
+
{
|
382 |
+
"cell_type": "code",
|
383 |
+
"execution_count": null,
|
384 |
+
"metadata": {
|
385 |
+
"id": "Bdpj6esPBs4q"
|
386 |
+
},
|
387 |
+
"outputs": [],
|
388 |
+
"source": [
|
389 |
+
"llm_chain = LLMChain(prompt=prompt, llm=llm)"
|
390 |
+
]
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"cell_type": "code",
|
394 |
+
"execution_count": null,
|
395 |
+
"metadata": {
|
396 |
+
"id": "Ex8ZzlTKBtlm"
|
397 |
+
},
|
398 |
+
"outputs": [],
|
399 |
+
"source": [
|
400 |
+
"question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
|
401 |
+
"llm_chain.run(question)"
|
402 |
+
]
|
403 |
+
}
|
404 |
+
],
|
405 |
+
"metadata": {
|
406 |
+
"colab": {
|
407 |
+
"provenance": []
|
408 |
+
},
|
409 |
+
"kernelspec": {
|
410 |
+
"display_name": "Python 3",
|
411 |
+
"name": "python3"
|
412 |
+
},
|
413 |
+
"language_info": {
|
414 |
+
"name": "python"
|
415 |
+
}
|
416 |
+
},
|
417 |
+
"nbformat": 4,
|
418 |
+
"nbformat_minor": 0
|
419 |
+
}
|
notebooks/gradio-testing.ipynb
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 4,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"# %%capture\n",
|
10 |
+
"# !pip install huggingface-hub hf-transfer langchain llama-cpp-python langchain-community"
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "code",
|
15 |
+
"execution_count": 1,
|
16 |
+
"metadata": {},
|
17 |
+
"outputs": [],
|
18 |
+
"source": [
|
19 |
+
"from langchain.callbacks.manager import CallbackManager\n",
|
20 |
+
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
21 |
+
"from langchain.chains import LLMChain\n",
|
22 |
+
"from langchain.prompts import PromptTemplate\n",
|
23 |
+
"from langchain_community.llms import LlamaCpp\n",
|
24 |
+
"\n",
|
25 |
+
"import gradio as gr "
|
26 |
+
]
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"cell_type": "code",
|
30 |
+
"execution_count": 8,
|
31 |
+
"metadata": {},
|
32 |
+
"outputs": [],
|
33 |
+
"source": [
|
34 |
+
"def build_llm_chain():\n",
|
35 |
+
"\n",
|
36 |
+
" MODEL_PATH = \"../models/llama-2-7b-chat.Q5_K_M.gguf\"\n",
|
37 |
+
"\n",
|
38 |
+
" template = \"\"\"\n",
|
39 |
+
"\n",
|
40 |
+
" You are a helpful AI Assistant created by Mohammed Vasim. He is an AI Engineer and Specialist.\n",
|
41 |
+
" \n",
|
42 |
+
" Question: {question}\n",
|
43 |
+
"\n",
|
44 |
+
" Answer: helpful answer\"\"\"\n",
|
45 |
+
"\n",
|
46 |
+
" prompt = PromptTemplate.from_template(template)\n",
|
47 |
+
"\n",
|
48 |
+
" # Callbacks support token-wise streaming\n",
|
49 |
+
" callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])\n",
|
50 |
+
"\n",
|
51 |
+
" # Make sure the model path is correct for your system!\n",
|
52 |
+
" llm = LlamaCpp(\n",
|
53 |
+
" model_path=MODEL_PATH,\n",
|
54 |
+
" temperature=0.75,\n",
|
55 |
+
" max_tokens=2000,\n",
|
56 |
+
" top_p=1,\n",
|
57 |
+
" callback_manager=callback_manager,\n",
|
58 |
+
" verbose=True, # Verbose is required to pass to the callback manager\n",
|
59 |
+
" )\n",
|
60 |
+
"\n",
|
61 |
+
" llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
62 |
+
"\n",
|
63 |
+
" # question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
|
64 |
+
" # llm_chain.run(question)\n",
|
65 |
+
"\n",
|
66 |
+
" return llm_chain"
|
67 |
+
]
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"cell_type": "code",
|
71 |
+
"execution_count": 9,
|
72 |
+
"metadata": {},
|
73 |
+
"outputs": [
|
74 |
+
{
|
75 |
+
"name": "stderr",
|
76 |
+
"output_type": "stream",
|
77 |
+
"text": [
|
78 |
+
"llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ../models/llama-2-7b-chat.Q5_K_M.gguf (version GGUF V2)\n",
|
79 |
+
"llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
|
80 |
+
"llama_model_loader: - kv 0: general.architecture str = llama\n",
|
81 |
+
"llama_model_loader: - kv 1: general.name str = LLaMA v2\n",
|
82 |
+
"llama_model_loader: - kv 2: llama.context_length u32 = 4096\n",
|
83 |
+
"llama_model_loader: - kv 3: llama.embedding_length u32 = 4096\n",
|
84 |
+
"llama_model_loader: - kv 4: llama.block_count u32 = 32\n",
|
85 |
+
"llama_model_loader: - kv 5: llama.feed_forward_length u32 = 11008\n",
|
86 |
+
"llama_model_loader: - kv 6: llama.rope.dimension_count u32 = 128\n",
|
87 |
+
"llama_model_loader: - kv 7: llama.attention.head_count u32 = 32\n",
|
88 |
+
"llama_model_loader: - kv 8: llama.attention.head_count_kv u32 = 32\n",
|
89 |
+
"llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000001\n",
|
90 |
+
"llama_model_loader: - kv 10: general.file_type u32 = 17\n",
|
91 |
+
"llama_model_loader: - kv 11: tokenizer.ggml.model str = llama\n",
|
92 |
+
"llama_model_loader: - kv 12: tokenizer.ggml.tokens arr[str,32000] = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n",
|
93 |
+
"llama_model_loader: - kv 13: tokenizer.ggml.scores arr[f32,32000] = [0.000000, 0.000000, 0.000000, 0.0000...\n",
|
94 |
+
"llama_model_loader: - kv 14: tokenizer.ggml.token_type arr[i32,32000] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n",
|
95 |
+
"llama_model_loader: - kv 15: tokenizer.ggml.bos_token_id u32 = 1\n",
|
96 |
+
"llama_model_loader: - kv 16: tokenizer.ggml.eos_token_id u32 = 2\n",
|
97 |
+
"llama_model_loader: - kv 17: tokenizer.ggml.unknown_token_id u32 = 0\n",
|
98 |
+
"llama_model_loader: - kv 18: general.quantization_version u32 = 2\n",
|
99 |
+
"llama_model_loader: - type f32: 65 tensors\n",
|
100 |
+
"llama_model_loader: - type q5_K: 193 tensors\n",
|
101 |
+
"llama_model_loader: - type q6_K: 33 tensors\n"
|
102 |
+
]
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"name": "stderr",
|
106 |
+
"output_type": "stream",
|
107 |
+
"text": [
|
108 |
+
"llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n",
|
109 |
+
"llm_load_print_meta: format = GGUF V2\n",
|
110 |
+
"llm_load_print_meta: arch = llama\n",
|
111 |
+
"llm_load_print_meta: vocab type = SPM\n",
|
112 |
+
"llm_load_print_meta: n_vocab = 32000\n",
|
113 |
+
"llm_load_print_meta: n_merges = 0\n",
|
114 |
+
"llm_load_print_meta: n_ctx_train = 4096\n",
|
115 |
+
"llm_load_print_meta: n_embd = 4096\n",
|
116 |
+
"llm_load_print_meta: n_head = 32\n",
|
117 |
+
"llm_load_print_meta: n_head_kv = 32\n",
|
118 |
+
"llm_load_print_meta: n_layer = 32\n",
|
119 |
+
"llm_load_print_meta: n_rot = 128\n",
|
120 |
+
"llm_load_print_meta: n_embd_head_k = 128\n",
|
121 |
+
"llm_load_print_meta: n_embd_head_v = 128\n",
|
122 |
+
"llm_load_print_meta: n_gqa = 1\n",
|
123 |
+
"llm_load_print_meta: n_embd_k_gqa = 4096\n",
|
124 |
+
"llm_load_print_meta: n_embd_v_gqa = 4096\n",
|
125 |
+
"llm_load_print_meta: f_norm_eps = 0.0e+00\n",
|
126 |
+
"llm_load_print_meta: f_norm_rms_eps = 1.0e-06\n",
|
127 |
+
"llm_load_print_meta: f_clamp_kqv = 0.0e+00\n",
|
128 |
+
"llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n",
|
129 |
+
"llm_load_print_meta: n_ff = 11008\n",
|
130 |
+
"llm_load_print_meta: n_expert = 0\n",
|
131 |
+
"llm_load_print_meta: n_expert_used = 0\n",
|
132 |
+
"llm_load_print_meta: rope scaling = linear\n",
|
133 |
+
"llm_load_print_meta: freq_base_train = 10000.0\n",
|
134 |
+
"llm_load_print_meta: freq_scale_train = 1\n",
|
135 |
+
"llm_load_print_meta: n_yarn_orig_ctx = 4096\n",
|
136 |
+
"llm_load_print_meta: rope_finetuned = unknown\n",
|
137 |
+
"llm_load_print_meta: model type = 7B\n",
|
138 |
+
"llm_load_print_meta: model ftype = Q5_K - Medium\n",
|
139 |
+
"llm_load_print_meta: model params = 6.74 B\n",
|
140 |
+
"llm_load_print_meta: model size = 4.45 GiB (5.68 BPW) \n",
|
141 |
+
"llm_load_print_meta: general.name = LLaMA v2\n",
|
142 |
+
"llm_load_print_meta: BOS token = 1 '<s>'\n",
|
143 |
+
"llm_load_print_meta: EOS token = 2 '</s>'\n",
|
144 |
+
"llm_load_print_meta: UNK token = 0 '<unk>'\n",
|
145 |
+
"llm_load_print_meta: LF token = 13 '<0x0A>'\n",
|
146 |
+
"llm_load_tensors: ggml ctx size = 0.11 MiB\n",
|
147 |
+
"llm_load_tensors: CPU buffer size = 4560.87 MiB\n",
|
148 |
+
"...................................................................................................\n",
|
149 |
+
"llama_new_context_with_model: n_ctx = 512\n",
|
150 |
+
"llama_new_context_with_model: freq_base = 10000.0\n",
|
151 |
+
"llama_new_context_with_model: freq_scale = 1\n",
|
152 |
+
"llama_kv_cache_init: CPU KV buffer size = 256.00 MiB\n",
|
153 |
+
"llama_new_context_with_model: KV self size = 256.00 MiB, K (f16): 128.00 MiB, V (f16): 128.00 MiB\n",
|
154 |
+
"llama_new_context_with_model: CPU input buffer size = 0.14 MiB\n",
|
155 |
+
"llama_new_context_with_model: CPU compute buffer size = 1.10 MiB\n",
|
156 |
+
"llama_new_context_with_model: graph splits (measure): 1\n",
|
157 |
+
"AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | \n",
|
158 |
+
"Model metadata: {'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '11008', 'llama.attention.layer_norm_rms_epsilon': '0.000001', 'llama.rope.dimension_count': '128', 'llama.attention.head_count': '32', 'tokenizer.ggml.bos_token_id': '1', 'llama.block_count': '32', 'llama.attention.head_count_kv': '32', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '17'}\n"
|
159 |
+
]
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"ename": "ValidationError",
|
163 |
+
"evalue": "1 validation error for LlamaCpp\ncallback_manager\n instance of BaseCallbackManager expected (type=type_error.arbitrary_type; expected_arbitrary_type=BaseCallbackManager)",
|
164 |
+
"output_type": "error",
|
165 |
+
"traceback": [
|
166 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
167 |
+
"\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)",
|
168 |
+
"Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m title \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWelcome Open Source LLM\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3\u001b[0m description \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis is a Llama-2-GGUF\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 5\u001b[0m chain \u001b[38;5;241m=\u001b[39m \u001b[43mbuild_llm_chain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21manswer_query\u001b[39m(message, history):\n\u001b[1;32m 8\u001b[0m message \u001b[38;5;241m=\u001b[39m chain\u001b[38;5;241m.\u001b[39mrun(message)\n",
|
169 |
+
"Cell \u001b[0;32mIn[8], line 19\u001b[0m, in \u001b[0;36mbuild_llm_chain\u001b[0;34m()\u001b[0m\n\u001b[1;32m 16\u001b[0m callback_manager \u001b[38;5;241m=\u001b[39m CallbackManager([StreamingStdOutCallbackHandler()])\n\u001b[1;32m 18\u001b[0m \u001b[38;5;66;03m# Make sure the model path is correct for your system!\u001b[39;00m\n\u001b[0;32m---> 19\u001b[0m llm \u001b[38;5;241m=\u001b[39m \u001b[43mLlamaCpp\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mMODEL_PATH\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.75\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2000\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallback_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallback_manager\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Verbose is required to pass to the callback manager\u001b[39;49;00m\n\u001b[1;32m 26\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m llm_chain \u001b[38;5;241m=\u001b[39m LLMChain(prompt\u001b[38;5;241m=\u001b[39mprompt, llm\u001b[38;5;241m=\u001b[39mllm)\n\u001b[1;32m 30\u001b[0m \u001b[38;5;66;03m# question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\u001b[39;00m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;66;03m# llm_chain.run(question)\u001b[39;00m\n",
|
170 |
+
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/langchain_core/load/serializable.py:107\u001b[0m, in \u001b[0;36mSerializable.__init__\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lc_kwargs \u001b[38;5;241m=\u001b[39m kwargs\n",
|
171 |
+
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/pydantic/v1/main.py:341\u001b[0m, in \u001b[0;36mBaseModel.__init__\u001b[0;34m(__pydantic_self__, **data)\u001b[0m\n\u001b[1;32m 339\u001b[0m values, fields_set, validation_error \u001b[38;5;241m=\u001b[39m validate_model(__pydantic_self__\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m, data)\n\u001b[1;32m 340\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m validation_error:\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m validation_error\n\u001b[1;32m 342\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 343\u001b[0m object_setattr(__pydantic_self__, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__dict__\u001b[39m\u001b[38;5;124m'\u001b[39m, values)\n",
|
172 |
+
"\u001b[0;31mValidationError\u001b[0m: 1 validation error for LlamaCpp\ncallback_manager\n instance of BaseCallbackManager expected (type=type_error.arbitrary_type; expected_arbitrary_type=BaseCallbackManager)"
|
173 |
+
]
|
174 |
+
}
|
175 |
+
],
|
176 |
+
"source": [
|
177 |
+
"\n",
|
178 |
+
"title = \"Welcome Open Source LLM\"\n",
|
179 |
+
"\n",
|
180 |
+
"description = \"This is a Llama-2-GGUF\"\n",
|
181 |
+
"\n",
|
182 |
+
"chain = build_llm_chain()\n",
|
183 |
+
"\n",
|
184 |
+
"def answer_query(message, history):\n",
|
185 |
+
" message = chain.run(message)\n",
|
186 |
+
" return message \n",
|
187 |
+
"\n",
|
188 |
+
"# Gradio chat interface\n",
|
189 |
+
"gr.ChatInterface(\n",
|
190 |
+
" fn=answer_query,\n",
|
191 |
+
" title=title,\n",
|
192 |
+
" description=description,\n",
|
193 |
+
" additional_inputs=[gr.Textbox(\"You are helpful assistant.\")],\n",
|
194 |
+
" additional_inputs_accordion=\"📝 System prompt\",\n",
|
195 |
+
" examples=[\n",
|
196 |
+
" [\"What is a Large Language Model?\"],\n",
|
197 |
+
" [\"What's 9+2-1?\"],\n",
|
198 |
+
" [\"Write Python code to print the Fibonacci sequence\"]\n",
|
199 |
+
" ]\n",
|
200 |
+
").queue().launch(server_name=\"0.0.0.0\")"
|
201 |
+
]
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"cell_type": "code",
|
205 |
+
"execution_count": null,
|
206 |
+
"metadata": {},
|
207 |
+
"outputs": [],
|
208 |
+
"source": []
|
209 |
+
}
|
210 |
+
],
|
211 |
+
"metadata": {
|
212 |
+
"kernelspec": {
|
213 |
+
"display_name": "Python 3",
|
214 |
+
"language": "python",
|
215 |
+
"name": "python3"
|
216 |
+
},
|
217 |
+
"language_info": {
|
218 |
+
"codemirror_mode": {
|
219 |
+
"name": "ipython",
|
220 |
+
"version": 3
|
221 |
+
},
|
222 |
+
"file_extension": ".py",
|
223 |
+
"mimetype": "text/x-python",
|
224 |
+
"name": "python",
|
225 |
+
"nbconvert_exporter": "python",
|
226 |
+
"pygments_lexer": "ipython3",
|
227 |
+
"version": "3.0.0"
|
228 |
+
}
|
229 |
+
},
|
230 |
+
"nbformat": 4,
|
231 |
+
"nbformat_minor": 2
|
232 |
+
}
|