{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "EGTI9yHm74B0" }, "outputs": [], "source": [ "%%capture\n", "!pip install huggingface-hub hf-transfer langchain llama-cpp-python" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "ao6p6SSd5VvW" }, "outputs": [], "source": [ "%%capture\n", "# !CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install llama-cpp-python" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "AOmrozm5GoZZ" }, "outputs": [], "source": [ "# !wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_M.gguf" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FoxgM851hI5F", "outputId": "fcc7276e-3d87-4e8a-cd10-ff533074d12b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "downloading https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf to /root/.cache/huggingface/hub/tmp02ipqll0\n", "llama-2-7b-chat.Q2_K.gguf: 100% 2.83G/2.83G [00:26<00:00, 107MB/s]\n", "./llama-2-7b-chat.Q2_K.gguf\n" ] } ], "source": [ "import os\n", "os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\"\n", "\n", "# !huggingface-cli download \\\n", "# Deci/DeciLM-7B-instruct-GGUF \\\n", "# decilm-7b-uniform-gqa-q8_0.gguf \\\n", "# --local-dir . \\\n", "# --local-dir-use-symlinks False\n", "\n", "!huggingface-cli download \\\n", " TheBloke/Llama-2-7B-Chat-GGUF \\\n", " llama-2-7b-chat.Q2_K.gguf \\\n", " --local-dir . \\\n", " --local-dir-use-symlinks False" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "176a5LS68sBI" }, "outputs": [], "source": [ "from langchain.callbacks.manager import CallbackManager\n", "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", "from langchain.chains import LLMChain\n", "from langchain.prompts import PromptTemplate\n", "from langchain_community.llms import LlamaCpp" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "E0nySsfAHmu_" }, "outputs": [], "source": [ "MODEL_PATH = \"llama-2-7b-chat.Q2_K.gguf\"" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "r_rEfQFfBYOb" }, "outputs": [], "source": [ "template = \"\"\"Question: {question}\n", "\n", "Answer: Let's work this out in a step by step way to be sure we have the right answer.\"\"\"\n", "\n", "prompt = PromptTemplate.from_template(template)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "VR2kLDqLBY1A" }, "outputs": [], "source": [ "# Callbacks support token-wise streaming\n", "callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "L_KBhPNmBbCV", "outputId": "ed5292d0-67e6-4b91-b8e0-418dd92d2572" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b-chat.Q2_K.gguf (version GGUF V2)\n", "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", "llama_model_loader: - kv 0: general.architecture str = llama\n", "llama_model_loader: - kv 1: general.name str = LLaMA v2\n", "llama_model_loader: - kv 2: llama.context_length u32 = 4096\n", "llama_model_loader: - kv 3: llama.embedding_length u32 = 4096\n", "llama_model_loader: - kv 4: llama.block_count u32 = 32\n", "llama_model_loader: - kv 5: llama.feed_forward_length u32 = 11008\n", "llama_model_loader: - kv 6: llama.rope.dimension_count u32 = 128\n", "llama_model_loader: - kv 7: llama.attention.head_count u32 = 32\n", "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 = 32\n", "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000001\n", "llama_model_loader: - kv 10: general.file_type u32 = 10\n", "llama_model_loader: - kv 11: tokenizer.ggml.model str = llama\n", "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr[str,32000] = [\"\", \"\", \"\", \"<0x00>\", \"<...\n", "llama_model_loader: - kv 13: tokenizer.ggml.scores arr[f32,32000] = [0.000000, 0.000000, 0.000000, 0.0000...\n", "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr[i32,32000] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n", "llama_model_loader: - kv 15: tokenizer.ggml.bos_token_id u32 = 1\n", "llama_model_loader: - kv 16: tokenizer.ggml.eos_token_id u32 = 2\n", "llama_model_loader: - kv 17: tokenizer.ggml.unknown_token_id u32 = 0\n", "llama_model_loader: - kv 18: general.quantization_version u32 = 2\n", "llama_model_loader: - type f32: 65 tensors\n", "llama_model_loader: - type q2_K: 65 tensors\n", "llama_model_loader: - type q3_K: 160 tensors\n", "llama_model_loader: - type q6_K: 1 tensors\n", "llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n", "llm_load_print_meta: format = GGUF V2\n", "llm_load_print_meta: arch = llama\n", "llm_load_print_meta: vocab type = SPM\n", "llm_load_print_meta: n_vocab = 32000\n", "llm_load_print_meta: n_merges = 0\n", "llm_load_print_meta: n_ctx_train = 4096\n", "llm_load_print_meta: n_embd = 4096\n", "llm_load_print_meta: n_head = 32\n", "llm_load_print_meta: n_head_kv = 32\n", "llm_load_print_meta: n_layer = 32\n", "llm_load_print_meta: n_rot = 128\n", "llm_load_print_meta: n_embd_head_k = 128\n", "llm_load_print_meta: n_embd_head_v = 128\n", "llm_load_print_meta: n_gqa = 1\n", "llm_load_print_meta: n_embd_k_gqa = 4096\n", "llm_load_print_meta: n_embd_v_gqa = 4096\n", "llm_load_print_meta: f_norm_eps = 0.0e+00\n", "llm_load_print_meta: f_norm_rms_eps = 1.0e-06\n", "llm_load_print_meta: f_clamp_kqv = 0.0e+00\n", "llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n", "llm_load_print_meta: n_ff = 11008\n", "llm_load_print_meta: n_expert = 0\n", "llm_load_print_meta: n_expert_used = 0\n", "llm_load_print_meta: rope scaling = linear\n", "llm_load_print_meta: freq_base_train = 10000.0\n", "llm_load_print_meta: freq_scale_train = 1\n", "llm_load_print_meta: n_yarn_orig_ctx = 4096\n", "llm_load_print_meta: rope_finetuned = unknown\n", "llm_load_print_meta: model type = 7B\n", "llm_load_print_meta: model ftype = Q2_K - Medium\n", "llm_load_print_meta: model params = 6.74 B\n", "llm_load_print_meta: model size = 2.63 GiB (3.35 BPW) \n", "llm_load_print_meta: general.name = LLaMA v2\n", "llm_load_print_meta: BOS token = 1 ''\n", "llm_load_print_meta: EOS token = 2 ''\n", "llm_load_print_meta: UNK token = 0 ''\n", "llm_load_print_meta: LF token = 13 '<0x0A>'\n", "llm_load_tensors: ggml ctx size = 0.11 MiB\n", "llm_load_tensors: CPU buffer size = 2694.32 MiB\n", ".................................................................................................\n", "llama_new_context_with_model: n_ctx = 512\n", "llama_new_context_with_model: freq_base = 10000.0\n", "llama_new_context_with_model: freq_scale = 1\n", "llama_kv_cache_init: CPU KV buffer size = 256.00 MiB\n", "llama_new_context_with_model: KV self size = 256.00 MiB, K (f16): 128.00 MiB, V (f16): 128.00 MiB\n", "llama_new_context_with_model: CPU input buffer size = 0.14 MiB\n", "llama_new_context_with_model: CPU compute buffer size = 1.10 MiB\n", "llama_new_context_with_model: graph splits (measure): 1\n", "AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | \n", "Model metadata: {'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '11008', 'llama.attention.layer_norm_rms_epsilon': '0.000001', 'llama.rope.dimension_count': '128', 'llama.attention.head_count': '32', 'tokenizer.ggml.bos_token_id': '1', 'llama.block_count': '32', 'llama.attention.head_count_kv': '32', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '10'}\n" ] } ], "source": [ "# Make sure the model path is correct for your system!\n", "llm = LlamaCpp(\n", " model_path=MODEL_PATH,\n", " temperature=0.75,\n", " max_tokens=2000,\n", " top_p=1,\n", " callback_manager=callback_manager,\n", " verbose=True, # Verbose is required to pass to the callback manager\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "crv_Wu52Bdz_", "outputId": "4b45a176-4503-4bf7-8fb7-0bc949eed169" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Stephen Colbert:" ] }, { "name": "stderr", "output_type": "stream", "text": [ "ERROR:root:Internal Python error in the inspect module.\n", "Below is the traceback from this internal error.\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Traceback (most recent call last):\n", " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 3553, in run_code\n", " exec(code_obj, self.user_global_ns, self.user_ns)\n", " File \"\", line 4, in \n", " llm.invoke(prompt)\n", " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 273, in invoke\n", " self.generate_prompt(\n", " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 568, in generate_prompt\n", " return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 741, in generate\n", " output = self._generate_helper(\n", " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 605, in _generate_helper\n", " raise e\n", " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 592, in _generate_helper\n", " self._generate(\n", " File \"/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\", line 1177, in _generate\n", " self._call(prompt, stop=stop, run_manager=run_manager, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\", line 288, in _call\n", " for chunk in self._stream(\n", " File \"/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\", line 341, in _stream\n", " for part in result:\n", " File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\", line 978, in _create_completion\n", " for token in self.generate(\n", " File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\", line 663, in generate\n", " self.eval(tokens)\n", " File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\", line 503, in eval\n", " self._ctx.decode(self._batch)\n", " File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/_internals.py\", line 305, in decode\n", " return_code = llama_cpp.llama_decode(\n", " File \"/usr/local/lib/python3.10/dist-packages/llama_cpp/llama_cpp.py\", line 1636, in llama_decode\n", " return _lib.llama_decode(ctx, batch)\n", "KeyboardInterrupt\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\", line 2099, in showtraceback\n", " stb = value._render_traceback_()\n", "AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 1101, in get_records\n", " return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)\n", " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 248, in wrapped\n", " return f(*args, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\", line 281, in _fixed_getinnerframes\n", " records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))\n", " File \"/usr/lib/python3.10/inspect.py\", line 1662, in getinnerframes\n", " frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)\n", " File \"/usr/lib/python3.10/inspect.py\", line 1620, in getframeinfo\n", " filename = getsourcefile(frame) or getfile(frame)\n", " File \"/usr/lib/python3.10/inspect.py\", line 829, in getsourcefile\n", " module = getmodule(object, filename)\n", " File \"/usr/lib/python3.10/inspect.py\", line 878, in getmodule\n", " os.path.realpath(f)] = module.__name__\n", " File \"/usr/lib/python3.10/posixpath.py\", line 396, in realpath\n", " path, ok = _joinrealpath(filename[:0], filename, strict, {})\n", " File \"/usr/lib/python3.10/posixpath.py\", line 429, in _joinrealpath\n", " newpath = join(path, name)\n", " File \"/usr/lib/python3.10/posixpath.py\", line 71, in join\n", " def join(a, *p):\n", "KeyboardInterrupt\n" ] }, { "ename": "TypeError", "evalue": "object of type 'NoneType' has no len()", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \"\"\"\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minvoke\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, stop, **kwargs)\u001b[0m\n\u001b[1;32m 272\u001b[0m return (\n\u001b[0;32m--> 273\u001b[0;31m self.generate_prompt(\n\u001b[0m\u001b[1;32m 274\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36mgenerate_prompt\u001b[0;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m 567\u001b[0m \u001b[0mprompt_strings\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 568\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt_strings\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 569\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36mgenerate\u001b[0;34m(self, prompts, stop, callbacks, tags, metadata, run_name, **kwargs)\u001b[0m\n\u001b[1;32m 740\u001b[0m ]\n\u001b[0;32m--> 741\u001b[0;31m output = self._generate_helper(\n\u001b[0m\u001b[1;32m 742\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_managers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbool\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_arg_supported\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36m_generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_llm_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mLLMResult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgenerations\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 605\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 606\u001b[0m \u001b[0mflattened_outputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflatten\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36m_generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 591\u001b[0m output = (\n\u001b[0;32m--> 592\u001b[0;31m self._generate(\n\u001b[0m\u001b[1;32m 593\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/language_models/llms.py\u001b[0m in \u001b[0;36m_generate\u001b[0;34m(self, prompts, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 1176\u001b[0m text = (\n\u001b[0;32m-> 1177\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrun_manager\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1178\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnew_arg_supported\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, prompt, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[0mcombined_text_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 288\u001b[0;31m for chunk in self._stream(\n\u001b[0m\u001b[1;32m 289\u001b[0m \u001b[0mprompt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_community/llms/llamacpp.py\u001b[0m in \u001b[0;36m_stream\u001b[0;34m(self, prompt, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 340\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstream\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 341\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mpart\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 342\u001b[0m \u001b[0mlogprobs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpart\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"choices\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"logprobs\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\u001b[0m in \u001b[0;36m_create_completion\u001b[0;34m(self, prompt, suffix, max_tokens, temperature, top_p, min_p, typical_p, logprobs, echo, stop, frequency_penalty, presence_penalty, repeat_penalty, top_k, stream, seed, tfs_z, mirostat_mode, mirostat_tau, mirostat_eta, model, stopping_criteria, logits_processor, grammar, logit_bias)\u001b[0m\n\u001b[1;32m 977\u001b[0m \u001b[0mmultibyte_fix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 978\u001b[0;31m for token in self.generate(\n\u001b[0m\u001b[1;32m 979\u001b[0m \u001b[0mprompt_tokens\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\u001b[0m in \u001b[0;36mgenerate\u001b[0;34m(self, tokens, top_k, top_p, min_p, typical_p, temp, repeat_penalty, reset, frequency_penalty, presence_penalty, tfs_z, mirostat_mode, mirostat_tau, mirostat_eta, penalize_nl, logits_processor, stopping_criteria, grammar)\u001b[0m\n\u001b[1;32m 662\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 663\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtokens\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 664\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0msample_idx\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_tokens\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama.py\u001b[0m in \u001b[0;36meval\u001b[0;34m(self, tokens)\u001b[0m\n\u001b[1;32m 502\u001b[0m )\n\u001b[0;32m--> 503\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 504\u001b[0m \u001b[0;31m# Save tokens\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/_internals.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 304\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 305\u001b[0;31m return_code = llama_cpp.llama_decode(\n\u001b[0m\u001b[1;32m 306\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mctx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/llama_cpp/llama_cpp.py\u001b[0m in \u001b[0;36mllama_decode\u001b[0;34m(ctx, batch)\u001b[0m\n\u001b[1;32m 1635\u001b[0m < 0 - error\"\"\"\n\u001b[0;32m-> 1636\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_lib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mllama_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mctx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1637\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: ", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mshowtraceback\u001b[0;34m(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)\u001b[0m\n\u001b[1;32m 2098\u001b[0m \u001b[0;31m# in the engines. This should return a list of strings.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2099\u001b[0;31m \u001b[0mstb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_render_traceback_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2100\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAttributeError\u001b[0m: 'KeyboardInterrupt' object has no attribute '_render_traceback_'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mshowtraceback\u001b[0;34m(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)\u001b[0m\n\u001b[1;32m 2099\u001b[0m \u001b[0mstb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_render_traceback_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2100\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2101\u001b[0;31m stb = self.InteractiveTB.structured_traceback(etype,\n\u001b[0m\u001b[1;32m 2102\u001b[0m value, tb, tb_offset=tb_offset)\n\u001b[1;32m 2103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mstructured_traceback\u001b[0;34m(self, etype, value, tb, tb_offset, number_of_lines_of_context)\u001b[0m\n\u001b[1;32m 1365\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1366\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1367\u001b[0;31m return FormattedTB.structured_traceback(\n\u001b[0m\u001b[1;32m 1368\u001b[0m self, etype, value, tb, tb_offset, number_of_lines_of_context)\n\u001b[1;32m 1369\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mstructured_traceback\u001b[0;34m(self, etype, value, tb, tb_offset, number_of_lines_of_context)\u001b[0m\n\u001b[1;32m 1265\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose_modes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1266\u001b[0m \u001b[0;31m# Verbose modes need a full traceback\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1267\u001b[0;31m return VerboseTB.structured_traceback(\n\u001b[0m\u001b[1;32m 1268\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0metype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb_offset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumber_of_lines_of_context\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1269\u001b[0m )\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mstructured_traceback\u001b[0;34m(self, etype, evalue, etb, tb_offset, number_of_lines_of_context)\u001b[0m\n\u001b[1;32m 1122\u001b[0m \u001b[0;34m\"\"\"Return a nice text document describing the traceback.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1123\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1124\u001b[0;31m formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n\u001b[0m\u001b[1;32m 1125\u001b[0m tb_offset)\n\u001b[1;32m 1126\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mformat_exception_as_a_whole\u001b[0;34m(self, etype, evalue, etb, number_of_lines_of_context, tb_offset)\u001b[0m\n\u001b[1;32m 1080\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1081\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1082\u001b[0;31m \u001b[0mlast_unique\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecursion_repeat\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfind_recursion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0morig_etype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mevalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1083\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1084\u001b[0m \u001b[0mframes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat_records\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrecords\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlast_unique\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecursion_repeat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py\u001b[0m in \u001b[0;36mfind_recursion\u001b[0;34m(etype, value, records)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[0;31m# first frame (from in to out) that looks different.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 381\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_recursion_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0metype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrecords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 382\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrecords\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 383\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[0;31m# Select filename, lineno, func_name to track frames with\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: object of type 'NoneType' has no len()" ] } ], "source": [ "prompt = \"\"\"\n", "Question: A rap battle between Stephen Colbert and John Oliver\n", "\"\"\"\n", "llm.invoke(prompt)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Bdpj6esPBs4q" }, "outputs": [], "source": [ "llm_chain = LLMChain(prompt=prompt, llm=llm)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Ex8ZzlTKBtlm" }, "outputs": [], "source": [ "question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n", "llm_chain.run(question)" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }