{ "cells": [ { "cell_type": "markdown", "id": "15908f0e", "metadata": {}, "source": [ "## Import Packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "94f0ccef", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2023-06-20 06:04:33.911240: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-06-20 06:04:34.105735: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2023-06-20 06:04:35.049185: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64\n", "2023-06-20 06:04:35.049283: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64\n", "2023-06-20 06:04:35.049291: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "===================================BUG REPORT===================================\n", "Welcome to bitsandbytes. For bug reports, please run\n", "\n", "python -m bitsandbytes\n", "\n", " and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n", "================================================================================\n", "bin /opt/conda/envs/media-reco-env-3-8/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda113_nocublaslt.so\n", "CUDA SETUP: CUDA runtime path found: /opt/conda/envs/media-reco-env-3-8/lib/libcudart.so\n", "CUDA SETUP: Highest compute capability among GPUs detected: 7.0\n", "CUDA SETUP: Detected CUDA version 113\n", "CUDA SETUP: Loading binary /opt/conda/envs/media-reco-env-3-8/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda113_nocublaslt.so...\n" ] } ], "source": [ "import os\n", "os.chdir(\"..\")\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "import torch\n", "from peft import PeftConfig, PeftModel\n", "from transformers import GenerationConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig" ] }, { "cell_type": "markdown", "id": "58b927f4", "metadata": {}, "source": [ "## Utilities" ] }, { "cell_type": "code", "execution_count": 2, "id": "9837afb7", "metadata": {}, "outputs": [], "source": [ "def generate_prompt(prompt: str) -> str:\n", " return f\"\"\"\n", " : {prompt}\n", " : \n", " \"\"\".strip()" ] }, { "cell_type": "markdown", "id": "b37f5f57", "metadata": {}, "source": [ "## Configs" ] }, { "cell_type": "code", "execution_count": 3, "id": "b53f6c18", "metadata": {}, "outputs": [], "source": [ "MODEL_NAME = \"Sandiago21/falcon-7b-prompt-answering\"\n", "# MODEL_NAME = \".\"\n", "# BASE_MODEL = \"tiiuae/falcon-7b\"" ] }, { "cell_type": "markdown", "id": "ec8111a9", "metadata": {}, "source": [ "## Load Model & Tokenizer" ] }, { "cell_type": "code", "execution_count": 4, "id": "d6c0966c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'tiiuae/falcon-7b'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "config = PeftConfig.from_pretrained(MODEL_NAME)\n", "config.base_model_name_or_path" ] }, { "cell_type": "code", "execution_count": 5, "id": "ebd614a3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'tiiuae/falcon-7b'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "config.base_model_name_or_path" ] }, { "cell_type": "code", "execution_count": 6, "id": "1cb5103c", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cfb5a01e129d48e7871a15fda0511006", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00= \"2\":\n", "# model = torch.compile(model)" ] }, { "cell_type": "markdown", "id": "d265647e", "metadata": {}, "source": [ "## Generation Examples" ] }, { "cell_type": "code", "execution_count": 8, "id": "10372ae3", "metadata": {}, "outputs": [], "source": [ "generation_config = model.generation_config\n", "generation_config.top_p = 0.7\n", "generation_config.num_return_sequences = 1\n", "generation_config.max_new_tokens = 32\n", "generation_config.use_cache = False\n", "generation_config.pad_token_id = tokenizer.eos_token_id\n", "generation_config.eos_token_id = tokenizer.eos_token_id" ] }, { "cell_type": "markdown", "id": "e2ac4b78", "metadata": {}, "source": [ "## Examples with Base (tiiuae/falcon-7b) model" ] }, { "cell_type": "markdown", "id": "1f6e7df1", "metadata": {}, "source": [ "### Example 1" ] }, { "cell_type": "code", "execution_count": 9, "id": "a84a4f9e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": Como cocinar supa de pescado?\n", ": ¿Qué quiere decir \"supa de pescado\"?\n", ": ¿Como cocinar supa de pescado?\n", "<\n", "CPU times: user 9.68 s, sys: 188 ms, total: 9.87 s\n", "Wall time: 9.93 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": Como cocinar supa de pescado?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "8143ca1f", "metadata": {}, "source": [ "### Example 2" ] }, { "cell_type": "code", "execution_count": 10, "id": "65117ac7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": What is the capital city of Greece and with which countries does Greece border?\n", ": The capital city of Greece is Athens. Greece borders Albania, Bulgaria, Macedonia, and Turkey.\n", ": What is the capital city of Albania and with\n", "CPU times: user 8.81 s, sys: 0 ns, total: 8.81 s\n", "Wall time: 8.8 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": What is the capital city of Greece and with which countries does Greece border?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "447f75f9", "metadata": {}, "source": [ "### Example 3" ] }, { "cell_type": "code", "execution_count": 11, "id": "2ff7a5e5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": Ποιά είναι η μεγαλύτερη πόλη της Ελλάδας?\n", ": Ποιά είναι η μεγαλύτερη πόλη τ\n", "CPU times: user 9.29 s, sys: 0 ns, total: 9.29 s\n", "Wall time: 9.29 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": Ποιά είναι η μεγαλύτερη πόλη της Ελλάδας?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "c0f1fc51", "metadata": {}, "source": [ "### Example 4" ] }, { "cell_type": "code", "execution_count": 12, "id": "4073cb6d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": I have two oranges and 3 apples. How many fruits do I have in total?\n", ": 5\n", ": 5?\n", ": Yes\n", ": I have 2 oranges and 3 apples. How many fruits\n", "CPU times: user 8.85 s, sys: 0 ns, total: 8.85 s\n", "Wall time: 8.86 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": I have two oranges and 3 apples. How many fruits do I have in total?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", ")\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "2e2d35b3", "metadata": {}, "source": [ "## Examples with Fine-Tuned model" ] }, { "cell_type": "markdown", "id": "df08ac5a", "metadata": {}, "source": [ "## Let's Load the Fine-Tuned version" ] }, { "cell_type": "code", "execution_count": 13, "id": "9cba7db1", "metadata": {}, "outputs": [], "source": [ "model = PeftModel.from_pretrained(model, MODEL_NAME)" ] }, { "cell_type": "markdown", "id": "5bc70c31", "metadata": {}, "source": [ "### Example 1" ] }, { "cell_type": "code", "execution_count": 14, "id": "af3a477a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": Como cocinar supa de pescado?\n", ": Para cocinar supa de pescado, debe ser descongelada y lavada. Después, debe ser cortada en trozos pequeños y\n", "CPU times: user 9.34 s, sys: 3.68 ms, total: 9.35 s\n", "Wall time: 9.34 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": Como cocinar supa de pescado?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "622b3c0a", "metadata": {}, "source": [ "### Example 2" ] }, { "cell_type": "code", "execution_count": 15, "id": "eab112ae", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": What is the capital city of Greece and with which countries does Greece border?\n", ": The capital city of Greece is Athens and it borders Albania, Bulgaria, Macedonia, and Turkey.\n", ": What is the capital city of Greece and with\n", "CPU times: user 9.67 s, sys: 0 ns, total: 9.67 s\n", "Wall time: 9.66 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": What is the capital city of Greece and with which countries does Greece border?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "fb0e6d9e", "metadata": {}, "source": [ "### Example 3" ] }, { "cell_type": "code", "execution_count": 16, "id": "df571d56", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": Ποιά είναι η μεγαλύτερη πόλη της Ελλάδας?\n", ": Το Αθήνα είναι το πλήρες κόσ\n", "CPU times: user 9.46 s, sys: 0 ns, total: 9.46 s\n", "Wall time: 9.45 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": Ποιά είναι η μεγαλύτερη πόλη της Ελλάδας?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "8d3aa375", "metadata": {}, "source": [ "### Example 4" ] }, { "cell_type": "code", "execution_count": 17, "id": "4975198b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": I have two oranges and 3 apples. How many fruits do I have in total?\n", ": You have 2 oranges and 3 apples. You have 5 fruits in total. You can also use the following formula to calculate the number of fruits you\n", "CPU times: user 8.93 s, sys: 0 ns, total: 8.93 s\n", "Wall time: 8.92 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": I have two oranges and 3 apples. How many fruits do I have in total?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "code", "execution_count": null, "id": "045c1d46", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:media-reco-env-3-8]", "language": "python", "name": "conda-env-media-reco-env-3-8-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 5 }