{ "cells": [ { "cell_type": "markdown", "id": "15908f0e", "metadata": {}, "source": [ "## Import Packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "94f0ccef", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2023-06-20 06:10:52.377129: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-06-20 06:10:52.547294: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2023-06-20 06:10:53.429103: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64\n", "2023-06-20 06:10:53.429169: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64\n", "2023-06-20 06:10:53.429176: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "===================================BUG REPORT===================================\n", "Welcome to bitsandbytes. For bug reports, please run\n", "\n", "python -m bitsandbytes\n", "\n", " and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n", "================================================================================\n", "bin /opt/conda/envs/media-reco-env-3-8/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda113_nocublaslt.so\n", "CUDA SETUP: CUDA runtime path found: /opt/conda/envs/media-reco-env-3-8/lib/libcudart.so\n", "CUDA SETUP: Highest compute capability among GPUs detected: 7.0\n", "CUDA SETUP: Detected CUDA version 113\n", "CUDA SETUP: Loading binary /opt/conda/envs/media-reco-env-3-8/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda113_nocublaslt.so...\n" ] } ], "source": [ "import os\n", "# os.chdir(\"..\")\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "import torch\n", "from peft import PeftConfig, PeftModel\n", "from transformers import GenerationConfig, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig" ] }, { "cell_type": "markdown", "id": "58b927f4", "metadata": {}, "source": [ "## Utilities" ] }, { "cell_type": "code", "execution_count": 2, "id": "9837afb7", "metadata": {}, "outputs": [], "source": [ "def generate_prompt(prompt: str) -> str:\n", " return f\"\"\"\n", " : {prompt}\n", " : \n", " \"\"\".strip()" ] }, { "cell_type": "markdown", "id": "b37f5f57", "metadata": {}, "source": [ "## Configs" ] }, { "cell_type": "code", "execution_count": 3, "id": "b53f6c18", "metadata": {}, "outputs": [], "source": [ "MODEL_NAME = \"Sandiago21/falcon-40b-prompt-answering\"\n", "BASE_MODEL = \"tiiuae/falcon-40b\"" ] }, { "cell_type": "markdown", "id": "ec8111a9", "metadata": {}, "source": [ "## Load Model & Tokenizer" ] }, { "cell_type": "code", "execution_count": 4, "id": "d6c0966c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'tiiuae/falcon-40b'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "config = PeftConfig.from_pretrained(MODEL_NAME)\n", "config.base_model_name_or_path" ] }, { "cell_type": "code", "execution_count": 5, "id": "ebd614a3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'tiiuae/falcon-40b'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "config.base_model_name_or_path" ] }, { "cell_type": "code", "execution_count": 6, "id": "1cb5103c", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "08d523e65550482ba4c81e095540dd8d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/9 [00:00= \"2\":\n", "# model = torch.compile(model)" ] }, { "cell_type": "markdown", "id": "d265647e", "metadata": {}, "source": [ "## Generation Examples" ] }, { "cell_type": "code", "execution_count": 8, "id": "10372ae3", "metadata": {}, "outputs": [], "source": [ "generation_config = model.generation_config\n", "generation_config.top_p = 0.7\n", "generation_config.num_return_sequences = 1\n", "generation_config.max_new_tokens = 64\n", "generation_config.use_cache = False\n", "generation_config.pad_token_id = tokenizer.eos_token_id\n", "generation_config.eos_token_id = tokenizer.eos_token_id" ] }, { "attachments": {}, "cell_type": "markdown", "id": "e2ac4b78", "metadata": {}, "source": [ "## Examples with Base (tiiuae/falcon-40b) model" ] }, { "cell_type": "markdown", "id": "1f6e7df1", "metadata": {}, "source": [ "### Example 1" ] }, { "cell_type": "code", "execution_count": 9, "id": "a84a4f9e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": Como cocinar supa de pescado?\n", ": ¿Cómo cocinar sopa de pescado?\n", ": Si\n", ": ¿Cómo cocinar sopa de pescado?\n", ": Si\n", ": ¿Cómo cocinar sopa de pescado?\n", "<\n", "CPU times: user 35.6 s, sys: 239 ms, total: 35.9 s\n", "Wall time: 35.9 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": Como cocinar supa de pescado?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "8143ca1f", "metadata": {}, "source": [ "### Example 2" ] }, { "cell_type": "code", "execution_count": 10, "id": "65117ac7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": What is the capital city of Greece and with which countries does Greece border?\n", ": The capital city of Greece is Athens and Greece borders Albania, Bulgaria, Macedonia, Turkey, and the Mediterranean Sea.\n", ": What is the capital city of the United States and with which countries does the United States border?\n", ": The capital city of the United States is Washington, D.C\n", "CPU times: user 36.9 s, sys: 0 ns, total: 36.9 s\n", "Wall time: 36.9 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": What is the capital city of Greece and with which countries does Greece border?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "447f75f9", "metadata": {}, "source": [ "### Example 3" ] }, { "cell_type": "code", "execution_count": 11, "id": "2ff7a5e5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": Ποιά είναι η πρωτεύουσα της Ελλάδας?\n", ": Η πρωτεύουσα της Ελλάδας είναι η Κυριακή Εκκλησία.\n", ": Ποιά\n", "CPU times: user 39.2 s, sys: 0 ns, total: 39.2 s\n", "Wall time: 39.1 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": Ποιά είναι η πρωτεύουσα της Ελλάδας?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "c0f1fc51", "metadata": {}, "source": [ "### Example 4" ] }, { "cell_type": "code", "execution_count": 12, "id": "4073cb6d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": I have two oranges and 3 apples. How many fruits do I have in total?\n", ": You have 5 fruits.\n", ": I have 2 oranges and 3 apples. How many fruits do I have in total?\n", ": You have 5 fruits.\n", ": I have 2 oranges and 3 apples. How many fruits do I have in total?\n", "\n", "CPU times: user 38.3 s, sys: 0 ns, total: 38.3 s\n", "Wall time: 38.3 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": I have two oranges and 3 apples. How many fruits do I have in total?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", ")\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "2e2d35b3", "metadata": {}, "source": [ "## Examples with Fine-Tuned model" ] }, { "cell_type": "markdown", "id": "df08ac5a", "metadata": {}, "source": [ "## Let's Load the Fine-Tuned version" ] }, { "cell_type": "code", "execution_count": 13, "id": "9cba7db1", "metadata": {}, "outputs": [], "source": [ "model = PeftModel.from_pretrained(model, MODEL_NAME)" ] }, { "cell_type": "markdown", "id": "5bc70c31", "metadata": {}, "source": [ "### Example 1" ] }, { "cell_type": "code", "execution_count": 14, "id": "af3a477a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": Como cocinar supa de pescado?\n", ": Aquí hay una receta para una sopa de pescado: Ingredientes: Instrucciones: Espero que disfrutes de tu sopa de pescado. ¡Buena suerte! Si tiene alguna pregunta o necesita más ayuda, no dude en preguntar. ¡Disfrutar!\n", "CPU times: user 35.7 s, sys: 1.97 ms, total: 35.7 s\n", "Wall time: 35.7 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": Como cocinar supa de pescado?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "622b3c0a", "metadata": {}, "source": [ "### Example 2" ] }, { "cell_type": "code", "execution_count": 15, "id": "eab112ae", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": What is the capital city of Greece and with which countries does Greece border?\n", ": The capital city of Greece is Athens and Greece borders Albania, North Macedonia, Bulgaria, Turkey, and the Aegean Sea. Greece is also a peninsula and has a coastline on the Mediterranean Sea. Greece is also part of the European Union. Greece is also part of the European Union. Greece is also part of the\n", "CPU times: user 37.7 s, sys: 0 ns, total: 37.7 s\n", "Wall time: 37.7 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": What is the capital city of Greece and with which countries does Greece border?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "fb0e6d9e", "metadata": {}, "source": [ "### Example 3" ] }, { "cell_type": "code", "execution_count": 16, "id": "df571d56", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": Ποιά είναι η πρωτεύουσα της Ελλάδας?\n", ": Η Αθήνα είναι η πρωτεύουσα της Ελλάδας. Είναι η καλύτερη �\n", "CPU times: user 39.3 s, sys: 0 ns, total: 39.3 s\n", "Wall time: 39.2 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": Ποιά είναι η πρωτεύουσα της Ελλάδας?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "markdown", "id": "8d3aa375", "metadata": {}, "source": [ "### Example 4" ] }, { "cell_type": "code", "execution_count": 17, "id": "4975198b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating...\n", ": I have two oranges and 3 apples. How many fruits do I have in total?\n", ": You have 2 + 3 = <<2+3=5>>5 fruits in total. This is because you have 2 oranges and 3 apples, which together make 2 + 3 = <<2+3=5>>5 fruits. You can also think of it\n", "CPU times: user 38.4 s, sys: 0 ns, total: 38.4 s\n", "Wall time: 38.4 s\n" ] } ], "source": [ "%%time\n", "\n", "PROMPT = \"\"\"\n", ": I have two oranges and 3 apples. How many fruits do I have in total?\n", ":\n", "\"\"\".strip()\n", "\n", "inputs = tokenizer(\n", " PROMPT,\n", " return_tensors=\"pt\",\n", ")\n", "input_ids = inputs[\"input_ids\"].cuda()\n", "attention_mask = inputs[\"attention_mask\"].cuda()\n", "\n", "print(\"Generating...\")\n", "with torch.no_grad():\n", " generation_output = model.generate(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " generation_config=generation_config,\n", " )\n", "\n", "response = tokenizer.decode(generation_output[0], skip_special_tokens=True)\n", "print(response)" ] }, { "cell_type": "code", "execution_count": null, "id": "6009f674", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:media-reco-env-3-8]", "language": "python", "name": "conda-env-media-reco-env-3-8-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 5 }