{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "119805f4-8589-4379-ad87-a7bad4c0e658", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/gscratch/raivn/ethans/miniconda3/envs/llms_12.1/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", ":241: RuntimeWarning: pyarrow.lib.IpcWriteOptions size changed, may indicate binary incompatibility. Expected 72 from C header, got 88 from PyObject\n", ":241: RuntimeWarning: pyarrow.lib.IpcReadOptions size changed, may indicate binary incompatibility. Expected 96 from C header, got 104 from PyObject\n", ":241: RuntimeWarning: pyarrow._fs.FileInfo size changed, may indicate binary incompatibility. Expected 64 from C header, got 88 from PyObject\n", ":241: RuntimeWarning: pyarrow._fs.FileSelector size changed, may indicate binary incompatibility. Expected 48 from C header, got 72 from PyObject\n", "2024-05-30 03:09:58.230601: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2024-05-30 03:09:58.280835: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-05-30 03:10:03.250651: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "import json\n", "import os\n", "import pickle\n", "from datetime import datetime\n", "\n", "import evaluate\n", "import torch\n", "from tqdm import tqdm\n", "\n", "from eval import *\n", "from superposed.llama.metrics import *\n", "from superposed.llama.generation import Llama\n", "from superposed.llama.superposed_generation import SuperposedLlama\n", "from superposed.llama.tokenizer import Tokenizer\n", "from superposed.ngrams.ngram_models import make_models" ] }, { "cell_type": "code", "execution_count": 4, "id": "51c15900-c8b8-46d9-a884-6842a391ef48", "metadata": {}, "outputs": [], "source": [ "sup_device = torch.device(\"cuda:0\")\n", "tokenizer = Tokenizer('../../7B/tokenizer.model')" ] }, { "cell_type": "code", "execution_count": 5, "id": "9817d9a4-ad64-41c6-b87b-b1e422b836a9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parameters: {'alpha': 0.54, 'temp': 0.06, 'n_drafts': 3, 'prompt_len': 15, 'n_token_sample': 9, 'n_token_consider': 32000, 'mixing_method': 'sample_new_weights_with_score', 'smoothing': 'geom', 'sample_tokens': 0, 'sample_beams': 0, 'i_weights': [0.01, 0.04, 0.15, 0.18, 0.12], 'i_length': [1, 2, 3, 4, 5]}\n" ] } ], "source": [ "# Params\n", "param_file = \"../../params/p15_d3_mixed.json\"\n", "with open(param_file, \"r\") as f:\n", " params = json.load(f)\n", " print(f\"Parameters: {params}\")\n", "alpha = params[\"alpha\"]\n", "temp = params[\"temp\"]\n", "n_drafts = params[\"n_drafts\"]\n", "prompt_len = params[\"prompt_len\"]\n", "n_token_sample = params[\"n_token_sample\"]\n", "i_weights = params[\"i_weights\"]\n", "i_length = params[\"i_length\"]" ] }, { "cell_type": "code", "execution_count": 6, "id": "9c99098e-a38b-4c78-a0e9-8c80309830bb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Making bigram...\n", "1310800\n", "Making trigram...\n", "671088728\n", "Making fourgram...\n", "2684354648\n", "Making fivegram...\n", "5368709200\n", "Making sixgram...\n", "5368709200\n" ] } ], "source": [ "# Create ngram models\n", "ngrams = make_models(\"../../ckpts-200k\", bigram=True, trigram=True, fourgram=True, fivegram=True, sixgram=True, sevengram=False)" ] }, { "cell_type": "code", "execution_count": 7, "id": "c3331332-242c-4e98-9f11-58c6dc0ef581", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "> initializing model parallel with size 1\n", "> initializing ddp with size 1\n", "> initializing pipeline with size 1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/gscratch/raivn/ethans/miniconda3/envs/llms_12.1/lib/python3.11/site-packages/torch/__init__.py:614: UserWarning: torch.set_default_tensor_type() is deprecated as of PyTorch 2.1, please use torch.set_default_dtype() and torch.set_default_device() as alternatives. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:451.)\n", " _C._set_default_tensor_type(t)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Loaded in 25.15 seconds\n", "cuda:0\n" ] } ], "source": [ "weight_path = \"../../7B/\"\n", "model = SuperposedLlama.build(ckpt_dir=weight_path, \n", " tokenizer_path=f'{weight_path}/tokenizer.model', \n", " max_seq_len=100, \n", " max_batch_size=32,\n", " device=sup_device,\n", " model_parallel_size=1)" ] }, { "cell_type": "markdown", "id": "e2b48c23-d6a3-43b1-ad4c-54524aacfda6", "metadata": {}, "source": [ "# Inference" ] }, { "cell_type": "code", "execution_count": 11, "id": "5093373b-bf76-47e3-8f99-1045b60f29c3", "metadata": {}, "outputs": [], "source": [ "def decode(tokenizer, encoding):\n", " \"\"\"\n", " Args:\n", " tokenizer (Any): Tokenizer\n", " encoding (torch.Tensor): Encoding\n", " Returns:\n", " decoding (str)\n", " \"\"\"\n", " eos_locs = (encoding == tokenizer.eos_id).nonzero()\n", " if len(eos_locs > 0):\n", " encoding = encoding[:eos_locs[0]]\n", " return tokenizer.decode(encoding.to(torch.int32).tolist())" ] }, { "cell_type": "code", "execution_count": 22, "id": "18703b19-f3e9-46e4-ab1c-c6d3b403c6d2", "metadata": {}, "outputs": [], "source": [ "prompts = [\n", " \"Hi my name is\",\n", " \"The Seattle Seahawks were Super Bowl\",\n", " \"Penguins are birds native to\"\n", "]\n", "tokenized_prompts = tokenizer.encode(prompts, True, False)" ] }, { "cell_type": "code", "execution_count": 23, "id": "d39cd735-9480-4979-ac92-bbd470f75570", "metadata": {}, "outputs": [], "source": [ "alive_gens, _ = model.sup_generate(prompt_tokens=tokenized_prompts, \n", " smoothing=\"geom\",\n", " max_gen_len=10, \n", " n_token_sample=n_token_sample,\n", " alpha=alpha, \n", " temp=temp,\n", " n_drafts=n_drafts,\n", " i_weights=i_weights,\n", " i_length=i_length,\n", " ngrams=ngrams,\n", " get_time=False,\n", " penalty=200)" ] }, { "cell_type": "code", "execution_count": 24, "id": "cfefa793-e49e-483a-a504-5cc9e23f619d", "metadata": {}, "outputs": [], "source": [ "gens = alive_gens[0].reshape(len(prompts) * n_drafts, -1)" ] }, { "cell_type": "code", "execution_count": 25, "id": "5abf87ab-2ee0-4204-868b-1215abf0c8aa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hi\n", "my name\n", "is L\n", "inda,\n", "I am\n", "a \n", "40\n", "year old\n", "woman who\n" ] } ], "source": [ "for i in gens:\n", " print(decode(tokenizer, i))" ] }, { "cell_type": "code", "execution_count": null, "id": "e73dc3cc-baa5-468d-bdd1-827465bdeb62", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }