{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "119805f4-8589-4379-ad87-a7bad4c0e658",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/gscratch/raivn/ethans/miniconda3/envs/llms_12.1/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "<frozen importlib._bootstrap>:241: RuntimeWarning: pyarrow.lib.IpcWriteOptions size changed, may indicate binary incompatibility. Expected 72 from C header, got 88 from PyObject\n",
      "<frozen importlib._bootstrap>:241: RuntimeWarning: pyarrow.lib.IpcReadOptions size changed, may indicate binary incompatibility. Expected 96 from C header, got 104 from PyObject\n",
      "<frozen importlib._bootstrap>:241: RuntimeWarning: pyarrow._fs.FileInfo size changed, may indicate binary incompatibility. Expected 64 from C header, got 88 from PyObject\n",
      "<frozen importlib._bootstrap>:241: RuntimeWarning: pyarrow._fs.FileSelector size changed, may indicate binary incompatibility. Expected 48 from C header, got 72 from PyObject\n",
      "2024-05-30 03:09:58.230601: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
      "2024-05-30 03:09:58.280835: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2024-05-30 03:10:03.250651: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
     ]
    }
   ],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import json\n",
    "import os\n",
    "import pickle\n",
    "from datetime import datetime\n",
    "\n",
    "import evaluate\n",
    "import torch\n",
    "from tqdm import tqdm\n",
    "\n",
    "from eval import *\n",
    "from superposed.llama.metrics import *\n",
    "from superposed.llama.generation import Llama\n",
    "from superposed.llama.superposed_generation import SuperposedLlama\n",
    "from superposed.llama.tokenizer import Tokenizer\n",
    "from superposed.ngrams.ngram_models import make_models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "51c15900-c8b8-46d9-a884-6842a391ef48",
   "metadata": {},
   "outputs": [],
   "source": [
    "sup_device = torch.device(\"cuda:0\")\n",
    "tokenizer = Tokenizer('../../7B/tokenizer.model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "9817d9a4-ad64-41c6-b87b-b1e422b836a9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parameters: {'alpha': 0.54, 'temp': 0.06, 'n_drafts': 3, 'prompt_len': 15, 'n_token_sample': 9, 'n_token_consider': 32000, 'mixing_method': 'sample_new_weights_with_score', 'smoothing': 'geom', 'sample_tokens': 0, 'sample_beams': 0, 'i_weights': [0.01, 0.04, 0.15, 0.18, 0.12], 'i_length': [1, 2, 3, 4, 5]}\n"
     ]
    }
   ],
   "source": [
    "# Params\n",
    "param_file = \"../../params/p15_d3_mixed.json\"\n",
    "with open(param_file, \"r\") as f:\n",
    "    params = json.load(f)\n",
    "    print(f\"Parameters: {params}\")\n",
    "alpha = params[\"alpha\"]\n",
    "temp = params[\"temp\"]\n",
    "n_drafts = params[\"n_drafts\"]\n",
    "prompt_len = params[\"prompt_len\"]\n",
    "n_token_sample = params[\"n_token_sample\"]\n",
    "i_weights = params[\"i_weights\"]\n",
    "i_length = params[\"i_length\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9c99098e-a38b-4c78-a0e9-8c80309830bb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Making bigram...\n",
      "1310800\n",
      "Making trigram...\n",
      "671088728\n",
      "Making fourgram...\n",
      "2684354648\n",
      "Making fivegram...\n",
      "5368709200\n",
      "Making sixgram...\n",
      "5368709200\n"
     ]
    }
   ],
   "source": [
    "# Create ngram models\n",
    "ngrams = make_models(\"../../ckpts-200k\", bigram=True, trigram=True, fourgram=True, fivegram=True, sixgram=True, sevengram=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c3331332-242c-4e98-9f11-58c6dc0ef581",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> initializing model parallel with size 1\n",
      "> initializing ddp with size 1\n",
      "> initializing pipeline with size 1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/gscratch/raivn/ethans/miniconda3/envs/llms_12.1/lib/python3.11/site-packages/torch/__init__.py:614: UserWarning: torch.set_default_tensor_type() is deprecated as of PyTorch 2.1, please use torch.set_default_dtype() and torch.set_default_device() as alternatives. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:451.)\n",
      "  _C._set_default_tensor_type(t)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded in 25.15 seconds\n",
      "cuda:0\n"
     ]
    }
   ],
   "source": [
    "weight_path = \"../../7B/\"\n",
    "model = SuperposedLlama.build(ckpt_dir=weight_path, \n",
    "                         tokenizer_path=f'{weight_path}/tokenizer.model', \n",
    "                         max_seq_len=100, \n",
    "                         max_batch_size=32,\n",
    "                         device=sup_device,\n",
    "                         model_parallel_size=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e2b48c23-d6a3-43b1-ad4c-54524aacfda6",
   "metadata": {},
   "source": [
    "# Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "5093373b-bf76-47e3-8f99-1045b60f29c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "def decode(tokenizer, encoding):\n",
    "    \"\"\"\n",
    "    Args:\n",
    "        tokenizer (Any): Tokenizer\n",
    "        encoding (torch.Tensor): Encoding\n",
    "    Returns:\n",
    "        decoding (str)\n",
    "    \"\"\"\n",
    "    eos_locs = (encoding == tokenizer.eos_id).nonzero()\n",
    "    if len(eos_locs > 0):\n",
    "        encoding = encoding[:eos_locs[0]]\n",
    "    return tokenizer.decode(encoding.to(torch.int32).tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "18703b19-f3e9-46e4-ab1c-c6d3b403c6d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompts = [\n",
    "    \"Hi my name is\",\n",
    "    \"The Seattle Seahawks were Super Bowl\",\n",
    "    \"Penguins are birds native to\"\n",
    "]\n",
    "tokenized_prompts = tokenizer.encode(prompts, True, False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "d39cd735-9480-4979-ac92-bbd470f75570",
   "metadata": {},
   "outputs": [],
   "source": [
    "alive_gens, _ = model.sup_generate(prompt_tokens=tokenized_prompts, \n",
    "                                        smoothing=\"geom\",\n",
    "                                        max_gen_len=10, \n",
    "                                        n_token_sample=n_token_sample,\n",
    "                                        alpha=alpha, \n",
    "                                        temp=temp,\n",
    "                                        n_drafts=n_drafts,\n",
    "                                        i_weights=i_weights,\n",
    "                                        i_length=i_length,\n",
    "                                        ngrams=ngrams,\n",
    "                                        get_time=False,\n",
    "                                        penalty=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "cfefa793-e49e-483a-a504-5cc9e23f619d",
   "metadata": {},
   "outputs": [],
   "source": [
    "gens = alive_gens[0].reshape(len(prompts) * n_drafts, -1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "5abf87ab-2ee0-4204-868b-1215abf0c8aa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hi\n",
      "my name\n",
      "is L\n",
      "inda,\n",
      "I am\n",
      "a \n",
      "40\n",
      "year old\n",
      "woman who\n"
     ]
    }
   ],
   "source": [
    "for i in gens:\n",
    "    print(decode(tokenizer, i))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e73dc3cc-baa5-468d-bdd1-827465bdeb62",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}