{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Install Dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "zsh:1: command not found: pip\n"
     ]
    }
   ],
   "source": [
    "!pip install -U --user transformers torch torchvision"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import Dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load from Local"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/Users/deanmartin/Source/gpt-neo-1.3B-fiction-novel-generation'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "root_dir = '/'.join(os.getcwd().split('/')[:-1])\n",
    "\n",
    "root_dir"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(root_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = AutoModelForCausalLM.from_pretrained(root_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Inference Example"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model Usage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "My name is John Doe, and I'm part of the New York State Attorney General's office.\n",
      "\n",
      "I'm not your \"typical\" defendant. I have no jury, no judge, no jury pool, no prosecutor-counsel, no courtroom.\n",
      "\n",
      "I don't even have my own cell phone with my inmate phone number. I live in the Bronx, Queens, Manhattan and New Jersey.\n",
      "\n",
      "And, I can't sleep because there's been a bomb threat,\n"
     ]
    }
   ],
   "source": [
    "prompt = 'My name is John Doe'\n",
    "input_ids = tokenizer(prompt, return_tensors='pt').input_ids\n",
    "generated_tokens = model.generate(\n",
    "    input_ids,\n",
    "    do_sample=True,\n",
    "    temperature=0.9,\n",
    "    max_length=100\n",
    ")\n",
    "generated_text = tokenizer.batch_decode(generated_tokens)[0]\n",
    "\n",
    "print(generated_text)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.13 ('pytorch')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "0203f9377e450cf3e5fd498dcfe93bad69687b6515d650e7d79a42aa53323e2d"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}