{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Install Dependencies" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "zsh:1: command not found: pip\n" ] } ], "source": [ "!pip install -U --user transformers torch torchvision" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Import Dependencies" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from transformers import AutoTokenizer, AutoModelForCausalLM\n", "import os" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Load from Local" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/Users/deanmartin/Source/gpt-neo-1.3B-fiction-novel-generation'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "root_dir = '/'.join(os.getcwd().split('/')[:-1])\n", "\n", "root_dir" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tokenizer" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "tokenizer = AutoTokenizer.from_pretrained(root_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Model" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "model = AutoModelForCausalLM.from_pretrained(root_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Inference Example" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Model Usage" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "My name is John Doe, and I'm part of the New York State Attorney General's office.\n", "\n", "I'm not your \"typical\" defendant. I have no jury, no judge, no jury pool, no prosecutor-counsel, no courtroom.\n", "\n", "I don't even have my own cell phone with my inmate phone number. I live in the Bronx, Queens, Manhattan and New Jersey.\n", "\n", "And, I can't sleep because there's been a bomb threat,\n" ] } ], "source": [ "prompt = 'My name is John Doe'\n", "input_ids = tokenizer(prompt, return_tensors='pt').input_ids\n", "generated_tokens = model.generate(\n", " input_ids,\n", " do_sample=True,\n", " temperature=0.9,\n", " max_length=100\n", ")\n", "generated_text = tokenizer.batch_decode(generated_tokens)[0]\n", "\n", "print(generated_text)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.13 ('pytorch')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "0203f9377e450cf3e5fd498dcfe93bad69687b6515d650e7d79a42aa53323e2d" } } }, "nbformat": 4, "nbformat_minor": 2 }