{ "cells": [ { "cell_type": "markdown", "id": "53a990e3-0d47-4e66-b928-f40d67f06584", "metadata": {}, "source": [ "# Setup" ] }, { "cell_type": "markdown", "id": "51fb0d43-c12b-4892-95d2-074bf5de0ce2", "metadata": {}, "source": [ "## Install addition packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "9cf48779-454b-4b1d-b78f-531a1b207276", "metadata": { "tags": [] }, "outputs": [], "source": [ "import os\n", "\n", "# The Google Cloud Notebook product has specific requirements\n", "IS_GOOGLE_CLOUD_NOTEBOOK = os.path.exists(\"/opt/deeplearning/metadata/env_version\")\n", "\n", "# Google Cloud Notebook requires dependencies to be installed with '--user'\n", "USER_FLAG = \"\"\n", "if IS_GOOGLE_CLOUD_NOTEBOOK:\n", " USER_FLAG = \"--user\"" ] }, { "cell_type": "code", "execution_count": 2, "id": "d2a3556a-ebf1-49c7-9d2c-63e30ca45f73", "metadata": { "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip -q install {USER_FLAG} --upgrade transformers\n", "!pip -q install {USER_FLAG} --upgrade datasets\n", "!pip -q install {USER_FLAG} --upgrade tqdm\n", "!pip -q install {USER_FLAG} --upgrade cloudml-hypertune" ] }, { "cell_type": "code", "execution_count": 3, "id": "fcc3f1f6-36d3-4056-ad29-b69c57bb0bac", "metadata": { "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip -q install {USER_FLAG} --upgrade google-cloud-aiplatform" ] }, { "cell_type": "code", "execution_count": 4, "id": "2214d165-356d-47f1-a4ee-4f6c50027e96", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Automatically restart kernel after installs\n", "import os\n", "\n", "if not os.getenv(\"IS_TESTING\"):\n", " # Automatically restart kernel after installs\n", " import IPython\n", "\n", " app = IPython.Application.instance()\n", " app.kernel.do_shutdown(True)" ] }, { "cell_type": "code", "execution_count": 1, "id": "e8817443-c80e-475b-b54e-dd834c040b12", "metadata": {}, "outputs": [], "source": [ "%%capture\n", "!pip install git+https://github.com/huggingface/transformers.git datasets pandas torch\n", "!pip install transformers[torch]\n", "!pip install accelerate -U" ] }, { "cell_type": "markdown", "id": "21cc7690-95bf-4452-abef-46cd318ccfb5", "metadata": {}, "source": [ "## Set Project ID" ] }, { "cell_type": "code", "execution_count": 2, "id": "30b78533-ff39-4c92-a365-f2e05ddb642f", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Project ID: ikame-gem-ai-research\n" ] } ], "source": [ "PROJECT_ID = \"iKame-gem-ai-research\" # <---CHANGE THIS TO YOUR PROJECT\n", "\n", "import os\n", "\n", "# Get your Google Cloud project ID using google.auth\n", "if not os.getenv(\"IS_TESTING\"):\n", " import google.auth\n", "\n", " _, PROJECT_ID = google.auth.default()\n", " print(\"Project ID: \", PROJECT_ID)\n", "\n", "# validate PROJECT_ID\n", "if PROJECT_ID == \"\" or PROJECT_ID is None or PROJECT_ID == \"iKame-gem-ai-research\":\n", " print(\n", " f\"Please set your project id before proceeding to next step. Currently it's set as {PROJECT_ID}\"\n", " )" ] }, { "cell_type": "code", "execution_count": 3, "id": "5c4631f5-c8ba-43e9-a623-08cb2cb3a51a", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TIMESTAMP = 20240108040502\n" ] } ], "source": [ "from datetime import datetime\n", "\n", "\n", "def get_timestamp():\n", " return datetime.now().strftime(\"%Y%m%d%H%M%S\")\n", "\n", "\n", "TIMESTAMP = get_timestamp()\n", "print(f\"TIMESTAMP = {TIMESTAMP}\")" ] }, { "cell_type": "markdown", "id": "494d8009-7f9a-45d8-ba7c-3e3205d1c96b", "metadata": {}, "source": [ "## Create Cloud Storage bucket" ] }, { "cell_type": "code", "execution_count": 4, "id": "303136a0-6334-4889-b43b-9f171a934311", "metadata": { "tags": [] }, "outputs": [], "source": [ "BUCKET_NAME = \"gs://iKame-gem-ai-research\" # <---CHANGE THIS TO YOUR BUCKET\n", "REGION = \"us-central1\" # @param {type:\"string\"}" ] }, { "cell_type": "code", "execution_count": 5, "id": "014c6208-0b1a-4da8-888b-19c02a112474", "metadata": { "tags": [] }, "outputs": [], "source": [ "if BUCKET_NAME == \"\" or BUCKET_NAME is None or BUCKET_NAME == \"gs://iKame-gem-ai-research\":\n", " BUCKET_NAME = f\"gs://{PROJECT_ID}-bucket-review\"" ] }, { "cell_type": "code", "execution_count": 6, "id": "a52a28fa-591e-487c-bd53-8f770441ba63", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PROJECT_ID = ikame-gem-ai-research\n", "BUCKET_NAME = gs://ikame-gem-ai-research-bucket-review\n", "REGION = us-central1\n" ] } ], "source": [ "print(f\"PROJECT_ID = {PROJECT_ID}\")\n", "print(f\"BUCKET_NAME = {BUCKET_NAME}\")\n", "print(f\"REGION = {REGION}\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "24c35eb2-7619-4958-a04a-79b62788f257", "metadata": { "tags": [] }, "outputs": [], "source": [ "# ! gsutil mb -l $REGION $BUCKET_NAME" ] }, { "cell_type": "code", "execution_count": 8, "id": "6f2ee0a0-3cff-47cb-9379-6f6e75fef9d5", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 3078 2024-01-05T01:42:25Z gs://ikame-gem-ai-research-bucket-review/batch_examples.csv#1704418945853255 metageneration=1\n", " gs://ikame-gem-ai-research-bucket-review/pipeline_root/\n", "TOTAL: 1 objects, 3078 bytes (3.01 KiB)\n" ] } ], "source": [ "! gsutil ls -al $BUCKET_NAME #validate access to your Cloud Storage bucket" ] }, { "cell_type": "markdown", "id": "da865a4c-5e29-465e-abf2-e443dae1b573", "metadata": {}, "source": [ "## Install libraries" ] }, { "cell_type": "code", "execution_count": 9, "id": "fedbebaf-516e-4f7d-8a70-c7dc31de02df", "metadata": { "tags": [] }, "outputs": [], "source": [ "import base64\n", "import json\n", "import os\n", "import random\n", "import sys\n", "\n", "import google.auth\n", "from google.cloud import aiplatform\n", "from google.cloud.aiplatform import gapic as aip\n", "from google.cloud.aiplatform import hyperparameter_tuning as hpt\n", "from google.protobuf.json_format import MessageToDict" ] }, { "cell_type": "code", "execution_count": 10, "id": "0cc75279-b7a9-47cc-81a4-f8729c7d57f8", "metadata": { "tags": [] }, "outputs": [], "source": [ "from IPython.display import HTML, display" ] }, { "cell_type": "code", "execution_count": 11, "id": "8856c9f3-270f-4dca-8a10-6bdee1af8bc0", "metadata": { "tags": [] }, "outputs": [], "source": [ "import datasets\n", "from datasets import Dataset, DatasetDict\n", "import numpy as np\n", "import pandas as pd\n", "import torch\n", "import transformers\n", "from datasets import ClassLabel, Sequence, load_dataset\n", "from transformers import (AutoModelForSequenceClassification, AutoTokenizer,BertForSequenceClassification,\n", " EvalPrediction, Trainer, TrainingArguments,PreTrainedModel,BertModel,\n", " default_data_collator)" ] }, { "cell_type": "code", "execution_count": 12, "id": "bbecdaa8-3cd3-4e7b-939d-f959da9301d6", "metadata": { "tags": [] }, "outputs": [], "source": [ "from google.cloud import bigquery\n", "from google.cloud import storage\n", "\n", "client = bigquery.Client()\n", "storage_client = storage.Client()" ] }, { "cell_type": "code", "execution_count": 13, "id": "f693060f-c0ed-4ec3-bc66-17898f8ef854", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Notebook runtime: GPU\n", "PyTorch version : 2.0.0+cu118\n", "Transformers version : 2.16.1\n", "Datasets version : 4.37.0.dev0\n" ] } ], "source": [ "print(f\"Notebook runtime: {'GPU' if torch.cuda.is_available() else 'CPU'}\")\n", "print(f\"PyTorch version : {torch.__version__}\")\n", "print(f\"Transformers version : {datasets.__version__}\")\n", "print(f\"Datasets version : {transformers.__version__}\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "5637d9f0-d290-4107-974a-bfbda3b316b2", "metadata": { "tags": [] }, "outputs": [], "source": [ "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"" ] }, { "cell_type": "code", "execution_count": 14, "id": "3d114e96-31c2-4ed9-82d1-f2fab38f0944", "metadata": { "tags": [] }, "outputs": [], "source": [ "APP_NAME = \"aift-review-classificatio-multiple-label\"" ] }, { "cell_type": "code", "execution_count": null, "id": "173dcb77-9908-4af1-86bb-7811c9f580e9", "metadata": {}, "outputs": [], "source": [ "!cd aift-model-review-multiple-label-classification" ] }, { "cell_type": "markdown", "id": "3f383051-501f-4f8c-8017-c989c5740041", "metadata": {}, "source": [ "# Training" ] }, { "cell_type": "markdown", "id": "db9715cc-0779-47a4-a0ed-82714b6668f6", "metadata": {}, "source": [ "## Preprocess data" ] }, { "cell_type": "code", "execution_count": 16, "id": "052ecc7b-c015-49a0-a359-85afbac10bbf", "metadata": { "tags": [] }, "outputs": [], "source": [ "model_ckpt = \"distilbert-base-uncased\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_ckpt)\n", "\n", "def tokenize_and_encode(examples):\n", " return tokenizer(examples[\"review\"], truncation=True)" ] }, { "cell_type": "code", "execution_count": 17, "id": "6f5faf02-ede8-4d48-b94a-1d4619c8e610", "metadata": { "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7a2415bdfd4a40fe80afe71e70d97976", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/556 [00:00thresh)==y_true.bool()).float().mean().item()" ] }, { "cell_type": "code", "execution_count": 23, "id": "db202a97-61e1-4e43-bb93-20179c2c0aa2", "metadata": { "tags": [] }, "outputs": [], "source": [ "def compute_metrics(eval_pred):\n", " predictions, labels = eval_pred\n", " return {'accuracy_thresh': accuracy_thresh(predictions, labels)}" ] }, { "cell_type": "code", "execution_count": 24, "id": "e0ab370a-fc4d-460b-9dab-dbde755dc3f4", "metadata": {}, "outputs": [], "source": [ "class MultilabelTrainer(Trainer):\n", " def compute_loss(self, model, inputs, return_outputs=False):\n", " labels = inputs.pop(\"labels\")\n", " outputs = model(**inputs)\n", " logits = outputs.logits\n", " loss_fct = torch.nn.BCEWithLogitsLoss()\n", " loss = loss_fct(logits.view(-1, self.model.config.num_labels),\n", " labels.float().view(-1, self.model.config.num_labels))\n", " return (loss, outputs) if return_outputs else loss" ] }, { "cell_type": "code", "execution_count": 32, "id": "340ade6d-1eb1-47ec-b8e6-56371083e361", "metadata": {}, "outputs": [], "source": [ "batch_size = 8\n", "\n", "args = TrainingArguments(\n", " output_dir=\"aift-model-review-multiple-label-classification\",\n", " evaluation_strategy = \"epoch\",\n", " learning_rate=2e-5,\n", " per_device_train_batch_size=batch_size,\n", " per_device_eval_batch_size=batch_size,\n", " num_train_epochs=10,\n", " weight_decay=0.01,\n", " use_cpu = False\n", ")" ] }, { "cell_type": "code", "execution_count": 33, "id": "39d8e955-9ca8-463c-899a-bd3b1d5f2c0e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, num_labels=num_labels).to('cuda')" ] }, { "cell_type": "code", "execution_count": 34, "id": "3cb96e02-f0f7-4a0a-9fe6-f88fe89826f8", "metadata": {}, "outputs": [], "source": [ "trainer = MultilabelTrainer(\n", " model,\n", " args,\n", " train_dataset=dataset[\"train\"],\n", " eval_dataset=dataset[\"test\"],\n", " compute_metrics=compute_metrics,\n", " tokenizer=tokenizer)" ] }, { "cell_type": "code", "execution_count": 35, "id": "da79a882-f1f1-41a5-b4dd-98b070012c4c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [18/18 00:06]\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'eval_loss': 0.7062913179397583,\n", " 'eval_accuracy_thresh': 0.4561224579811096,\n", " 'eval_runtime': 0.2818,\n", " 'eval_samples_per_second': 496.847,\n", " 'eval_steps_per_second': 63.88}" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.evaluate()" ] }, { "cell_type": "code", "execution_count": 36, "id": "eeefe348-a66f-4e14-9844-da6f3f3ebd80", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [700/700 00:47, Epoch 10/10]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossAccuracy Thresh
1No log0.4151910.868367
2No log0.3026310.901020
3No log0.2406270.928571
4No log0.2176010.931633
5No log0.2038450.924490
6No log0.1924440.929592
7No log0.1900310.926531
80.2652000.1867600.928571
90.2652000.1804360.936735
100.2652000.1798210.934694

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Checkpoint destination directory aift-model-review-multiple-label-classification/checkpoint-500 already exists and is non-empty.Saving will proceed but saved results may be invalid.\n" ] }, { "data": { "text/plain": [ "TrainOutput(global_step=700, training_loss=0.22303315843854632, metrics={'train_runtime': 47.1667, 'train_samples_per_second': 117.88, 'train_steps_per_second': 14.841, 'total_flos': 55632988457664.0, 'train_loss': 0.22303315843854632, 'epoch': 10.0})" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.train()" ] }, { "cell_type": "code", "execution_count": 104, "id": "d9c2e1e1-c20e-48e5-8f6b-e4e3222899a5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mkdir: cannot create directory ‘./models’: File exists\n" ] } ], "source": [ "saved_model_local_path = \"./models\"\n", "# !mkdir ./aift-model-review-multiple-label-classification/models" ] }, { "cell_type": "code", "execution_count": 39, "id": "c6632c17-49e2-4823-abae-a286fa06f8c5", "metadata": {}, "outputs": [], "source": [ "trainer.save_model(saved_model_local_path)" ] }, { "cell_type": "code", "execution_count": 69, "id": "4af413bf-9c9d-46aa-b75b-f729c8aae546", "metadata": {}, "outputs": [], "source": [ "history = trainer.evaluate()" ] }, { "cell_type": "code", "execution_count": 70, "id": "6ee5c718-6b27-4ed8-993b-dd41468cf16a", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "{'eval_loss': 0.1798214465379715,\n", " 'eval_accuracy_thresh': 0.9346938729286194,\n", " 'eval_runtime': 0.2965,\n", " 'eval_samples_per_second': 472.249,\n", " 'eval_steps_per_second': 60.718,\n", " 'epoch': 10.0}" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "history" ] }, { "cell_type": "code", "execution_count": 110, "id": "948a6110-48c3-42f5-8950-d4dc3cfc21a5", "metadata": { "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c835ed1d2ac74d3995f59f351a5933bd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HTML(value='

= threshold)] = 1\n", " # turn predicted id's into actual label names\n", " predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]\n", " print(predicted_labels)" ] }, { "cell_type": "code", "execution_count": 57, "id": "136f3624-d752-4e62-ae67-c52c8c7413b0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0. 0. 0. 0. 0. 0. 0.]\n", "tensor([0.9740, 0.0251, 0.1409, 0.7609, 0.0359, 0.0374, 0.0321],\n", " grad_fn=)\n", "['ads', 'negative']\n" ] } ], "source": [ "text = \"a lot of ads\"\n", "predict(text,0.4)" ] }, { "cell_type": "code", "execution_count": 60, "id": "4bdd8052-5c6f-4148-a5cd-bbd5e42aa640", "metadata": {}, "outputs": [], "source": [ "label_text = id2label\n", "model_name_or_path=model_ckpt\n", "saved_model_path = saved_model_local_path\n", "\n", "\n", "def predict_(input_text, saved_model_path,threshold):\n", " # initialize tokenizer\n", " tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)\n", "\n", " # preprocess and encode input text\n", " tokenizer_args = (input_text,)\n", " predict_input = tokenizer(\n", " *tokenizer_args,\n", " padding=\"max_length\",\n", " max_length=128,\n", " truncation=True,\n", " return_tensors=\"pt\",\n", " )\n", "\n", " # load trained model\n", " loaded_model = AutoModelForSequenceClassification.from_pretrained(saved_model_path)\n", "\n", " # get predictions\n", " output = loaded_model(predict_input[\"input_ids\"])\n", "\n", " # return labels\n", " logits = output.logits\n", " logits.shape\n", " # apply sigmoid + threshold\n", " sigmoid = torch.nn.Sigmoid()\n", " probs = sigmoid(logits.squeeze().cpu())\n", " predictions = np.zeros(probs.shape)\n", " print(predictions)\n", " print(probs)\n", " predictions[np.where(probs >= threshold)] = 1\n", " # turn predicted id's into actual label names\n", " predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]\n", " print(predicted_labels)" ] }, { "cell_type": "code", "execution_count": 62, "id": "48e96b48-db19-4c25-89f1-eb640c955614", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0. 0. 0. 0. 0. 0. 0.]\n", "tensor([0.5107, 0.1010, 0.5961, 0.2481, 0.2118, 0.1907, 0.1010],\n", " grad_fn=)\n", "['ads', 'positive']\n" ] } ], "source": [ "text='ew a lot of ads'\n", "predict_(text, saved_model_path,0.4)" ] }, { "cell_type": "markdown", "id": "2b8505cd-bc32-46e9-9387-a102830e62ef", "metadata": { "tags": [] }, "source": [ "# Custom training" ] }, { "cell_type": "code", "execution_count": 99, "id": "bba84d7d-5971-4e44-a977-268bc2b97e77", "metadata": { "tags": [] }, "outputs": [], "source": [ "PRE_BUILT_TRAINING_CONTAINER_IMAGE_URI = (\n", " \"us-docker.pkg.dev/vertex-ai/training/pytorch-gpu.1-7:latest\"\n", ")\n", "\n", "PYTHON_PACKAGE_APPLICATION_DIR = \"python_package\"\n", "\n", "source_package_file_name = f\"pipeline/aift-model-review-multiple-label-classification/{PYTHON_PACKAGE_APPLICATION_DIR}/dist/trainer-0.1.tar.gz\"\n", "python_package_gcs_uri = (\n", " f\"{BUCKET_NAME}/pytorch-on-gcp/{APP_NAME}/train/python_package/trainer-0.1.tar.gz\"\n", ")\n", "python_module_name = \"trainer.task\"" ] }, { "cell_type": "code", "execution_count": 100, "id": "3610d07c-909a-470a-b3f7-2e68f3b8292e", "metadata": { "tags": [] }, "outputs": [], "source": [ "# !mkdir ./python_package" ] }, { "cell_type": "code", "execution_count": 108, "id": "ecdc6201-d714-4cbe-9c1f-415857730700", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting ./aift-model-review-multiple-label-classification/python_package/setup.py\n" ] } ], "source": [ "%%writefile ./aift-model-review-multiple-label-classification/{PYTHON_PACKAGE_APPLICATION_DIR}/setup.py\n", "\n", "from setuptools import find_packages\n", "from setuptools import setup\n", "import setuptools\n", "\n", "from distutils.command.build import build as _build\n", "import subprocess\n", "\n", "\n", "REQUIRED_PACKAGES = [\n", " 'transformers',\n", " 'datasets',\n", " 'tqdm',\n", " 'cloudml-hypertune'\n", "]\n", "\n", "setup(\n", " name='trainer',\n", " version='0.1',\n", " install_requires=REQUIRED_PACKAGES,\n", " packages=find_packages(),\n", " include_package_data=True,\n", " description='Vertex AI | Training | PyTorch | Text Classification | Python Package'\n", ")" ] }, { "cell_type": "code", "execution_count": 109, "id": "d001cdca-a207-4f23-b6e5-33106c252004", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "running sdist\n", "running egg_info\n", "creating trainer.egg-info\n", "writing trainer.egg-info/PKG-INFO\n", "writing dependency_links to trainer.egg-info/dependency_links.txt\n", "writing requirements to trainer.egg-info/requires.txt\n", "writing top-level names to trainer.egg-info/top_level.txt\n", "writing manifest file 'trainer.egg-info/SOURCES.txt'\n", "reading manifest file 'trainer.egg-info/SOURCES.txt'\n", "writing manifest file 'trainer.egg-info/SOURCES.txt'\n", "running check\n", "creating trainer-0.1\n", "creating trainer-0.1/trainer\n", "creating trainer-0.1/trainer.egg-info\n", "copying files to trainer-0.1...\n", "copying README.md -> trainer-0.1\n", "copying setup.py -> trainer-0.1\n", "copying trainer/__init__.py -> trainer-0.1/trainer\n", "copying trainer/experiment.py -> trainer-0.1/trainer\n", "copying trainer/metadata.py -> trainer-0.1/trainer\n", "copying trainer/model.py -> trainer-0.1/trainer\n", "copying trainer/task.py -> trainer-0.1/trainer\n", "copying trainer/utils.py -> trainer-0.1/trainer\n", "copying trainer.egg-info/PKG-INFO -> trainer-0.1/trainer.egg-info\n", "copying trainer.egg-info/SOURCES.txt -> trainer-0.1/trainer.egg-info\n", "copying trainer.egg-info/dependency_links.txt -> trainer-0.1/trainer.egg-info\n", "copying trainer.egg-info/requires.txt -> trainer-0.1/trainer.egg-info\n", "copying trainer.egg-info/top_level.txt -> trainer-0.1/trainer.egg-info\n", "Writing trainer-0.1/setup.cfg\n", "creating dist\n", "Creating tar archive\n", "removing 'trainer-0.1' (and everything under it)\n" ] } ], "source": [ "!cd aift-model-review-multiple-label-classification/{PYTHON_PACKAGE_APPLICATION_DIR} && python3 setup.py sdist --formats=gztar" ] }, { "cell_type": "code", "execution_count": 82, "id": "7a296aa0-ead6-456f-a93a-657fed393bd2", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Copying file://python_package/dist/trainer-0.1.tar.gz [Content-Type=application/x-tar]...\n", "/ [1 files][ 916.0 B/ 916.0 B] \n", "Operation completed over 1 objects/916.0 B. \n" ] } ], "source": [ "!gsutil cp {source_package_file_name} {python_package_gcs_uri}" ] }, { "cell_type": "code", "execution_count": 83, "id": "087fcdaa-0d99-4104-8e61-74455d4bf734", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 916 2024-01-08T07:48:19Z gs://ikame-gem-ai-research-bucket-review/pytorch-on-gcp/aift-review-classificatio-multiple-label/train/python_package/trainer-0.1.tar.gz\n", "TOTAL: 1 objects, 916 bytes (916 B)\n" ] } ], "source": [ "!gsutil ls -l {python_package_gcs_uri}" ] }, { "cell_type": "code", "execution_count": 85, "id": "4dce414a-063a-4952-8197-75586909e098", "metadata": { "tags": [] }, "outputs": [], "source": [ "# !cd {PYTHON_PACKAGE_APPLICATION_DIR} && python -m trainer.task" ] }, { "cell_type": "code", "execution_count": 86, "id": "a7698349-f5f4-4032-a9b2-1fc659f4b022", "metadata": { "tags": [] }, "outputs": [], "source": [ "aiplatform.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME)" ] }, { "cell_type": "code", "execution_count": 87, "id": "112e1b67-5bb0-444a-94c6-a2f010e24fe9", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "APP_NAME=aift-review-classificatio-multiple-label\n", "PRE_BUILT_TRAINING_CONTAINER_IMAGE_URI=us-docker.pkg.dev/vertex-ai/training/pytorch-gpu.1-7:latest\n", "python_package_gcs_uri=gs://ikame-gem-ai-research-bucket-review/pytorch-on-gcp/aift-review-classificatio-multiple-label/train/python_package/trainer-0.1.tar.gz\n", "python_module_name=trainer.task\n" ] } ], "source": [ "print(f\"APP_NAME={APP_NAME}\")\n", "print(\n", " f\"PRE_BUILT_TRAINING_CONTAINER_IMAGE_URI={PRE_BUILT_TRAINING_CONTAINER_IMAGE_URI}\"\n", ")\n", "print(f\"python_package_gcs_uri={python_package_gcs_uri}\")\n", "print(f\"python_module_name={python_module_name}\")" ] }, { "cell_type": "code", "execution_count": 88, "id": "c0fa20a0-0831-49ab-9fce-423016e98db6", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "JOB_NAME=aift-review-classificatio-multiple-label-pytorch-pkg-ar-20240108075109\n" ] } ], "source": [ "JOB_NAME = f\"{APP_NAME}-pytorch-pkg-ar-{get_timestamp()}\"\n", "print(f\"JOB_NAME={JOB_NAME}\")" ] }, { "cell_type": "code", "execution_count": 89, "id": "86922169-8509-48ff-acc9-c06bc9a4ecd1", "metadata": { "tags": [] }, "outputs": [], "source": [ "job = aiplatform.CustomPythonPackageTrainingJob(\n", " display_name=f\"{JOB_NAME}\",\n", " python_package_gcs_uri=python_package_gcs_uri,\n", " python_module_name=python_module_name,\n", " container_uri=PRE_BUILT_TRAINING_CONTAINER_IMAGE_URI,\n", ")" ] }, { "cell_type": "code", "execution_count": 90, "id": "a7909b64-fedb-4da8-bc61-80b4806117d3", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training Output directory:\n", "gs://ikame-gem-ai-research-bucket-review/aiplatform-custom-training-2024-01-08-07:51:20.301 \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:google.cloud.aiplatform.training_jobs:Training Output directory:\n", "gs://ikame-gem-ai-research-bucket-review/aiplatform-custom-training-2024-01-08-07:51:20.301 \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "View Training:\n", "https://console.cloud.google.com/ai/platform/locations/us-central1/training/2282426366479564800?project=763889829809\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:google.cloud.aiplatform.training_jobs:View Training:\n", "https://console.cloud.google.com/ai/platform/locations/us-central1/training/2282426366479564800?project=763889829809\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CustomPythonPackageTrainingJob projects/763889829809/locations/us-central1/trainingPipelines/2282426366479564800 current state:\n", "PipelineState.PIPELINE_STATE_RUNNING\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob projects/763889829809/locations/us-central1/trainingPipelines/2282426366479564800 current state:\n", "PipelineState.PIPELINE_STATE_RUNNING\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "View backing custom job:\n", "https://console.cloud.google.com/ai/platform/locations/us-central1/training/7832101356516147200?project=763889829809\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:google.cloud.aiplatform.training_jobs:View backing custom job:\n", "https://console.cloud.google.com/ai/platform/locations/us-central1/training/7832101356516147200?project=763889829809\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CustomPythonPackageTrainingJob projects/763889829809/locations/us-central1/trainingPipelines/2282426366479564800 current state:\n", "PipelineState.PIPELINE_STATE_RUNNING\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob projects/763889829809/locations/us-central1/trainingPipelines/2282426366479564800 current state:\n", "PipelineState.PIPELINE_STATE_RUNNING\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CustomPythonPackageTrainingJob projects/763889829809/locations/us-central1/trainingPipelines/2282426366479564800 current state:\n", "PipelineState.PIPELINE_STATE_RUNNING\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob projects/763889829809/locations/us-central1/trainingPipelines/2282426366479564800 current state:\n", "PipelineState.PIPELINE_STATE_RUNNING\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CustomPythonPackageTrainingJob projects/763889829809/locations/us-central1/trainingPipelines/2282426366479564800 current state:\n", "PipelineState.PIPELINE_STATE_RUNNING\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob projects/763889829809/locations/us-central1/trainingPipelines/2282426366479564800 current state:\n", "PipelineState.PIPELINE_STATE_RUNNING\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CustomPythonPackageTrainingJob projects/763889829809/locations/us-central1/trainingPipelines/2282426366479564800 current state:\n", "PipelineState.PIPELINE_STATE_RUNNING\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:google.cloud.aiplatform.training_jobs:CustomPythonPackageTrainingJob projects/763889829809/locations/us-central1/trainingPipelines/2282426366479564800 current state:\n", "PipelineState.PIPELINE_STATE_RUNNING\n" ] } ], "source": [ "training_args = [\"--num-epochs\", \"2\", \"--model-name\", \"finetuned-bert-classifier\"]\n", "\n", "model = job.run(\n", " replica_count=1,\n", " machine_type=\"n1-standard-8\",\n", " accelerator_type=\"NVIDIA_TESLA_V100\",\n", " accelerator_count=1,\n", " args=training_args,\n", " sync=False,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "1e681913-680e-4664-9c6a-083f350915bc", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "environment": { "kernel": "python3", "name": ".m114", "type": "gcloud", "uri": "gcr.io/deeplearning-platform-release/:m114" }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }