{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "56f79218-026b-403d-8caa-d5aae41bb3e0", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:57:37.162054Z", "start_time": "2024-03-02T07:57:31.733202900Z" } }, "outputs": [], "source": [ "from torch import nn\n", "from torch import optim\n", "from torchvision import transforms\n", "from torch.optim import lr_scheduler" ] }, { "cell_type": "code", "execution_count": 3, "id": "a64dd1a6-0197-424b-b109-f88787b18164", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:57:38.404732Z", "start_time": "2024-03-02T07:57:37.165358400Z" } }, "outputs": [], "source": [ "from generate_c3d_model import generate_model\n", "from train import train_epoch" ] }, { "cell_type": "code", "execution_count": 4, "id": "33b89569-a272-4d8a-8ece-e0fc3054e9bb", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:57:38.431727100Z", "start_time": "2024-03-02T07:57:38.406924200Z" } }, "outputs": [], "source": [ "from datasets.nv import NV" ] }, { "cell_type": "code", "execution_count": 5, "id": "41220539-449a-478f-954e-ecf9982388e5", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:57:38.446055400Z", "start_time": "2024-03-02T07:57:38.426055300Z" } }, "outputs": [], "source": [ "from utils import *\n", "from target_transforms import *" ] }, { "cell_type": "code", "execution_count": 6, "id": "1e969200-a07e-445f-b638-a5d84b6892d8", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:57:38.459855800Z", "start_time": "2024-03-02T07:57:38.440573600Z" } }, "outputs": [], "source": [ "from logger.logger import get_logger\n", "logger = get_logger(__name__)" ] }, { "cell_type": "code", "execution_count": 7, "id": "b7d5fa6a-adae-47cc-a5c2-605f3773ed1e", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:57:38.491833Z", "start_time": "2024-03-02T07:57:38.454971500Z" } }, "outputs": [ { "data": { "text/plain": "" }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.manual_seed(1)" ] }, { "cell_type": "code", "execution_count": 8, "id": "bedb9441-e776-4f4f-b14e-60e99e78118b", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:57:38.492929700Z", "start_time": "2024-03-02T07:57:38.473197300Z" } }, "outputs": [], "source": [ "arch = '{}'.format('c3d')\n", "n_epochs = 35\n", "n_classes = 26\n", "sample_size = 112\n", "sample_duration = 10\n", "ft_portion = \"last_layer\"\n", "downsample = 2\n", "scale_step = 0.84089641525\n", "scales = [1.0]\n", "for i in range(1, 5):\n", " scales.append(scales[-1] * scale_step)" ] }, { "cell_type": "code", "execution_count": 10, "id": "fc3d13b8-6f90-42bf-aebc-ebbbf3a2e7e8", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:58:00.830367500Z", "start_time": "2024-03-02T07:58:00.069619100Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "generate_c3d_model 2024-03-02 10:58:00,066 INFO Torch version: 2.2.1\n", "generate_c3d_model 2024-03-02 10:58:00,068 INFO Is CUDA enabled? True\n", "generate_c3d_model 2024-03-02 10:58:00,565 INFO Total number of trainable parameters: 31913114\n", "generate_c3d_model 2024-03-02 10:58:00,567 INFO Converting the pretrained model to RGB+D init model\n", "generate_c3d_model 2024-03-02 10:58:00,810 INFO Done. RGB-D model ready.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "last_layer\n" ] } ], "source": [ "model, parameters = generate_model(n_classes, sample_size, ft_portion)" ] }, { "cell_type": "code", "execution_count": 11, "id": "f547dfcb-bded-41a1-b7c5-314c51cee32c", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:58:04.335008400Z", "start_time": "2024-03-02T07:58:04.312769200Z" } }, "outputs": [], "source": [ "criterion = nn.CrossEntropyLoss()\n", "criterion = criterion.cuda()\n", "spatial_transform = transforms.Compose([\n", " transforms.ToTensor(),\n", " transforms.Normalize([0, 0, 0], [1, 1, 1])\n", "])\n", "temporal_transform = transforms.Compose([\n", " transforms.ToTensor(),\n", " transforms.Normalize([0, 0, 0], [1, 1, 1])])\n", "target_transform = ClassLabel()\n", "optimizer = optim.SGD(\n", " parameters,\n", " lr=0.1,\n", " momentum=0.9,\n", " dampening=0.9,\n", " weight_decay=1e-3,\n", " nesterov=False)\n", "\n", "scheduler = lr_scheduler.ReduceLROnPlateau(\n", " optimizer, 'min', patience=10)" ] }, { "cell_type": "code", "execution_count": 12, "id": "f024f129-7d3f-42b3-af89-36612b5f2c43", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:58:09.870821600Z", "start_time": "2024-03-02T07:58:09.730071200Z" } }, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: './annotation_nvGesture_v1/nvall_but_None.json'", "output_type": "error", "traceback": [ "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[1;31mFileNotFoundError\u001B[0m Traceback (most recent call last)", "Cell \u001B[1;32mIn[12], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m training_data \u001B[38;5;241m=\u001B[39m \u001B[43mNV\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 2\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43m./nvGesture_v1.1/nvGesture_v1\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 3\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43m./annotation_nvGesture_v1/nvall_but_None.json\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 4\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mtraining\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 5\u001B[0m \u001B[43m \u001B[49m\u001B[43mspatial_transform\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mspatial_transform\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 6\u001B[0m \u001B[43m \u001B[49m\u001B[43mtemporal_transform\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtemporal_transform\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 7\u001B[0m \u001B[43m \u001B[49m\u001B[43mtarget_transform\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtarget_transform\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 8\u001B[0m \u001B[43m \u001B[49m\u001B[43msample_duration\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msample_duration\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 9\u001B[0m \u001B[43m \u001B[49m\u001B[43mmodality\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mRGB-D\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n", "File \u001B[1;32mD:\\current\\gesture\\new\\datasets\\nv.py:192\u001B[0m, in \u001B[0;36mNV.__init__\u001B[1;34m(self, root_path, annotation_path, subset, n_samples_for_each_video, spatial_transform, temporal_transform, target_transform, sample_duration, modality, get_loader)\u001B[0m\n\u001B[0;32m 181\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m__init__\u001B[39m(\u001B[38;5;28mself\u001B[39m,\n\u001B[0;32m 182\u001B[0m root_path,\n\u001B[0;32m 183\u001B[0m annotation_path,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 190\u001B[0m modality\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mRGB\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[0;32m 191\u001B[0m get_loader\u001B[38;5;241m=\u001B[39mget_default_video_loader):\n\u001B[1;32m--> 192\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdata, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mclass_names \u001B[38;5;241m=\u001B[39m \u001B[43mmake_dataset\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 193\u001B[0m \u001B[43m \u001B[49m\u001B[43mroot_path\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mannotation_path\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43msubset\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mn_samples_for_each_video\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 194\u001B[0m \u001B[43m \u001B[49m\u001B[43msample_duration\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 196\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mspatial_transform \u001B[38;5;241m=\u001B[39m spatial_transform\n\u001B[0;32m 197\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtemporal_transform \u001B[38;5;241m=\u001B[39m temporal_transform\n", "File \u001B[1;32mD:\\current\\gesture\\new\\datasets\\nv.py:116\u001B[0m, in \u001B[0;36mmake_dataset\u001B[1;34m(root_path, annotation_path, subset, n_samples_for_each_video, sample_duration)\u001B[0m\n\u001B[0;32m 115\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mmake_dataset\u001B[39m(root_path, annotation_path, subset, n_samples_for_each_video, sample_duration):\n\u001B[1;32m--> 116\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[43mload_annotation_data\u001B[49m\u001B[43m(\u001B[49m\u001B[43mannotation_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 117\u001B[0m video_names, annotations \u001B[38;5;241m=\u001B[39m get_video_names_and_annotations(data, subset)\n\u001B[0;32m 118\u001B[0m class_to_idx \u001B[38;5;241m=\u001B[39m get_class_labels(data)\n", "File \u001B[1;32mD:\\current\\gesture\\new\\datasets\\nv.py:88\u001B[0m, in \u001B[0;36mload_annotation_data\u001B[1;34m(data_file_path)\u001B[0m\n\u001B[0;32m 87\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mload_annotation_data\u001B[39m(data_file_path):\n\u001B[1;32m---> 88\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28;43mopen\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mdata_file_path\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mr\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mas\u001B[39;00m data_file:\n\u001B[0;32m 89\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m json\u001B[38;5;241m.\u001B[39mload(data_file)\n", "\u001B[1;31mFileNotFoundError\u001B[0m: [Errno 2] No such file or directory: './annotation_nvGesture_v1/nvall_but_None.json'" ] } ], "source": [ "training_data = NV(\n", " './nvGesture_v1.1/nvGesture_v1',\n", " './annotation_nvGesture_v1/nvall_but_None.json',\n", " 'training',\n", " spatial_transform=spatial_transform,\n", " temporal_transform=temporal_transform,\n", " target_transform=target_transform,\n", " sample_duration=sample_duration,\n", " modality=\"RGB-D\")" ] }, { "cell_type": "code", "execution_count": null, "id": "5ed0e8a9-5fae-4eda-acd9-f1f27d442826", "metadata": { "tags": [], "ExecuteTime": { "end_time": "2024-03-02T07:46:53.578865700Z", "start_time": "2024-03-02T07:46:53.568462300Z" } }, "outputs": [], "source": [ "train_loader = torch.utils.data.DataLoader(\n", " training_data,\n", " batch_size=80,\n", " shuffle=True,\n", " num_workers=12,\n", " pin_memory=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "d8e9ff8c-d19b-4b0a-aac4-ff49feb4440c", "metadata": { "tags": [], "ExecuteTime": { "start_time": "2024-03-02T07:46:53.572952800Z" } }, "outputs": [], "source": [ "# logger.info(f\"run\")\n", "# best_prec1 = 0\n", "# for i in range(1, n_epochs + 1):\n", "# # for i in range(opt.begin_epoch, opt.begin_epoch + 10):\n", "# torch.cuda.empty_cache()\n", "# adjust_learning_rate(optimizer, i)\n", "# train_epoch(i, train_loader, model, criterion, optimizer)\n", "# state = {\n", "# 'epoch': i,\n", "# 'arch': arch,\n", "# 'state_dict': model.state_dict(),\n", "# 'optimizer': optimizer.state_dict(),\n", "# 'best_prec1': best_prec1\n", "# }\n", "# save_checkpoint(state, False) \n", "# " ] }, { "cell_type": "code", "execution_count": null, "id": "0364f529-f663-417b-ad0e-db46d443d147", "metadata": { "ExecuteTime": { "start_time": "2024-03-02T07:46:53.577765700Z" } }, "outputs": [], "source": [ "if __name__ == '__main__':\n", " logger.info(f\"run\")\n", " best_prec1 = 0\n", " for i in range(1, n_epochs + 1):\n", " # for i in range(opt.begin_epoch, opt.begin_epoch + 10):\n", " torch.cuda.empty_cache()\n", " adjust_learning_rate(optimizer, i)\n", " train_epoch(i, train_loader, model, criterion, optimizer)\n", " state = {\n", " 'epoch': i,\n", " 'arch': arch,\n", " 'state_dict': model.state_dict(),\n", " 'optimizer': optimizer.state_dict(),\n", " 'best_prec1': best_prec1\n", " }\n", " save_checkpoint(state, False) \n", " " ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.17" } }, "nbformat": 4, "nbformat_minor": 5 }