{ "cells": [ { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "import sys\n", "sys.path.insert(0, '..')\n", "\n", "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from utils.plot_metrics import plot_metrics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## To train the model, follow the following steps:" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Extract the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !7z x -y ../data/raw/DocLayNet_core.zip -o./DocLayNet_core" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2. Run the below script" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# output_dir = \"./output\"\n", "# device = \"cuda\"\n", "\n", "# !python ./layout-model-training/tools/train_net.py \\\n", "# \"--dataset_name\" DocLayNet \\\n", "# \"--json_annotation_train\" \"./DocLayNet_core/COCO/train.json\" \\\n", "# \"--image_path_train\" \"./DocLayNet_core/PNG\" \\\n", "# \"--json_annotation_val\" \"./DocLayNet_core/COCO/test.json\" \\\n", "# \"--image_path_val\" \"./DocLayNet_core/PNG\" \\\n", "# \"--config-file\" \"./layout-model-training/configs/prima/fast_rcnn_R_50_FPN_3x.yaml\" \\\n", "# \"--resume\" \\\n", "# \"OUTPUT_DIR\" \"{output_dir}\" \\\n", "# \"SOLVER.IMS_PER_BATCH\" 4 \\\n", "# \"MODEL.DEVICE\" \"{device}\" \\\n", "# \"SOLVER.BASE_LR\" 0.01 \\\n", "# \"SOLVER.MAX_ITER\" 80000 \\\n", "# \"SOLVER.CHECKPOINT_PERIOD\" 300" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "NB: the files at `./layout-model-training/tools/train_net.py` was included by `git subtree add --prefix model/layout-model-training https://github.com/Layout-Parser/layout-model-training.git master`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Results" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "8 | \n", "9 | \n", "
---|---|---|---|---|---|---|---|---|---|---|
data_time | \n", "0.008384 | \n", "0.008775 | \n", "0.008960 | \n", "0.009312 | \n", "0.009425 | \n", "0.008924 | \n", "0.008733 | \n", "0.008826 | \n", "0.009223 | \n", "0.008738 | \n", "
eta_seconds | \n", "30040.116253 | \n", "30032.604346 | \n", "30199.984691 | \n", "30397.155697 | \n", "30450.676296 | \n", "30443.054099 | \n", "30435.431902 | \n", "30306.135462 | \n", "30336.655440 | \n", "30290.952027 | \n", "
fast_rcnn/cls_accuracy | \n", "0.971680 | \n", "0.962891 | \n", "0.930664 | \n", "0.922119 | \n", "0.927246 | \n", "0.922607 | \n", "0.918701 | \n", "0.878906 | \n", "0.879150 | \n", "0.889648 | \n", "
fast_rcnn/false_negative | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "0.958960 | \n", "0.995067 | \n", "0.961984 | \n", "
fast_rcnn/fg_cls_accuracy | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.024907 | \n", "0.004933 | \n", "0.031249 | \n", "
iteration | \n", "19.000000 | \n", "39.000000 | \n", "59.000000 | \n", "79.000000 | \n", "99.000000 | \n", "119.000000 | \n", "139.000000 | \n", "159.000000 | \n", "179.000000 | \n", "199.000000 | \n", "
loss_box_reg | \n", "0.005330 | \n", "0.045536 | \n", "0.182375 | \n", "0.244092 | \n", "0.200095 | \n", "0.219698 | \n", "0.240223 | \n", "0.371495 | \n", "0.394405 | \n", "0.402448 | \n", "
loss_cls | \n", "0.242587 | \n", "0.356886 | \n", "0.420098 | \n", "0.368185 | \n", "0.321174 | \n", "0.327423 | \n", "0.343623 | \n", "0.426194 | \n", "0.434159 | \n", "0.423370 | \n", "
loss_rpn_cls | \n", "0.700573 | \n", "0.584717 | \n", "0.443300 | \n", "0.333261 | \n", "0.271206 | \n", "0.204834 | \n", "0.148327 | \n", "0.124927 | \n", "0.118580 | \n", "0.105196 | \n", "
loss_rpn_loc | \n", "0.336152 | \n", "0.313502 | \n", "0.241430 | \n", "0.246864 | \n", "0.228371 | \n", "0.217888 | \n", "0.214341 | \n", "0.206750 | \n", "0.202433 | \n", "0.200037 | \n", "
lr | \n", "0.000200 | \n", "0.000400 | \n", "0.000599 | \n", "0.000799 | \n", "0.000999 | \n", "0.001199 | \n", "0.001399 | \n", "0.001598 | \n", "0.001798 | \n", "0.001998 | \n", "
rank_data_time | \n", "0.008384 | \n", "0.008775 | \n", "0.008960 | \n", "0.009312 | \n", "0.009425 | \n", "0.008924 | \n", "0.008733 | \n", "0.008826 | \n", "0.009223 | \n", "0.008738 | \n", "
roi_head/num_bg_samples | \n", "497.875000 | \n", "493.000000 | \n", "476.500000 | \n", "472.125000 | \n", "474.750000 | \n", "471.500000 | \n", "470.375000 | \n", "452.250000 | \n", "451.125000 | \n", "452.375000 | \n", "
roi_head/num_fg_samples | \n", "14.125000 | \n", "19.000000 | \n", "35.500000 | \n", "39.875000 | \n", "37.250000 | \n", "40.500000 | \n", "41.625000 | \n", "59.750000 | \n", "60.875000 | \n", "59.625000 | \n", "
rpn/num_neg_anchors | \n", "180.125000 | \n", "186.250000 | \n", "184.375000 | \n", "190.125000 | \n", "186.750000 | \n", "185.000000 | \n", "183.625000 | \n", "185.250000 | \n", "187.000000 | \n", "192.125000 | \n", "
rpn/num_pos_anchors | \n", "75.875000 | \n", "69.750000 | \n", "71.625000 | \n", "65.875000 | \n", "69.250000 | \n", "71.000000 | \n", "72.375000 | \n", "70.750000 | \n", "69.000000 | \n", "63.875000 | \n", "
time | \n", "0.375595 | \n", "0.377317 | \n", "0.381449 | \n", "0.387139 | \n", "0.385061 | \n", "0.380078 | \n", "0.376928 | \n", "0.347513 | \n", "0.387798 | \n", "0.360441 | \n", "
total_loss | \n", "1.385000 | \n", "1.235328 | \n", "1.374799 | \n", "1.180483 | \n", "1.041189 | \n", "1.030759 | \n", "0.980173 | \n", "1.161567 | \n", "1.168232 | \n", "1.147360 | \n", "
bbox/AP | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-Caption | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-Footnote | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-Formula | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-List-item | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-Page-footer | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-Page-header | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-Picture | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-Section-header | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-Table | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-Text | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP-Title | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP50 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/AP75 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/APl | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/APm | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
bbox/APs | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "