File size: 35,608 Bytes
002bd9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sqlite3\n",
"import json\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"db_path = \"../../exp/annotations.db\"\n",
"\n",
"conn = sqlite3.connect(db_path)\n",
"cursor = conn.cursor()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Table Names:\n",
"visual_genome_densecap_local_train\n",
"visual_genome_densecap_local_eval_visual_genome_densecap_local_densecap_test\n",
"\n",
"Schema for table visual_genome_densecap_local_train\n",
"region_id INTEGER\n",
"image_id INTEGER\n",
"width INTEGER\n",
"height INTEGER\n",
"file_name TEXT\n",
"coco_url TEXT\n",
"task_type TEXT\n",
"phrases TEXT\n",
"tokenized_phrases TEXT\n",
"x REAL\n",
"y REAL\n",
"region_width REAL\n",
"region_height REAL\n",
"\n",
"Schema for table visual_genome_densecap_local_eval_visual_genome_densecap_local_densecap_test\n",
"region_id INTEGER\n",
"image_id INTEGER\n",
"width INTEGER\n",
"height INTEGER\n",
"file_name TEXT\n",
"coco_url TEXT\n",
"task_type TEXT\n",
"phrases TEXT\n",
"tokenized_phrases TEXT\n",
"x REAL\n",
"y REAL\n",
"region_width REAL\n",
"region_height REAL\n"
]
}
],
"source": [
"# Get the table names\n",
"table_names = cursor.execute(\"SELECT name FROM sqlite_master WHERE type='table';\").fetchall()\n",
"table_names = [name[0] for name in table_names]\n",
"print(\"Table Names:\")\n",
"for name in table_names:\n",
" print(name)\n",
"\n",
"# Get the schema of each table\n",
"for name in table_names:\n",
" print(\"\\nSchema for table\", name)\n",
" schema = cursor.execute(\"PRAGMA table_info({})\".format(name)).fetchall()\n",
" for column in schema:\n",
" print(column[1], column[2])"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Rows for table visual_genome_densecap_local_train\n"
]
},
{
"data": {
"text/plain": [
"(929, 3684063, 0.00025216724035392445)"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"table_id = 0\n",
"fetch = cursor.execute(\"SELECT tokenized_phrases FROM {}\".format(table_names[table_id]))\n",
"print(\"\\nRows for table\", table_names[table_id])\n",
"num_tokens = []\n",
"for row in fetch:\n",
" row = json.loads(row[0])\n",
" for phrase in row:\n",
" num_tokens.append(len(phrase))\n",
"threshold = 20\n",
"num_tokens_array = np.array(num_tokens)\n",
"np.sum(num_tokens_array > threshold), len(num_tokens_array), np.sum(num_tokens_array > threshold) / len(num_tokens_array)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGsCAYAAAAPJKchAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAjc0lEQVR4nO3de1DVdeL/8RegHDQDLwQIoaibl1LRUAnN2bVY0VjT7UbWhlna5mJjss1XqITcLlipy2yRlOslpzHNJm0LR1NS25JyA52sNfKOpQc1R1BMMM7n90c/T3vi+iHxzcHnY+bMrB/en895f/bNmfPscy74WJZlCQAAwBBf0xMAAACXN2IEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAY5VUx8tFHH2ncuHEKDw+Xj4+P1q5da/sYlmVp3rx56t27txwOhyIiIvTss89e/MkCAIBGaWN6AnZUVFQoOjpaDzzwgG677bYmHWPGjBn64IMPNG/ePA0YMEAnT57UyZMnL/JMAQBAY/l46x/K8/Hx0Zo1azRhwgT3tsrKSj3xxBN68803derUKfXv31/PP/+8fve730mSdu/erYEDB+rLL79Unz59zEwcAAB48KqXaRoyffp0FRQUaOXKlfriiy905513asyYMdqzZ48k6b333lPPnj31/vvvq0ePHoqKitKUKVO4MgIAgEGtJkZKSkq0dOlSrV69WiNHjlSvXr302GOP6cYbb9TSpUslSfv379ehQ4e0evVqLV++XMuWLVNhYaHuuOMOw7MHAODy5VXvGanPrl27VF1drd69e3tsr6ysVJcuXSRJLpdLlZWVWr58uXvc4sWLFRMTo+LiYl66AQDAgFYTI2fOnJGfn58KCwvl5+fn8bMOHTpIkrp27ao2bdp4BEu/fv0k/XRlhRgBAODSazUxMnjwYFVXV+vYsWMaOXJkrWNGjBihH3/8Ufv27VOvXr0kSd98840kqXv37pdsrgAA4Gde9WmaM2fOaO/evZJ+io8FCxZo1KhR6ty5s7p166Y//elP+uSTTzR//nwNHjxYx48fV35+vgYOHKjExES5XC4NHTpUHTp0UHZ2tlwul1JSUhQYGKgPPvjA8NkBAHB58qoY2bJli0aNGlVj+6RJk7Rs2TKdP39ezzzzjJYvX67vvvtOwcHBuuGGGzRnzhwNGDBAknTkyBE98sgj+uCDD3TFFVdo7Nixmj9/vjp37nypTwcAAMjLYgQAALQ+reajvQAAwDsRIwAAwCiv+DSNy+XSkSNHdOWVV8rHx8f0dAAAQCNYlqXTp08rPDxcvr51X//wihg5cuSIIiMjTU8DAAA0weHDh3X11VfX+XOviJErr7xS0k8nExgYaHg2AACgMcrLyxUZGel+Hq+LV8TIhZdmAgMDiREAALxMQ2+x4A2sAADAKGIEAAAYRYwAAACjbMfIRx99pHHjxik8PFw+Pj5au3Ztg/ts2bJF119/vRwOh37zm99o2bJlTZgqAABojWzHSEVFhaKjo5WTk9Oo8QcOHFBiYqJGjRqlnTt36tFHH9WUKVO0YcMG25MFAACtj+1P04wdO1Zjx45t9Pjc3Fz16NFD8+fPlyT169dPH3/8sf7+978rISHB7t0DAIBWptnfM1JQUKD4+HiPbQkJCSooKKhzn8rKSpWXl3vcAABA69TsMeJ0OhUaGuqxLTQ0VOXl5frhhx9q3ScrK0tBQUHuG9++CgBA69UiP02Tnp6usrIy9+3w4cOmpwQAAJpJs38Da1hYmEpLSz22lZaWKjAwUO3atat1H4fDIYfD0dxTAwAALUCzXxmJi4tTfn6+x7aNGzcqLi6uue8aAAB4AdsxcubMGe3cuVM7d+6U9NNHd3fu3KmSkhJJP73Ekpyc7B7/8MMPa//+/fq///s/ff3113rllVf01ltvaebMmRfnDAAAgFezHSOff/65Bg8erMGDB0uSUlNTNXjwYGVkZEiSjh496g4TSerRo4fy8vK0ceNGRUdHa/78+frnP//Jx3oBAIAkyceyLMv0JBpSXl6uoKAglZWV8Vd7AQDwEo19/m72N7ACzS0qLa/Zjn1wbmKzHRsA8JMW+dFeAABw+SBGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGNSlGcnJyFBUVpYCAAMXGxmr79u31js/OzlafPn3Url07RUZGaubMmTp37lyTJgwAAFoX2zGyatUqpaamKjMzU0VFRYqOjlZCQoKOHTtW6/gVK1YoLS1NmZmZ2r17txYvXqxVq1bp8ccf/9WTBwAA3s92jCxYsEBTp07V5MmTde211yo3N1ft27fXkiVLah2/bds2jRgxQvfcc4+ioqI0evRoTZw4scGrKQAA4PJgK0aqqqpUWFio+Pj4nw/g66v4+HgVFBTUus/w4cNVWFjojo/9+/dr3bp1uuWWW+q8n8rKSpWXl3vcAABA69TGzuATJ06ourpaoaGhHttDQ0P19ddf17rPPffcoxMnTujGG2+UZVn68ccf9fDDD9f7Mk1WVpbmzJljZ2oAAMBLNfunabZs2aLnnntOr7zyioqKivTOO+8oLy9PTz/9dJ37pKenq6yszH07fPhwc08TAAAYYuvKSHBwsPz8/FRaWuqxvbS0VGFhYbXuM3v2bN13332aMmWKJGnAgAGqqKjQQw89pCeeeEK+vjV7yOFwyOFw2JkaAADwUrZixN/fXzExMcrPz9eECRMkSS6XS/n5+Zo+fXqt+5w9e7ZGcPj5+UmSLMtqwpSBSycqLa9ZjntwbmKzHBcAvJGtGJGk1NRUTZo0SUOGDNGwYcOUnZ2tiooKTZ48WZKUnJysiIgIZWVlSZLGjRunBQsWaPDgwYqNjdXevXs1e/ZsjRs3zh0lAADg8mU7RpKSknT8+HFlZGTI6XRq0KBBWr9+vftNrSUlJR5XQp588kn5+PjoySef1HfffaerrrpK48aN07PPPnvxzgIAAHgtH8sLXispLy9XUFCQysrKFBgYaHo6aGGa66WU5sTLNAAuB419/uZv0wAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKPamJ4ALg9RaXmmpwAAaKG4MgIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMKpJMZKTk6OoqCgFBAQoNjZW27dvr3f8qVOnlJKSoq5du8rhcKh3795at25dkyYMAABalzZ2d1i1apVSU1OVm5ur2NhYZWdnKyEhQcXFxQoJCakxvqqqSr///e8VEhKit99+WxERETp06JA6dux4MeYPAAC8nO0YWbBggaZOnarJkydLknJzc5WXl6clS5YoLS2txvglS5bo5MmT2rZtm9q2bStJioqK+nWzBgAArYatl2mqqqpUWFio+Pj4nw/g66v4+HgVFBTUus+//vUvxcXFKSUlRaGhoerfv7+ee+45VVdX13k/lZWVKi8v97gBAIDWyVaMnDhxQtXV1QoNDfXYHhoaKqfTWes++/fv19tvv63q6mqtW7dOs2fP1vz58/XMM8/UeT9ZWVkKCgpy3yIjI+1MEwAAeJFm/zSNy+VSSEiIXnvtNcXExCgpKUlPPPGEcnNz69wnPT1dZWVl7tvhw4ebe5oAAMAQW+8ZCQ4Olp+fn0pLSz22l5aWKiwsrNZ9unbtqrZt28rPz8+9rV+/fnI6naqqqpK/v3+NfRwOhxwOh52pAQAAL2Xryoi/v79iYmKUn5/v3uZyuZSfn6+4uLha9xkxYoT27t0rl8vl3vbNN9+oa9eutYYIAAC4vNh+mSY1NVWLFi3S66+/rt27d2vatGmqqKhwf7omOTlZ6enp7vHTpk3TyZMnNWPGDH3zzTfKy8vTc889p5SUlIt3FgAAwGvZ/mhvUlKSjh8/royMDDmdTg0aNEjr1693v6m1pKREvr4/N05kZKQ2bNigmTNnauDAgYqIiNCMGTM0a9asi3cWAADAa/lYlmWZnkRDysvLFRQUpLKyMgUGBpqeDpogKi3P9BRalINzE01PAQCaXWOfv/nbNAAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEa1MT0B4HIUlZbXbMc+ODex2Y4NAM2BKyMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYFSTYiQnJ0dRUVEKCAhQbGystm/f3qj9Vq5cKR8fH02YMKEpdwsAAFoh2zGyatUqpaamKjMzU0VFRYqOjlZCQoKOHTtW734HDx7UY489ppEjRzZ5sgAAoPWxHSMLFizQ1KlTNXnyZF177bXKzc1V+/bttWTJkjr3qa6u1r333qs5c+aoZ8+ev2rCAACgdbEVI1VVVSosLFR8fPzPB/D1VXx8vAoKCurc729/+5tCQkL04IMPNup+KisrVV5e7nEDAACtk60YOXHihKqrqxUaGuqxPTQ0VE6ns9Z9Pv74Yy1evFiLFi1q9P1kZWUpKCjIfYuMjLQzTQAA4EWa9dM0p0+f1n333adFixYpODi40fulp6errKzMfTt8+HAzzhIAAJjUxs7g4OBg+fn5qbS01GN7aWmpwsLCaozft2+fDh48qHHjxrm3uVyun+64TRsVFxerV69eNfZzOBxyOBx2pgYAALyUrSsj/v7+iomJUX5+vnuby+VSfn6+4uLiaozv27evdu3apZ07d7pvt956q0aNGqWdO3fy8gsAALB3ZUSSUlNTNWnSJA0ZMkTDhg1Tdna2KioqNHnyZElScnKyIiIilJWVpYCAAPXv399j/44dO0pSje0AAODyZDtGkpKSdPz4cWVkZMjpdGrQoEFav369+02tJSUl8vXli10BAEDj+FiWZZmeREPKy8sVFBSksrIyBQYGmp4OmiAqLc/0FC4bB+cmmp4CAEhq/PM3lzAAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgVBvTEwBwcUWl5TXLcQ/OTWyW4wIAV0YAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwKgmxUhOTo6ioqIUEBCg2NhYbd++vc6xixYt0siRI9WpUyd16tRJ8fHx9Y4HAACXF9sxsmrVKqWmpiozM1NFRUWKjo5WQkKCjh07Vuv4LVu2aOLEidq8ebMKCgoUGRmp0aNH67vvvvvVkwcAAN7Px7Isy84OsbGxGjp0qF5++WVJksvlUmRkpB555BGlpaU1uH91dbU6deqkl19+WcnJyY26z/LycgUFBamsrEyBgYF2posWIiotz/QU8CsdnJtoegoAvExjn79tXRmpqqpSYWGh4uPjfz6Ar6/i4+NVUFDQqGOcPXtW58+fV+fOnescU1lZqfLyco8bAABonWzFyIkTJ1RdXa3Q0FCP7aGhoXI6nY06xqxZsxQeHu4RNL+UlZWloKAg9y0yMtLONAEAgBe5pJ+mmTt3rlauXKk1a9YoICCgznHp6ekqKytz3w4fPnwJZwkAAC6lNnYGBwcHy8/PT6WlpR7bS0tLFRYWVu++8+bN09y5c7Vp0yYNHDiw3rEOh0MOh8PO1AAAgJeydWXE399fMTExys/Pd29zuVzKz89XXFxcnfu98MILevrpp7V+/XoNGTKk6bMFAACtjq0rI5KUmpqqSZMmaciQIRo2bJiys7NVUVGhyZMnS5KSk5MVERGhrKwsSdLzzz+vjIwMrVixQlFRUe73lnTo0EEdOnS4iKcCAAC8ke0YSUpK0vHjx5WRkSGn06lBgwZp/fr17je1lpSUyNf35wsuCxcuVFVVle644w6P42RmZuqpp576dbMHAABez/b3jJjA94x4P75nxPvxPSMA7GqW7xkBAAC42IgRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwKg2picAwDtEpeU127EPzk1stmMDaPm4MgIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIziG1jhoTm/ZRMAgNpwZQQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwqo3pCQBAVFpesxz34NzEZjkugIuLKyMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBR/KA9Aq9Vcf4BP4o/wARcTV0YAAIBRxAgAADCKl2m8UHNeegYA4FLjyggAADCKGAEAAEbxMg0ANEFzvVzKp3RwOeLKCAAAMKpJMZKTk6OoqCgFBAQoNjZW27dvr3f86tWr1bdvXwUEBGjAgAFat25dkyYLAABaH9sv06xatUqpqanKzc1VbGyssrOzlZCQoOLiYoWEhNQYv23bNk2cOFFZWVn6wx/+oBUrVmjChAkqKipS//79L8pJAEBrwRe14XLkY1mWZWeH2NhYDR06VC+//LIkyeVyKTIyUo888ojS0tJqjE9KSlJFRYXef/9997YbbrhBgwYNUm5ubqPus7y8XEFBQSorK1NgYKCd6bZKfLQXQFMQI7jUGvv8bevKSFVVlQoLC5Wenu7e5uvrq/j4eBUUFNS6T0FBgVJTUz22JSQkaO3atXXeT2VlpSorK93/Lisrk/TTSUFyVZ41PQUAXqjbzNWmp2Dbl3MSTE8Bv8KF5+2GrnvYipETJ06ourpaoaGhHttDQ0P19ddf17qP0+msdbzT6azzfrKysjRnzpwa2yMjI+1MFwDg5YKyTc8AF8Pp06cVFBRU589b5Ed709PTPa6muFwunTx5Ul26dJGPj89Fu5/y8nJFRkbq8OHDrfbln9Z+jpyf92vt58j5eb/Wfo7NeX6WZen06dMKDw+vd5ytGAkODpafn59KS0s9tpeWliosLKzWfcLCwmyNlySHwyGHw+GxrWPHjnamaktgYGCr/AX7X639HDk/79faz5Hz836t/Ryb6/zquyJyga2P9vr7+ysmJkb5+fnubS6XS/n5+YqLi6t1n7i4OI/xkrRx48Y6xwMAgMuL7ZdpUlNTNWnSJA0ZMkTDhg1Tdna2KioqNHnyZElScnKyIiIilJWVJUmaMWOGfvvb32r+/PlKTEzUypUr9fnnn+u11167uGcCAAC8ku0YSUpK0vHjx5WRkSGn06lBgwZp/fr17jeplpSUyNf35wsuw4cP14oVK/Tkk0/q8ccf1zXXXKO1a9e2iO8YcTgcyszMrPGSUGvS2s+R8/N+rf0cOT/v19rPsSWcn+3vGQEAALiY+Ns0AADAKGIEAAAYRYwAAACjiBEAAGBUq4+RnJwcRUVFKSAgQLGxsdq+fXu941evXq2+ffsqICBAAwYM0Lp16y7RTO3LysrS0KFDdeWVVyokJEQTJkxQcXFxvfssW7ZMPj4+HreAgIBLNGN7nnrqqRpz7du3b737eNP6SVJUVFSNc/Tx8VFKSkqt41v6+n300UcaN26cwsPD5ePjU+NvUFmWpYyMDHXt2lXt2rVTfHy89uzZ0+Bx7T6Om0t953f+/HnNmjVLAwYM0BVXXKHw8HAlJyfryJEj9R6zKb/nzamhNbz//vtrzHfMmDENHtcb1lBSrY9HHx8fvfjii3UesyWtYWOeF86dO6eUlBR16dJFHTp00O23317jy0l/qamP3cZq1TGyatUqpaamKjMzU0VFRYqOjlZCQoKOHTtW6/ht27Zp4sSJevDBB7Vjxw5NmDBBEyZM0JdffnmJZ944W7duVUpKij799FNt3LhR58+f1+jRo1VRUVHvfoGBgTp69Kj7dujQoUs0Y/uuu+46j7l+/PHHdY71tvWTpP/85z8e57dx40ZJ0p133lnnPi15/SoqKhQdHa2cnJxaf/7CCy/oH//4h3Jzc/XZZ5/piiuuUEJCgs6dO1fnMe0+jptTfed39uxZFRUVafbs2SoqKtI777yj4uJi3XrrrQ0e187veXNraA0lacyYMR7zffPNN+s9presoSSP8zp69KiWLFkiHx8f3X777fUet6WsYWOeF2bOnKn33ntPq1ev1tatW3XkyBHddttt9R63KY9dW6xWbNiwYVZKSor739XV1VZ4eLiVlZVV6/i77rrLSkxM9NgWGxtr/fnPf27WeV4sx44dsyRZW7durXPM0qVLraCgoEs3qV8hMzPTio6ObvR4b18/y7KsGTNmWL169bJcLletP/em9ZNkrVmzxv1vl8tlhYWFWS+++KJ726lTpyyHw2G9+eabdR7H7uP4Uvnl+dVm+/btliTr0KFDdY6x+3t+KdV2jpMmTbLGjx9v6zjevIbjx4+3brrppnrHtOQ1/OXzwqlTp6y2bdtaq1evdo/ZvXu3JckqKCio9RhNfeza0WqvjFRVVamwsFDx8fHubb6+voqPj1dBQUGt+xQUFHiMl6SEhIQ6x7c0ZWVlkqTOnTvXO+7MmTPq3r27IiMjNX78eH311VeXYnpNsmfPHoWHh6tnz5669957VVJSUudYb1+/qqoqvfHGG3rggQfq/YOQ3rR+/+vAgQNyOp0eaxQUFKTY2Ng616gpj+OWpKysTD4+Pg3+bS07v+ctwZYtWxQSEqI+ffpo2rRp+v777+sc681rWFpaqry8PD344IMNjm2pa/jL54XCwkKdP3/eYz369u2rbt261bkeTXns2tVqY+TEiROqrq52fzPsBaGhoXI6nbXu43Q6bY1vSVwulx599FGNGDGi3m+37dOnj5YsWaJ3331Xb7zxhlwul4YPH65vv/32Es62cWJjY7Vs2TKtX79eCxcu1IEDBzRy5EidPn261vHevH6StHbtWp06dUr3339/nWO8af1+6cI62FmjpjyOW4pz585p1qxZmjhxYr1/fMzu77lpY8aM0fLly5Wfn6/nn39eW7du1dixY1VdXV3reG9ew9dff11XXnllgy9htNQ1rO15wel0yt/fv0YgN/TceGFMY/exy/bXwaNlSklJ0Zdfftng65RxcXEef6Rw+PDh6tevn1599VU9/fTTzT1NW8aOHev+3wMHDlRsbKy6d++ut956q1H/peJtFi9erLFjx9b7p7a9af0uZ+fPn9ddd90ly7K0cOHCesd62+/53Xff7f7fAwYM0MCBA9WrVy9t2bJFN998s8GZXXxLlizRvffe2+CbxFvqGjb2eaElaLVXRoKDg+Xn51fjHcKlpaUKCwurdZ+wsDBb41uK6dOn6/3339fmzZt19dVX29q3bdu2Gjx4sPbu3dtMs7t4OnbsqN69e9c5V29dP0k6dOiQNm3apClTptjaz5vW78I62FmjpjyOTbsQIocOHdLGjRtt/0n2hn7PW5qePXsqODi4zvl64xpK0r///W8VFxfbfkxKLWMN63peCAsLU1VVlU6dOuUxvqHnxgtjGruPXa02Rvz9/RUTE6P8/Hz3NpfLpfz8fI//svxfcXFxHuMlaePGjXWON82yLE2fPl1r1qzRhx9+qB49etg+RnV1tXbt2qWuXbs2wwwvrjNnzmjfvn11ztXb1u9/LV26VCEhIUpMTLS1nzetX48ePRQWFuaxRuXl5frss8/qXKOmPI5NuhAie/bs0aZNm9SlSxfbx2jo97yl+fbbb/X999/XOV9vW8MLFi9erJiYGEVHR9ve1+QaNvS8EBMTo7Zt23qsR3FxsUpKSupcj6Y8dpsy8VZr5cqVlsPhsJYtW2b997//tR566CGrY8eOltPptCzLsu677z4rLS3NPf6TTz6x2rRpY82bN8/avXu3lZmZabVt29batWuXqVOo17Rp06ygoCBry5Yt1tGjR923s2fPusf88hznzJljbdiwwdq3b59VWFho3X333VZAQID11VdfmTiFev31r3+1tmzZYh04cMD65JNPrPj4eCs4ONg6duyYZVnev34XVFdXW926dbNmzZpV42fetn6nT5+2duzYYe3YscOSZC1YsMDasWOH+9Mkc+fOtTp27Gi9++671hdffGGNHz/e6tGjh/XDDz+4j3HTTTdZL730kvvfDT2OW8r5VVVVWbfeeqt19dVXWzt37vR4TFZWVtZ5fg39nl9q9Z3j6dOnrccee8wqKCiwDhw4YG3atMm6/vrrrWuuucY6d+6c+xjeuoYXlJWVWe3bt7cWLlxY6zFa8ho25nnh4Ycftrp162Z9+OGH1ueff27FxcVZcXFxHsfp06eP9c4777j/3ZjH7q/RqmPEsizrpZdesrp162b5+/tbw4YNsz799FP3z377299akyZN8hj/1ltvWb1797b8/f2t6667zsrLy7vEM248SbXeli5d6h7zy3N89NFH3f9/hIaGWrfccotVVFR06SffCElJSVbXrl0tf39/KyIiwkpKSrL27t3r/rm3r98FGzZssCRZxcXFNX7mbeu3efPmWn8nL5yDy+WyZs+ebYWGhloOh8O6+eaba5x39+7drczMTI9t9T2OL6X6zu/AgQN1PiY3b97sPsYvz6+h3/NLrb5zPHv2rDV69Gjrqquustq2bWt1797dmjp1ao2o8NY1vODVV1+12rVrZ506darWY7TkNWzM88IPP/xg/eUvf7E6depktW/f3vrjH/9oHT16tMZx/nefxjx2fw2f/3+nAAAARrTa94wAAADvQIwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIz6f2SX7jEL10+bAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"out = plt.hist(num_tokens, bins=20, range=(0, 20))"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Rows for table visual_genome_densecap_local_eval_visual_genome_densecap_local_densecap_test\n"
]
},
{
"data": {
"text/plain": [
"(60, 238069, 0.0002520277734606354)"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"table_id = 1\n",
"fetch = cursor.execute(\"SELECT tokenized_phrases FROM {}\".format(table_names[table_id]))\n",
"print(\"\\nRows for table\", table_names[table_id])\n",
"num_tokens = []\n",
"for row in fetch:\n",
" row = json.loads(row[0])\n",
" for phrase in row:\n",
" num_tokens.append(len(phrase))\n",
"threshold = 20\n",
"num_tokens_array = np.array(num_tokens)\n",
"np.sum(num_tokens_array > threshold), len(num_tokens_array), np.sum(num_tokens_array > threshold) / len(num_tokens_array)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "module 'matplotlib.pyplot' has no attribute 'set_xlabel'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/t-yutonglin/xiaoke/segment-caption-anything-v2/scripts/notebooks/dataset_statstics_db.ipynb Cell 7\u001b[0m line \u001b[0;36m2\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bazure_tunnel/home/t-yutonglin/xiaoke/segment-caption-anything-v2/scripts/notebooks/dataset_statstics_db.ipynb#X21sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m plt\u001b[39m.\u001b[39mhist(num_tokens, bins\u001b[39m=\u001b[39m\u001b[39m20\u001b[39m, \u001b[39mrange\u001b[39m\u001b[39m=\u001b[39m(\u001b[39m0\u001b[39m, \u001b[39m20\u001b[39m))\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2Bazure_tunnel/home/t-yutonglin/xiaoke/segment-caption-anything-v2/scripts/notebooks/dataset_statstics_db.ipynb#X21sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a>\u001b[0m plt\u001b[39m.\u001b[39;49mset_xlabel(\u001b[39m\"\u001b[39m\u001b[39mNumber of tokens\u001b[39m\u001b[39m\"\u001b[39m)\n",
"\u001b[0;31mAttributeError\u001b[0m: module 'matplotlib.pyplot' has no attribute 'set_xlabel'"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAGdCAYAAADwjmIIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAuy0lEQVR4nO3de3BUZZ7/8U8S6IRbd+SShCzh4qhAlIsECO1ldtUsrcYZGeMMMKxGjLqwgZXES2CHCehag4vrCgw3HUfD1gwjUDWgEAkbg4QZiYABVkDJohMNbuiAYtIQIYH0+f0xv5ylJYE0EJJ+eL+qThU5z/c8/Tz9dFd/OOlzEmZZliUAAADDhLf1AAAAAFoDIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYKQObT2AtuT3+1VZWalu3bopLCysrYcDAABawLIsHT9+XPHx8QoPb/58zVUdciorK5WQkNDWwwAAABfh0KFD6tOnT7PtV3XI6datm6S/PklOp7ONRwMAAFrC5/MpISHB/hxvzlUdchp/ReV0Ogk5AACEmAt91YQvHgMAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYqUNbDwBor/rPzG+Vfr94MbVV+gUABOJMDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMFHTI+d///V/9wz/8g3r06KFOnTppyJAh+uijj+x2y7KUm5ur3r17q1OnTkpJSdHBgwcD+jh27JgmTZokp9Op6OhoZWRk6MSJEwE1H3/8sW6//XZFRUUpISFB8+fPP2csa9as0aBBgxQVFaUhQ4bo3XffDXY6AADAUEGFnG+//Va33nqrOnbsqI0bN+qTTz7Ryy+/rGuuucaumT9/vhYtWqTly5dr+/bt6tKlizwej06dOmXXTJo0Sfv371dhYaE2bNigrVu36oknnrDbfT6fxo4dq379+qm0tFQvvfSS5s6dq9dee82u2bZtmyZOnKiMjAzt3r1b48aN07hx47Rv375LeT4AAIAhwizLslpaPHPmTH3wwQf605/+1GS7ZVmKj4/XU089paefflqSVFNTo9jYWOXl5WnChAn69NNPlZiYqJ07d2rkyJGSpIKCAt1777366quvFB8fr2XLlukXv/iFvF6vHA6H/djr1q3TgQMHJEnjx49XbW2tNmzYYD/+mDFjNHz4cC1fvrxF8/H5fHK5XKqpqZHT6Wzp04CrBH+FHADap5Z+fgd1Juedd97RyJEj9dOf/lQxMTG6+eab9Zvf/MZuLy8vl9frVUpKir3P5XIpOTlZJSUlkqSSkhJFR0fbAUeSUlJSFB4eru3bt9s1P/zhD+2AI0kej0dlZWX69ttv7ZqzH6expvFxAADA1S2okPOXv/xFy5Yt0/XXX69NmzZp6tSp+ud//metWLFCkuT1eiVJsbGxAcfFxsbabV6vVzExMQHtHTp0UPfu3QNqmurj7MdorqaxvSl1dXXy+XwBGwAAMFOHYIr9fr9GjhypX/3qV5Kkm2++Wfv27dPy5cuVnp7eKgO8nObNm6fnnnuurYcBAACugKDO5PTu3VuJiYkB+wYPHqyKigpJUlxcnCSpqqoqoKaqqspui4uL05EjRwLaz5w5o2PHjgXUNNXH2Y/RXE1je1NmzZqlmpoaezt06NCFJw0AAEJSUCHn1ltvVVlZWcC+//mf/1G/fv0kSQMGDFBcXJyKiorsdp/Pp+3bt8vtdkuS3G63qqurVVpaatds3rxZfr9fycnJds3WrVt1+vRpu6awsFADBw60r+Ryu90Bj9NY0/g4TYmMjJTT6QzYAACAmYIKOVlZWfrwww/1q1/9Sp999plWrlyp1157TZmZmZKksLAwzZgxQy+88ILeeecd7d27Vw8//LDi4+M1btw4SX8983P33Xfr8ccf144dO/TBBx9o2rRpmjBhguLj4yVJP//5z+VwOJSRkaH9+/dr1apVWrhwobKzs+2xPPnkkyooKNDLL7+sAwcOaO7cufroo480bdq0y/TUAACAUBbUd3JGjRqltWvXatasWXr++ec1YMAALViwQJMmTbJrnn32WdXW1uqJJ55QdXW1brvtNhUUFCgqKsqu+f3vf69p06bprrvuUnh4uNLS0rRo0SK73eVy6b/+67+UmZmppKQk9ezZU7m5uQH30rnlllu0cuVKzZ49W//yL/+i66+/XuvWrdNNN910Kc8HAAAwRFD3yTEN98nB+XCfHABon1rlPjkAAAChgpADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjNShrQcAXG36z8xvtb6/eDG11foGgFDDmRwAAGAkQg4AADBSUCFn7ty5CgsLC9gGDRpkt586dUqZmZnq0aOHunbtqrS0NFVVVQX0UVFRodTUVHXu3FkxMTF65plndObMmYCaLVu2aMSIEYqMjNR1112nvLy8c8ayZMkS9e/fX1FRUUpOTtaOHTuCmQoAADBc0GdybrzxRh0+fNje/vznP9ttWVlZWr9+vdasWaPi4mJVVlbqgQcesNsbGhqUmpqq+vp6bdu2TStWrFBeXp5yc3PtmvLycqWmpuqOO+7Qnj17NGPGDD322GPatGmTXbNq1SplZ2drzpw52rVrl4YNGyaPx6MjR45c7PMAAAAME2ZZltXS4rlz52rdunXas2fPOW01NTXq1auXVq5cqQcffFCSdODAAQ0ePFglJSUaM2aMNm7cqPvuu0+VlZWKjY2VJC1fvlw5OTk6evSoHA6HcnJylJ+fr3379tl9T5gwQdXV1SooKJAkJScna9SoUVq8eLEkye/3KyEhQdOnT9fMmTNbPHmfzyeXy6Wamho5nc4WH4erQ2t+Qbi18MVjAFeDln5+B30m5+DBg4qPj9e1116rSZMmqaKiQpJUWlqq06dPKyUlxa4dNGiQ+vbtq5KSEklSSUmJhgwZYgccSfJ4PPL5fNq/f79dc3YfjTWNfdTX16u0tDSgJjw8XCkpKXZNc+rq6uTz+QI2AABgpqBCTnJysvLy8lRQUKBly5apvLxct99+u44fPy6v1yuHw6Ho6OiAY2JjY+X1eiVJXq83IOA0tje2na/G5/Pp5MmT+vrrr9XQ0NBkTWMfzZk3b55cLpe9JSQkBDN9AAAQQoK6T84999xj/3vo0KFKTk5Wv379tHr1anXq1OmyD+5ymzVrlrKzs+2ffT4fQQcAAENd0iXk0dHRuuGGG/TZZ58pLi5O9fX1qq6uDqipqqpSXFycJCkuLu6cq60af75QjdPpVKdOndSzZ09FREQ0WdPYR3MiIyPldDoDNgAAYKZLCjknTpzQ559/rt69eyspKUkdO3ZUUVGR3V5WVqaKigq53W5Jktvt1t69ewOugiosLJTT6VRiYqJdc3YfjTWNfTgcDiUlJQXU+P1+FRUV2TUAAABBhZynn35axcXF+uKLL7Rt2zb95Cc/UUREhCZOnCiXy6WMjAxlZ2fr/fffV2lpqSZPniy3260xY8ZIksaOHavExEQ99NBD+u///m9t2rRJs2fPVmZmpiIjIyVJU6ZM0V/+8hc9++yzOnDggJYuXarVq1crKyvLHkd2drZ+85vfaMWKFfr00081depU1dbWavLkyZfxqQEAAKEsqO/kfPXVV5o4caK++eYb9erVS7fddps+/PBD9erVS5L0yiuvKDw8XGlpaaqrq5PH49HSpUvt4yMiIrRhwwZNnTpVbrdbXbp0UXp6up5//nm7ZsCAAcrPz1dWVpYWLlyoPn366PXXX5fH47Frxo8fr6NHjyo3N1der1fDhw9XQUHBOV9GBgAAV6+g7pNjGu6Tg/PhPjkA0D612n1yAAAAQgEhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgpKDueAy0N6F4wz4AwJXBmRwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjHRJIefFF19UWFiYZsyYYe87deqUMjMz1aNHD3Xt2lVpaWmqqqoKOK6iokKpqanq3LmzYmJi9Mwzz+jMmTMBNVu2bNGIESMUGRmp6667Tnl5eec8/pIlS9S/f39FRUUpOTlZO3bsuJTpAAAAg1x0yNm5c6deffVVDR06NGB/VlaW1q9frzVr1qi4uFiVlZV64IEH7PaGhgalpqaqvr5e27Zt04oVK5SXl6fc3Fy7pry8XKmpqbrjjju0Z88ezZgxQ4899pg2bdpk16xatUrZ2dmaM2eOdu3apWHDhsnj8ejIkSMXOyUAAGCQMMuyrGAPOnHihEaMGKGlS5fqhRde0PDhw7VgwQLV1NSoV69eWrlypR588EFJ0oEDBzR48GCVlJRozJgx2rhxo+677z5VVlYqNjZWkrR8+XLl5OTo6NGjcjgcysnJUX5+vvbt22c/5oQJE1RdXa2CggJJUnJyskaNGqXFixdLkvx+vxISEjR9+nTNnDmzRfPw+XxyuVyqqamR0+kM9mlAO9B/Zn5bD6Fd+eLF1LYeAgC0upZ+fl/UmZzMzEylpqYqJSUlYH9paalOnz4dsH/QoEHq27evSkpKJEklJSUaMmSIHXAkyePxyOfzaf/+/XbN9/v2eDx2H/X19SotLQ2oCQ8PV0pKil3TlLq6Ovl8voANAACYqUOwB7z11lvatWuXdu7ceU6b1+uVw+FQdHR0wP7Y2Fh5vV675uyA09je2Ha+Gp/Pp5MnT+rbb79VQ0NDkzUHDhxoduzz5s3Tc88917KJAgCAkBbUmZxDhw7pySef1O9//3tFRUW11phazaxZs1RTU2Nvhw4daushAQCAVhJUyCktLdWRI0c0YsQIdejQQR06dFBxcbEWLVqkDh06KDY2VvX19aqurg44rqqqSnFxcZKkuLi4c662avz5QjVOp1OdOnVSz549FRER0WRNYx9NiYyMlNPpDNgAAICZggo5d911l/bu3as9e/bY28iRIzVp0iT73x07dlRRUZF9TFlZmSoqKuR2uyVJbrdbe/fuDbgKqrCwUE6nU4mJiXbN2X001jT24XA4lJSUFFDj9/tVVFRk1wAAgKtbUN/J6datm2666aaAfV26dFGPHj3s/RkZGcrOzlb37t3ldDo1ffp0ud1ujRkzRpI0duxYJSYm6qGHHtL8+fPl9Xo1e/ZsZWZmKjIyUpI0ZcoULV68WM8++6weffRRbd68WatXr1Z+/v9dSZOdna309HSNHDlSo0eP1oIFC1RbW6vJkydf0hMCAADMEPQXjy/klVdeUXh4uNLS0lRXVyePx6OlS5fa7REREdqwYYOmTp0qt9utLl26KD09Xc8//7xdM2DAAOXn5ysrK0sLFy5Unz599Prrr8vj8dg148eP19GjR5Wbmyuv16vhw4eroKDgnC8jAwCAq9NF3SfHFNwnJ/Rxn5xA3CcHwNWgVe+TAwAA0N4RcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjdWjrAQC4fPrPzG+1vr94MbXV+gaA1sCZHAAAYCRCDgAAMBIhBwAAGCmokLNs2TINHTpUTqdTTqdTbrdbGzdutNtPnTqlzMxM9ejRQ127dlVaWpqqqqoC+qioqFBqaqo6d+6smJgYPfPMMzpz5kxAzZYtWzRixAhFRkbquuuuU15e3jljWbJkifr376+oqCglJydrx44dwUwFAAAYLqiQ06dPH7344osqLS3VRx99pDvvvFP333+/9u/fL0nKysrS+vXrtWbNGhUXF6uyslIPPPCAfXxDQ4NSU1NVX1+vbdu2acWKFcrLy1Nubq5dU15ertTUVN1xxx3as2ePZsyYoccee0ybNm2ya1atWqXs7GzNmTNHu3bt0rBhw+TxeHTkyJFLfT4AAIAhwizLsi6lg+7du+ull17Sgw8+qF69emnlypV68MEHJUkHDhzQ4MGDVVJSojFjxmjjxo267777VFlZqdjYWEnS8uXLlZOTo6NHj8rhcCgnJ0f5+fnat2+f/RgTJkxQdXW1CgoKJEnJyckaNWqUFi9eLEny+/1KSEjQ9OnTNXPmzBaP3efzyeVyqaamRk6n81KeBrSR1ryaCIG4ugpAe9HSz++L/k5OQ0OD3nrrLdXW1srtdqu0tFSnT59WSkqKXTNo0CD17dtXJSUlkqSSkhINGTLEDjiS5PF45PP57LNBJSUlAX001jT2UV9fr9LS0oCa8PBwpaSk2DXNqaurk8/nC9gAAICZgg45e/fuVdeuXRUZGakpU6Zo7dq1SkxMlNfrlcPhUHR0dEB9bGysvF6vJMnr9QYEnMb2xrbz1fh8Pp08eVJff/21Ghoamqxp7KM58+bNk8vlsreEhIRgpw8AAEJE0CFn4MCB2rNnj7Zv366pU6cqPT1dn3zySWuM7bKbNWuWampq7O3QoUNtPSQAANBKgr7jscPh0HXXXSdJSkpK0s6dO7Vw4UKNHz9e9fX1qq6uDjibU1VVpbi4OElSXFzcOVdBNV59dXbN96/IqqqqktPpVKdOnRQREaGIiIgmaxr7aE5kZKQiIyODnTIAAAhBl3yfHL/fr7q6OiUlJaljx44qKiqy28rKylRRUSG32y1Jcrvd2rt3b8BVUIWFhXI6nUpMTLRrzu6jsaaxD4fDoaSkpIAav9+voqIiuwYAACCoMzmzZs3SPffco759++r48eNauXKltmzZok2bNsnlcikjI0PZ2dnq3r27nE6npk+fLrfbrTFjxkiSxo4dq8TERD300EOaP3++vF6vZs+erczMTPsMy5QpU7R48WI9++yzevTRR7V582atXr1a+fn/dxVNdna20tPTNXLkSI0ePVoLFixQbW2tJk+efBmfGgAAEMqCCjlHjhzRww8/rMOHD8vlcmno0KHatGmT/v7v/16S9Morryg8PFxpaWmqq6uTx+PR0qVL7eMjIiK0YcMGTZ06VW63W126dFF6erqef/55u2bAgAHKz89XVlaWFi5cqD59+uj111+Xx+Oxa8aPH6+jR48qNzdXXq9Xw4cPV0FBwTlfRgYAAFevS75PTijjPjmhj/vkXDncJwdAe9Hq98kBAABozwg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGCmokDNv3jyNGjVK3bp1U0xMjMaNG6eysrKAmlOnTikzM1M9evRQ165dlZaWpqqqqoCaiooKpaamqnPnzoqJidEzzzyjM2fOBNRs2bJFI0aMUGRkpK677jrl5eWdM54lS5aof//+ioqKUnJysnbs2BHMdAAAgMGCCjnFxcXKzMzUhx9+qMLCQp0+fVpjx45VbW2tXZOVlaX169drzZo1Ki4uVmVlpR544AG7vaGhQampqaqvr9e2bdu0YsUK5eXlKTc3164pLy9Xamqq7rjjDu3Zs0czZszQY489pk2bNtk1q1atUnZ2tubMmaNdu3Zp2LBh8ng8OnLkyKU8HwAAwBBhlmVZF3vw0aNHFRMTo+LiYv3whz9UTU2NevXqpZUrV+rBBx+UJB04cECDBw9WSUmJxowZo40bN+q+++5TZWWlYmNjJUnLly9XTk6Ojh49KofDoZycHOXn52vfvn32Y02YMEHV1dUqKCiQJCUnJ2vUqFFavHixJMnv9yshIUHTp0/XzJkzWzR+n88nl8ulmpoaOZ3Oi30a0Ib6z8xv6yFcNb54MbWthwAAklr++X1J38mpqamRJHXv3l2SVFpaqtOnTyslJcWuGTRokPr27auSkhJJUklJiYYMGWIHHEnyeDzy+Xzav3+/XXN2H401jX3U19ertLQ0oCY8PFwpKSl2TVPq6urk8/kCNgAAYKaLDjl+v18zZszQrbfeqptuukmS5PV65XA4FB0dHVAbGxsrr9dr15wdcBrbG9vOV+Pz+XTy5El9/fXXamhoaLKmsY+mzJs3Ty6Xy94SEhKCnzgAAAgJFx1yMjMztW/fPr311luXczytatasWaqpqbG3Q4cOtfWQAABAK+lwMQdNmzZNGzZs0NatW9WnTx97f1xcnOrr61VdXR1wNqeqqkpxcXF2zfevgmq8+ursmu9fkVVVVSWn06lOnTopIiJCERERTdY09tGUyMhIRUZGBj9hAAAQcoI6k2NZlqZNm6a1a9dq8+bNGjBgQEB7UlKSOnbsqKKiIntfWVmZKioq5Ha7JUlut1t79+4NuAqqsLBQTqdTiYmJds3ZfTTWNPbhcDiUlJQUUOP3+1VUVGTXAACAq1tQZ3IyMzO1cuVKvf322+rWrZv9/ReXy6VOnTrJ5XIpIyND2dnZ6t69u5xOp6ZPny63260xY8ZIksaOHavExEQ99NBDmj9/vrxer2bPnq3MzEz7LMuUKVO0ePFiPfvss3r00Ue1efNmrV69Wvn5/3clTXZ2ttLT0zVy5EiNHj1aCxYsUG1trSZPnny5nhsAABDCggo5y5YtkyT93d/9XcD+N998U4888ogk6ZVXXlF4eLjS0tJUV1cnj8ejpUuX2rURERHasGGDpk6dKrfbrS5duig9PV3PP/+8XTNgwADl5+crKytLCxcuVJ8+ffT666/L4/HYNePHj9fRo0eVm5srr9er4cOHq6Cg4JwvIwMAgKvTJd0nJ9Rxn5zQx31yrhzukwOgvbgi98kBAABorwg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGuqi/XQXg6tNa9yTi/jsAWgtncgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYiZADAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYKeiQs3XrVv3oRz9SfHy8wsLCtG7duoB2y7KUm5ur3r17q1OnTkpJSdHBgwcDao4dO6ZJkybJ6XQqOjpaGRkZOnHiREDNxx9/rNtvv11RUVFKSEjQ/PnzzxnLmjVrNGjQIEVFRWnIkCF69913g50OAAAwVNAhp7a2VsOGDdOSJUuabJ8/f74WLVqk5cuXa/v27erSpYs8Ho9OnTpl10yaNEn79+9XYWGhNmzYoK1bt+qJJ56w230+n8aOHat+/fqptLRUL730kubOnavXXnvNrtm2bZsmTpyojIwM7d69W+PGjdO4ceO0b9++YKcEAAAMFGZZlnXRB4eFae3atRo3bpykv57FiY+P11NPPaWnn35aklRTU6PY2Fjl5eVpwoQJ+vTTT5WYmKidO3dq5MiRkqSCggLde++9+uqrrxQfH69ly5bpF7/4hbxerxwOhyRp5syZWrdunQ4cOCBJGj9+vGpra7VhwwZ7PGPGjNHw4cO1fPnyFo3f5/PJ5XKppqZGTqfzYp8GtKH+M/Pbegi4RF+8mNrWQwAQYlr6+X1Zv5NTXl4ur9erlJQUe5/L5VJycrJKSkokSSUlJYqOjrYDjiSlpKQoPDxc27dvt2t++MMf2gFHkjwej8rKyvTtt9/aNWc/TmNN4+M0pa6uTj6fL2ADAABmuqwhx+v1SpJiY2MD9sfGxtptXq9XMTExAe0dOnRQ9+7dA2qa6uPsx2iuprG9KfPmzZPL5bK3hISEYKcIAABCxFV1ddWsWbNUU1Njb4cOHWrrIQEAgFZyWUNOXFycJKmqqipgf1VVld0WFxenI0eOBLSfOXNGx44dC6hpqo+zH6O5msb2pkRGRsrpdAZsAADATJc15AwYMEBxcXEqKiqy9/l8Pm3fvl1ut1uS5Ha7VV1drdLSUrtm8+bN8vv9Sk5Otmu2bt2q06dP2zWFhYUaOHCgrrnmGrvm7MdprGl8HAAAcHULOuScOHFCe/bs0Z49eyT99cvGe/bsUUVFhcLCwjRjxgy98MILeuedd7R37149/PDDio+Pt6/AGjx4sO6++249/vjj2rFjhz744ANNmzZNEyZMUHx8vCTp5z//uRwOhzIyMrR//36tWrVKCxcuVHZ2tj2OJ598UgUFBXr55Zd14MABzZ07Vx999JGmTZt26c8KAAAIeUFfQr5lyxbdcccd5+xPT09XXl6eLMvSnDlz9Nprr6m6ulq33Xabli5dqhtuuMGuPXbsmKZNm6b169crPDxcaWlpWrRokbp27WrXfPzxx8rMzNTOnTvVs2dPTZ8+XTk5OQGPuWbNGs2ePVtffPGFrr/+es2fP1/33ntvi+fCJeShj0vIcT5cng6YqaWf35d0n5xQR8gJfYQcnA8hBzBTm9wnBwAAoL0g5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARurQ1gPA1YE7EwMArjTO5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJEIOAAAwEiEHAAAYqUNbDwAAWkv/mfmt0u8XL6a2Sr8ALi/O5AAAACMRcgAAgJEIOQAAwEiEHAAAYCRCDgAAMBIhBwAAGImQAwAAjETIAQAARiLkAAAAIxFyAACAkQg5AADASIQcAABgJP5AJwAEqbX+8KfEH/8ELifO5AAAACMRcgAAgJH4dRVsrXkKHgCAK40zOQAAwEiEHAAAYCR+XQUA7Uhr/dqYq7ZwNeJMDgAAMFLIh5wlS5aof//+ioqKUnJysnbs2NHWQwIAAO1ASP+6atWqVcrOztby5cuVnJysBQsWyOPxqKysTDExMW09PABoN7iBIa5GYZZlWW09iIuVnJysUaNGafHixZIkv9+vhIQETZ8+XTNnzrzg8T6fTy6XSzU1NXI6na093HaPS8gBtCeEJzSnpZ/fIXsmp76+XqWlpZo1a5a9Lzw8XCkpKSopKWnymLq6OtXV1dk/19TUSPrrkwXJX/ddWw8BAGx9s9a0Wt/7nvO0Wt9ofY2f2xc6TxOyIefrr79WQ0ODYmNjA/bHxsbqwIEDTR4zb948Pffcc+fsT0hIaJUxAgDaJ9eCth4BLofjx4/L5XI12x6yIedizJo1S9nZ2fbPfr9fx44dU48ePRQWFnbZHsfn8ykhIUGHDh0y9tdgps+R+YU+0+fI/EKf6XNszflZlqXjx48rPj7+vHUhG3J69uypiIgIVVVVBeyvqqpSXFxck8dERkYqMjIyYF90dHRrDVFOp9PIF+7ZTJ8j8wt9ps+R+YU+0+fYWvM73xmcRiF7CbnD4VBSUpKKiorsfX6/X0VFRXK73W04MgAA0B6E7JkcScrOzlZ6erpGjhyp0aNHa8GCBaqtrdXkyZPbemgAAKCNhXTIGT9+vI4eParc3Fx5vV4NHz5cBQUF53wZ+UqLjIzUnDlzzvnVmElMnyPzC32mz5H5hT7T59ge5hfS98kBAABoTsh+JwcAAOB8CDkAAMBIhBwAAGAkQg4AADASIeciLVmyRP3791dUVJSSk5O1Y8eO89avWbNGgwYNUlRUlIYMGaJ33333Co00ePPmzdOoUaPUrVs3xcTEaNy4cSorKzvvMXl5eQoLCwvYoqKirtCIgzN37txzxjpo0KDzHhNK69e/f/9z5hcWFqbMzMwm60Nh7bZu3aof/ehHio+PV1hYmNatWxfQblmWcnNz1bt3b3Xq1EkpKSk6ePDgBfsN9n3cWs43v9OnTysnJ0dDhgxRly5dFB8fr4cffliVlZXn7fNiXuet6UJr+Mgjj5wz3rvvvvuC/YbCGkpq8j0ZFhaml156qdk+29MatuRz4dSpU8rMzFSPHj3UtWtXpaWlnXPD3u+72PduSxFyLsKqVauUnZ2tOXPmaNeuXRo2bJg8Ho+OHDnSZP22bds0ceJEZWRkaPfu3Ro3bpzGjRunffv2XeGRt0xxcbEyMzP14YcfqrCwUKdPn9bYsWNVW1t73uOcTqcOHz5sb19++eUVGnHwbrzxxoCx/vnPf262NtTWb+fOnQFzKywslCT99Kc/bfaY9r52tbW1GjZsmJYsWdJk+/z587Vo0SItX75c27dvV5cuXeTxeHTq1Klm+wz2fdyazje/7777Trt27dIvf/lL7dq1S3/84x9VVlamH//4xxfsN5jXeWu70BpK0t133x0w3j/84Q/n7TNU1lBSwLwOHz6sN954Q2FhYUpLSztvv+1lDVvyuZCVlaX169drzZo1Ki4uVmVlpR544IHz9nsx792gWAja6NGjrczMTPvnhoYGKz4+3po3b16T9T/72c+s1NTUgH3JycnWP/7jP7bqOC+XI0eOWJKs4uLiZmvefPNNy+VyXblBXYI5c+ZYw4YNa3F9qK/fk08+af3gBz+w/H5/k+2htHaWZVmSrLVr19o/+/1+Ky4uznrppZfsfdXV1VZkZKT1hz/8odl+gn0fXynfn19TduzYYUmyvvzyy2Zrgn2dX0lNzTE9Pd26//77g+onlNfw/vvvt+68887z1rTnNfz+50J1dbXVsWNHa82aNXbNp59+akmySkpKmuzjYt+7weBMTpDq6+tVWlqqlJQUe194eLhSUlJUUlLS5DElJSUB9ZLk8XiarW9vampqJEndu3c/b92JEyfUr18/JSQk6P7779f+/fuvxPAuysGDBxUfH69rr71WkyZNUkVFRbO1obx+9fX1+t3vfqdHH330vH+ENpTW7vvKy8vl9XoD1sjlcik5ObnZNbqY93F7UlNTo7CwsAv+7b1gXuftwZYtWxQTE6OBAwdq6tSp+uabb5qtDeU1rKqqUn5+vjIyMi5Y217X8PufC6WlpTp9+nTAegwaNEh9+/Ztdj0u5r0bLEJOkL7++ms1NDScc1fl2NhYeb3eJo/xer1B1bcnfr9fM2bM0K233qqbbrqp2bqBAwfqjTfe0Ntvv63f/e538vv9uuWWW/TVV19dwdG2THJysvLy8lRQUKBly5apvLxct99+u44fP95kfSiv37p161RdXa1HHnmk2ZpQWrumNK5DMGt0Me/j9uLUqVPKycnRxIkTz/tHD4N9nbe1u+++W//5n/+poqIi/du//ZuKi4t1zz33qKGhocn6UF7DFStWqFu3bhf8VU57XcOmPhe8Xq8cDsc5wftCn42NNS09Jlgh/Wcd0PoyMzO1b9++C/4e2O12B/xh1FtuuUWDBw/Wq6++qn/9139t7WEG5Z577rH/PXToUCUnJ6tfv35avXp1i/5nFUp++9vf6p577lF8fHyzNaG0dle706dP62c/+5ksy9KyZcvOWxtqr/MJEybY/x4yZIiGDh2qH/zgB9qyZYvuuuuuNhzZ5ffGG29o0qRJF/yCf3tdw5Z+LrQHnMkJUs+ePRUREXHON8arqqoUFxfX5DFxcXFB1bcX06ZN04YNG/T++++rT58+QR3bsWNH3Xzzzfrss89aaXSXT3R0tG644YZmxxqq6/fll1/qvffe02OPPRbUcaG0dpLsdQhmjS7mfdzWGgPOl19+qcLCwvOexWnKhV7n7c21116rnj17NjveUFxDSfrTn/6ksrKyoN+XUvtYw+Y+F+Li4lRfX6/q6uqA+gt9NjbWtPSYYBFyguRwOJSUlKSioiJ7n9/vV1FRUcD/hs/mdrsD6iWpsLCw2fq2ZlmWpk2bprVr12rz5s0aMGBA0H00NDRo79696t27dyuM8PI6ceKEPv/882bHGmrr1+jNN99UTEyMUlNTgzoulNZOkgYMGKC4uLiANfL5fNq+fXuza3Qx7+O21BhwDh48qPfee089evQIuo8Lvc7bm6+++krffPNNs+MNtTVs9Nvf/lZJSUkaNmxY0Me25Rpe6HMhKSlJHTt2DFiPsrIyVVRUNLseF/PevZiBI0hvvfWWFRkZaeXl5VmffPKJ9cQTT1jR0dGW1+u1LMuyHnroIWvmzJl2/QcffGB16NDB+vd//3fr008/tebMmWN17NjR2rt3b1tN4bymTp1quVwua8uWLdbhw4ft7bvvvrNrvj/H5557ztq0aZP1+eefW6WlpdaECROsqKgoa//+/W0xhfN66qmnrC1btljl5eXWBx98YKWkpFg9e/a0jhw5YllW6K+fZf31KpO+fftaOTk557SF4todP37c2r17t7V7925LkvUf//Ef1u7du+2ri1588UUrOjraevvtt62PP/7Yuv/++60BAwZYJ0+etPu48847rV//+tf2zxd6H7eX+dXX11s//vGPrT59+lh79uwJeE/W1dU1O78Lvc6vtPPN8fjx49bTTz9tlZSUWOXl5dZ7771njRgxwrr++uutU6dO2X2E6ho2qqmpsTp37mwtW7asyT7a8xq25HNhypQpVt++fa3NmzdbH330keV2uy232x3Qz8CBA60//vGP9s8tee9eCkLORfr1r39t9e3b13I4HNbo0aOtDz/80G7727/9Wys9PT2gfvXq1dYNN9xgORwO68Ybb7Ty8/Ov8IhbTlKT25tvvmnXfH+OM2bMsJ+P2NhY695777V27dp15QffAuPHj7d69+5tORwO62/+5m+s8ePHW5999pndHurrZ1mWtWnTJkuSVVZWdk5bKK7d+++/3+RrsnEefr/f+uUvf2nFxsZakZGR1l133XXO3Pv162fNmTMnYN/53sdX0vnmV15e3ux78v3337f7+P78LvQ6v9LON8fvvvvOGjt2rNWrVy+rY8eOVr9+/azHH3/8nLASqmvY6NVXX7U6depkVVdXN9lHe17DlnwunDx50vqnf/on65prrrE6d+5s/eQnP7EOHz58Tj9nH9OS9+6lCPv/DwoAAGAUvpMDAACMRMgBAABGIuQAAAAjEXIAAICRCDkAAMBIhBwAAGAkQg4AADASIQcAABiJkAMAAIxEyAEAAEYi5AAAACMRcgAAgJH+H5rlTpDjVIrgAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.hist(num_tokens, bins=20, range=(0, 20))\n",
"plt.set_xlabel(\"Number of tokens\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "sca-v2",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|