{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "138889b92720ce2e", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T14:36:31.336129Z", "start_time": "2024-05-13T14:36:31.323847Z" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
runnameseedstepsagg_scorecommonsense_qa/acccommonsense_qa/acc_normhellaswag/acchellaswag/acc_normopenbookqa/accopenbookqa/acc_norm...siqa/accsiqa/acc_normwinogrande/accwinogrande/acc_normsciq/accsciq/acc_normarc/accarc/acc_normmmlu/accmmlu/acc_norm
0filtering-baseline-2019-18-40gt500.3309530.1860.2330.2720.2580.1660.286...0.3670.3620.5160.4970.2100.2020.21900.25150.2302850.250127
1filtering-baseline-2019-18-40gt510000.3574740.2390.2710.2970.2870.1460.260...0.3650.3960.5030.4860.5680.5020.26650.28550.2425260.253291
2filtering-baseline-2019-18-40gt520000.3774360.2800.2840.3210.3320.1340.268...0.3680.3990.5190.5020.6860.5900.30300.32150.2457450.260988
3filtering-baseline-2019-18-40gt530000.3879940.2770.2910.3390.3590.1320.280...0.3940.4040.5200.5030.7210.6220.32100.33850.2504270.264451
4filtering-baseline-2019-18-40gt540000.3961100.2990.3150.3400.3660.1580.286...0.3760.3990.5150.5000.7390.6200.33200.34450.2561340.270382
..................................................................
250sm-baseline-c46100000.4304430.3350.3260.3790.4740.1760.340...0.3850.4060.5250.5230.7670.6750.37650.37500.2691390.280545
251sm-baseline-c46110000.4307760.3410.3230.3910.4810.1920.346...0.3900.4050.5310.5150.7660.6760.37750.37700.2668950.281210
252sm-baseline-c46120000.4303520.3400.3190.3920.4750.1920.342...0.3770.3950.5280.5180.7850.6880.37550.38400.2671590.279819
253sm-baseline-c46130000.4321360.3390.3260.3950.4770.1980.348...0.3900.4050.5290.5180.7850.6820.37800.38250.2697190.281585
254sm-baseline-c46135000.4338660.3440.3280.3940.4840.1980.334...0.3880.4060.5310.5230.7780.6820.37950.38450.2696010.284425
\n", "

255 rows × 22 columns

\n", "
" ], "text/plain": [ " runname seed steps agg_score \\\n", "0 filtering-baseline-2019-18-40gt 5 0 0.330953 \n", "1 filtering-baseline-2019-18-40gt 5 1000 0.357474 \n", "2 filtering-baseline-2019-18-40gt 5 2000 0.377436 \n", "3 filtering-baseline-2019-18-40gt 5 3000 0.387994 \n", "4 filtering-baseline-2019-18-40gt 5 4000 0.396110 \n", ".. ... ... ... ... \n", "250 sm-baseline-c4 6 10000 0.430443 \n", "251 sm-baseline-c4 6 11000 0.430776 \n", "252 sm-baseline-c4 6 12000 0.430352 \n", "253 sm-baseline-c4 6 13000 0.432136 \n", "254 sm-baseline-c4 6 13500 0.433866 \n", "\n", " commonsense_qa/acc commonsense_qa/acc_norm hellaswag/acc \\\n", "0 0.186 0.233 0.272 \n", "1 0.239 0.271 0.297 \n", "2 0.280 0.284 0.321 \n", "3 0.277 0.291 0.339 \n", "4 0.299 0.315 0.340 \n", ".. ... ... ... \n", "250 0.335 0.326 0.379 \n", "251 0.341 0.323 0.391 \n", "252 0.340 0.319 0.392 \n", "253 0.339 0.326 0.395 \n", "254 0.344 0.328 0.394 \n", "\n", " hellaswag/acc_norm openbookqa/acc openbookqa/acc_norm ... siqa/acc \\\n", "0 0.258 0.166 0.286 ... 0.367 \n", "1 0.287 0.146 0.260 ... 0.365 \n", "2 0.332 0.134 0.268 ... 0.368 \n", "3 0.359 0.132 0.280 ... 0.394 \n", "4 0.366 0.158 0.286 ... 0.376 \n", ".. ... ... ... ... ... \n", "250 0.474 0.176 0.340 ... 0.385 \n", "251 0.481 0.192 0.346 ... 0.390 \n", "252 0.475 0.192 0.342 ... 0.377 \n", "253 0.477 0.198 0.348 ... 0.390 \n", "254 0.484 0.198 0.334 ... 0.388 \n", "\n", " siqa/acc_norm winogrande/acc winogrande/acc_norm sciq/acc \\\n", "0 0.362 0.516 0.497 0.210 \n", "1 0.396 0.503 0.486 0.568 \n", "2 0.399 0.519 0.502 0.686 \n", "3 0.404 0.520 0.503 0.721 \n", "4 0.399 0.515 0.500 0.739 \n", ".. ... ... ... ... \n", "250 0.406 0.525 0.523 0.767 \n", "251 0.405 0.531 0.515 0.766 \n", "252 0.395 0.528 0.518 0.785 \n", "253 0.405 0.529 0.518 0.785 \n", "254 0.406 0.531 0.523 0.778 \n", "\n", " sciq/acc_norm arc/acc arc/acc_norm mmlu/acc mmlu/acc_norm \n", "0 0.202 0.2190 0.2515 0.230285 0.250127 \n", "1 0.502 0.2665 0.2855 0.242526 0.253291 \n", "2 0.590 0.3030 0.3215 0.245745 0.260988 \n", "3 0.622 0.3210 0.3385 0.250427 0.264451 \n", "4 0.620 0.3320 0.3445 0.256134 0.270382 \n", ".. ... ... ... ... ... \n", "250 0.675 0.3765 0.3750 0.269139 0.280545 \n", "251 0.676 0.3775 0.3770 0.266895 0.281210 \n", "252 0.688 0.3755 0.3840 0.267159 0.279819 \n", "253 0.682 0.3780 0.3825 0.269719 0.281585 \n", "254 0.682 0.3795 0.3845 0.269601 0.284425 \n", "\n", "[255 rows x 22 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from matplotlib.figure import Figure\n", "\n", "df = pd.read_csv(\"../src_data/c4-filters.csv\")\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "id": "839a06a71d9183e5", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T14:36:32.338012Z", "start_time": "2024-05-13T14:36:32.335209Z" } }, "outputs": [ { "data": { "text/plain": [ "['filtering-baseline-2019-18-40gt',\n", " 'filtering-baseline-2019-18-60gt',\n", " 'filtering-c4-all-except-terminal_punct',\n", " 'filtering-c4-all',\n", " 'filtering-c4-curly_bracket',\n", " 'filtering-c4-terminal_punct',\n", " 'filtering-c4-word_lengths',\n", " 'sm-baseline-c4']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.unique(df[\"runname\"]).tolist()" ] }, { "cell_type": "code", "execution_count": 4, "id": "b610f43caefdf01", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T16:06:36.968532Z", "start_time": "2024-05-13T16:06:36.966172Z" }, "collapsed": false }, "outputs": [], "source": [ "runs_mapping = {\n", " # 'filtering-baseline-2019-18-40gt': \"baseline\",\n", " 'filtering-baseline-2019-18-60gt': \"baseline\",\n", " 'filtering-c4-curly_bracket': \"curly_bracket filter\",\n", " 'filtering-c4-terminal_punct': \"terminal_punct filter\",\n", " 'filtering-c4-word_lengths': \"word_lengths filter\",\n", " 'filtering-c4-all': \"All filters\",\n", " 'filtering-c4-all-except-terminal_punct': \"All filters except terminal_punct\",\n", " 'sm-baseline-c4': \"C4\"\n", "}" ] }, { "cell_type": "code", "execution_count": 6, "id": "initial_id", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T16:06:37.459935Z", "start_time": "2024-05-13T16:06:37.181024Z" }, "collapsed": true }, "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", "\n", "\n", "import json\n", "import os\n", "from matplotlib import pyplot as plt\n", "metrics = ['agg_score', 'commonsense_qa/acc_norm', 'hellaswag/acc_norm', 'openbookqa/acc_norm', 'piqa/acc_norm',\n", " 'siqa/acc_norm', 'winogrande/acc_norm', 'arc/acc_norm', 'mmlu/acc_norm']\n", "\n", "def normalize_runname(runname):\n", " return runname.replace(\"/\", \"_\")\n", "\n", "grouped = (\n", " df.groupby([\"runname\", \"steps\"])\n", " .agg(\n", " {\n", " key: \"mean\" for key in metrics\n", " }\n", " )\n", " .reset_index()\n", ")\n", "\n", "file_id=\"../assets/data/plots/c4_filters_hellaswag\"\n", "files = {}\n", "for metric in metrics:\n", " datas = {}\n", " for name, group in grouped.groupby(\"runname\"):\n", " if name not in runs_mapping:\n", " continue\n", " group = group[[\"steps\", metric]].sort_values(by=\"steps\")\n", " group = group.set_index(\"steps\")\n", " rolling_avg = group\n", " datas[name] = {\n", " \"x\": (rolling_avg.index * 2048 * 1024 * 1e-9).tolist(),\n", " \"y\": rolling_avg[metric].tolist(),\n", " \"label\": runs_mapping[name],\n", " }\n", " # Sort the datata based on the steps\n", " datas = {k: v for k, v in sorted(datas.items(), key=lambda x: -x[1][\"y\"][-1])}\n", " # Create a folder\n", " os.makedirs(f\"{file_id}\", exist_ok=True)\n", " with open(f\"{file_id}/{normalize_runname(metric)}.json\", \"w\") as f:\n", " json.dump({\n", " \"data\": datas,\n", " \"layout\": {\n", " \"title\": {\n", " \"text\": \"C4 filtering effect on HellaSwag\"\n", " },\n", " }\n", " }, f)\n", " files[metric] = {\"file\": f\"{normalize_runname(metric)}.json\"}\n", "# Create index\n", "with open(f\"{file_id}/index.json\", \"w\") as f:\n", " json.dump({\n", " \"files\": files,\n", " \"settings\": {\n", " \"defaultMetric\": \"hellaswag/acc_norm\",\n", " \"slider\":{\"min\":0,\"max\":10,\"default\":3}\n", " }\n", " }, f)" ] }, { "cell_type": "code", "execution_count": 3, "id": "af28ebbd054cdc33", "metadata": { "ExecuteTime": { "end_time": "2024-04-30T12:52:05.836260Z", "start_time": "2024-04-30T12:52:05.834381Z" }, "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }