{ "cells": [ { "cell_type": "code", "execution_count": 6, "id": "138889b92720ce2e", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T15:30:52.864251Z", "start_time": "2024-05-13T15:30:52.316016Z" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
runnameseedstepsagg_scorecommonsense_qa/acccommonsense_qa/acc_normhellaswag/acchellaswag/acc_normopenbookqa/accopenbookqa/acc_norm...siqa/accsiqa/acc_normwinogrande/accwinogrande/acc_normsciq/accsciq/acc_normarc/accarc/acc_normmmlu/accmmlu/acc_norm
0filtering-baseline-2019-18-40gt500.3309530.1860.2330.2720.2580.1660.286...0.3670.3620.5160.4970.2100.2020.21900.25150.2302850.250127
1filtering-baseline-2019-18-40gt510000.3574740.2390.2710.2970.2870.1460.260...0.3650.3960.5030.4860.5680.5020.26650.28550.2425260.253291
2filtering-baseline-2019-18-40gt520000.3774360.2800.2840.3210.3320.1340.268...0.3680.3990.5190.5020.6860.5900.30300.32150.2457450.260988
3filtering-baseline-2019-18-40gt530000.3879940.2770.2910.3390.3590.1320.280...0.3940.4040.5200.5030.7210.6220.32100.33850.2504270.264451
4filtering-baseline-2019-18-40gt540000.3961100.2990.3150.3400.3660.1580.286...0.3760.3990.5150.5000.7390.6200.33200.34450.2561340.270382
..................................................................
115wet-extraction-2019-186100000.4089770.3260.3120.3620.4120.1660.312...0.3790.3960.5250.5170.7670.6540.34800.35600.2623570.276813
116wet-extraction-2019-186110000.4087710.3250.3150.3630.4090.1620.312...0.3880.3990.5290.5200.7770.6640.34650.35550.2615990.276664
117wet-extraction-2019-186120000.4082390.3290.3080.3640.4160.1780.308...0.3820.3980.5210.5100.7700.6560.35550.35950.2609280.278411
118wet-extraction-2019-186130000.4132630.3250.3080.3670.4250.1740.312...0.3870.4110.5230.5240.7740.6620.35700.36000.2630670.281104
119wet-extraction-2019-186135000.4107540.3350.3100.3660.4240.1640.300...0.3920.4070.5150.5190.7790.6680.35900.35650.2616810.279534
\n", "

120 rows × 22 columns

\n", "
" ], "text/plain": [ " runname seed steps agg_score \\\n", "0 filtering-baseline-2019-18-40gt 5 0 0.330953 \n", "1 filtering-baseline-2019-18-40gt 5 1000 0.357474 \n", "2 filtering-baseline-2019-18-40gt 5 2000 0.377436 \n", "3 filtering-baseline-2019-18-40gt 5 3000 0.387994 \n", "4 filtering-baseline-2019-18-40gt 5 4000 0.396110 \n", ".. ... ... ... ... \n", "115 wet-extraction-2019-18 6 10000 0.408977 \n", "116 wet-extraction-2019-18 6 11000 0.408771 \n", "117 wet-extraction-2019-18 6 12000 0.408239 \n", "118 wet-extraction-2019-18 6 13000 0.413263 \n", "119 wet-extraction-2019-18 6 13500 0.410754 \n", "\n", " commonsense_qa/acc commonsense_qa/acc_norm hellaswag/acc \\\n", "0 0.186 0.233 0.272 \n", "1 0.239 0.271 0.297 \n", "2 0.280 0.284 0.321 \n", "3 0.277 0.291 0.339 \n", "4 0.299 0.315 0.340 \n", ".. ... ... ... \n", "115 0.326 0.312 0.362 \n", "116 0.325 0.315 0.363 \n", "117 0.329 0.308 0.364 \n", "118 0.325 0.308 0.367 \n", "119 0.335 0.310 0.366 \n", "\n", " hellaswag/acc_norm openbookqa/acc openbookqa/acc_norm ... siqa/acc \\\n", "0 0.258 0.166 0.286 ... 0.367 \n", "1 0.287 0.146 0.260 ... 0.365 \n", "2 0.332 0.134 0.268 ... 0.368 \n", "3 0.359 0.132 0.280 ... 0.394 \n", "4 0.366 0.158 0.286 ... 0.376 \n", ".. ... ... ... ... ... \n", "115 0.412 0.166 0.312 ... 0.379 \n", "116 0.409 0.162 0.312 ... 0.388 \n", "117 0.416 0.178 0.308 ... 0.382 \n", "118 0.425 0.174 0.312 ... 0.387 \n", "119 0.424 0.164 0.300 ... 0.392 \n", "\n", " siqa/acc_norm winogrande/acc winogrande/acc_norm sciq/acc \\\n", "0 0.362 0.516 0.497 0.210 \n", "1 0.396 0.503 0.486 0.568 \n", "2 0.399 0.519 0.502 0.686 \n", "3 0.404 0.520 0.503 0.721 \n", "4 0.399 0.515 0.500 0.739 \n", ".. ... ... ... ... \n", "115 0.396 0.525 0.517 0.767 \n", "116 0.399 0.529 0.520 0.777 \n", "117 0.398 0.521 0.510 0.770 \n", "118 0.411 0.523 0.524 0.774 \n", "119 0.407 0.515 0.519 0.779 \n", "\n", " sciq/acc_norm arc/acc arc/acc_norm mmlu/acc mmlu/acc_norm \n", "0 0.202 0.2190 0.2515 0.230285 0.250127 \n", "1 0.502 0.2665 0.2855 0.242526 0.253291 \n", "2 0.590 0.3030 0.3215 0.245745 0.260988 \n", "3 0.622 0.3210 0.3385 0.250427 0.264451 \n", "4 0.620 0.3320 0.3445 0.256134 0.270382 \n", ".. ... ... ... ... ... \n", "115 0.654 0.3480 0.3560 0.262357 0.276813 \n", "116 0.664 0.3465 0.3555 0.261599 0.276664 \n", "117 0.656 0.3555 0.3595 0.260928 0.278411 \n", "118 0.662 0.3570 0.3600 0.263067 0.281104 \n", "119 0.668 0.3590 0.3565 0.261681 0.279534 \n", "\n", "[120 rows x 22 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from matplotlib.figure import Figure\n", "\n", "df = pd.read_csv(\"../src_data/wet_comparison.csv\")\n", "df" ] }, { "cell_type": "code", "execution_count": 7, "id": "b610f43caefdf01", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T15:30:52.866635Z", "start_time": "2024-05-13T15:30:52.865068Z" }, "collapsed": false }, "outputs": [], "source": [ "runs_mapping = {\n", " \"wet-extraction-2019-18\": \"WET data\",\n", " \"ind_minhash-CC-MAIN-2019-18\": \"Extracted from WARC\",\n", "}" ] }, { "cell_type": "code", "execution_count": 9, "id": "initial_id", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T15:30:53.034617Z", "start_time": "2024-05-13T15:30:52.867342Z" }, "collapsed": true }, "outputs": [], "source": [ "import json\n", "import os\n", "from matplotlib import pyplot as plt\n", "metrics = ['agg_score', 'commonsense_qa/acc_norm', 'hellaswag/acc_norm', 'openbookqa/acc_norm', 'piqa/acc_norm',\n", " 'siqa/acc_norm', 'winogrande/acc_norm', 'arc/acc_norm', 'mmlu/acc_norm']\n", "\n", "def normalize_runname(runname):\n", " return runname.replace(\"/\", \"_\")\n", "\n", "grouped = (\n", " df.groupby([\"runname\", \"steps\"])\n", " .agg(\n", " {\n", " key: \"mean\" for key in metrics\n", " }\n", " )\n", " .reset_index()\n", ")\n", "\n", "file_id=\"../assets/data/plots/wet_comparison\"\n", "files = {}\n", "for metric in metrics:\n", " datas = {}\n", " for name, group in grouped.groupby(\"runname\"):\n", " if name not in runs_mapping:\n", " continue\n", " group = group[[\"steps\", metric]].sort_values(by=\"steps\")\n", " group = group.set_index(\"steps\")\n", " rolling_avg = group\n", " # rolling_avg = group.rolling(window=5).mean()\n", " datas[name] = {\n", " \"x\": (rolling_avg.index * 2048 * 1024 * 1e-9).tolist(),\n", " \"y\": rolling_avg[metric].tolist(),\n", " \"label\": runs_mapping[name],\n", " }\n", " # Sort the datata based on the steps\n", " datas = {k: v for k, v in sorted(datas.items(), key=lambda x: -x[1][\"y\"][-1])}\n", " # Create a folder\n", " os.makedirs(f\"{file_id}\", exist_ok=True)\n", " with open(f\"{file_id}/{normalize_runname(metric)}.json\", \"w\") as f:\n", " json.dump({\n", " \"data\": datas,\n", " \"layout\": {\n", " \"title\": {\n", " \"text\": \"WET data is worse than data extracted from WARC\"\n", " },\n", " }\n", " }, f)\n", " files[metric] = {\"file\": f\"{normalize_runname(metric)}.json\"}\n", "# Create index\n", "with open(f\"{file_id}/index.json\", \"w\") as f:\n", " json.dump({\n", " \"files\": files,\n", " \"settings\": {\n", " \"defaultMetric\": \"agg_score\",\n", " \"slider\":{\"min\":0,\"max\":10,\"default\":0}\n", " }\n", " }, f)\n", " " ] }, { "cell_type": "code", "execution_count": 3, "id": "af28ebbd054cdc33", "metadata": { "ExecuteTime": { "end_time": "2024-05-13T15:30:53.036912Z", "start_time": "2024-05-13T15:30:53.035519Z" }, "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }