diff --git "a/PrepareData.ipynb" "b/PrepareData.ipynb" new file mode 100644--- /dev/null +++ "b/PrepareData.ipynb" @@ -0,0 +1,4652 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ноутбук с подготовкой данных " + ] + }, + { + "cell_type": "code", + "execution_count": 216, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "metadata": {}, + "outputs": [], + "source": [ + "f = open('arxivData.json')\n", + "df = pd.read_json(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 218, + "metadata": {}, + "outputs": [], + "source": [ + "#df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Данные загрузились нормально, теперь выберем нужные нам колонки." + ] + }, + { + "cell_type": "code", + "execution_count": 219, + "metadata": {}, + "outputs": [], + "source": [ + "data = df.drop(['author', 'day', 'id', 'link', 'month', 'year'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 220, + "metadata": {}, + "outputs": [], + "source": [ + "#data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Теперь данные нужно привести к удобному виду:" + ] + }, + { + "cell_type": "code", + "execution_count": 221, + "metadata": {}, + "outputs": [], + "source": [ + "data['summary'] = data['summary'].apply(lambda x : x.lower())\n", + "data['title'] = data['title'].apply(lambda x : x.lower())" + ] + }, + { + "cell_type": "code", + "execution_count": 222, + "metadata": {}, + "outputs": [], + "source": [ + "def extract(term_line):\n", + " line = eval(term_line)\n", + " for elem in line:\n", + " try:\n", + " return elem['term'].split('.')[0]\n", + " except Exception:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 223, + "metadata": {}, + "outputs": [], + "source": [ + "data['tag'] = data['tag'].apply(lambda x : extract(x))" + ] + }, + { + "cell_type": "code", + "execution_count": 224, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summarytagtitle
0we propose an architecture for vqa which utili...csdual recurrent attention units for visual ques...
1recent approaches based on artificial neural n...cssequential short-text classification with recu...
2we introduce the multiresolution recurrent neu...csmultiresolution recurrent neural networks: an ...
3multi-task learning is motivated by the observ...statlearning what to share between loosely related...
4we present milabot: a deep reinforcement learn...csa deep reinforcement learning chatbot
\n", + "
" + ], + "text/plain": [ + " summary tag \\\n", + "0 we propose an architecture for vqa which utili... cs \n", + "1 recent approaches based on artificial neural n... cs \n", + "2 we introduce the multiresolution recurrent neu... cs \n", + "3 multi-task learning is motivated by the observ... stat \n", + "4 we present milabot: a deep reinforcement learn... cs \n", + "\n", + " title \n", + "0 dual recurrent attention units for visual ques... \n", + "1 sequential short-text classification with recu... \n", + "2 multiresolution recurrent neural networks: an ... \n", + "3 learning what to share between loosely related... \n", + "4 a deep reinforcement learning chatbot " + ] + }, + "execution_count": 224, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 225, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import Counter" + ] + }, + { + "cell_type": "code", + "execution_count": 226, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({'cs': 34597,\n", + " 'stat': 4782,\n", + " 'astro-ph': 59,\n", + " 'q-bio': 320,\n", + " 'eess': 75,\n", + " 'cond-mat': 65,\n", + " 'math': 612,\n", + " 'physics': 216,\n", + " 'quant-ph': 66,\n", + " 'q-fin': 30,\n", + " 'gr-qc': 4,\n", + " 'nlin': 47,\n", + " 'cmp-lg': 110,\n", + " 'econ': 5,\n", + " 'hep-ex': 4,\n", + " 'hep-th': 1,\n", + " 'nucl-th': 1,\n", + " 'hep-ph': 2,\n", + " 'hep-lat': 2,\n", + " 'adap-org': 2})" + ] + }, + "execution_count": 226, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Counter(data['tag'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Мы столкнулись с проблемой: классы сильно несбалансированы, на computer scince приходится более 85% от всех статей, половина от оставшихся значений приходится на statistics. Так что с большой долей вероятности, обученная модель будет возвращать один из этих классов. По-хорошему стоило бы уравновесить классы, но мы попробуем добавить статьи из архива в малочисленные категории, а из cs часть данных удалим, потому что перевес слишком большой." + ] + }, + { + "cell_type": "code", + "execution_count": 227, + "metadata": {}, + "outputs": [], + "source": [ + "import arxiv" + ] + }, + { + "cell_type": "code", + "execution_count": 228, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cs.CV\n", + "cs.LG\n", + "cs.RO\n", + "cs.LG\n", + "eess.SY\n", + "hep-th\n", + "math.DG\n", + "cs.LG\n", + "astro-ph.GA\n", + "cs.NI\n", + "cond-mat.soft\n", + "cs.LO\n", + "cs.LG\n", + "gr-qc\n", + "stat.ME\n", + "cs.CV\n", + "math.RT\n", + "cs.LG\n", + "cs.CV\n", + "hep-ex\n", + "cs.CV\n", + "stat.ML\n", + "astro-ph.CO\n", + "physics.optics\n", + "quant-ph\n", + "cs.CV\n", + "cs.LG\n", + "math.PR\n", + "cs.CV\n", + "astro-ph.SR\n", + "math.AG\n", + "math.MG\n", + "math.LO\n", + "hep-ph\n", + "math.KT\n", + "q-bio.QM\n", + "stat.ME\n", + "math.AP\n", + "cs.LG\n", + "cs.MA\n", + "cs.LG\n", + "cond-mat.mtrl-sci\n", + "cs.AR\n", + "math.CA\n", + "cs.CV\n", + "eess.IV\n", + "physics.atom-ph\n", + "math.AP\n", + "cs.IR\n", + "cond-mat.mtrl-sci\n", + "math.AP\n", + "cond-mat.mes-hall\n", + "math.DG\n", + "eess.AS\n", + "math.NA\n", + "cs.CR\n", + "math.PR\n", + "q-bio.QM\n", + "cs.CR\n", + "cs.IT\n", + "math.RT\n", + "math.RT\n", + "math.RT\n", + "cs.CV\n", + "math.RT\n", + "astro-ph.HE\n", + "math.DG\n", + "math.OC\n", + "math.RT\n", + "astro-ph.GA\n", + "physics.optics\n", + "stat.AP\n", + "cond-mat.supr-con\n", + "cs.CV\n", + "math.NA\n", + "cond-mat.mes-hall\n", + "quant-ph\n", + "cs.LG\n", + "cs.LG\n", + "hep-ph\n", + "astro-ph.HE\n", + "math.GR\n", + "math.SG\n", + "econ.EM\n", + "cs.LG\n", + "cs.CL\n", + "cs.LG\n", + "math.NT\n", + "cs.CV\n", + "cs.SE\n", + "cs.AI\n", + "math.AG\n", + "physics.comp-ph\n", + "astro-ph.HE\n", + "cond-mat.str-el\n", + "cs.LG\n", + "physics.ao-ph\n", + "quant-ph\n", + "astro-ph.SR\n", + "cs.CV\n", + "cs.CV\n", + "cs.NI\n", + "cs.CV\n", + "cs.SI\n", + "stat.ML\n", + "cs.DS\n", + "cs.CV\n", + "eess.IV\n", + "cs.CV\n", + "astro-ph.GA\n", + "cond-mat.mtrl-sci\n", + "cs.CV\n", + "stat.ML\n", + "cs.DS\n", + "cs.LG\n", + "physics.optics\n", + "hep-ex\n", + "math.FA\n", + "cs.CL\n", + "stat.ML\n", + "cs.CL\n", + "cs.CG\n", + "cs.DC\n", + "cs.CV\n", + "eess.AS\n", + "math.GR\n", + "quant-ph\n", + "math.CO\n", + "math.PR\n", + "cs.AI\n", + "cs.DB\n", + "cs.DM\n", + "cs.AI\n", + "hep-th\n", + "cs.DL\n", + "math.AG\n", + "cond-mat.mes-hall\n", + "eess.IV\n", + "math.OC\n", + "cs.LG\n", + "hep-lat\n", + "cs.FL\n", + "astro-ph.SR\n", + "cs.LG\n", + "cond-mat.mtrl-sci\n", + "math.OC\n", + "cs.DB\n", + "cs.CL\n", + "math.CO\n", + "eess.SY\n", + "cs.CV\n", + "astro-ph.GA\n", + "nlin.CG\n", + "math.GR\n", + "cs.LG\n", + "physics.med-ph\n", + "q-bio.GN\n", + "cs.LG\n", + "astro-ph.GA\n", + "eess.IV\n", + "math.OC\n", + "math.CO\n", + "cs.IR\n", + "cs.SD\n", + "math.AP\n", + "cs.CV\n", + "math.CA\n", + "cs.AI\n", + "physics.comp-ph\n", + "cs.MM\n", + "physics.chem-ph\n", + "cs.CR\n", + "cond-mat.mtrl-sci\n", + "physics.app-ph\n", + "cond-mat.mtrl-sci\n", + "physics.optics\n", + "hep-ph\n", + "cs.SE\n", + "eess.SY\n", + "stat.ME\n", + "math.CO\n", + "math.CO\n", + "math.CO\n", + "cs.LG\n", + "cs.SE\n", + "cs.CV\n", + "cs.LG\n", + "cs.AI\n", + "cs.CV\n", + "cs.LG\n", + "gr-qc\n", + "cs.FL\n", + "hep-ex\n", + "cs.DC\n", + "cs.GT\n", + "math.OC\n", + "cond-mat.stat-mech\n", + "nucl-th\n", + "hep-th\n", + "astro-ph.GA\n", + "astro-ph.CO\n", + "math.LO\n", + "cs.SE\n", + "hep-th\n", + "cs.CL\n", + "hep-ph\n", + "math.LO\n", + "cond-mat.stat-mech\n", + "cs.LG\n", + "cs.CV\n", + "math.GT\n", + "cond-mat.mes-hall\n", + "math.NT\n", + "math.CO\n", + "cs.CL\n", + "cs.CV\n", + "astro-ph.CO\n", + "hep-ph\n", + "math.CA\n", + "astro-ph.SR\n", + "hep-ph\n", + "nlin.PS\n", + "hep-ph\n", + "cs.AI\n", + "cond-mat.str-el\n", + "cs.LG\n", + "quant-ph\n", + "cs.CY\n", + "cs.LG\n", + "hep-ph\n", + "math.AG\n", + "physics.ao-ph\n", + "nlin.PS\n", + "stat.ME\n", + "stat.ME\n", + "cs.DB\n", + "eess.SP\n", + "math.NT\n", + "eess.SP\n", + "astro-ph.CO\n", + "cs.DB\n", + "physics.soc-ph\n", + "cs.FL\n", + "astro-ph.GA\n", + "physics.soc-ph\n", + "hep-th\n", + "physics.optics\n", + "cs.CV\n", + "math.GR\n", + "hep-ph\n", + "hep-ex\n", + "hep-ph\n", + "cs.LG\n", + "eess.IV\n", + "physics.soc-ph\n", + "physics.ao-ph\n", + "cs.NI\n", + "astro-ph.SR\n", + "cs.CV\n", + "quant-ph\n", + "math.DG\n", + "eess.IV\n", + "physics.bio-ph\n", + "cs.CL\n", + "cs.IT\n", + "cs.FL\n", + "cs.LG\n", + "math.PR\n", + "eess.IV\n", + "cs.LG\n", + "quant-ph\n", + "cs.RO\n", + "physics.optics\n", + "math.FA\n", + "math.GT\n", + "math.CO\n", + "gr-qc\n", + "cs.AI\n", + "quant-ph\n", + "stat.ML\n", + "cs.AI\n", + "stat.CO\n", + "physics.flu-dyn\n", + "cs.CV\n", + "cs.LG\n", + "nlin.SI\n", + "math.CO\n", + "astro-ph.SR\n", + "astro-ph.HE\n", + "cs.CV\n", + "cs.IT\n", + "math.RA\n", + "cs.SI\n", + "math-ph\n", + "math.GR\n", + "cs.CC\n", + "cs.LG\n", + "astro-ph.EP\n", + "astro-ph.GA\n", + "quant-ph\n", + "math.DG\n", + "astro-ph.HE\n", + "hep-th\n", + "astro-ph.CO\n", + "quant-ph\n", + "gr-qc\n", + "quant-ph\n", + "cs.LG\n", + "hep-th\n", + "math.CA\n", + "astro-ph.CO\n", + "astro-ph.CO\n", + "econ.EM\n", + "cs.DS\n", + "math.NT\n", + "cs.CV\n", + "quant-ph\n", + "cs.CV\n", + "quant-ph\n", + "gr-qc\n", + "q-bio.PE\n", + "astro-ph.SR\n", + "cond-mat.str-el\n", + "cs.CV\n", + "astro-ph.SR\n", + "eess.IV\n", + "quant-ph\n", + "astro-ph.HE\n", + "cs.DC\n", + "nucl-ex\n", + "cs.AI\n", + "astro-ph.SR\n", + "cs.AI\n", + "math.GR\n", + "astro-ph.GA\n", + "math.NA\n", + "math.CO\n", + "q-bio.MN\n", + "math.OC\n", + "astro-ph.SR\n", + "cs.CV\n", + "econ.EM\n", + "astro-ph.EP\n", + "cond-mat.mtrl-sci\n", + "hep-ex\n", + "cs.SE\n", + "cs.DM\n", + "math.CA\n", + "astro-ph.SR\n", + "astro-ph.SR\n", + "eess.SY\n", + "math.NA\n", + "astro-ph.HE\n", + "cs.DS\n", + "cs.LO\n", + "hep-ph\n", + "econ.EM\n", + "gr-qc\n", + "math-ph\n", + "gr-qc\n", + "cs.GT\n", + "hep-ex\n", + "astro-ph.SR\n", + "math.PR\n", + "eess.SY\n", + "cond-mat.dis-nn\n", + "nucl-th\n", + "cs.IR\n", + "cs.CL\n", + "cs.CV\n", + "cs.CV\n", + "cs.PL\n", + "cs.LG\n", + "hep-th\n", + "cs.IT\n", + "physics.soc-ph\n", + "eess.SP\n", + "quant-ph\n", + "cs.CV\n", + "math.RA\n", + "cs.AI\n", + "cs.LG\n", + "cs.LG\n", + "quant-ph\n", + "cond-mat.mes-hall\n", + "cs.SD\n", + "hep-ph\n", + "cs.LG\n", + "astro-ph.HE\n", + "cond-mat.mtrl-sci\n", + "cs.LG\n", + "cs.CV\n", + "cs.LG\n", + "math.ST\n", + "math.CO\n", + "cs.LG\n", + "cs.DL\n", + "math.CO\n", + "math.CO\n", + "stat.ME\n", + "math.CO\n", + "eess.SY\n", + "astro-ph.SR\n", + "cs.NE\n", + "math.NT\n", + "math.OC\n", + "cs.SI\n", + "cs.LG\n", + "astro-ph.CO\n", + "cs.CV\n", + "astro-ph.GA\n", + "math.RT\n", + "eess.SY\n", + "physics.optics\n", + "cs.IR\n", + "cond-mat.str-el\n", + "cs.LG\n", + "hep-th\n", + "cs.LG\n", + "math.CO\n", + "cs.LG\n", + "physics.chem-ph\n", + "cs.NI\n", + "cs.CL\n", + "astro-ph.SR\n", + "astro-ph.IM\n", + "math-ph\n", + "cs.DS\n", + "cs.LG\n", + "physics.ins-det\n", + "math.AP\n", + "math-ph\n", + "astro-ph.GA\n", + "astro-ph.GA\n", + "cs.LG\n", + "quant-ph\n", + "math.GR\n", + "cs.CL\n", + "cs.LG\n", + "math.AG\n", + "gr-qc\n", + "cs.SE\n", + "physics.ins-det\n", + "eess.IV\n", + "math.GN\n", + "cs.IT\n", + "math.DS\n", + "math.GM\n", + "cs.DC\n", + "cs.LG\n", + "cs.CR\n", + "physics.optics\n", + "eess.SP\n", + "q-fin.MF\n", + "cs.SD\n", + "stat.ML\n", + "cond-mat.mtrl-sci\n", + "stat.ML\n", + "cs.CV\n", + "cs.AI\n", + "math.CO\n", + "cond-mat.supr-con\n", + "cs.MA\n", + "astro-ph.GA\n", + "math.NT\n", + "math-ph\n", + "cs.AI\n", + "astro-ph.HE\n", + "astro-ph.GA\n", + "astro-ph.GA\n", + "cs.LG\n", + "quant-ph\n", + "cs.LG\n", + "cs.IT\n", + "quant-ph\n", + "quant-ph\n", + "physics.chem-ph\n", + "math.OC\n", + "cs.CV\n", + "stat.ME\n", + "physics.optics\n", + "gr-qc\n", + "stat.ML\n", + "nucl-ex\n", + "hep-th\n", + "cs.LG\n", + "cs.DS\n", + "cond-mat.mes-hall\n", + "math.CO\n", + "cs.CV\n", + "cs.CL\n", + "cs.CV\n", + "stat.ME\n", + "cs.IT\n", + "math.ST\n", + "cs.CL\n", + "eess.SP\n", + "math.CO\n", + "cs.LO\n", + "q-bio.NC\n", + "cond-mat.quant-gas\n", + "astro-ph.CO\n", + "math.DG\n", + "cs.CV\n", + "cs.DC\n", + "cs.IR\n", + "astro-ph.GA\n", + "cs.DB\n", + "cond-mat.stat-mech\n", + "cs.SD\n", + "gr-qc\n", + "quant-ph\n", + "cs.FL\n", + "eess.IV\n", + "cs.IT\n", + "cs.NI\n", + "cs.NE\n", + "cs.CV\n", + "cs.CR\n", + "q-bio.NC\n", + "cs.LG\n", + "cond-mat.dis-nn\n", + "econ.EM\n", + "cs.CV\n", + "cs.LG\n", + "cs.DM\n", + "cs.LG\n", + "cs.RO\n", + "cs.LG\n", + "math.NT\n", + "stat.ML\n", + "cs.LG\n", + "cs.CV\n", + "physics.soc-ph\n", + "hep-th\n", + "cs.CV\n", + "cs.IR\n", + "cs.LG\n", + "math.GR\n", + "cs.DM\n", + "astro-ph.HE\n", + "math.CV\n", + "math.CO\n", + "cs.DB\n", + "cs.DB\n", + "stat.ME\n", + "astro-ph.HE\n", + "physics.plasm-ph\n", + "astro-ph.GA\n", + "astro-ph.GA\n", + "astro-ph.GA\n", + "hep-th\n", + "astro-ph.CO\n", + "cs.CV\n", + "cs.CV\n", + "cond-mat.mtrl-sci\n", + "math.OC\n", + "stat.ML\n", + "math.PR\n", + "quant-ph\n", + "math.AP\n", + "hep-ph\n", + "cs.CL\n", + "hep-th\n", + "gr-qc\n", + "astro-ph.GA\n", + "cs.SI\n", + "math.GR\n", + "astro-ph.EP\n", + "astro-ph.GA\n", + "astro-ph.GA\n", + "physics.geo-ph\n", + "hep-ph\n", + "eess.IV\n", + "math.RT\n", + "cs.CR\n", + "hep-ph\n", + "hep-ph\n", + "physics.chem-ph\n", + "cs.LG\n", + "cs.CV\n", + "math.PR\n", + "hep-ph\n", + "astro-ph.HE\n", + "cs.AI\n", + "nucl-th\n", + "cs.RO\n", + "eess.IV\n", + "physics.med-ph\n", + "math.CO\n", + "cs.CV\n", + "cs.RO\n", + "cs.DS\n", + "astro-ph.EP\n", + "hep-th\n", + "cs.CG\n", + "cs.CV\n", + "cs.MM\n", + "cs.LG\n", + "math.GR\n", + "cs.CL\n", + "cs.CL\n", + "cs.MM\n", + "hep-ph\n", + "math.NT\n", + "eess.IV\n", + "eess.SP\n", + "cs.CV\n", + "cs.AI\n", + "astro-ph.EP\n", + "physics.class-ph\n", + "math-ph\n", + "math.CO\n", + "cs.IT\n", + "cs.CV\n", + "cs.LG\n", + "cs.LG\n", + "q-bio.QM\n", + "cs.CL\n", + "cs.AR\n", + "q-bio.PE\n", + "gr-qc\n", + "cs.LG\n", + "cond-mat.mtrl-sci\n", + "hep-th\n", + "cs.CL\n", + "hep-ph\n", + "math.CO\n", + "math.DS\n", + "physics.med-ph\n", + "cs.CL\n", + "cs.CV\n", + "math.CO\n", + "cs.CL\n", + "math.GR\n", + "astro-ph.GA\n", + "math.HO\n", + "cs.RO\n", + "math-ph\n", + "math.RA\n", + "math.GM\n", + "cs.LG\n", + "math.OC\n", + "cs.CL\n", + "astro-ph.HE\n", + "stat.ML\n", + "astro-ph.GA\n", + "math.CO\n", + "gr-qc\n", + "gr-qc\n", + "astro-ph.GA\n", + "astro-ph.HE\n", + "hep-th\n", + "hep-th\n", + "cond-mat.str-el\n", + "cs.CL\n", + "cs.CV\n", + "cs.CY\n", + "astro-ph.HE\n", + "physics.chem-ph\n", + "cs.RO\n", + "gr-qc\n", + "physics.flu-dyn\n", + "cs.CV\n", + "cs.LG\n", + "physics.soc-ph\n", + "q-bio.QM\n", + "cs.NE\n", + "eess.SP\n", + "eess.IV\n", + "stat.AP\n", + "astro-ph.HE\n", + "math.CO\n", + "math.CO\n", + "physics.acc-ph\n", + "astro-ph.HE\n", + "cond-mat.mtrl-sci\n", + "gr-qc\n", + "cs.LG\n", + "hep-ph\n", + "math.NT\n", + "physics.bio-ph\n", + "cs.IT\n", + "math.PR\n", + "econ.GN\n", + "quant-ph\n", + "eess.SP\n", + "math-ph\n", + "cs.LG\n", + "cs.AI\n", + "cs.LG\n", + "stat.ML\n", + "cs.LG\n", + "eess.SP\n", + "cond-mat.mtrl-sci\n", + "cs.CV\n", + "cs.CV\n", + "cond-mat.mes-hall\n", + "physics.app-ph\n", + "quant-ph\n", + "astro-ph.GA\n", + "math.OC\n", + "cs.CR\n", + "math.GR\n", + "cs.LG\n", + "astro-ph.IM\n", + "cs.LG\n", + "cs.RO\n", + "math.NT\n", + "cs.CV\n", + "quant-ph\n", + "cs.LG\n", + "cs.LG\n", + "math.CO\n", + "math.CO\n", + "physics.chem-ph\n", + "eess.SP\n", + "cs.DC\n", + "cs.DM\n", + "math.AG\n", + "eess.SY\n", + "math.NT\n", + "cs.DC\n", + "hep-lat\n", + "cs.CL\n", + "hep-ex\n", + "gr-qc\n", + "astro-ph.SR\n", + "hep-th\n", + "astro-ph.GA\n", + "hep-th\n", + "astro-ph.GA\n", + "cs.CV\n", + "cs.CV\n", + "cs.IT\n", + "cs.LG\n", + "astro-ph.GA\n", + "math.CO\n", + "math.GR\n", + "cs.LG\n", + "hep-th\n", + "q-bio.NC\n", + "physics.optics\n", + "cs.DC\n", + "math.NA\n", + "cs.LG\n", + "cs.SD\n", + "cs.IT\n", + "math.CO\n", + "gr-qc\n", + "cond-mat.supr-con\n", + "math.DS\n", + "cs.LG\n", + "astro-ph.HE\n", + "astro-ph.GA\n", + "math.CT\n", + "cs.LG\n", + "cs.IR\n", + "math.LO\n", + "cs.LG\n", + "cs.SE\n", + "cs.IR\n", + "cs.RO\n", + "cs.RO\n", + "cs.CV\n", + "quant-ph\n", + "cond-mat.soft\n", + "math.OC\n", + "cs.LG\n", + "cs.LG\n", + "astro-ph.SR\n", + "math.LO\n", + "cs.LG\n", + "math.GR\n", + "stat.ML\n", + "astro-ph.SR\n", + "cs.AI\n", + "quant-ph\n", + "cond-mat.mes-hall\n", + "math.CO\n", + "math.OC\n", + "quant-ph\n", + "cs.LG\n", + "cs.CR\n", + "cond-mat.mes-hall\n", + "cond-mat.stat-mech\n", + "physics.class-ph\n", + "quant-ph\n", + "cs.AI\n", + "stat.ML\n", + "cs.MA\n", + "math.MG\n", + "cs.CV\n", + "cs.LG\n", + "cs.CV\n", + "cs.CV\n", + "eess.SP\n", + "math.OC\n", + "q-bio.MN\n", + "stat.ML\n", + "astro-ph.IM\n", + "physics.acc-ph\n", + "astro-ph.GA\n", + "eess.IV\n", + "cs.AI\n", + "math.OC\n", + "hep-lat\n", + "cond-mat.str-el\n", + "quant-ph\n", + "stat.ML\n", + "cs.CV\n", + "cs.DC\n", + "cs.CV\n", + "astro-ph.GA\n", + "eess.SP\n", + "cs.CL\n", + "cond-mat.mes-hall\n", + "cond-mat.mtrl-sci\n", + "nlin.CD\n", + "cs.AR\n", + "cs.GT\n", + "math.AC\n", + "cs.CV\n", + "astro-ph.GA\n", + "physics.soc-ph\n", + "cond-mat.stat-mech\n", + "astro-ph.CO\n", + "astro-ph.CO\n", + "cs.CV\n", + "gr-qc\n", + "astro-ph.GA\n", + "quant-ph\n", + "astro-ph.CO\n", + "cs.LG\n", + "math.CO\n", + "quant-ph\n", + "math.CT\n", + "cs.HC\n", + "quant-ph\n", + "astro-ph.HE\n", + "econ.GN\n", + "math.AG\n", + "cs.CV\n", + "econ.GN\n", + "cs.CV\n", + "cs.GT\n", + "quant-ph\n", + "math.OC\n", + "math.AC\n", + "cs.CG\n", + "cs.CL\n", + "cond-mat.stat-mech\n", + "hep-ex\n", + "cs.LG\n", + "math.AP\n", + "physics.ed-ph\n", + "physics.geo-ph\n", + "cond-mat.stat-mech\n", + "astro-ph.HE\n", + "math.CO\n", + "eess.AS\n", + "cs.LG\n", + "cs.LG\n", + "astro-ph.GA\n", + "cs.LG\n", + "cs.CR\n", + "cs.CR\n", + "cs.LG\n", + "cs.MA\n", + "hep-ph\n", + "quant-ph\n", + "q-bio.PE\n", + "quant-ph\n", + "quant-ph\n", + "astro-ph.SR\n", + "cond-mat.supr-con\n", + "physics.optics\n", + "cs.IT\n", + "econ.TH\n", + "physics.data-an\n", + "astro-ph.HE\n", + "astro-ph.GA\n", + "stat.CO\n", + "cs.IT\n", + "eess.SP\n", + "eess.IV\n", + "cs.AI\n", + "eess.SP\n", + "physics.class-ph\n", + "nucl-th\n", + "cond-mat.str-el\n", + "physics.ins-det\n", + "physics.geo-ph\n", + "math.GR\n", + "cs.CL\n", + "cs.CV\n", + "cs.LG\n", + "quant-ph\n", + "cs.CV\n", + "econ.EM\n", + "eess.SP\n", + "cs.CV\n", + "cs.SE\n", + "math.PR\n", + "cs.LG\n", + "cs.CR\n", + "math.DS\n", + "cs.SI\n", + "cs.CY\n", + "physics.optics\n", + "math.LO\n", + "econ.GN\n", + "eess.IV\n", + "cs.CV\n", + "stat.ML\n", + "q-bio.QM\n", + "gr-qc\n", + "math.NT\n", + "math.NT\n", + "cs.CV\n", + "cs.CG\n", + "cs.LG\n", + "cond-mat.mtrl-sci\n", + "cs.CR\n", + "cs.CV\n", + "cs.CV\n", + "gr-qc\n", + "math.MG\n", + "math.FA\n", + "cs.LG\n", + "math.AP\n", + "cs.LG\n", + "cs.NI\n", + "cs.CV\n", + "cs.LG\n", + "cs.LG\n", + "cs.LG\n", + "cs.CV\n", + "cs.LG\n", + "cs.CV\n", + "math.ST\n", + "cs.CV\n", + "cond-mat.stat-mech\n", + "stat.ML\n", + "cs.LG\n", + "cond-mat.str-el\n", + "cs.CV\n", + "gr-qc\n", + "cs.IT\n", + "hep-ex\n", + "math.SP\n", + "math.CO\n", + "math.AG\n", + "math.OC\n", + "cs.LG\n", + "cs.LG\n", + "cs.SI\n", + "math.RT\n", + "cs.CV\n", + "cs.CL\n", + "math.DS\n", + "cs.FL\n", + "eess.SY\n", + "cs.CV\n", + "cs.LG\n", + "cs.CV\n", + "cs.AI\n", + "cs.LG\n", + "math.NA\n", + "math.NA\n", + "cs.GT\n", + "cs.CY\n", + "cond-mat.soft\n", + "math.AP\n", + "astro-ph.SR\n", + "astro-ph.EP\n", + "stat.ME\n", + "cs.NE\n", + "math.NT\n", + "physics.chem-ph\n", + "physics.comp-ph\n", + "math.RA\n", + "astro-ph.EP\n", + "cs.DC\n", + "astro-ph.IM\n", + "quant-ph\n", + "hep-th\n", + "astro-ph.GA\n", + "astro-ph.GA\n", + "quant-ph\n", + "astro-ph.GA\n", + "math.CO\n", + "cs.CV\n", + "quant-ph\n", + "cs.CV\n", + "cs.DS\n", + "cs.LG\n", + "physics.optics\n", + "math.GN\n", + "math.PR\n", + "eess.SP\n", + "cs.DC\n", + "cs.DB\n", + "physics.flu-dyn\n", + "astro-ph.CO\n", + "hep-th\n", + "astro-ph.SR\n", + "cs.LG\n", + "math.NT\n", + "astro-ph.GA\n", + "cs.DS\n", + "cs.LG\n", + "math.AT\n", + "math.CO\n", + "astro-ph.IM\n", + "cond-mat.soft\n", + "cs.LG\n", + "cs.CY\n", + "cs.LO\n", + "hep-th\n", + "math.OC\n", + "eess.SP\n", + "cs.CV\n", + "math.GR\n", + "hep-ph\n", + "astro-ph.CO\n", + "math.DG\n", + "astro-ph.GA\n", + "astro-ph.IM\n", + "math.AP\n", + "eess.SY\n", + "math.DG\n", + "cs.LG\n", + "math.NA\n", + "cond-mat.mtrl-sci\n", + "cs.HC\n", + "eess.SP\n", + "cs.CV\n", + "physics.ao-ph\n", + "cs.FL\n", + "math.NT\n", + "gr-qc\n", + "math.AG\n", + "cs.LG\n", + "math.AP\n", + "eess.IV\n", + "math.GT\n", + "cs.CV\n", + "cs.LG\n", + "cs.CR\n", + "stat.AP\n", + "math.GM\n", + "stat.ME\n", + "cs.LG\n", + "cond-mat.supr-con\n", + "eess.IV\n", + "hep-ph\n", + "cond-mat.mtrl-sci\n", + "cs.CR\n", + "cs.LG\n", + "cs.LG\n", + "cs.CV\n", + "astro-ph.IM\n", + "cs.CC\n", + "math-ph\n", + "physics.optics\n", + "physics.optics\n", + "cs.LG\n", + "math.MG\n", + "stat.ME\n", + "eess.IV\n", + "astro-ph.SR\n", + "math.NA\n", + "math.DG\n", + "astro-ph.SR\n", + "math.AG\n", + "cond-mat.str-el\n", + "astro-ph.GA\n", + "math.CO\n", + "hep-th\n", + "physics.comp-ph\n", + "cond-mat.stat-mech\n", + "math.DG\n", + "math.AP\n", + "math.AT\n", + "nucl-ex\n", + "cs.CL\n", + "cs.CV\n", + "math.LO\n", + "astro-ph.EP\n", + "hep-ph\n", + "math.CO\n", + "stat.CO\n", + "econ.GN\n", + "quant-ph\n", + "q-bio.NC\n", + "math.DG\n", + "math.CO\n", + "cs.LG\n", + "math-ph\n", + "cond-mat.mes-hall\n", + "cs.AI\n", + "gr-qc\n", + "cs.CV\n", + "astro-ph.CO\n", + "cond-mat.mtrl-sci\n", + "stat.ML\n", + "cs.CV\n", + "math.DS\n", + "cs.DM\n", + "math.DS\n", + "cs.LG\n", + "math.OC\n", + "math.OC\n", + "cs.CV\n", + "cs.CC\n", + "eess.SP\n", + "cs.CV\n", + "astro-ph.HE\n", + "hep-ex\n", + "cs.LG\n", + "cs.LG\n", + "math.CO\n", + "cs.AI\n", + "astro-ph.SR\n", + "cs.NE\n", + "cs.CY\n", + "astro-ph.SR\n", + "math.CO\n", + "cs.CV\n", + "math.GR\n", + "gr-qc\n", + "math.NT\n", + "math.RT\n", + "cs.IT\n", + "stat.ML\n", + "cs.LG\n", + "cs.DL\n", + "cs.DS\n", + "math-ph\n", + "cs.DC\n", + "physics.gen-ph\n", + "cs.LG\n", + "physics.atom-ph\n", + "astro-ph.CO\n", + "cs.LG\n", + "math.DS\n", + "cs.RO\n", + "cs.LG\n", + "astro-ph.IM\n", + "hep-th\n", + "hep-ph\n", + "astro-ph.CO\n", + "math.AT\n", + "gr-qc\n", + "cs.DS\n", + "math.RT\n", + "cs.CE\n", + "quant-ph\n", + "quant-ph\n", + "math.SG\n", + "cond-mat.stat-mech\n", + "cs.CV\n", + "cs.DC\n", + "cs.MA\n", + "math.AG\n", + "cs.RO\n", + "cs.MA\n", + "cond-mat.supr-con\n", + "physics.atom-ph\n", + "eess.IV\n", + "math.CO\n", + "cs.LG\n", + "cs.LG\n", + "cs.CR\n", + "physics.ins-det\n", + "stat.ME\n", + "cs.LG\n", + "cs.LG\n", + "cs.CV\n", + "cs.LO\n", + "cs.LG\n", + "eess.SP\n", + "math.AP\n", + "astro-ph.IM\n", + "cs.CV\n", + "cs.DM\n", + "cs.CV\n", + "cs.IT\n", + "math.SG\n", + "astro-ph.GA\n", + "math.CV\n", + "quant-ph\n", + "cs.LG\n", + "hep-ph\n", + "cs.LG\n", + "cs.LG\n", + "math-ph\n", + "cs.LG\n", + "cs.LG\n", + "physics.hist-ph\n", + "eess.IV\n", + "cs.LG\n", + "cs.FL\n", + "math.CV\n", + "cs.CL\n", + "cs.LG\n", + "eess.IV\n", + "astro-ph.GA\n", + "cs.LO\n", + "cs.CR\n", + "stat.ME\n", + "cs.LG\n", + "physics.flu-dyn\n", + "cs.CV\n", + "cs.IT\n", + "cs.SC\n", + "cs.CV\n", + "cs.IT\n", + "cs.CV\n", + "cs.CV\n", + "math.AP\n", + "math.AG\n", + "cs.CL\n", + "cs.AR\n", + "eess.SP\n", + "math.RT\n", + "math.OC\n", + "cs.RO\n", + "q-bio.GN\n", + "cs.CR\n", + "quant-ph\n", + "cs.CL\n", + "cs.LG\n", + "math.MG\n", + "cond-mat.soft\n", + "astro-ph.CO\n", + "cs.CL\n", + "cs.LG\n", + "math.PR\n", + "astro-ph.GA\n", + "cs.MA\n", + "physics.gen-ph\n", + "cs.LG\n", + "cs.LG\n", + "cond-mat.mes-hall\n", + "cs.RO\n", + "cs.CV\n", + "astro-ph.CO\n", + "cond-mat.mtrl-sci\n", + "stat.ML\n", + "cs.LG\n", + "astro-ph.HE\n", + "cs.LG\n", + "astro-ph.SR\n", + "gr-qc\n", + "quant-ph\n", + "cond-mat.quant-gas\n", + "quant-ph\n", + "astro-ph.SR\n", + "cs.LG\n", + "cs.CV\n", + "cs.LG\n", + "cs.RO\n", + "cs.LG\n", + "cs.CL\n", + "gr-qc\n", + "math.MG\n", + "math.PR\n", + "physics.ins-det\n", + "math.FA\n", + "quant-ph\n", + "math.RT\n", + "q-bio.QM\n", + "quant-ph\n", + "quant-ph\n", + "cs.LG\n", + "quant-ph\n", + "math.AT\n", + "math.AP\n", + "cs.DS\n", + "astro-ph.IM\n", + "math.NT\n", + "cs.LG\n", + "econ.EM\n", + "math.PR\n", + "physics.plasm-ph\n", + "cond-mat.mes-hall\n", + "quant-ph\n", + "cs.CL\n", + "cs.CL\n", + "cond-mat.mes-hall\n", + "hep-th\n", + "astro-ph.SR\n", + "cs.CV\n", + "math.AG\n", + "cs.LG\n", + "cs.LG\n", + "cs.LG\n", + "hep-ex\n", + "nucl-th\n", + "quant-ph\n", + "cond-mat.mtrl-sci\n", + "hep-ph\n", + "cond-mat.str-el\n", + "cs.CL\n", + "cond-mat.stat-mech\n", + "math-ph\n", + "cs.FL\n", + "cs.LG\n", + "gr-qc\n", + "math.OC\n", + "math.NT\n", + "quant-ph\n", + "math.OC\n", + "astro-ph.IM\n", + "econ.GN\n", + "cs.LG\n", + "cs.LG\n", + "cs.SE\n", + "cond-mat.mtrl-sci\n", + "physics.optics\n", + "cs.CE\n", + "astro-ph.CO\n", + "cs.CL\n", + "cs.AR\n", + "math.CO\n", + "math.PR\n", + "cs.CV\n", + "math.CO\n", + "hep-ph\n", + "cs.LG\n", + "physics.flu-dyn\n", + "cond-mat.mtrl-sci\n", + "math.NA\n", + "cs.DC\n", + "math.CO\n", + "cs.IR\n", + "cs.DM\n", + "cs.LG\n", + "q-bio.NC\n", + "econ.TH\n", + "eess.SP\n", + "cs.IR\n", + "eess.IV\n", + "math.GR\n", + "cs.IT\n", + "astro-ph.GA\n", + "hep-th\n", + "cs.AI\n", + "physics.flu-dyn\n", + "physics.ed-ph\n", + "quant-ph\n", + "hep-th\n", + "gr-qc\n", + "hep-ph\n", + "astro-ph.CO\n", + "math.MG\n", + "hep-th\n", + "math.OC\n", + "astro-ph.IM\n", + "cs.NE\n", + "math.AT\n", + "astro-ph.HE\n", + "gr-qc\n", + "math.CO\n", + "quant-ph\n", + "cs.CV\n", + "cond-mat.mes-hall\n", + "physics.class-ph\n", + "math.CO\n", + "cs.CV\n", + "hep-ph\n", + "physics.ins-det\n", + "math.GR\n", + "cs.LG\n", + "cs.PL\n", + "cs.RO\n", + "cs.SD\n", + "math.CO\n", + "astro-ph.IM\n", + "cs.LG\n", + "cs.SE\n", + "cs.CL\n", + "eess.IV\n", + "cs.CC\n", + "cs.RO\n", + "hep-th\n", + "cs.CR\n", + "cs.LG\n", + "math.RT\n", + "cs.LG\n", + "cs.LG\n", + "astro-ph.SR\n", + "cs.LG\n", + "cs.NI\n", + "math.CA\n", + "math.AP\n", + "math.GR\n", + "cond-mat.soft\n", + "math.RA\n", + "nucl-th\n", + "math.CO\n", + "math-ph\n", + "hep-th\n", + "cond-mat.supr-con\n", + "stat.ME\n", + "stat.ME\n", + "cs.CV\n", + "cs.LG\n", + "cs.LG\n", + "cs.IT\n", + "cs.CV\n", + "physics.med-ph\n", + "cs.CR\n", + "q-bio.QM\n", + "math.CO\n", + "physics.app-ph\n", + "physics.optics\n", + "cs.CV\n", + "physics.optics\n", + "cs.LO\n", + "cond-mat.str-el\n", + "astro-ph.GA\n", + "astro-ph.GA\n", + "cs.DC\n", + "math.GN\n", + "cs.DM\n", + "cs.CV\n", + "cond-mat.soft\n", + "eess.SP\n", + "math.AP\n", + "cond-mat.other\n", + "cs.RO\n", + "math.DS\n", + "cs.LG\n", + "cs.LG\n", + "cs.GT\n", + "cs.IR\n", + "math.OC\n", + "hep-ph\n", + "cs.DS\n", + "cs.LG\n", + "cond-mat.supr-con\n", + "cs.GT\n", + "cs.DS\n", + "cs.LG\n", + "math.SG\n", + "math.DS\n", + "cs.GT\n", + "math.CO\n", + "q-bio.PE\n", + "gr-qc\n", + "cs.CY\n", + "cs.CV\n", + "cs.IR\n", + "astro-ph.CO\n", + "cs.DM\n", + "cs.IT\n", + "physics.ins-det\n", + "cs.CR\n", + "cs.CL\n", + "cs.CV\n", + "cs.DL\n", + "math.DG\n", + "math.NA\n", + "cs.IT\n", + "cs.CV\n", + "math.AG\n", + "hep-ph\n", + "gr-qc\n", + "hep-th\n", + "cs.LG\n", + "hep-ph\n", + "cs.NE\n", + "math.QA\n", + "hep-ph\n", + "hep-th\n", + "eess.SP\n", + "cs.LG\n", + "cs.GT\n", + "cs.CL\n", + "cs.CV\n", + "astro-ph.HE\n", + "cs.IT\n", + "stat.ME\n", + "cs.DS\n", + "cs.CV\n", + "cs.HC\n", + "math.SP\n", + "math.AP\n", + "stat.CO\n", + "cs.DC\n", + "cs.CV\n", + "cs.SD\n", + "hep-ph\n", + "astro-ph.SR\n", + "cond-mat.mtrl-sci\n", + "cs.LG\n", + "cs.LG\n", + "cs.CL\n", + "cs.CV\n", + "q-bio.QM\n", + "quant-ph\n", + "math.AP\n", + "cs.GR\n", + "hep-ph\n", + "astro-ph.GA\n", + "math.PR\n", + "cs.DS\n", + "cs.SI\n", + "cond-mat.soft\n", + "nlin.SI\n", + "math.PR\n", + "physics.ins-det\n", + "cond-mat.mtrl-sci\n", + "math.GR\n", + "cond-mat.mtrl-sci\n", + "physics.geo-ph\n", + "cs.CV\n", + "cs.CL\n", + "stat.ML\n", + "physics.flu-dyn\n", + "eess.SP\n", + "cs.LG\n", + "cs.GT\n", + "cs.CV\n", + "cs.DB\n", + "cs.LG\n", + "math.DS\n", + "math.CO\n", + "physics.flu-dyn\n", + "physics.ed-ph\n", + "cs.LG\n", + "math.NA\n", + "cs.CV\n", + "cs.LG\n", + "cs.RO\n", + "physics.chem-ph\n", + "cs.CL\n", + "hep-ph\n", + "astro-ph.HE\n", + "astro-ph.GA\n", + "quant-ph\n", + "math.NA\n", + "astro-ph.SR\n", + "physics.chem-ph\n", + "cs.LG\n", + "cs.LG\n", + "cs.LG\n", + "cs.DS\n", + "math.GR\n", + "physics.flu-dyn\n", + "cs.LO\n", + "math.DG\n", + "cs.LG\n", + "cs.CV\n", + "cs.CV\n", + "cs.FL\n", + "math.NT\n", + "math.AG\n", + "cond-mat.mtrl-sci\n", + "cs.IT\n", + "cs.SI\n", + "cs.RO\n", + "eess.SY\n", + "astro-ph.CO\n", + "hep-th\n", + "math.CO\n", + "q-bio.GN\n", + "cs.CV\n", + "stat.ME\n", + "math.ST\n", + "cs.LG\n", + "math.CO\n", + "cs.LG\n", + "cs.LG\n", + "eess.SY\n", + "hep-ph\n", + "cs.DB\n", + "cs.CR\n", + "math.OC\n", + "cs.LG\n", + "gr-qc\n", + "astro-ph.SR\n", + "quant-ph\n", + "cs.LG\n", + "cs.LG\n", + "hep-ex\n", + "quant-ph\n", + "cs.LG\n", + "cond-mat.stat-mech\n", + "physics.optics\n", + "cs.CR\n", + "math.DG\n", + "cs.LG\n", + "cs.LG\n", + "cs.LG\n", + "quant-ph\n", + "physics.soc-ph\n", + "cs.CV\n", + "eess.IV\n", + "cs.CV\n", + "physics.plasm-ph\n", + "cs.CV\n", + "math.OA\n", + "cs.CV\n", + "cs.MA\n", + "cs.LG\n", + "cs.ET\n", + "cs.DC\n", + "math.DS\n", + "cs.LG\n", + "cs.LG\n", + "gr-qc\n", + "cs.LG\n", + "cs.LG\n", + "cond-mat.supr-con\n", + "cond-mat.mtrl-sci\n", + "cs.GR\n", + "cs.CV\n", + "astro-ph.CO\n", + "q-fin.ST\n", + "cs.LG\n", + "astro-ph.CO\n", + "stat.ME\n", + "math.AG\n", + "astro-ph.CO\n", + "cs.GT\n", + "cs.CR\n", + "cs.LG\n", + "cs.CG\n", + "cs.IT\n", + "hep-th\n", + "astro-ph.GA\n", + "hep-th\n", + "hep-ph\n", + "cs.CV\n", + "gr-qc\n", + "cond-mat.quant-gas\n", + "math.PR\n", + "nucl-ex\n", + "quant-ph\n", + "quant-ph\n", + "math.GR\n", + "cs.LG\n", + "math-ph\n", + "cs.IT\n", + "astro-ph.SR\n", + "cs.CC\n", + "cs.DL\n", + "cond-mat.mtrl-sci\n", + "math.PR\n", + "hep-lat\n", + "math.CO\n", + "physics.flu-dyn\n", + "physics.optics\n", + "cs.CV\n", + "physics.ins-det\n", + "math.PR\n", + "math.AP\n", + "cs.CY\n", + "cs.LG\n", + "hep-th\n", + "astro-ph.SR\n", + "cs.GT\n", + "cs.LG\n", + "cs.CV\n", + "cs.IT\n", + "cs.LG\n", + "math.CO\n", + "hep-ph\n", + "math.FA\n", + "cs.LG\n", + "cs.CV\n", + "cs.CY\n", + "math.CO\n", + "cs.CV\n", + "math.DS\n", + "cond-mat.soft\n", + "math.NA\n", + "math.SP\n", + "quant-ph\n", + "cs.CV\n", + "physics.soc-ph\n", + "astro-ph.HE\n", + "gr-qc\n", + "cs.DL\n", + "math.DS\n", + "math.GR\n", + "cond-mat.mtrl-sci\n", + "math.NT\n", + "math.DG\n", + "cs.CR\n", + "math.NT\n", + "stat.ML\n", + "cs.CV\n", + "cs.CV\n", + "cs.LG\n", + "cs.LO\n", + "physics.optics\n", + "cs.LG\n", + "cond-mat.mes-hall\n", + "cs.CV\n", + "q-bio.MN\n", + "math.FA\n", + "cs.DS\n", + "cs.RO\n", + "hep-ph\n", + "cs.CV\n", + "math.ST\n", + "cs.AI\n", + "cs.LG\n", + "astro-ph.GA\n", + "astro-ph.CO\n", + "astro-ph.IM\n", + "cs.LG\n", + "physics.space-ph\n", + "astro-ph.GA\n", + "hep-th\n", + "cs.CL\n", + "hep-th\n", + "astro-ph.EP\n", + "astro-ph.CO\n", + "hep-ph\n", + "cs.CV\n", + "hep-ex\n", + "physics.atom-ph\n", + "cs.CL\n", + "math.NT\n", + "astro-ph.EP\n", + "cs.CV\n", + "astro-ph.SR\n", + "econ.EM\n", + "astro-ph.CO\n", + "cs.CR\n", + "astro-ph.HE\n", + "cs.IR\n", + "stat.ML\n", + "econ.GN\n", + "cs.PF\n", + "physics.geo-ph\n", + "astro-ph.SR\n", + "physics.acc-ph\n", + "quant-ph\n", + "astro-ph.HE\n", + "cs.CL\n", + "gr-qc\n", + "physics.chem-ph\n", + "cond-mat.soft\n", + "math.AG\n", + "math.CO\n", + "math.CO\n", + "cond-mat.str-el\n", + "math.PR\n", + "math.CO\n", + "cs.CL\n", + "cond-mat.stat-mech\n", + "nucl-th\n", + "math.AG\n", + "cond-mat.mes-hall\n", + "math.AG\n", + "cs.CR\n", + "cs.CL\n", + "cs.CL\n", + "cs.CL\n", + "cs.CL\n", + "eess.SP\n", + "cs.CL\n", + "math.LO\n", + "cs.CL\n", + "cs.CL\n", + "cs.IT\n", + "physics.app-ph\n", + "cs.CL\n", + "cs.CV\n", + "cond-mat.quant-gas\n", + "cs.CL\n", + "astro-ph.CO\n", + "cond-mat.soft\n", + "cs.CV\n", + "cs.DC\n", + "physics.optics\n", + "cond-mat.mtrl-sci\n", + "cs.CL\n", + "math.RT\n", + "cs.CL\n", + "gr-qc\n", + "cond-mat.mes-hall\n", + "math.OA\n", + "cs.CV\n", + "physics.optics\n", + "cs.CL\n", + "math.GR\n", + "cs.CL\n", + "cs.LG\n", + "cs.CL\n", + "q-bio.BM\n", + "math.AP\n", + "math.DG\n", + "eess.AS\n", + "cs.CL\n", + "cs.CV\n", + "stat.ME\n", + "math.FA\n", + "hep-ph\n", + "cs.LG\n", + "math.OC\n", + "cond-mat.supr-con\n", + "eess.IV\n", + "math.AG\n", + "physics.soc-ph\n", + "math.NT\n", + "hep-ph\n", + "cs.RO\n", + "quant-ph\n", + "physics.chem-ph\n", + "cs.AR\n", + "cs.LG\n", + "cs.CC\n", + "cs.NI\n", + "hep-ph\n", + "hep-th\n", + "astro-ph.HE\n", + "math.NA\n", + "math.CO\n", + "astro-ph.HE\n", + "cs.CV\n", + "cond-mat.stat-mech\n", + "math.DS\n", + "cs.CR\n", + "cs.CY\n", + "cs.LG\n", + "q-bio.PE\n", + "cs.CL\n", + "math.OC\n", + "cs.DC\n", + "nucl-th\n", + "astro-ph.GA\n", + "astro-ph.SR\n", + "astro-ph.CO\n", + "hep-th\n", + "cs.CL\n", + "hep-ph\n", + "math.CO\n", + "hep-th\n", + "math.NT\n", + "cs.CV\n", + "cs.CL\n", + "cs.CL\n", + "gr-qc\n", + "math.AG\n", + "stat.ME\n", + "cs.AI\n", + "math.OC\n", + "math.OC\n", + "physics.chem-ph\n", + "cs.IT\n", + "cs.CV\n", + "math.GR\n", + "gr-qc\n", + "math.NT\n", + "quant-ph\n", + "stat.ML\n", + "quant-ph\n", + "cs.LG\n", + "hep-th\n", + "cs.GT\n", + "math.NA\n", + "cs.NI\n", + "math.AP\n", + "math.RT\n", + "cs.CV\n", + "cs.CL\n", + "cs.CV\n", + "cond-mat.str-el\n", + "eess.SY\n", + "cs.LG\n", + "math.AT\n", + "cond-mat.mtrl-sci\n", + "cs.CL\n", + "math.GR\n", + "cs.CL\n", + "physics.app-ph\n", + "eess.SY\n", + "quant-ph\n", + "math.CO\n", + "cond-mat.str-el\n", + "quant-ph\n", + "cs.IR\n", + "hep-ph\n", + "cs.LG\n", + "cs.CL\n", + "cs.CV\n", + "math.GN\n", + "cs.IT\n", + "cs.MM\n", + "cs.SI\n", + "math.CO\n", + "math.CO\n", + "cs.SE\n", + "cs.LG\n", + "cs.CL\n", + "eess.SY\n", + "hep-th\n", + "cs.PF\n", + "gr-qc\n", + "quant-ph\n", + "cs.SD\n", + "cs.CL\n", + "cs.IR\n", + "cs.LG\n", + "physics.optics\n", + "cs.LG\n", + "gr-qc\n", + "hep-th\n", + "cs.LO\n", + "cs.LO\n", + "cs.MS\n", + "stat.ML\n", + "cond-mat.mes-hall\n", + "cs.CV\n", + "physics.chem-ph\n", + "cs.DB\n", + "physics.chem-ph\n", + "math.PR\n", + "cs.DL\n", + "math.CO\n", + "stat.ML\n", + "cs.AI\n", + "physics.optics\n", + "stat.ME\n", + "cs.LG\n", + "physics.optics\n", + "math.NT\n", + "hep-ph\n", + "math.AG\n", + "hep-ph\n", + "astro-ph.HE\n", + "astro-ph.CO\n", + "astro-ph.GA\n", + "astro-ph.CO\n", + "gr-qc\n", + "cs.IR\n", + "math.CO\n", + "cs.CL\n", + "hep-lat\n", + "cs.CV\n", + "cs.LG\n", + "math.OC\n", + "math.SG\n", + "math.AP\n", + "hep-th\n", + "quant-ph\n", + "eess.SP\n", + "astro-ph.SR\n", + "math.RA\n", + "math.GT\n", + "math.CO\n", + "cs.CL\n", + "quant-ph\n", + "cs.LG\n", + "math.CO\n", + "cs.SE\n", + "cs.CV\n", + "eess.SP\n", + "physics.gen-ph\n", + "cond-mat.stat-mech\n", + "cs.GT\n", + "physics.soc-ph\n", + "cs.LG\n", + "math.OC\n", + "physics.flu-dyn\n", + "physics.chem-ph\n", + "physics.ins-det\n", + "cs.DS\n", + "cs.DS\n", + "cs.CV\n", + "cond-mat.dis-nn\n", + "physics.med-ph\n", + "math.FA\n", + "math.GM\n", + "cs.CV\n", + "cs.CL\n", + "cond-mat.mtrl-sci\n", + "astro-ph.SR\n", + "cs.CV\n", + "cs.DC\n", + "cs.IT\n", + "math.FA\n", + "cs.LG\n", + "hep-ph\n", + "math.AG\n", + "cs.LG\n", + "physics.comp-ph\n", + "physics.optics\n", + "quant-ph\n", + "cs.CL\n", + "stat.ML\n", + "cs.DC\n", + "cs.CV\n", + "math.GT\n", + "cs.AR\n", + "cs.CL\n", + "math.AT\n", + "quant-ph\n", + "math.CT\n", + "cs.LG\n", + "math.AP\n", + "cs.AI\n", + "cs.SE\n", + "cs.CL\n", + "math.CO\n", + "cs.DS\n", + "physics.med-ph\n", + "physics.ao-ph\n", + "math.OC\n", + "math.CO\n", + "cs.CR\n", + "astro-ph.EP\n", + "cs.IT\n", + "cs.GT\n", + "math-ph\n", + "astro-ph.IM\n", + "math.OC\n", + "math.GT\n", + "cs.LG\n", + "cs.CY\n", + "econ.TH\n", + "math.RT\n", + "cs.AI\n", + "astro-ph.IM\n", + "astro-ph.CO\n", + "math.CO\n", + "cs.LG\n", + "physics.chem-ph\n", + "cs.CL\n", + "cs.CV\n", + "cs.CL\n", + "cs.CR\n", + "cs.GT\n", + "stat.AP\n", + "cs.IT\n", + "math.CO\n", + "math.ST\n", + "cs.CV\n", + "math.DG\n", + "math.CO\n", + "math.OA\n", + "cs.CY\n", + "math.AP\n", + "cs.CL\n", + "physics.class-ph\n", + "cs.IT\n", + "cs.CV\n", + "cs.CL\n", + "cs.RO\n", + "cs.LG\n", + "gr-qc\n", + "math-ph\n", + "math.OA\n", + "cs.SE\n", + "cs.NE\n", + "cond-mat.soft\n", + "cs.SE\n", + "cs.LG\n", + "cs.CV\n", + "cs.LG\n", + "math.GR\n", + "cs.CV\n", + "cs.MM\n", + "math.LO\n", + "quant-ph\n", + "cs.LG\n", + "quant-ph\n", + "math.AC\n", + "physics.flu-dyn\n", + "cs.AI\n", + "astro-ph.GA\n", + "quant-ph\n", + "math.CO\n", + "quant-ph\n", + "cond-mat.mtrl-sci\n", + "cs.CV\n", + "cs.CL\n", + "cs.DC\n", + "hep-ex\n", + "cs.CL\n", + "math.NT\n", + "cs.IT\n", + "cs.LG\n", + "cs.CV\n", + "quant-ph\n", + "cs.LG\n", + "physics.optics\n", + "gr-qc\n", + "math-ph\n", + "astro-ph.EP\n", + "astro-ph.HE\n", + "cs.CV\n", + "stat.ME\n", + "cs.CL\n", + "quant-ph\n", + "cs.LG\n", + "math.OC\n", + "hep-ph\n", + "cs.CV\n", + "econ.TH\n", + "math.AP\n", + "quant-ph\n", + "cs.AI\n", + "astro-ph.GA\n", + "astro-ph.SR\n", + "cs.CL\n", + "eess.AS\n", + "cs.CV\n", + "cs.LG\n", + "cond-mat.quant-gas\n", + "nucl-th\n", + "quant-ph\n", + "cs.CV\n", + "cs.LG\n", + "cs.CV\n", + "cs.CV\n", + "nucl-th\n", + "gr-qc\n", + "cs.LG\n", + "cs.DB\n", + "cond-mat.soft\n", + "cs.DS\n", + "cs.LO\n", + "hep-th\n", + "eess.SY\n", + "cs.CV\n", + "astro-ph.IM\n", + "cs.LG\n", + "physics.flu-dyn\n", + "cs.PL\n", + "math.PR\n", + "cs.IR\n", + "gr-qc\n", + "hep-ex\n", + "cond-mat.soft\n", + "quant-ph\n", + "cs.LG\n", + "cs.CL\n", + "astro-ph.IM\n", + "cs.LG\n", + "cs.IR\n", + "astro-ph.CO\n", + "math.GT\n", + "cs.CV\n", + "cs.LG\n", + "cs.CL\n", + "cs.IT\n", + "cond-mat.supr-con\n", + "cs.DC\n", + "eess.AS\n", + "math.CO\n", + "eess.IV\n", + "astro-ph.CO\n", + "cs.CL\n", + "cs.CV\n", + "cs.LG\n", + "physics.gen-ph\n", + "cond-mat.stat-mech\n", + "cs.LG\n", + "eess.IV\n", + "hep-ph\n", + "cs.LG\n", + "astro-ph.SR\n", + "cs.LG\n", + "cs.CV\n", + "math.GM\n", + "cond-mat.dis-nn\n", + "cs.LG\n", + "cs.CV\n", + "physics.plasm-ph\n", + "cs.LG\n", + "hep-ph\n", + "math.OC\n", + "cs.CL\n", + "cs.LG\n", + "astro-ph.EP\n", + "physics.data-an\n", + "cs.CV\n", + "astro-ph.SR\n", + "quant-ph\n", + "quant-ph\n", + "astro-ph.HE\n", + "cs.LO\n", + "cs.CV\n", + "physics.space-ph\n", + "cs.LG\n", + "cond-mat.mtrl-sci\n", + "physics.optics\n", + "cond-mat.mtrl-sci\n", + "cs.LG\n", + "cs.LG\n", + "cs.SE\n", + "cs.SI\n", + "quant-ph\n", + "cond-mat.dis-nn\n", + "quant-ph\n", + "astro-ph.SR\n", + "math.PR\n", + "astro-ph.EP\n", + "math.ST\n", + "cs.LG\n", + "gr-qc\n", + "math.AP\n", + "physics.optics\n", + "cs.CV\n", + "math.CO\n", + "math.NT\n", + "cs.RO\n", + "math.OC\n", + "cs.CV\n", + "cs.IR\n", + "math.CO\n", + "astro-ph.IM\n", + "cs.IT\n", + "cond-mat.str-el\n", + "cs.LG\n", + "cs.IR\n", + "physics.hist-ph\n", + "quant-ph\n", + "hep-ex\n", + "hep-ph\n", + "cond-mat.supr-con\n", + "math.CO\n", + "cs.CL\n", + "math.DG\n", + "cs.CL\n", + "stat.AP\n", + "math.CO\n", + "nlin.SI\n", + "physics.flu-dyn\n", + "cs.CL\n", + "math.NT\n", + "cs.CR\n", + "cs.CR\n", + "stat.ML\n", + "quant-ph\n", + "cs.IR\n", + "cond-mat.dis-nn\n", + "cs.DC\n", + "cs.SI\n", + "cs.CR\n", + "math.PR\n", + "gr-qc\n", + "cs.CV\n", + "quant-ph\n", + "eess.SP\n", + "cs.LG\n", + "math-ph\n", + "physics.bio-ph\n", + "cs.SE\n", + "cond-mat.other\n", + "math.AP\n", + "astro-ph.EP\n", + "math.AP\n", + "physics.flu-dyn\n", + "cs.CV\n", + "cs.LG\n", + "stat.ML\n", + "cs.CV\n", + "astro-ph.GA\n", + "hep-lat\n", + "cs.CV\n", + "cs.CL\n", + "astro-ph.IM\n", + "astro-ph.CO\n", + "hep-th\n", + "cs.LG\n", + "cs.CV\n", + "cs.LG\n", + "cs.RO\n", + "cs.LG\n", + "cs.LG\n", + "quant-ph\n", + "cs.CL\n", + "math.CO\n", + "math.PR\n", + "cs.CL\n", + "gr-qc\n", + "physics.chem-ph\n", + "math.GM\n", + "cs.DC\n", + "cs.CG\n", + "math.DG\n", + "math.CO\n", + "stat.ML\n", + "cs.DC\n", + "cs.CR\n", + "cs.LG\n", + "cs.CL\n", + "cs.NI\n", + "astro-ph.GA\n", + "cs.FL\n", + "math.FA\n", + "q-bio.QM\n", + "cs.LG\n", + "cs.DC\n", + "quant-ph\n", + "physics.optics\n", + "math.PR\n", + "hep-ph\n", + "astro-ph.GA\n", + "hep-th\n", + "astro-ph.GA\n", + "hep-th\n", + "astro-ph.SR\n", + "math.DG\n", + "astro-ph.CO\n", + "astro-ph.GA\n", + "stat.AP\n", + "stat.ML\n", + "cs.CL\n", + "cond-mat.stat-mech\n", + "astro-ph.EP\n", + "physics.chem-ph\n", + "math.GR\n", + "cs.RO\n", + "cs.CR\n", + "math-ph\n", + "cs.LO\n", + "astro-ph.IM\n", + "quant-ph\n", + "physics.optics\n", + "hep-ex\n", + "cs.CR\n", + "math.RT\n", + "math.CO\n", + "cs.IR\n", + "cs.AI\n", + "hep-ex\n", + "math.CO\n", + "cs.LG\n", + "cs.CR\n", + "cond-mat.mtrl-sci\n", + "cs.RO\n", + "cond-mat.mtrl-sci\n", + "math.AP\n", + "cs.CV\n", + "quant-ph\n", + "q-fin.TR\n", + "cs.IT\n", + "cs.MS\n", + "astro-ph.GA\n", + "cs.CL\n", + "math.CO\n", + "astro-ph.GA\n", + "eess.SP\n", + "cs.LO\n", + "physics.geo-ph\n", + "cs.LG\n", + "cs.IT\n", + "cs.CV\n", + "cs.SI\n", + "math.PR\n", + "cs.LG\n", + "cs.CR\n", + "cs.SI\n", + "cs.CL\n", + "cs.CL\n", + "eess.SP\n", + "quant-ph\n", + "math.PR\n", + "math.CO\n", + "cs.IT\n", + "cs.CV\n", + "math.PR\n", + "cs.PL\n", + "math.CA\n", + "cs.PL\n", + "math.AP\n", + "cs.DL\n", + "math.CA\n", + "cs.CL\n", + "quant-ph\n", + "cs.AI\n", + "math.OC\n", + "cs.DB\n", + "cs.CV\n", + "physics.flu-dyn\n", + "cs.LG\n", + "cs.DS\n", + "cs.NI\n", + "hep-ph\n", + "physics.ins-det\n", + "cs.LO\n", + "math.ST\n", + "cs.CL\n", + "physics.optics\n", + "cs.CL\n", + "gr-qc\n", + "cs.SD\n", + "quant-ph\n", + "cs.LG\n", + "hep-ex\n", + "math.GT\n", + "cs.LO\n", + "cond-mat.supr-con\n", + "astro-ph.GA\n", + "cs.DS\n", + "hep-lat\n", + "gr-qc\n", + "astro-ph.HE\n", + "cs.IT\n", + "cs.CV\n", + "math.NT\n", + "math.GM\n", + "cond-mat.mes-hall\n", + "cs.NI\n", + "cs.CL\n", + "math.CO\n", + "cs.CV\n", + "cs.CR\n", + "cs.SD\n", + "cs.IT\n", + "math.CO\n", + "math.RT\n", + "math.DG\n", + "quant-ph\n", + "cs.DS\n", + "physics.hist-ph\n", + "cs.LG\n", + "math.NT\n", + "econ.TH\n", + "cs.DM\n", + "cs.IR\n", + "cond-mat.mes-hall\n", + "cs.CV\n", + "cond-mat.mtrl-sci\n", + "astro-ph.HE\n", + "cs.AR\n", + "math.CO\n", + "math.NT\n", + "hep-ph\n", + "cond-mat.mes-hall\n", + "cond-mat.mes-hall\n", + "math.GT\n", + "math.MG\n", + "math.NA\n", + "stat.ML\n", + "cs.FL\n", + "hep-ph\n", + "cs.CV\n", + "math.RT\n", + "gr-qc\n", + "cs.RO\n", + "gr-qc\n", + "math.CO\n", + "hep-th\n", + "cs.CL\n", + "eess.IV\n", + "quant-ph\n", + "cond-mat.mes-hall\n", + "cs.AI\n", + "cs.GT\n", + "math.AG\n", + "cs.CV\n", + "hep-ph\n", + "cs.CR\n", + "astro-ph.IM\n", + "cs.CV\n", + "cs.GT\n", + "cond-mat.soft\n", + "q-bio.QM\n", + "physics.flu-dyn\n", + "cs.LG\n", + "math.CV\n", + "stat.ME\n", + "cs.GR\n", + "cond-mat.stat-mech\n", + "cs.LG\n", + "eess.AS\n", + "astro-ph.HE\n", + "cs.CV\n", + "math.FA\n", + "physics.optics\n", + "cond-mat.stat-mech\n", + "cs.AR\n", + "cs.IT\n", + "stat.ME\n", + "cond-mat.quant-gas\n", + "astro-ph.HE\n", + "hep-ph\n", + "physics.ao-ph\n", + "cond-mat.str-el\n", + "hep-ph\n", + "astro-ph.GA\n", + "eess.SY\n", + "cs.CC\n", + "cs.RO\n", + "hep-th\n", + "physics.data-an\n", + "cs.GT\n", + "cs.SE\n", + "cs.CG\n", + "math.CT\n", + "cs.CR\n", + "quant-ph\n", + "cs.CL\n", + "hep-ph\n", + "hep-ph\n", + "quant-ph\n", + "cs.SE\n", + "math.CO\n", + "cs.RO\n", + "cs.CV\n", + "cs.DM\n", + "math.AP\n", + "stat.ME\n", + "math-ph\n", + "math.AG\n", + "cs.CV\n", + "cond-mat.mes-hall\n", + "cs.NE\n", + "math.NT\n", + "cs.CV\n", + "cs.CR\n", + "cs.IT\n", + "math.CO\n", + "cs.IR\n", + "math.FA\n", + "cs.LG\n", + "cs.CV\n", + "astro-ph.GA\n", + "physics.med-ph\n", + "cs.CV\n", + "math.CV\n", + "math.RT\n", + "physics.flu-dyn\n", + "cs.AI\n", + "cond-mat.mes-hall\n", + "math.AG\n", + "cond-mat.stat-mech\n", + "cs.CR\n", + "cond-mat.mes-hall\n", + "quant-ph\n", + "cs.CL\n", + "cs.SE\n", + "cs.SD\n", + "cs.IT\n", + "hep-th\n", + "math.AP\n", + "hep-ex\n", + "cs.LG\n", + "cs.LG\n", + "stat.ML\n", + "cs.SI\n", + "cs.DC\n", + "cond-mat.mtrl-sci\n", + "econ.TH\n", + "cond-mat.supr-con\n", + "math.DS\n", + "hep-th\n", + "cs.SC\n", + "stat.ME\n", + "hep-th\n", + "cs.AI\n", + "hep-th\n", + "cs.IT\n", + "math.OC\n", + "cond-mat.mtrl-sci\n", + "cs.CL\n", + "math.CO\n", + "cs.CV\n", + "cs.LG\n", + "cs.GR\n", + "stat.ME\n", + "cs.CL\n", + "math.AP\n", + "math-ph\n", + "gr-qc\n", + "gr-qc\n", + "math.GR\n", + "math.AG\n", + "cs.CL\n", + "cs.LG\n", + "cond-mat.mtrl-sci\n", + "cond-mat.stat-mech\n", + "math.GN\n", + "math.GR\n", + "stat.ML\n", + "physics.optics\n", + "quant-ph\n", + "stat.ME\n", + "physics.atom-ph\n", + "math.NT\n", + "physics.comp-ph\n", + "quant-ph\n", + "cs.RO\n", + "cs.DC\n", + "quant-ph\n", + "physics.optics\n", + "physics.chem-ph\n", + "math.AP\n", + "cs.CV\n", + "physics.data-an\n", + "cs.CR\n", + "stat.ML\n", + "cs.CL\n", + "cs.IT\n", + "eess.AS\n", + "math.DS\n", + "gr-qc\n", + "eess.IV\n", + "math.AC\n", + "cs.IT\n", + "hep-ph\n", + "math.RT\n", + "hep-th\n", + "cs.CG\n", + "cond-mat.mes-hall\n", + "cs.IT\n", + "math.OC\n", + "physics.med-ph\n", + "cs.LG\n", + "cs.CL\n", + "astro-ph.SR\n", + "math.OC\n", + "gr-qc\n", + "cs.CR\n", + "cs.LG\n", + "cs.LG\n", + "physics.comp-ph\n", + "math.PR\n", + "cs.LG\n", + "astro-ph.GA\n", + "astro-ph.CO\n", + "gr-qc\n", + "cs.CL\n", + "cond-mat.mtrl-sci\n", + "math.DG\n", + "cs.CL\n", + "eess.SP\n", + "physics.gen-ph\n", + "cs.CV\n", + "cs.PL\n", + "q-bio.NC\n", + "cs.DL\n", + "eess.IV\n", + "math.AG\n", + "cs.CC\n", + "cs.CL\n", + "nucl-ex\n", + "eess.SY\n", + "cs.CV\n", + "cs.CR\n", + "hep-ex\n", + "cond-mat.mtrl-sci\n", + "cs.CV\n", + "q-bio.NC\n", + "cs.CL\n", + "math.CO\n", + "cs.CV\n", + "cs.DM\n", + "hep-ex\n", + "cs.CY\n", + "cs.CR\n", + "hep-ph\n", + "cs.CR\n", + "math.OC\n", + "cs.CY\n", + "cs.SI\n", + "cs.AI\n", + "stat.ML\n", + "physics.geo-ph\n", + "cs.AI\n", + "econ.TH\n", + "cs.SI\n", + "cond-mat.soft\n", + "econ.GN\n", + "astro-ph.SR\n", + "cs.LG\n", + "physics.plasm-ph\n", + "math.NT\n", + "cond-mat.mtrl-sci\n", + "hep-ph\n", + "cs.IR\n", + "cs.CV\n", + "math.RA\n", + "math.CO\n", + "math.CO\n", + "math.RT\n", + "cs.DB\n", + "cs.IT\n", + "cs.AI\n", + "physics.med-ph\n", + "math.FA\n", + "math.GT\n", + "gr-qc\n", + "math.LO\n", + "math.CO\n", + "astro-ph.SR\n", + "cs.CV\n", + "physics.plasm-ph\n", + "q-fin.MF\n", + "math.PR\n", + "cs.NE\n", + "quant-ph\n", + "hep-ex\n", + "cs.LG\n", + "cs.RO\n", + "cs.CY\n", + "quant-ph\n", + "astro-ph.HE\n", + "cond-mat.stat-mech\n", + "math.GT\n", + "astro-ph.SR\n", + "astro-ph.HE\n", + "astro-ph.CO\n", + "quant-ph\n", + "hep-ph\n", + "physics.atom-ph\n", + "cs.LG\n", + "physics.comp-ph\n", + "stat.AP\n", + "cs.CV\n", + "gr-qc\n", + "cs.IR\n", + "math.DS\n", + "astro-ph.HE\n", + "astro-ph.GA\n", + "astro-ph.GA\n", + "cs.CV\n", + "cs.LG\n", + "cond-mat.mtrl-sci\n", + "gr-qc\n", + "nlin.CD\n", + "cs.LG\n", + "cs.LG\n", + "math.CO\n", + "eess.SP\n", + "astro-ph.CO\n", + "cond-mat.mtrl-sci\n", + "cond-mat.mtrl-sci\n", + "cs.HC\n", + "eess.SP\n", + "math.OC\n", + "cs.CV\n", + "cs.LG\n", + "cs.PL\n", + "cs.CL\n", + "physics.optics\n", + "math.RA\n", + "astro-ph.GA\n", + "quant-ph\n", + "stat.ME\n", + "stat.ML\n", + "cs.CL\n", + "hep-ex\n", + "cs.LG\n", + "hep-th\n", + "cs.CR\n", + "quant-ph\n", + "econ.TH\n", + "cs.DC\n", + "quant-ph\n", + "cs.SE\n", + "stat.AP\n", + "cs.CR\n", + "cond-mat.soft\n", + "hep-th\n", + "math.CT\n", + "cs.CR\n", + "hep-ph\n", + "math.NA\n", + "gr-qc\n", + "cs.SI\n", + "stat.ME\n", + "math.OC\n", + "cond-mat.soft\n", + "physics.geo-ph\n", + "hep-ex\n", + "math.CO\n", + "hep-ph\n", + "quant-ph\n", + "quant-ph\n", + "cs.CV\n", + "math.CO\n", + "physics.flu-dyn\n", + "physics.flu-dyn\n", + "cs.IR\n", + "cs.LG\n", + "cs.LG\n", + "cs.IR\n", + "cond-mat.mes-hall\n", + "cond-mat.stat-mech\n", + "cs.LG\n", + "astro-ph.CO\n", + "cs.CL\n", + "gr-qc\n", + "cs.ET\n", + "cs.IT\n", + "quant-ph\n", + "cs.CR\n", + "astro-ph.EP\n", + "cs.CL\n", + "cs.CL\n", + "physics.chem-ph\n", + "math-ph\n", + "cs.RO\n", + "cs.CG\n", + "eess.SP\n", + "hep-ph\n", + "cs.LG\n", + "astro-ph.CO\n", + "math.PR\n", + "cs.CV\n", + "cs.CL\n", + "quant-ph\n", + "physics.optics\n", + "math.PR\n", + "astro-ph.GA\n", + "cs.CL\n", + "astro-ph.HE\n", + "gr-qc\n", + "q-fin.CP\n", + "hep-ph\n", + "hep-ex\n", + "astro-ph.EP\n", + "quant-ph\n", + "astro-ph.GA\n", + "astro-ph.SR\n", + "cs.CV\n", + "astro-ph.CO\n", + "cs.CV\n", + "astro-ph.EP\n", + "cond-mat.mes-hall\n", + "stat.ME\n", + "cs.DB\n", + "cs.DS\n", + "cs.LG\n", + "gr-qc\n", + "astro-ph.CO\n", + "astro-ph.CO\n", + "astro-ph.CO\n", + "stat.ME\n", + "hep-ph\n", + "math.RA\n", + "math.DG\n", + "math.RT\n", + "astro-ph.CO\n", + "nucl-th\n", + "cs.LG\n", + "cond-mat.str-el\n", + "quant-ph\n", + "hep-ex\n", + "astro-ph.CO\n", + "astro-ph.CO\n", + "physics.chem-ph\n", + "quant-ph\n", + "cs.AI\n", + "cs.LG\n", + "cond-mat.mtrl-sci\n", + "eess.IV\n", + "cs.GT\n", + "cs.CR\n", + "cs.LG\n", + "cs.DB\n", + "quant-ph\n", + "cs.CR\n", + "stat.ME\n", + "cs.HC\n", + "astro-ph.CO\n", + "math.NT\n", + "cs.CR\n", + "cs.CV\n", + "astro-ph.GA\n", + "math.RT\n", + "astro-ph.HE\n", + "quant-ph\n", + "astro-ph.GA\n", + "math.OC\n", + "math-ph\n", + "cs.CL\n", + "math.RA\n", + "gr-qc\n", + "eess.SP\n", + "stat.ML\n", + "math.OC\n", + "astro-ph.GA\n", + "cond-mat.mes-hall\n", + "gr-qc\n", + "q-bio.BM\n", + "cs.CL\n", + "math.AG\n", + "physics.comp-ph\n", + "quant-ph\n", + "physics.ins-det\n", + "math.GT\n", + "astro-ph.EP\n", + "hep-th\n", + "physics.comp-ph\n", + "cs.DS\n", + "cond-mat.mtrl-sci\n", + "cs.LG\n", + "cs.DC\n", + "cs.PL\n", + "q-bio.PE\n", + "quant-ph\n", + "cs.RO\n", + "math.FA\n", + "math.PR\n", + "math.LO\n", + "hep-ph\n", + "cs.CL\n", + "cs.CL\n", + "hep-ex\n", + "math.CO\n", + "hep-ph\n", + "astro-ph.HE\n", + "astro-ph.HE\n", + "astro-ph.EP\n", + "cs.CR\n", + "astro-ph.HE\n", + "hep-ph\n", + "astro-ph.EP\n", + "hep-th\n", + "cs.IT\n", + "cond-mat.stat-mech\n", + "physics.hist-ph\n", + "math.CO\n", + "cs.SE\n", + "cs.IT\n", + "cs.MA\n", + "quant-ph\n", + "hep-ph\n", + "cs.IT\n", + "math.FA\n", + "astro-ph.HE\n", + "cond-mat.mtrl-sci\n", + "eess.SP\n", + "cs.SE\n", + "math.DS\n", + "math.PR\n", + "econ.EM\n", + "astro-ph.IM\n", + "cs.HC\n", + "math.LO\n", + "math.DG\n", + "cs.DS\n", + "quant-ph\n", + "cs.SE\n", + "hep-th\n", + "eess.IV\n", + "hep-ex\n", + "physics.flu-dyn\n", + "cs.CL\n", + "physics.atom-ph\n", + "quant-ph\n", + "math-ph\n", + "math.GT\n", + "quant-ph\n", + "stat.CO\n", + "cs.CC\n", + "cond-mat.mtrl-sci\n", + "gr-qc\n", + "hep-ph\n", + "gr-qc\n", + "astro-ph.HE\n", + "quant-ph\n", + "cs.DS\n", + "cs.IT\n", + "math.AG\n", + "astro-ph.HE\n", + "cs.MA\n", + "hep-ph\n", + "math.GR\n", + "cs.NI\n", + "physics.optics\n", + "math.OA\n", + "cs.RO\n", + "math.AP\n", + "physics.ins-det\n", + "cs.GT\n", + "cs.CR\n", + "econ.TH\n", + "quant-ph\n", + "cs.LG\n", + "cs.CG\n", + "math.AP\n", + "quant-ph\n", + "cs.DC\n", + "physics.plasm-ph\n", + "cs.LG\n", + "cs.CV\n", + "astro-ph.CO\n", + "cs.LG\n", + "astro-ph.CO\n", + "math.NT\n", + "econ.TH\n", + "hep-ph\n", + "math.AP\n", + "cs.LG\n", + "physics.ins-det\n", + "cond-mat.mes-hall\n", + "quant-ph\n", + "math.PR\n", + "math.DS\n", + "cs.LG\n", + "cs.CL\n", + "physics.soc-ph\n", + "cs.CV\n", + "math.NT\n", + "cs.IR\n", + "hep-ph\n", + "quant-ph\n", + "astro-ph.GA\n", + "hep-ph\n", + "astro-ph.GA\n", + "astro-ph.CO\n", + "astro-ph.HE\n", + "cond-mat.supr-con\n", + "physics.optics\n", + "cs.CV\n", + "physics.plasm-ph\n", + "cs.IT\n", + "cs.PL\n", + "math.CO\n", + "astro-ph.SR\n", + "math.NA\n", + "math.NA\n", + "q-bio.NC\n", + "cs.LO\n", + "cs.PL\n", + "gr-qc\n", + "math.MG\n", + "astro-ph.GA\n", + "cs.IR\n", + "math.NT\n", + "cs.CV\n", + "math.GT\n", + "math.ST\n", + "cs.IT\n", + "math.LO\n", + "quant-ph\n", + "cs.CR\n", + "cs.SD\n", + "stat.ME\n", + "cs.MA\n", + "cs.GT\n", + "nucl-ex\n", + "cs.CL\n", + "math.AP\n", + "math.OC\n", + "q-fin.ST\n", + "math.PR\n", + "cs.IR\n", + "math.AP\n", + "astro-ph.HE\n", + "math.FA\n", + "math.CO\n", + "cs.DB\n", + "quant-ph\n", + "math.GR\n", + "cs.SE\n", + "cs.GT\n", + "eess.SP\n", + "cs.RO\n", + "cs.NI\n", + "cs.CV\n", + "math.AG\n", + "cs.IR\n", + "cs.LG\n", + "stat.AP\n", + "astro-ph.GA\n", + "cs.RO\n", + "math.AC\n", + "math.PR\n", + "cs.IT\n", + "econ.TH\n", + "math.OC\n", + "math.NT\n", + "cs.NI\n", + "cs.CR\n", + "cs.CL\n", + "cs.CC\n", + "cs.CL\n", + "cs.CL\n", + "cs.LG\n", + "physics.ao-ph\n", + "cond-mat.mtrl-sci\n", + "cs.CL\n", + "quant-ph\n", + "math.SP\n", + "cs.RO\n", + "math.NT\n", + "quant-ph\n", + "cs.CL\n", + "math.NT\n", + "cs.CR\n", + "math.CO\n", + "econ.EM\n", + "cond-mat.stat-mech\n", + "nucl-ex\n", + "astro-ph.GA\n", + "astro-ph.SR\n", + "cs.DS\n", + "q-bio.GN\n", + "econ.TH\n", + "cond-mat.mtrl-sci\n", + "cs.IT\n", + "math.CT\n", + "physics.ao-ph\n", + "eess.SP\n", + "cs.IR\n", + "physics.optics\n", + "cond-mat.mes-hall\n", + "cs.CL\n", + "math.RT\n", + "physics.app-ph\n", + "math.CO\n", + "cs.CL\n", + "stat.ME\n", + "astro-ph.CO\n", + "cs.IT\n", + "cs.IT\n", + "quant-ph\n", + "q-bio.PE\n", + "cs.CV\n", + "gr-qc\n", + "math.CO\n", + "math.CO\n", + "cs.CL\n", + "cs.NI\n", + "math.AC\n", + "cs.DM\n", + "math.DG\n", + "cs.AI\n", + "cond-mat.stat-mech\n", + "cs.NI\n", + "math.CO\n", + "hep-ph\n", + "stat.CO\n", + "hep-th\n", + "math.NA\n", + "astro-ph.IM\n", + "quant-ph\n", + "cs.CL\n", + "physics.flu-dyn\n", + "cs.CG\n", + "hep-th\n", + "math.DS\n", + "cs.DS\n", + "eess.IV\n", + "math.CO\n", + "cs.CV\n", + "math.FA\n", + "cs.CL\n", + "stat.AP\n", + "cond-mat.mtrl-sci\n", + "econ.TH\n", + "cs.LG\n", + "stat.CO\n", + "stat.CO\n", + "cs.CL\n", + "cond-mat.soft\n", + "cs.CR\n", + "cs.CV\n", + "eess.SY\n", + "cs.AI\n", + "astro-ph.GA\n", + "hep-ph\n", + "eess.IV\n", + "cs.CV\n", + "astro-ph.SR\n", + "hep-th\n", + "astro-ph.SR\n", + "hep-ph\n", + "astro-ph.HE\n", + "astro-ph.CO\n", + "math.CO\n", + "cs.IT\n", + "astro-ph.CO\n", + "econ.GN\n", + "math.DG\n", + "cond-mat.str-el\n", + "math.ST\n", + "cs.CV\n", + "math.OC\n", + "hep-th\n", + "physics.chem-ph\n", + "astro-ph.IM\n", + "cond-mat.stat-mech\n", + "stat.OT\n", + "physics.chem-ph\n", + "cs.CY\n", + "cond-mat.mes-hall\n", + "cs.CR\n", + "stat.ML\n", + "cs.CV\n", + "stat.ME\n", + "cs.LG\n", + "econ.EM\n", + "cs.CL\n" + ] + }, + { + "ename": "UnexpectedEmptyPageError", + "evalue": "Page of results was unexpectedly empty (http://export.arxiv.org/api/query?search_query=all&id_list=&sortBy=submittedDate&sortOrder=descending&start=3200&max_results=100)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mUnexpectedEmptyPageError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/85/hyc80kf10k7b2q8d_m56jbyh0000gn/T/ipykernel_51171/4192306019.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0msort_by\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marxiv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSortCriterion\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSubmittedDate\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m )\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msearch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprimary_category\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprimary_category\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'cs'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.9/site-packages/arxiv/arxiv.py\u001b[0m in \u001b[0;36mresults\u001b[0;34m(self, search)\u001b[0m\n\u001b[1;32m 583\u001b[0m ))\n\u001b[1;32m 584\u001b[0m \u001b[0mpage_url\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_format_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moffset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpage_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 585\u001b[0;31m \u001b[0mfeed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parse_feed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpage_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfirst_page\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 586\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfirst_page\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 587\u001b[0m \u001b[0;31m# NOTE: this is an ugly fix for a known bug. The totalresults\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.9/site-packages/arxiv/arxiv.py\u001b[0m in \u001b[0;36m_parse_feed\u001b[0;34m(self, url, first_page)\u001b[0m\n\u001b[1;32m 637\u001b[0m \"\"\"\n\u001b[1;32m 638\u001b[0m \u001b[0;31m# Invoke the recursive helper with initial available retries.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 639\u001b[0;31m return self.__try_parse_feed(\n\u001b[0m\u001b[1;32m 640\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0mfirst_page\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfirst_page\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.9/site-packages/arxiv/arxiv.py\u001b[0m in \u001b[0;36m__try_parse_feed\u001b[0;34m(self, url, first_page, retries_left, last_err)\u001b[0m\n\u001b[1;32m 679\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 680\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mretries_left\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 681\u001b[0;31m return self.__try_parse_feed(\n\u001b[0m\u001b[1;32m 682\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 683\u001b[0m \u001b[0mfirst_page\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfirst_page\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.9/site-packages/arxiv/arxiv.py\u001b[0m in \u001b[0;36m__try_parse_feed\u001b[0;34m(self, url, first_page, retries_left, last_err)\u001b[0m\n\u001b[1;32m 679\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 680\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mretries_left\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 681\u001b[0;31m return self.__try_parse_feed(\n\u001b[0m\u001b[1;32m 682\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 683\u001b[0m \u001b[0mfirst_page\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfirst_page\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.9/site-packages/arxiv/arxiv.py\u001b[0m in \u001b[0;36m__try_parse_feed\u001b[0;34m(self, url, first_page, retries_left, last_err)\u001b[0m\n\u001b[1;32m 679\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merr\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 680\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mretries_left\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 681\u001b[0;31m return self.__try_parse_feed(\n\u001b[0m\u001b[1;32m 682\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 683\u001b[0m \u001b[0mfirst_page\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfirst_page\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.9/site-packages/arxiv/arxiv.py\u001b[0m in \u001b[0;36m__try_parse_feed\u001b[0;34m(self, url, first_page, retries_left, last_err)\u001b[0m\n\u001b[1;32m 687\u001b[0m \u001b[0;31m# Feed was never returned in self.num_retries tries. Raise the last\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 688\u001b[0m \u001b[0;31m# exception encountered.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 689\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 690\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mfeed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 691\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mUnexpectedEmptyPageError\u001b[0m: Page of results was unexpectedly empty (http://export.arxiv.org/api/query?search_query=all&id_list=&sortBy=submittedDate&sortOrder=descending&start=3200&max_results=100)" + ] + } + ], + "source": [ + "additional_cat = [] \n", + "additional_title = []\n", + "additional_summary = []\n", + "search = arxiv.Search(\n", + " query = 'all',\n", + " max_results = 7000,\n", + " sort_by = arxiv.SortCriterion.SubmittedDate\n", + ")\n", + "for result in search.results():\n", + " print(result.primary_category)\n", + " if result.primary_category[:2] != 'cs':\n", + " additional_cat.append(result.primary_category)\n", + " additional_title.append(result.title)\n", + " additional_summary.append(result.summary)" + ] + }, + { + "cell_type": "code", + "execution_count": 229, + "metadata": {}, + "outputs": [], + "source": [ + "df_additional = pd.DataFrame(list(zip(additional_summary, additional_cat, additional_title)),\n", + " columns =['summary', 'category', 'title']) " + ] + }, + { + "cell_type": "code", + "execution_count": 230, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summarycategorytitle
0This paper outlines a complete methodology for...eess.SYModeling, robust control synthesis and worst-c...
1We show that associativity of the tree-level O...hep-thOn Effective Field Theories with Celestial Duals
2Let dimensions $d\\ge 3$, and codimensions $c\\g...math.DGHomologically area minimizing surfaces with no...
3Carbon-enhanced metal-poor (CEMP) stars make-u...astro-ph.GAOver 2.7 Million Carbon-Enhanced Metal-Poor st...
4This paper studies the features of a homopolym...cond-mat.softEnd-pulled polymer translocation through a man...
............
1958Predicting structural and energetic properties...physics.chem-phTransferring Chemical and Energetic Knowledge ...
1959When normal metals (NMs) are attached to topol...cond-mat.mes-hallProximity-induced zero-energy states indisting...
1960Nowcasting can play a key role in giving polic...stat.MLBenchmarking Econometric and Machine Learning ...
1961There is an increasing trend of research in me...stat.MEComparison of Effect Size Measures for Mediati...
1962Cluster-robust inference is widely used in mod...econ.EMLeverage, Influence, and the Jackknife in Clus...
\n", + "

1963 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " summary category \\\n", + "0 This paper outlines a complete methodology for... eess.SY \n", + "1 We show that associativity of the tree-level O... hep-th \n", + "2 Let dimensions $d\\ge 3$, and codimensions $c\\g... math.DG \n", + "3 Carbon-enhanced metal-poor (CEMP) stars make-u... astro-ph.GA \n", + "4 This paper studies the features of a homopolym... cond-mat.soft \n", + "... ... ... \n", + "1958 Predicting structural and energetic properties... physics.chem-ph \n", + "1959 When normal metals (NMs) are attached to topol... cond-mat.mes-hall \n", + "1960 Nowcasting can play a key role in giving polic... stat.ML \n", + "1961 There is an increasing trend of research in me... stat.ME \n", + "1962 Cluster-robust inference is widely used in mod... econ.EM \n", + "\n", + " title \n", + "0 Modeling, robust control synthesis and worst-c... \n", + "1 On Effective Field Theories with Celestial Duals \n", + "2 Homologically area minimizing surfaces with no... \n", + "3 Over 2.7 Million Carbon-Enhanced Metal-Poor st... \n", + "4 End-pulled polymer translocation through a man... \n", + "... ... \n", + "1958 Transferring Chemical and Energetic Knowledge ... \n", + "1959 Proximity-induced zero-energy states indisting... \n", + "1960 Benchmarking Econometric and Machine Learning ... \n", + "1961 Comparison of Effect Size Measures for Mediati... \n", + "1962 Leverage, Influence, and the Jackknife in Clus... \n", + "\n", + "[1963 rows x 3 columns]" + ] + }, + "execution_count": 230, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_additional" + ] + }, + { + "cell_type": "code", + "execution_count": 231, + "metadata": {}, + "outputs": [], + "source": [ + "df_additional['summary'] = df_additional['summary'].apply(lambda x : x.lower())\n", + "df_additional['title'] = df_additional['title'].apply(lambda x : x.lower())" + ] + }, + { + "cell_type": "code", + "execution_count": 232, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_cat(term_line):\n", + " try:\n", + " return term_line.split('.')[0]\n", + " except Exception:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 233, + "metadata": {}, + "outputs": [], + "source": [ + "df_additional['tag'] = df_additional['category'].apply(lambda x : extract_cat(x))" + ] + }, + { + "cell_type": "code", + "execution_count": 234, + "metadata": {}, + "outputs": [], + "source": [ + "df_additional = df_additional.drop(['category'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 235, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({'eess': 102,\n", + " 'hep-th': 66,\n", + " 'math': 573,\n", + " 'astro-ph': 292,\n", + " 'cond-mat': 181,\n", + " 'gr-qc': 73,\n", + " 'stat': 96,\n", + " 'hep-ex': 29,\n", + " 'physics': 197,\n", + " 'quant-ph': 139,\n", + " 'hep-ph': 84,\n", + " 'q-bio': 34,\n", + " 'econ': 33,\n", + " 'hep-lat': 7,\n", + " 'nlin': 8,\n", + " 'nucl-th': 11,\n", + " 'math-ph': 25,\n", + " 'nucl-ex': 7,\n", + " 'q-fin': 6})" + ] + }, + "execution_count": 235, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Counter(df_additional['tag'])" + ] + }, + { + "cell_type": "code", + "execution_count": 236, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summarytitletag
0this paper outlines a complete methodology for...modeling, robust control synthesis and worst-c...eess
1we show that associativity of the tree-level o...on effective field theories with celestial dualshep-th
2let dimensions $d\\ge 3$, and codimensions $c\\g...homologically area minimizing surfaces with no...math
3carbon-enhanced metal-poor (cemp) stars make-u...over 2.7 million carbon-enhanced metal-poor st...astro-ph
4this paper studies the features of a homopolym...end-pulled polymer translocation through a man...cond-mat
\n", + "
" + ], + "text/plain": [ + " summary \\\n", + "0 this paper outlines a complete methodology for... \n", + "1 we show that associativity of the tree-level o... \n", + "2 let dimensions $d\\ge 3$, and codimensions $c\\g... \n", + "3 carbon-enhanced metal-poor (cemp) stars make-u... \n", + "4 this paper studies the features of a homopolym... \n", + "\n", + " title tag \n", + "0 modeling, robust control synthesis and worst-c... eess \n", + "1 on effective field theories with celestial duals hep-th \n", + "2 homologically area minimizing surfaces with no... math \n", + "3 over 2.7 million carbon-enhanced metal-poor st... astro-ph \n", + "4 end-pulled polymer translocation through a man... cond-mat " + ] + }, + "execution_count": 236, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_additional.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 237, + "metadata": {}, + "outputs": [], + "source": [ + "new_additional = df_additional[['summary','tag','title']].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 238, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summarytagtitle
0this paper outlines a complete methodology for...eessmodeling, robust control synthesis and worst-c...
1we show that associativity of the tree-level o...hep-thon effective field theories with celestial duals
2let dimensions $d\\ge 3$, and codimensions $c\\g...mathhomologically area minimizing surfaces with no...
3carbon-enhanced metal-poor (cemp) stars make-u...astro-phover 2.7 million carbon-enhanced metal-poor st...
4this paper studies the features of a homopolym...cond-matend-pulled polymer translocation through a man...
\n", + "
" + ], + "text/plain": [ + " summary tag \\\n", + "0 this paper outlines a complete methodology for... eess \n", + "1 we show that associativity of the tree-level o... hep-th \n", + "2 let dimensions $d\\ge 3$, and codimensions $c\\g... math \n", + "3 carbon-enhanced metal-poor (cemp) stars make-u... astro-ph \n", + "4 this paper studies the features of a homopolym... cond-mat \n", + "\n", + " title \n", + "0 modeling, robust control synthesis and worst-c... \n", + "1 on effective field theories with celestial duals \n", + "2 homologically area minimizing surfaces with no... \n", + "3 over 2.7 million carbon-enhanced metal-poor st... \n", + "4 end-pulled polymer translocation through a man... " + ] + }, + "execution_count": 238, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_additional.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 239, + "metadata": {}, + "outputs": [], + "source": [ + "frames = [data, new_additional]" + ] + }, + { + "cell_type": "code", + "execution_count": 240, + "metadata": {}, + "outputs": [], + "source": [ + "merged_data = pd.concat(frames)" + ] + }, + { + "cell_type": "code", + "execution_count": 241, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summarytagtitle
0we propose an architecture for vqa which utili...csdual recurrent attention units for visual ques...
1recent approaches based on artificial neural n...cssequential short-text classification with recu...
2we introduce the multiresolution recurrent neu...csmultiresolution recurrent neural networks: an ...
3multi-task learning is motivated by the observ...statlearning what to share between loosely related...
4we present milabot: a deep reinforcement learn...csa deep reinforcement learning chatbot
\n", + "
" + ], + "text/plain": [ + " summary tag \\\n", + "0 we propose an architecture for vqa which utili... cs \n", + "1 recent approaches based on artificial neural n... cs \n", + "2 we introduce the multiresolution recurrent neu... cs \n", + "3 multi-task learning is motivated by the observ... stat \n", + "4 we present milabot: a deep reinforcement learn... cs \n", + "\n", + " title \n", + "0 dual recurrent attention units for visual ques... \n", + "1 sequential short-text classification with recu... \n", + "2 multiresolution recurrent neural networks: an ... \n", + "3 learning what to share between loosely related... \n", + "4 a deep reinforcement learning chatbot " + ] + }, + "execution_count": 241, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 242, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({'cs': 34597,\n", + " 'stat': 4878,\n", + " 'astro-ph': 351,\n", + " 'q-bio': 354,\n", + " 'eess': 177,\n", + " 'cond-mat': 246,\n", + " 'math': 1185,\n", + " 'physics': 413,\n", + " 'quant-ph': 205,\n", + " 'q-fin': 36,\n", + " 'gr-qc': 77,\n", + " 'nlin': 55,\n", + " 'cmp-lg': 110,\n", + " 'econ': 38,\n", + " 'hep-ex': 33,\n", + " 'hep-th': 67,\n", + " 'nucl-th': 12,\n", + " 'hep-ph': 86,\n", + " 'hep-lat': 9,\n", + " 'adap-org': 2,\n", + " 'math-ph': 25,\n", + " 'nucl-ex': 7})" + ] + }, + "execution_count": 242, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Counter(merged_data['tag'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Как видим ситуация стала лучше. Теперь данные можно почистить от лишнего" + ] + }, + { + "cell_type": "code", + "execution_count": 243, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package stopwords to /Users/seal/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 243, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import nltk\n", + "nltk.download('stopwords')" + ] + }, + { + "cell_type": "code", + "execution_count": 244, + "metadata": {}, + "outputs": [], + "source": [ + "trash = nltk.corpus.stopwords.words('english')" + ] + }, + { + "cell_type": "code", + "execution_count": 245, + "metadata": {}, + "outputs": [], + "source": [ + "def clean_data(line):\n", + " output= [i for i in line.split() if i not in trash]\n", + " return ' '.join(output)" + ] + }, + { + "cell_type": "code", + "execution_count": 246, + "metadata": {}, + "outputs": [], + "source": [ + "merged_data['summary'] = merged_data['summary'].apply(lambda x : clean_data(x))\n", + "merged_data['title'] = merged_data['title'].apply(lambda x : clean_data(x))" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summarytagtitle
0propose architecture vqa utilizes recurrent la...csdual recurrent attention units visual question...
1recent approaches based artificial neural netw...cssequential short-text classification recurrent...
2introduce multiresolution recurrent neural net...csmultiresolution recurrent neural networks: app...
3multi-task learning motivated observation huma...statlearning share loosely related tasks
4present milabot: deep reinforcement learning c...csdeep reinforcement learning chatbot
\n", + "
" + ], + "text/plain": [ + " summary tag \\\n", + "0 propose architecture vqa utilizes recurrent la... cs \n", + "1 recent approaches based artificial neural netw... cs \n", + "2 introduce multiresolution recurrent neural net... cs \n", + "3 multi-task learning motivated observation huma... stat \n", + "4 present milabot: deep reinforcement learning c... cs \n", + "\n", + " title \n", + "0 dual recurrent attention units visual question... \n", + "1 sequential short-text classification recurrent... \n", + "2 multiresolution recurrent neural networks: app... \n", + "3 learning share loosely related tasks \n", + "4 deep reinforcement learning chatbot " + ] + }, + "execution_count": 247, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Закодируем таргет для более удобной работы" + ] + }, + { + "cell_type": "code", + "execution_count": 248, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder" + ] + }, + { + "cell_type": "code", + "execution_count": 249, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cs 34597\n", + "stat 4878\n", + "math 1185\n", + "physics 413\n", + "q-bio 354\n", + "astro-ph 351\n", + "cond-mat 246\n", + "quant-ph 205\n", + "eess 177\n", + "cmp-lg 110\n", + "hep-ph 86\n", + "gr-qc 77\n", + "hep-th 67\n", + "nlin 55\n", + "econ 38\n", + "q-fin 36\n", + "hep-ex 33\n", + "math-ph 25\n", + "nucl-th 12\n", + "hep-lat 9\n", + "nucl-ex 7\n", + "adap-org 2\n", + "Name: tag, dtype: int64" + ] + }, + "execution_count": 249, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_data.tag.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 250, + "metadata": {}, + "outputs": [], + "source": [ + "merged_data = merged_data.drop(merged_data[merged_data.tag == 'adap-org'].index)\n", + "merged_data = merged_data.drop(merged_data[merged_data.tag == 'nucl-ex'].index)\n", + "merged_data = merged_data.drop(merged_data[merged_data.tag == 'hep-lat'].index)" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "metadata": {}, + "outputs": [], + "source": [ + "label_encoder = LabelEncoder()\n", + "merged_data['tags'] = label_encoder.fit_transform(merged_data['tag'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Поскольку модель должна обрабатывать запросы как содержащие summary и title сразу, так и содержащие только одно заполненнное поле, сольём эти данные в одну колонку" + ] + }, + { + "cell_type": "code", + "execution_count": 252, + "metadata": {}, + "outputs": [], + "source": [ + "merged_data['full_info'] = merged_data['title'] + '. ' + merged_data['summary']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Избавимся от лишнего" + ] + }, + { + "cell_type": "code", + "execution_count": 253, + "metadata": {}, + "outputs": [], + "source": [ + "merged_data = merged_data.drop(['summary', 'tag', 'title'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 254, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tagsfull_info
03dual recurrent attention units visual question...
13sequential short-text classification recurrent...
23multiresolution recurrent neural networks: app...
318learning share loosely related tasks. multi-ta...
43deep reinforcement learning chatbot. present m...
\n", + "
" + ], + "text/plain": [ + " tags full_info\n", + "0 3 dual recurrent attention units visual question...\n", + "1 3 sequential short-text classification recurrent...\n", + "2 3 multiresolution recurrent neural networks: app...\n", + "3 18 learning share loosely related tasks. multi-ta...\n", + "4 3 deep reinforcement learning chatbot. present m..." + ] + }, + "execution_count": 254, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 255, + "metadata": {}, + "outputs": [], + "source": [ + "from imblearn.datasets import make_imbalance" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": {}, + "outputs": [], + "source": [ + "X = merged_data.drop(['tags'], axis=1)\n", + "y = merged_data['tags']" + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({3: 34584,\n", + " 18: 4877,\n", + " 0: 351,\n", + " 15: 354,\n", + " 5: 177,\n", + " 2: 246,\n", + " 10: 1185,\n", + " 14: 413,\n", + " 17: 205,\n", + " 16: 36,\n", + " 6: 77,\n", + " 12: 55,\n", + " 1: 110,\n", + " 4: 38,\n", + " 7: 33,\n", + " 9: 67,\n", + " 13: 12,\n", + " 8: 86,\n", + " 11: 25})" + ] + }, + "execution_count": 257, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Counter(y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Теперь выбросим часть данных cs и stats" + ] + }, + { + "cell_type": "code", + "execution_count": 258, + "metadata": {}, + "outputs": [], + "source": [ + "X_balanced, y_balanced = make_imbalance(X, y,\n", + " sampling_strategy={0: 351,\n", + " 1: 110,\n", + " 2: 246,\n", + " 3: 7279,\n", + " 4: 38,\n", + " 5: 177,\n", + " 6: 77,\n", + " 7: 33,\n", + " 8: 86,\n", + " 9: 67,\n", + " 10: 1185,\n", + " 11: 25, \n", + " 12: 55, \n", + " 13: 12, \n", + " 14: 413, \n", + " 15: 354,\n", + " 16: 36, \n", + " 17: 205, \n", + " 18: 3231\n", + " },\n", + " random_state=123)" + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({0: 351,\n", + " 1: 110,\n", + " 2: 246,\n", + " 3: 7279,\n", + " 4: 38,\n", + " 5: 177,\n", + " 6: 77,\n", + " 7: 33,\n", + " 8: 86,\n", + " 9: 67,\n", + " 10: 1185,\n", + " 11: 25,\n", + " 12: 55,\n", + " 13: 12,\n", + " 14: 413,\n", + " 15: 354,\n", + " 16: 36,\n", + " 17: 205,\n", + " 18: 3231})" + ] + }, + "execution_count": 259, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Counter(y_balanced)" + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "metadata": {}, + "outputs": [], + "source": [ + "X_balanced['tags'] = y_balanced" + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "metadata": {}, + "outputs": [], + "source": [ + "#X_balanced.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "# возьмём маленький размер тестовой выборки, чтобы в train попало как можно больше классов\n", + "train, test = train_test_split(X_balanced, test_size=0.15, shuffle=True,random_state=123)" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "metadata": {}, + "outputs": [], + "source": [ + "train.to_csv('data_train.csv', index=None)\n", + "test.to_csv('data_test.csv', index=None)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Будем считать, что данные подготовлены к тому, чтобы что-то на них обучить." + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [], + "source": [ + "name_dict = {3: 'cs',\n", + " 18: 'stat',\n", + " 10: 'math',\n", + " 14: 'physics',\n", + " 15: 'q-bio',\n", + " 0: 'astro-ph', \n", + " 2: 'cond-mat',\n", + " 17: 'quant-ph', \n", + " 5: 'eess', \n", + " 1: 'cmp-lg',\n", + " 8: 'hep-ph', \n", + " 6: 'gr-qc',\n", + " 9: 'hep-th', \n", + " 12: 'nlin', \n", + " 4: 'econ' \n", + " 16: 'q-fin', \n", + " 7: 'hep-ex', \n", + " 11: 'math-ph',\n", + " 13: 'nucl-th'}" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}