{ "cells": [ { "cell_type": "code", "execution_count": 329, "id": "6cf95722", "metadata": { "cellId": "eziodlb8kics09v3tpfeks" }, "outputs": [], "source": [ "#!g1.1\n", "from sklearn.preprocessing import LabelEncoder\n", "import transformers\n", "import torch\n", "import nltk\n", "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 330, "id": "c692d165", "metadata": { "cellId": "lvwy4cb1dnfnk3n391yiq" }, "outputs": [], "source": [ "#!g1.1\n", "df = pd.read_json('arxiv-metadata-oai-snapshot.json', lines = True)" ] }, { "cell_type": "code", "execution_count": 331, "id": "9b51b145", "metadata": { "cellId": "l71bdxo21obg5fforh5ppi" }, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "submitter | \n", "authors | \n", "title | \n", "comments | \n", "journal-ref | \n", "doi | \n", "report-no | \n", "categories | \n", "license | \n", "abstract | \n", "versions | \n", "update_date | \n", "authors_parsed | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0704.0001 | \n", "Pavel Nadolsky | \n", "C. Bal\\'azs, E. L. Berger, P. M. Nadolsky, C.-... | \n", "Calculation of prompt diphoton production cros... | \n", "37 pages, 15 figures; published version | \n", "Phys.Rev.D76:013009,2007 | \n", "10.1103/PhysRevD.76.013009 | \n", "ANL-HEP-PR-07-12 | \n", "hep-ph | \n", "None | \n", "A fully differential calculation in perturba... | \n", "[{'version': 'v1', 'created': 'Mon, 2 Apr 2007... | \n", "2008-11-26 | \n", "[[Balázs, C., ], [Berger, E. L., ], [Nadolsky,... | \n", "
1 | \n", "0704.0002 | \n", "Louis Theran | \n", "Ileana Streinu and Louis Theran | \n", "Sparsity-certifying Graph Decompositions | \n", "To appear in Graphs and Combinatorics | \n", "None | \n", "None | \n", "None | \n", "math.CO cs.CG | \n", "http://arxiv.org/licenses/nonexclusive-distrib... | \n", "We describe a new algorithm, the $(k,\\ell)$-... | \n", "[{'version': 'v1', 'created': 'Sat, 31 Mar 200... | \n", "2008-12-13 | \n", "[[Streinu, Ileana, ], [Theran, Louis, ]] | \n", "
2 | \n", "0704.0003 | \n", "Hongjun Pan | \n", "Hongjun Pan | \n", "The evolution of the Earth-Moon system based o... | \n", "23 pages, 3 figures | \n", "None | \n", "None | \n", "None | \n", "physics.gen-ph | \n", "None | \n", "The evolution of Earth-Moon system is descri... | \n", "[{'version': 'v1', 'created': 'Sun, 1 Apr 2007... | \n", "2008-01-13 | \n", "[[Pan, Hongjun, ]] | \n", "
3 | \n", "0704.0004 | \n", "David Callan | \n", "David Callan | \n", "A determinant of Stirling cycle numbers counts... | \n", "11 pages | \n", "None | \n", "None | \n", "None | \n", "math.CO | \n", "None | \n", "We show that a determinant of Stirling cycle... | \n", "[{'version': 'v1', 'created': 'Sat, 31 Mar 200... | \n", "2007-05-23 | \n", "[[Callan, David, ]] | \n", "
4 | \n", "0704.0005 | \n", "Alberto Torchinsky | \n", "Wael Abu-Shammala and Alberto Torchinsky | \n", "From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a... | \n", "None | \n", "Illinois J. Math. 52 (2008) no.2, 681-689 | \n", "None | \n", "None | \n", "math.CA math.FA | \n", "None | \n", "In this paper we show how to compute the $\\L... | \n", "[{'version': 'v1', 'created': 'Mon, 2 Apr 2007... | \n", "2013-10-15 | \n", "[[Abu-Shammala, Wael, ], [Torchinsky, Alberto, ]] | \n", "
\n", " | id | \n", "submitter | \n", "authors | \n", "title | \n", "comments | \n", "journal-ref | \n", "doi | \n", "report-no | \n", "categories | \n", "license | \n", "abstract | \n", "versions | \n", "update_date | \n", "authors_parsed | \n", "tag | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0704.0001 | \n", "Pavel Nadolsky | \n", "C. Bal\\'azs, E. L. Berger, P. M. Nadolsky, C.-... | \n", "Calculation of prompt diphoton production cros... | \n", "37 pages, 15 figures; published version | \n", "Phys.Rev.D76:013009,2007 | \n", "10.1103/PhysRevD.76.013009 | \n", "ANL-HEP-PR-07-12 | \n", "hep-ph | \n", "None | \n", "A fully differential calculation in perturba... | \n", "[{'version': 'v1', 'created': 'Mon, 2 Apr 2007... | \n", "2008-11-26 | \n", "[[Balázs, C., ], [Berger, E. L., ], [Nadolsky,... | \n", "hep-ph | \n", "
1 | \n", "0704.0002 | \n", "Louis Theran | \n", "Ileana Streinu and Louis Theran | \n", "Sparsity-certifying Graph Decompositions | \n", "To appear in Graphs and Combinatorics | \n", "None | \n", "None | \n", "None | \n", "math.CO cs.CG | \n", "http://arxiv.org/licenses/nonexclusive-distrib... | \n", "We describe a new algorithm, the $(k,\\ell)$-... | \n", "[{'version': 'v1', 'created': 'Sat, 31 Mar 200... | \n", "2008-12-13 | \n", "[[Streinu, Ileana, ], [Theran, Louis, ]] | \n", "math | \n", "
2 | \n", "0704.0003 | \n", "Hongjun Pan | \n", "Hongjun Pan | \n", "The evolution of the Earth-Moon system based o... | \n", "23 pages, 3 figures | \n", "None | \n", "None | \n", "None | \n", "physics.gen-ph | \n", "None | \n", "The evolution of Earth-Moon system is descri... | \n", "[{'version': 'v1', 'created': 'Sun, 1 Apr 2007... | \n", "2008-01-13 | \n", "[[Pan, Hongjun, ]] | \n", "physics | \n", "
3 | \n", "0704.0004 | \n", "David Callan | \n", "David Callan | \n", "A determinant of Stirling cycle numbers counts... | \n", "11 pages | \n", "None | \n", "None | \n", "None | \n", "math.CO | \n", "None | \n", "We show that a determinant of Stirling cycle... | \n", "[{'version': 'v1', 'created': 'Sat, 31 Mar 200... | \n", "2007-05-23 | \n", "[[Callan, David, ]] | \n", "math | \n", "
4 | \n", "0704.0005 | \n", "Alberto Torchinsky | \n", "Wael Abu-Shammala and Alberto Torchinsky | \n", "From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a... | \n", "None | \n", "Illinois J. Math. 52 (2008) no.2, 681-689 | \n", "None | \n", "None | \n", "math.CA math.FA | \n", "None | \n", "In this paper we show how to compute the $\\L... | \n", "[{'version': 'v1', 'created': 'Mon, 2 Apr 2007... | \n", "2013-10-15 | \n", "[[Abu-Shammala, Wael, ], [Torchinsky, Alberto, ]] | \n", "math | \n", "