Michael-Geis commited on
Commit
cd5ad0c
1 Parent(s): b777cd0

got APSP_40, updated req.txt with pyarrow, updated log

Browse files
Files changed (3) hide show
  1. collection.ipynb +1137 -11
  2. project_log.ipynb +21 -0
  3. requirements.txt +1 -0
collection.ipynb CHANGED
@@ -36981,23 +36981,1149 @@
36981
  },
36982
  {
36983
  "cell_type": "code",
36984
- "execution_count": 157,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36985
  "metadata": {},
36986
  "outputs": [
36987
  {
36988
- "ename": "NameError",
36989
- "evalue": "name 'y' is not defined",
36990
- "output_type": "error",
36991
- "traceback": [
36992
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
36993
- "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
36994
- "Cell \u001b[1;32mIn[157], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m y\n",
36995
- "\u001b[1;31mNameError\u001b[0m: name 'y' is not defined"
36996
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36997
  }
36998
  ],
36999
  "source": [
37000
- "y"
37001
  ]
37002
  }
37003
  ],
 
36981
  },
36982
  {
36983
  "cell_type": "code",
36984
+ "execution_count": 15,
36985
+ "metadata": {},
36986
+ "outputs": [],
36987
+ "source": [
36988
+ "import data_storage\n",
36989
+ "from data_storage import ArXivData\n",
36990
+ "\n",
36991
+ "pde_data = ArXivData()\n",
36992
+ "\n",
36993
+ "pde_data.load_from_query(query=\"cat:math.AP OR cat:math.SP\", max_results=20)\n",
36994
+ "pde_data.save_as_feather(path_to_data_dir=\"./data\", dataset_file_name=\"test.feather\")"
36995
+ ]
36996
+ },
36997
+ {
36998
+ "cell_type": "code",
36999
+ "execution_count": 13,
37000
  "metadata": {},
37001
  "outputs": [
37002
  {
37003
+ "data": {
37004
+ "text/html": [
37005
+ "<div>\n",
37006
+ "<style scoped>\n",
37007
+ " .dataframe tbody tr th:only-of-type {\n",
37008
+ " vertical-align: middle;\n",
37009
+ " }\n",
37010
+ "\n",
37011
+ " .dataframe tbody tr th {\n",
37012
+ " vertical-align: top;\n",
37013
+ " }\n",
37014
+ "\n",
37015
+ " .dataframe thead th {\n",
37016
+ " text-align: right;\n",
37017
+ " }\n",
37018
+ "</style>\n",
37019
+ "<table border=\"1\" class=\"dataframe\">\n",
37020
+ " <thead>\n",
37021
+ " <tr style=\"text-align: right;\">\n",
37022
+ " <th></th>\n",
37023
+ " <th>title</th>\n",
37024
+ " <th>summary</th>\n",
37025
+ " <th>id</th>\n",
37026
+ " <th>msc_tags</th>\n",
37027
+ " </tr>\n",
37028
+ " </thead>\n",
37029
+ " <tbody>\n",
37030
+ " <tr>\n",
37031
+ " <th>0</th>\n",
37032
+ " <td>The Calderón problem for space-time fractional...</td>\n",
37033
+ " <td>We study an inverse problem for variable coeff...</td>\n",
37034
+ " <td>2205.12509v3</td>\n",
37035
+ " <td>None</td>\n",
37036
+ " </tr>\n",
37037
+ " <tr>\n",
37038
+ " <th>1</th>\n",
37039
+ " <td>Spectral Theory for Schrödinger operators on c...</td>\n",
37040
+ " <td>Spectral properties of Schr\\\"odinger operators...</td>\n",
37041
+ " <td>2303.01924v2</td>\n",
37042
+ " <td>None</td>\n",
37043
+ " </tr>\n",
37044
+ " <tr>\n",
37045
+ " <th>2</th>\n",
37046
+ " <td>The total Q-curvature, volume entropy and poly...</td>\n",
37047
+ " <td>In this paper, we investigate a conformally fl...</td>\n",
37048
+ " <td>2306.15623v3</td>\n",
37049
+ " <td>[53C18, 53C20, 58J90]</td>\n",
37050
+ " </tr>\n",
37051
+ " <tr>\n",
37052
+ " <th>3</th>\n",
37053
+ " <td>Hilbert Complexes with Mixed Boundary Conditio...</td>\n",
37054
+ " <td>We show that the biharmonic Hilbert complex wi...</td>\n",
37055
+ " <td>2207.11778v3</td>\n",
37056
+ " <td>None</td>\n",
37057
+ " </tr>\n",
37058
+ " <tr>\n",
37059
+ " <th>4</th>\n",
37060
+ " <td>Lifting of fractional Sobolev mappings to nonc...</td>\n",
37061
+ " <td>Given a compact Riemannian manifolds $\\mathcal...</td>\n",
37062
+ " <td>2301.07663v2</td>\n",
37063
+ " <td>[58D15, 46E35]</td>\n",
37064
+ " </tr>\n",
37065
+ " <tr>\n",
37066
+ " <th>5</th>\n",
37067
+ " <td>Homogenization of the two-dimensional evolutio...</td>\n",
37068
+ " <td>We consider the evolutionary compressible Navi...</td>\n",
37069
+ " <td>2210.09070v3</td>\n",
37070
+ " <td>[35B27, 76M50, 76N06]</td>\n",
37071
+ " </tr>\n",
37072
+ " <tr>\n",
37073
+ " <th>6</th>\n",
37074
+ " <td>A note on the Long-Time behaviour of Stochasti...</td>\n",
37075
+ " <td>This paper presents an investigation into the ...</td>\n",
37076
+ " <td>2306.16130v2</td>\n",
37077
+ " <td>None</td>\n",
37078
+ " </tr>\n",
37079
+ " <tr>\n",
37080
+ " <th>7</th>\n",
37081
+ " <td>Localization for random quasi-one-dimensional ...</td>\n",
37082
+ " <td>In this paper we review results of Anderson lo...</td>\n",
37083
+ " <td>2305.05224v2</td>\n",
37084
+ " <td>None</td>\n",
37085
+ " </tr>\n",
37086
+ " <tr>\n",
37087
+ " <th>8</th>\n",
37088
+ " <td>Orthogonal modes of a fully anisotropic and he...</td>\n",
37089
+ " <td>The aim of this short note is to give a synthe...</td>\n",
37090
+ " <td>2303.14984v3</td>\n",
37091
+ " <td>None</td>\n",
37092
+ " </tr>\n",
37093
+ " <tr>\n",
37094
+ " <th>9</th>\n",
37095
+ " <td>Ginzburg-Landau Equations on Non-compact Riema...</td>\n",
37096
+ " <td>We study the Ginzburg-Landau equations on line...</td>\n",
37097
+ " <td>2203.14179v3</td>\n",
37098
+ " <td>[35Q56, 35J66]</td>\n",
37099
+ " </tr>\n",
37100
+ " <tr>\n",
37101
+ " <th>10</th>\n",
37102
+ " <td>Global well-posedness for the 2D Euler-Boussin...</td>\n",
37103
+ " <td>This present paper is dedicated to the study o...</td>\n",
37104
+ " <td>2306.10670v2</td>\n",
37105
+ " <td>[35Q35, 35B65, 35R11, 76B03]</td>\n",
37106
+ " </tr>\n",
37107
+ " <tr>\n",
37108
+ " <th>11</th>\n",
37109
+ " <td>Stochastic homogenization of Gaussian fields o...</td>\n",
37110
+ " <td>In this article, we study stochastic homogeniz...</td>\n",
37111
+ " <td>2201.12013v2</td>\n",
37112
+ " <td>[60K37, 60G15, 60G60, 35B27, 60J60, 60G20]</td>\n",
37113
+ " </tr>\n",
37114
+ " <tr>\n",
37115
+ " <th>12</th>\n",
37116
+ " <td>Spectra of generators of Markovian evolution i...</td>\n",
37117
+ " <td>We determine spectra of single-particle transl...</td>\n",
37118
+ " <td>2206.09879v2</td>\n",
37119
+ " <td>[81V99, 46N50, 82C10, 47A10]</td>\n",
37120
+ " </tr>\n",
37121
+ " <tr>\n",
37122
+ " <th>13</th>\n",
37123
+ " <td>Non-perturbative localization on the strip and...</td>\n",
37124
+ " <td>We prove non-perturbative Anderson localizatio...</td>\n",
37125
+ " <td>2306.15122v2</td>\n",
37126
+ " <td>None</td>\n",
37127
+ " </tr>\n",
37128
+ " <tr>\n",
37129
+ " <th>14</th>\n",
37130
+ " <td>Reconstructing anisotropic conductivities on t...</td>\n",
37131
+ " <td>We consider an electrically conductive compact...</td>\n",
37132
+ " <td>2202.12056v3</td>\n",
37133
+ " <td>None</td>\n",
37134
+ " </tr>\n",
37135
+ " <tr>\n",
37136
+ " <th>15</th>\n",
37137
+ " <td>Future stability of expanding spatially homoge...</td>\n",
37138
+ " <td>Spatially homogeneous FLRW solutions constitut...</td>\n",
37139
+ " <td>2306.17774v1</td>\n",
37140
+ " <td>None</td>\n",
37141
+ " </tr>\n",
37142
+ " <tr>\n",
37143
+ " <th>16</th>\n",
37144
+ " <td>Autonomous and asymptotically quasiconvex func...</td>\n",
37145
+ " <td>We obtain local regularity for minimizers of a...</td>\n",
37146
+ " <td>2306.17768v1</td>\n",
37147
+ " <td>[35J47, 35B65, 46E30]</td>\n",
37148
+ " </tr>\n",
37149
+ " <tr>\n",
37150
+ " <th>17</th>\n",
37151
+ " <td>A Coefficient Inverse Problem for the Mean Fie...</td>\n",
37152
+ " <td>A Coefficient Inverse Problem (CIP) of the det...</td>\n",
37153
+ " <td>2306.03349v2</td>\n",
37154
+ " <td>None</td>\n",
37155
+ " </tr>\n",
37156
+ " <tr>\n",
37157
+ " <th>18</th>\n",
37158
+ " <td>Nonuniqueness results for constant sixth order...</td>\n",
37159
+ " <td>We prove nonuniqueness results for constant si...</td>\n",
37160
+ " <td>2306.00679v2</td>\n",
37161
+ " <td>[35J60, 35B09, 35J30, 35B40, 53C18, 34C23, 58J55]</td>\n",
37162
+ " </tr>\n",
37163
+ " <tr>\n",
37164
+ " <th>19</th>\n",
37165
+ " <td>Asymptotic limits of the principal spectrum po...</td>\n",
37166
+ " <td>This work examines the limits of the principal...</td>\n",
37167
+ " <td>2306.17734v1</td>\n",
37168
+ " <td>[92D40, 92D50, 35P15, 35K57]</td>\n",
37169
+ " </tr>\n",
37170
+ " </tbody>\n",
37171
+ "</table>\n",
37172
+ "</div>"
37173
+ ],
37174
+ "text/plain": [
37175
+ " title \\\n",
37176
+ "0 The Calderón problem for space-time fractional... \n",
37177
+ "1 Spectral Theory for Schrödinger operators on c... \n",
37178
+ "2 The total Q-curvature, volume entropy and poly... \n",
37179
+ "3 Hilbert Complexes with Mixed Boundary Conditio... \n",
37180
+ "4 Lifting of fractional Sobolev mappings to nonc... \n",
37181
+ "5 Homogenization of the two-dimensional evolutio... \n",
37182
+ "6 A note on the Long-Time behaviour of Stochasti... \n",
37183
+ "7 Localization for random quasi-one-dimensional ... \n",
37184
+ "8 Orthogonal modes of a fully anisotropic and he... \n",
37185
+ "9 Ginzburg-Landau Equations on Non-compact Riema... \n",
37186
+ "10 Global well-posedness for the 2D Euler-Boussin... \n",
37187
+ "11 Stochastic homogenization of Gaussian fields o... \n",
37188
+ "12 Spectra of generators of Markovian evolution i... \n",
37189
+ "13 Non-perturbative localization on the strip and... \n",
37190
+ "14 Reconstructing anisotropic conductivities on t... \n",
37191
+ "15 Future stability of expanding spatially homoge... \n",
37192
+ "16 Autonomous and asymptotically quasiconvex func... \n",
37193
+ "17 A Coefficient Inverse Problem for the Mean Fie... \n",
37194
+ "18 Nonuniqueness results for constant sixth order... \n",
37195
+ "19 Asymptotic limits of the principal spectrum po... \n",
37196
+ "\n",
37197
+ " summary id \\\n",
37198
+ "0 We study an inverse problem for variable coeff... 2205.12509v3 \n",
37199
+ "1 Spectral properties of Schr\\\"odinger operators... 2303.01924v2 \n",
37200
+ "2 In this paper, we investigate a conformally fl... 2306.15623v3 \n",
37201
+ "3 We show that the biharmonic Hilbert complex wi... 2207.11778v3 \n",
37202
+ "4 Given a compact Riemannian manifolds $\\mathcal... 2301.07663v2 \n",
37203
+ "5 We consider the evolutionary compressible Navi... 2210.09070v3 \n",
37204
+ "6 This paper presents an investigation into the ... 2306.16130v2 \n",
37205
+ "7 In this paper we review results of Anderson lo... 2305.05224v2 \n",
37206
+ "8 The aim of this short note is to give a synthe... 2303.14984v3 \n",
37207
+ "9 We study the Ginzburg-Landau equations on line... 2203.14179v3 \n",
37208
+ "10 This present paper is dedicated to the study o... 2306.10670v2 \n",
37209
+ "11 In this article, we study stochastic homogeniz... 2201.12013v2 \n",
37210
+ "12 We determine spectra of single-particle transl... 2206.09879v2 \n",
37211
+ "13 We prove non-perturbative Anderson localizatio... 2306.15122v2 \n",
37212
+ "14 We consider an electrically conductive compact... 2202.12056v3 \n",
37213
+ "15 Spatially homogeneous FLRW solutions constitut... 2306.17774v1 \n",
37214
+ "16 We obtain local regularity for minimizers of a... 2306.17768v1 \n",
37215
+ "17 A Coefficient Inverse Problem (CIP) of the det... 2306.03349v2 \n",
37216
+ "18 We prove nonuniqueness results for constant si... 2306.00679v2 \n",
37217
+ "19 This work examines the limits of the principal... 2306.17734v1 \n",
37218
+ "\n",
37219
+ " msc_tags \n",
37220
+ "0 None \n",
37221
+ "1 None \n",
37222
+ "2 [53C18, 53C20, 58J90] \n",
37223
+ "3 None \n",
37224
+ "4 [58D15, 46E35] \n",
37225
+ "5 [35B27, 76M50, 76N06] \n",
37226
+ "6 None \n",
37227
+ "7 None \n",
37228
+ "8 None \n",
37229
+ "9 [35Q56, 35J66] \n",
37230
+ "10 [35Q35, 35B65, 35R11, 76B03] \n",
37231
+ "11 [60K37, 60G15, 60G60, 35B27, 60J60, 60G20] \n",
37232
+ "12 [81V99, 46N50, 82C10, 47A10] \n",
37233
+ "13 None \n",
37234
+ "14 None \n",
37235
+ "15 None \n",
37236
+ "16 [35J47, 35B65, 46E30] \n",
37237
+ "17 None \n",
37238
+ "18 [35J60, 35B09, 35J30, 35B40, 53C18, 34C23, 58J55] \n",
37239
+ "19 [92D40, 92D50, 35P15, 35K57] "
37240
+ ]
37241
+ },
37242
+ "execution_count": 13,
37243
+ "metadata": {},
37244
+ "output_type": "execute_result"
37245
+ }
37246
+ ],
37247
+ "source": [
37248
+ "data = ArXivData()\n",
37249
+ "data.load_from_feather(dataset_file_name=\"test.feather\", path_to_data_dir=\"./data\")\n",
37250
+ "\n",
37251
+ "data.metadata"
37252
+ ]
37253
+ },
37254
+ {
37255
+ "cell_type": "code",
37256
+ "execution_count": 5,
37257
+ "metadata": {},
37258
+ "outputs": [
37259
+ {
37260
+ "data": {
37261
+ "text/html": [
37262
+ "<div>\n",
37263
+ "<style scoped>\n",
37264
+ " .dataframe tbody tr th:only-of-type {\n",
37265
+ " vertical-align: middle;\n",
37266
+ " }\n",
37267
+ "\n",
37268
+ " .dataframe tbody tr th {\n",
37269
+ " vertical-align: top;\n",
37270
+ " }\n",
37271
+ "\n",
37272
+ " .dataframe thead th {\n",
37273
+ " text-align: right;\n",
37274
+ " }\n",
37275
+ "</style>\n",
37276
+ "<table border=\"1\" class=\"dataframe\">\n",
37277
+ " <thead>\n",
37278
+ " <tr style=\"text-align: right;\">\n",
37279
+ " <th></th>\n",
37280
+ " <th>General Relativity and Quantum Cosmology</th>\n",
37281
+ " <th>Mathematical Physics</th>\n",
37282
+ " <th>Analysis of PDEs</th>\n",
37283
+ " <th>Classical Analysis and ODEs</th>\n",
37284
+ " <th>Differential Geometry</th>\n",
37285
+ " <th>Dynamical Systems</th>\n",
37286
+ " <th>Functional Analysis</th>\n",
37287
+ " <th>Probability</th>\n",
37288
+ " <th>Spectral Theory</th>\n",
37289
+ " <th>Quantum Physics</th>\n",
37290
+ " </tr>\n",
37291
+ " </thead>\n",
37292
+ " <tbody>\n",
37293
+ " <tr>\n",
37294
+ " <th>0</th>\n",
37295
+ " <td>0</td>\n",
37296
+ " <td>0</td>\n",
37297
+ " <td>1</td>\n",
37298
+ " <td>0</td>\n",
37299
+ " <td>0</td>\n",
37300
+ " <td>0</td>\n",
37301
+ " <td>0</td>\n",
37302
+ " <td>0</td>\n",
37303
+ " <td>0</td>\n",
37304
+ " <td>0</td>\n",
37305
+ " </tr>\n",
37306
+ " <tr>\n",
37307
+ " <th>1</th>\n",
37308
+ " <td>0</td>\n",
37309
+ " <td>0</td>\n",
37310
+ " <td>1</td>\n",
37311
+ " <td>0</td>\n",
37312
+ " <td>0</td>\n",
37313
+ " <td>0</td>\n",
37314
+ " <td>1</td>\n",
37315
+ " <td>0</td>\n",
37316
+ " <td>1</td>\n",
37317
+ " <td>0</td>\n",
37318
+ " </tr>\n",
37319
+ " <tr>\n",
37320
+ " <th>2</th>\n",
37321
+ " <td>0</td>\n",
37322
+ " <td>0</td>\n",
37323
+ " <td>1</td>\n",
37324
+ " <td>0</td>\n",
37325
+ " <td>1</td>\n",
37326
+ " <td>0</td>\n",
37327
+ " <td>0</td>\n",
37328
+ " <td>0</td>\n",
37329
+ " <td>0</td>\n",
37330
+ " <td>0</td>\n",
37331
+ " </tr>\n",
37332
+ " <tr>\n",
37333
+ " <th>3</th>\n",
37334
+ " <td>0</td>\n",
37335
+ " <td>0</td>\n",
37336
+ " <td>1</td>\n",
37337
+ " <td>0</td>\n",
37338
+ " <td>0</td>\n",
37339
+ " <td>0</td>\n",
37340
+ " <td>1</td>\n",
37341
+ " <td>0</td>\n",
37342
+ " <td>0</td>\n",
37343
+ " <td>0</td>\n",
37344
+ " </tr>\n",
37345
+ " <tr>\n",
37346
+ " <th>4</th>\n",
37347
+ " <td>0</td>\n",
37348
+ " <td>0</td>\n",
37349
+ " <td>1</td>\n",
37350
+ " <td>1</td>\n",
37351
+ " <td>0</td>\n",
37352
+ " <td>0</td>\n",
37353
+ " <td>1</td>\n",
37354
+ " <td>0</td>\n",
37355
+ " <td>0</td>\n",
37356
+ " <td>0</td>\n",
37357
+ " </tr>\n",
37358
+ " <tr>\n",
37359
+ " <th>5</th>\n",
37360
+ " <td>0</td>\n",
37361
+ " <td>0</td>\n",
37362
+ " <td>1</td>\n",
37363
+ " <td>0</td>\n",
37364
+ " <td>0</td>\n",
37365
+ " <td>0</td>\n",
37366
+ " <td>0</td>\n",
37367
+ " <td>0</td>\n",
37368
+ " <td>0</td>\n",
37369
+ " <td>0</td>\n",
37370
+ " </tr>\n",
37371
+ " <tr>\n",
37372
+ " <th>6</th>\n",
37373
+ " <td>0</td>\n",
37374
+ " <td>0</td>\n",
37375
+ " <td>1</td>\n",
37376
+ " <td>0</td>\n",
37377
+ " <td>0</td>\n",
37378
+ " <td>0</td>\n",
37379
+ " <td>0</td>\n",
37380
+ " <td>1</td>\n",
37381
+ " <td>0</td>\n",
37382
+ " <td>0</td>\n",
37383
+ " </tr>\n",
37384
+ " <tr>\n",
37385
+ " <th>7</th>\n",
37386
+ " <td>0</td>\n",
37387
+ " <td>1</td>\n",
37388
+ " <td>0</td>\n",
37389
+ " <td>0</td>\n",
37390
+ " <td>0</td>\n",
37391
+ " <td>0</td>\n",
37392
+ " <td>0</td>\n",
37393
+ " <td>0</td>\n",
37394
+ " <td>1</td>\n",
37395
+ " <td>0</td>\n",
37396
+ " </tr>\n",
37397
+ " <tr>\n",
37398
+ " <th>8</th>\n",
37399
+ " <td>0</td>\n",
37400
+ " <td>0</td>\n",
37401
+ " <td>1</td>\n",
37402
+ " <td>0</td>\n",
37403
+ " <td>0</td>\n",
37404
+ " <td>0</td>\n",
37405
+ " <td>0</td>\n",
37406
+ " <td>0</td>\n",
37407
+ " <td>0</td>\n",
37408
+ " <td>0</td>\n",
37409
+ " </tr>\n",
37410
+ " <tr>\n",
37411
+ " <th>9</th>\n",
37412
+ " <td>0</td>\n",
37413
+ " <td>0</td>\n",
37414
+ " <td>1</td>\n",
37415
+ " <td>0</td>\n",
37416
+ " <td>0</td>\n",
37417
+ " <td>0</td>\n",
37418
+ " <td>0</td>\n",
37419
+ " <td>0</td>\n",
37420
+ " <td>0</td>\n",
37421
+ " <td>0</td>\n",
37422
+ " </tr>\n",
37423
+ " <tr>\n",
37424
+ " <th>10</th>\n",
37425
+ " <td>0</td>\n",
37426
+ " <td>0</td>\n",
37427
+ " <td>1</td>\n",
37428
+ " <td>0</td>\n",
37429
+ " <td>0</td>\n",
37430
+ " <td>0</td>\n",
37431
+ " <td>0</td>\n",
37432
+ " <td>0</td>\n",
37433
+ " <td>0</td>\n",
37434
+ " <td>0</td>\n",
37435
+ " </tr>\n",
37436
+ " <tr>\n",
37437
+ " <th>11</th>\n",
37438
+ " <td>0</td>\n",
37439
+ " <td>0</td>\n",
37440
+ " <td>1</td>\n",
37441
+ " <td>0</td>\n",
37442
+ " <td>0</td>\n",
37443
+ " <td>0</td>\n",
37444
+ " <td>0</td>\n",
37445
+ " <td>1</td>\n",
37446
+ " <td>0</td>\n",
37447
+ " <td>0</td>\n",
37448
+ " </tr>\n",
37449
+ " <tr>\n",
37450
+ " <th>12</th>\n",
37451
+ " <td>0</td>\n",
37452
+ " <td>1</td>\n",
37453
+ " <td>0</td>\n",
37454
+ " <td>0</td>\n",
37455
+ " <td>0</td>\n",
37456
+ " <td>0</td>\n",
37457
+ " <td>0</td>\n",
37458
+ " <td>0</td>\n",
37459
+ " <td>1</td>\n",
37460
+ " <td>1</td>\n",
37461
+ " </tr>\n",
37462
+ " <tr>\n",
37463
+ " <th>13</th>\n",
37464
+ " <td>0</td>\n",
37465
+ " <td>1</td>\n",
37466
+ " <td>0</td>\n",
37467
+ " <td>0</td>\n",
37468
+ " <td>0</td>\n",
37469
+ " <td>0</td>\n",
37470
+ " <td>0</td>\n",
37471
+ " <td>0</td>\n",
37472
+ " <td>1</td>\n",
37473
+ " <td>0</td>\n",
37474
+ " </tr>\n",
37475
+ " <tr>\n",
37476
+ " <th>14</th>\n",
37477
+ " <td>0</td>\n",
37478
+ " <td>0</td>\n",
37479
+ " <td>1</td>\n",
37480
+ " <td>0</td>\n",
37481
+ " <td>0</td>\n",
37482
+ " <td>0</td>\n",
37483
+ " <td>0</td>\n",
37484
+ " <td>0</td>\n",
37485
+ " <td>0</td>\n",
37486
+ " <td>0</td>\n",
37487
+ " </tr>\n",
37488
+ " <tr>\n",
37489
+ " <th>15</th>\n",
37490
+ " <td>1</td>\n",
37491
+ " <td>1</td>\n",
37492
+ " <td>1</td>\n",
37493
+ " <td>0</td>\n",
37494
+ " <td>1</td>\n",
37495
+ " <td>0</td>\n",
37496
+ " <td>0</td>\n",
37497
+ " <td>0</td>\n",
37498
+ " <td>0</td>\n",
37499
+ " <td>0</td>\n",
37500
+ " </tr>\n",
37501
+ " <tr>\n",
37502
+ " <th>16</th>\n",
37503
+ " <td>0</td>\n",
37504
+ " <td>0</td>\n",
37505
+ " <td>1</td>\n",
37506
+ " <td>0</td>\n",
37507
+ " <td>0</td>\n",
37508
+ " <td>0</td>\n",
37509
+ " <td>0</td>\n",
37510
+ " <td>0</td>\n",
37511
+ " <td>0</td>\n",
37512
+ " <td>0</td>\n",
37513
+ " </tr>\n",
37514
+ " <tr>\n",
37515
+ " <th>17</th>\n",
37516
+ " <td>0</td>\n",
37517
+ " <td>0</td>\n",
37518
+ " <td>1</td>\n",
37519
+ " <td>0</td>\n",
37520
+ " <td>0</td>\n",
37521
+ " <td>0</td>\n",
37522
+ " <td>0</td>\n",
37523
+ " <td>0</td>\n",
37524
+ " <td>0</td>\n",
37525
+ " <td>0</td>\n",
37526
+ " </tr>\n",
37527
+ " <tr>\n",
37528
+ " <th>18</th>\n",
37529
+ " <td>0</td>\n",
37530
+ " <td>0</td>\n",
37531
+ " <td>1</td>\n",
37532
+ " <td>0</td>\n",
37533
+ " <td>1</td>\n",
37534
+ " <td>0</td>\n",
37535
+ " <td>0</td>\n",
37536
+ " <td>0</td>\n",
37537
+ " <td>0</td>\n",
37538
+ " <td>0</td>\n",
37539
+ " </tr>\n",
37540
+ " <tr>\n",
37541
+ " <th>19</th>\n",
37542
+ " <td>0</td>\n",
37543
+ " <td>0</td>\n",
37544
+ " <td>1</td>\n",
37545
+ " <td>0</td>\n",
37546
+ " <td>0</td>\n",
37547
+ " <td>1</td>\n",
37548
+ " <td>0</td>\n",
37549
+ " <td>0</td>\n",
37550
+ " <td>0</td>\n",
37551
+ " <td>0</td>\n",
37552
+ " </tr>\n",
37553
+ " </tbody>\n",
37554
+ "</table>\n",
37555
+ "</div>"
37556
+ ],
37557
+ "text/plain": [
37558
+ " General Relativity and Quantum Cosmology Mathematical Physics \\\n",
37559
+ "0 0 0 \n",
37560
+ "1 0 0 \n",
37561
+ "2 0 0 \n",
37562
+ "3 0 0 \n",
37563
+ "4 0 0 \n",
37564
+ "5 0 0 \n",
37565
+ "6 0 0 \n",
37566
+ "7 0 1 \n",
37567
+ "8 0 0 \n",
37568
+ "9 0 0 \n",
37569
+ "10 0 0 \n",
37570
+ "11 0 0 \n",
37571
+ "12 0 1 \n",
37572
+ "13 0 1 \n",
37573
+ "14 0 0 \n",
37574
+ "15 1 1 \n",
37575
+ "16 0 0 \n",
37576
+ "17 0 0 \n",
37577
+ "18 0 0 \n",
37578
+ "19 0 0 \n",
37579
+ "\n",
37580
+ " Analysis of PDEs Classical Analysis and ODEs Differential Geometry \\\n",
37581
+ "0 1 0 0 \n",
37582
+ "1 1 0 0 \n",
37583
+ "2 1 0 1 \n",
37584
+ "3 1 0 0 \n",
37585
+ "4 1 1 0 \n",
37586
+ "5 1 0 0 \n",
37587
+ "6 1 0 0 \n",
37588
+ "7 0 0 0 \n",
37589
+ "8 1 0 0 \n",
37590
+ "9 1 0 0 \n",
37591
+ "10 1 0 0 \n",
37592
+ "11 1 0 0 \n",
37593
+ "12 0 0 0 \n",
37594
+ "13 0 0 0 \n",
37595
+ "14 1 0 0 \n",
37596
+ "15 1 0 1 \n",
37597
+ "16 1 0 0 \n",
37598
+ "17 1 0 0 \n",
37599
+ "18 1 0 1 \n",
37600
+ "19 1 0 0 \n",
37601
+ "\n",
37602
+ " Dynamical Systems Functional Analysis Probability Spectral Theory \\\n",
37603
+ "0 0 0 0 0 \n",
37604
+ "1 0 1 0 1 \n",
37605
+ "2 0 0 0 0 \n",
37606
+ "3 0 1 0 0 \n",
37607
+ "4 0 1 0 0 \n",
37608
+ "5 0 0 0 0 \n",
37609
+ "6 0 0 1 0 \n",
37610
+ "7 0 0 0 1 \n",
37611
+ "8 0 0 0 0 \n",
37612
+ "9 0 0 0 0 \n",
37613
+ "10 0 0 0 0 \n",
37614
+ "11 0 0 1 0 \n",
37615
+ "12 0 0 0 1 \n",
37616
+ "13 0 0 0 1 \n",
37617
+ "14 0 0 0 0 \n",
37618
+ "15 0 0 0 0 \n",
37619
+ "16 0 0 0 0 \n",
37620
+ "17 0 0 0 0 \n",
37621
+ "18 0 0 0 0 \n",
37622
+ "19 1 0 0 0 \n",
37623
+ "\n",
37624
+ " Quantum Physics \n",
37625
+ "0 0 \n",
37626
+ "1 0 \n",
37627
+ "2 0 \n",
37628
+ "3 0 \n",
37629
+ "4 0 \n",
37630
+ "5 0 \n",
37631
+ "6 0 \n",
37632
+ "7 0 \n",
37633
+ "8 0 \n",
37634
+ "9 0 \n",
37635
+ "10 0 \n",
37636
+ "11 0 \n",
37637
+ "12 1 \n",
37638
+ "13 0 \n",
37639
+ "14 0 \n",
37640
+ "15 0 \n",
37641
+ "16 0 \n",
37642
+ "17 0 \n",
37643
+ "18 0 \n",
37644
+ "19 0 "
37645
+ ]
37646
+ },
37647
+ "execution_count": 5,
37648
+ "metadata": {},
37649
+ "output_type": "execute_result"
37650
+ }
37651
+ ],
37652
+ "source": [
37653
+ "data.arxiv_subjects"
37654
+ ]
37655
+ },
37656
+ {
37657
+ "cell_type": "code",
37658
+ "execution_count": 20,
37659
+ "metadata": {},
37660
+ "outputs": [],
37661
+ "source": [
37662
+ "new = ArXivData()\n",
37663
+ "new.load_from_query(query=\"cat:math.AP OR cat:math.SP\", max_results=40000)\n",
37664
+ "new.save_as_feather(path_to_data_dir=\"./data\", dataset_file_name=\"APSP_40.feather\")"
37665
+ ]
37666
+ },
37667
+ {
37668
+ "cell_type": "code",
37669
+ "execution_count": 23,
37670
+ "metadata": {},
37671
+ "outputs": [
37672
+ {
37673
+ "data": {
37674
+ "text/html": [
37675
+ "<div>\n",
37676
+ "<style scoped>\n",
37677
+ " .dataframe tbody tr th:only-of-type {\n",
37678
+ " vertical-align: middle;\n",
37679
+ " }\n",
37680
+ "\n",
37681
+ " .dataframe tbody tr th {\n",
37682
+ " vertical-align: top;\n",
37683
+ " }\n",
37684
+ "\n",
37685
+ " .dataframe thead th {\n",
37686
+ " text-align: right;\n",
37687
+ " }\n",
37688
+ "</style>\n",
37689
+ "<table border=\"1\" class=\"dataframe\">\n",
37690
+ " <thead>\n",
37691
+ " <tr style=\"text-align: right;\">\n",
37692
+ " <th></th>\n",
37693
+ " <th>Cosmology and Nongalactic Astrophysics</th>\n",
37694
+ " <th>Earth and Planetary Astrophysics</th>\n",
37695
+ " <th>Astrophysics of Galaxies</th>\n",
37696
+ " <th>High Energy Astrophysical Phenomena</th>\n",
37697
+ " <th>Instrumentation and Methods for Astrophysics</th>\n",
37698
+ " <th>Solar and Stellar Astrophysics</th>\n",
37699
+ " <th>Disordered Systems and Neural Networks</th>\n",
37700
+ " <th>Mesoscale and Nanoscale Physics</th>\n",
37701
+ " <th>Materials Science</th>\n",
37702
+ " <th>Other Condensed Matter</th>\n",
37703
+ " <th>...</th>\n",
37704
+ " <th>Mathematical Finance</th>\n",
37705
+ " <th>Portfolio Management</th>\n",
37706
+ " <th>Pricing of Securities</th>\n",
37707
+ " <th>Risk Management</th>\n",
37708
+ " <th>Trading and Market Microstructure</th>\n",
37709
+ " <th>Quantum Physics</th>\n",
37710
+ " <th>Applications</th>\n",
37711
+ " <th>Computation</th>\n",
37712
+ " <th>Methodology</th>\n",
37713
+ " <th>Other Statistics</th>\n",
37714
+ " </tr>\n",
37715
+ " </thead>\n",
37716
+ " <tbody>\n",
37717
+ " <tr>\n",
37718
+ " <th>0</th>\n",
37719
+ " <td>0</td>\n",
37720
+ " <td>0</td>\n",
37721
+ " <td>0</td>\n",
37722
+ " <td>0</td>\n",
37723
+ " <td>0</td>\n",
37724
+ " <td>0</td>\n",
37725
+ " <td>0</td>\n",
37726
+ " <td>0</td>\n",
37727
+ " <td>0</td>\n",
37728
+ " <td>0</td>\n",
37729
+ " <td>...</td>\n",
37730
+ " <td>0</td>\n",
37731
+ " <td>0</td>\n",
37732
+ " <td>0</td>\n",
37733
+ " <td>0</td>\n",
37734
+ " <td>0</td>\n",
37735
+ " <td>0</td>\n",
37736
+ " <td>0</td>\n",
37737
+ " <td>0</td>\n",
37738
+ " <td>0</td>\n",
37739
+ " <td>0</td>\n",
37740
+ " </tr>\n",
37741
+ " <tr>\n",
37742
+ " <th>1</th>\n",
37743
+ " <td>0</td>\n",
37744
+ " <td>0</td>\n",
37745
+ " <td>0</td>\n",
37746
+ " <td>0</td>\n",
37747
+ " <td>0</td>\n",
37748
+ " <td>0</td>\n",
37749
+ " <td>0</td>\n",
37750
+ " <td>0</td>\n",
37751
+ " <td>0</td>\n",
37752
+ " <td>0</td>\n",
37753
+ " <td>...</td>\n",
37754
+ " <td>0</td>\n",
37755
+ " <td>0</td>\n",
37756
+ " <td>0</td>\n",
37757
+ " <td>0</td>\n",
37758
+ " <td>0</td>\n",
37759
+ " <td>0</td>\n",
37760
+ " <td>0</td>\n",
37761
+ " <td>0</td>\n",
37762
+ " <td>0</td>\n",
37763
+ " <td>0</td>\n",
37764
+ " </tr>\n",
37765
+ " <tr>\n",
37766
+ " <th>2</th>\n",
37767
+ " <td>0</td>\n",
37768
+ " <td>0</td>\n",
37769
+ " <td>0</td>\n",
37770
+ " <td>0</td>\n",
37771
+ " <td>0</td>\n",
37772
+ " <td>0</td>\n",
37773
+ " <td>0</td>\n",
37774
+ " <td>0</td>\n",
37775
+ " <td>0</td>\n",
37776
+ " <td>0</td>\n",
37777
+ " <td>...</td>\n",
37778
+ " <td>0</td>\n",
37779
+ " <td>0</td>\n",
37780
+ " <td>0</td>\n",
37781
+ " <td>0</td>\n",
37782
+ " <td>0</td>\n",
37783
+ " <td>0</td>\n",
37784
+ " <td>0</td>\n",
37785
+ " <td>0</td>\n",
37786
+ " <td>0</td>\n",
37787
+ " <td>0</td>\n",
37788
+ " </tr>\n",
37789
+ " <tr>\n",
37790
+ " <th>3</th>\n",
37791
+ " <td>0</td>\n",
37792
+ " <td>0</td>\n",
37793
+ " <td>0</td>\n",
37794
+ " <td>0</td>\n",
37795
+ " <td>0</td>\n",
37796
+ " <td>0</td>\n",
37797
+ " <td>0</td>\n",
37798
+ " <td>0</td>\n",
37799
+ " <td>0</td>\n",
37800
+ " <td>0</td>\n",
37801
+ " <td>...</td>\n",
37802
+ " <td>0</td>\n",
37803
+ " <td>0</td>\n",
37804
+ " <td>0</td>\n",
37805
+ " <td>0</td>\n",
37806
+ " <td>0</td>\n",
37807
+ " <td>0</td>\n",
37808
+ " <td>0</td>\n",
37809
+ " <td>0</td>\n",
37810
+ " <td>0</td>\n",
37811
+ " <td>0</td>\n",
37812
+ " </tr>\n",
37813
+ " <tr>\n",
37814
+ " <th>4</th>\n",
37815
+ " <td>0</td>\n",
37816
+ " <td>0</td>\n",
37817
+ " <td>0</td>\n",
37818
+ " <td>0</td>\n",
37819
+ " <td>0</td>\n",
37820
+ " <td>0</td>\n",
37821
+ " <td>0</td>\n",
37822
+ " <td>0</td>\n",
37823
+ " <td>0</td>\n",
37824
+ " <td>0</td>\n",
37825
+ " <td>...</td>\n",
37826
+ " <td>0</td>\n",
37827
+ " <td>0</td>\n",
37828
+ " <td>0</td>\n",
37829
+ " <td>0</td>\n",
37830
+ " <td>0</td>\n",
37831
+ " <td>0</td>\n",
37832
+ " <td>0</td>\n",
37833
+ " <td>0</td>\n",
37834
+ " <td>0</td>\n",
37835
+ " <td>0</td>\n",
37836
+ " </tr>\n",
37837
+ " <tr>\n",
37838
+ " <th>...</th>\n",
37839
+ " <td>...</td>\n",
37840
+ " <td>...</td>\n",
37841
+ " <td>...</td>\n",
37842
+ " <td>...</td>\n",
37843
+ " <td>...</td>\n",
37844
+ " <td>...</td>\n",
37845
+ " <td>...</td>\n",
37846
+ " <td>...</td>\n",
37847
+ " <td>...</td>\n",
37848
+ " <td>...</td>\n",
37849
+ " <td>...</td>\n",
37850
+ " <td>...</td>\n",
37851
+ " <td>...</td>\n",
37852
+ " <td>...</td>\n",
37853
+ " <td>...</td>\n",
37854
+ " <td>...</td>\n",
37855
+ " <td>...</td>\n",
37856
+ " <td>...</td>\n",
37857
+ " <td>...</td>\n",
37858
+ " <td>...</td>\n",
37859
+ " <td>...</td>\n",
37860
+ " </tr>\n",
37861
+ " <tr>\n",
37862
+ " <th>39995</th>\n",
37863
+ " <td>0</td>\n",
37864
+ " <td>0</td>\n",
37865
+ " <td>0</td>\n",
37866
+ " <td>0</td>\n",
37867
+ " <td>0</td>\n",
37868
+ " <td>0</td>\n",
37869
+ " <td>0</td>\n",
37870
+ " <td>0</td>\n",
37871
+ " <td>0</td>\n",
37872
+ " <td>0</td>\n",
37873
+ " <td>...</td>\n",
37874
+ " <td>0</td>\n",
37875
+ " <td>0</td>\n",
37876
+ " <td>0</td>\n",
37877
+ " <td>0</td>\n",
37878
+ " <td>0</td>\n",
37879
+ " <td>0</td>\n",
37880
+ " <td>0</td>\n",
37881
+ " <td>0</td>\n",
37882
+ " <td>0</td>\n",
37883
+ " <td>0</td>\n",
37884
+ " </tr>\n",
37885
+ " <tr>\n",
37886
+ " <th>39996</th>\n",
37887
+ " <td>0</td>\n",
37888
+ " <td>0</td>\n",
37889
+ " <td>0</td>\n",
37890
+ " <td>0</td>\n",
37891
+ " <td>0</td>\n",
37892
+ " <td>0</td>\n",
37893
+ " <td>0</td>\n",
37894
+ " <td>0</td>\n",
37895
+ " <td>0</td>\n",
37896
+ " <td>0</td>\n",
37897
+ " <td>...</td>\n",
37898
+ " <td>0</td>\n",
37899
+ " <td>0</td>\n",
37900
+ " <td>0</td>\n",
37901
+ " <td>0</td>\n",
37902
+ " <td>0</td>\n",
37903
+ " <td>0</td>\n",
37904
+ " <td>0</td>\n",
37905
+ " <td>0</td>\n",
37906
+ " <td>0</td>\n",
37907
+ " <td>0</td>\n",
37908
+ " </tr>\n",
37909
+ " <tr>\n",
37910
+ " <th>39997</th>\n",
37911
+ " <td>0</td>\n",
37912
+ " <td>0</td>\n",
37913
+ " <td>0</td>\n",
37914
+ " <td>0</td>\n",
37915
+ " <td>0</td>\n",
37916
+ " <td>0</td>\n",
37917
+ " <td>0</td>\n",
37918
+ " <td>0</td>\n",
37919
+ " <td>0</td>\n",
37920
+ " <td>0</td>\n",
37921
+ " <td>...</td>\n",
37922
+ " <td>0</td>\n",
37923
+ " <td>0</td>\n",
37924
+ " <td>0</td>\n",
37925
+ " <td>0</td>\n",
37926
+ " <td>0</td>\n",
37927
+ " <td>0</td>\n",
37928
+ " <td>0</td>\n",
37929
+ " <td>0</td>\n",
37930
+ " <td>0</td>\n",
37931
+ " <td>0</td>\n",
37932
+ " </tr>\n",
37933
+ " <tr>\n",
37934
+ " <th>39998</th>\n",
37935
+ " <td>0</td>\n",
37936
+ " <td>0</td>\n",
37937
+ " <td>0</td>\n",
37938
+ " <td>0</td>\n",
37939
+ " <td>0</td>\n",
37940
+ " <td>0</td>\n",
37941
+ " <td>0</td>\n",
37942
+ " <td>0</td>\n",
37943
+ " <td>0</td>\n",
37944
+ " <td>0</td>\n",
37945
+ " <td>...</td>\n",
37946
+ " <td>0</td>\n",
37947
+ " <td>0</td>\n",
37948
+ " <td>0</td>\n",
37949
+ " <td>0</td>\n",
37950
+ " <td>0</td>\n",
37951
+ " <td>0</td>\n",
37952
+ " <td>0</td>\n",
37953
+ " <td>0</td>\n",
37954
+ " <td>0</td>\n",
37955
+ " <td>0</td>\n",
37956
+ " </tr>\n",
37957
+ " <tr>\n",
37958
+ " <th>39999</th>\n",
37959
+ " <td>0</td>\n",
37960
+ " <td>0</td>\n",
37961
+ " <td>0</td>\n",
37962
+ " <td>0</td>\n",
37963
+ " <td>0</td>\n",
37964
+ " <td>0</td>\n",
37965
+ " <td>0</td>\n",
37966
+ " <td>0</td>\n",
37967
+ " <td>0</td>\n",
37968
+ " <td>0</td>\n",
37969
+ " <td>...</td>\n",
37970
+ " <td>0</td>\n",
37971
+ " <td>0</td>\n",
37972
+ " <td>0</td>\n",
37973
+ " <td>0</td>\n",
37974
+ " <td>0</td>\n",
37975
+ " <td>0</td>\n",
37976
+ " <td>0</td>\n",
37977
+ " <td>0</td>\n",
37978
+ " <td>0</td>\n",
37979
+ " <td>0</td>\n",
37980
+ " </tr>\n",
37981
+ " </tbody>\n",
37982
+ "</table>\n",
37983
+ "<p>40000 rows × 125 columns</p>\n",
37984
+ "</div>"
37985
+ ],
37986
+ "text/plain": [
37987
+ " Cosmology and Nongalactic Astrophysics \\\n",
37988
+ "0 0 \n",
37989
+ "1 0 \n",
37990
+ "2 0 \n",
37991
+ "3 0 \n",
37992
+ "4 0 \n",
37993
+ "... ... \n",
37994
+ "39995 0 \n",
37995
+ "39996 0 \n",
37996
+ "39997 0 \n",
37997
+ "39998 0 \n",
37998
+ "39999 0 \n",
37999
+ "\n",
38000
+ " Earth and Planetary Astrophysics Astrophysics of Galaxies \\\n",
38001
+ "0 0 0 \n",
38002
+ "1 0 0 \n",
38003
+ "2 0 0 \n",
38004
+ "3 0 0 \n",
38005
+ "4 0 0 \n",
38006
+ "... ... ... \n",
38007
+ "39995 0 0 \n",
38008
+ "39996 0 0 \n",
38009
+ "39997 0 0 \n",
38010
+ "39998 0 0 \n",
38011
+ "39999 0 0 \n",
38012
+ "\n",
38013
+ " High Energy Astrophysical Phenomena \\\n",
38014
+ "0 0 \n",
38015
+ "1 0 \n",
38016
+ "2 0 \n",
38017
+ "3 0 \n",
38018
+ "4 0 \n",
38019
+ "... ... \n",
38020
+ "39995 0 \n",
38021
+ "39996 0 \n",
38022
+ "39997 0 \n",
38023
+ "39998 0 \n",
38024
+ "39999 0 \n",
38025
+ "\n",
38026
+ " Instrumentation and Methods for Astrophysics \\\n",
38027
+ "0 0 \n",
38028
+ "1 0 \n",
38029
+ "2 0 \n",
38030
+ "3 0 \n",
38031
+ "4 0 \n",
38032
+ "... ... \n",
38033
+ "39995 0 \n",
38034
+ "39996 0 \n",
38035
+ "39997 0 \n",
38036
+ "39998 0 \n",
38037
+ "39999 0 \n",
38038
+ "\n",
38039
+ " Solar and Stellar Astrophysics Disordered Systems and Neural Networks \\\n",
38040
+ "0 0 0 \n",
38041
+ "1 0 0 \n",
38042
+ "2 0 0 \n",
38043
+ "3 0 0 \n",
38044
+ "4 0 0 \n",
38045
+ "... ... ... \n",
38046
+ "39995 0 0 \n",
38047
+ "39996 0 0 \n",
38048
+ "39997 0 0 \n",
38049
+ "39998 0 0 \n",
38050
+ "39999 0 0 \n",
38051
+ "\n",
38052
+ " Mesoscale and Nanoscale Physics Materials Science \\\n",
38053
+ "0 0 0 \n",
38054
+ "1 0 0 \n",
38055
+ "2 0 0 \n",
38056
+ "3 0 0 \n",
38057
+ "4 0 0 \n",
38058
+ "... ... ... \n",
38059
+ "39995 0 0 \n",
38060
+ "39996 0 0 \n",
38061
+ "39997 0 0 \n",
38062
+ "39998 0 0 \n",
38063
+ "39999 0 0 \n",
38064
+ "\n",
38065
+ " Other Condensed Matter ... Mathematical Finance \\\n",
38066
+ "0 0 ... 0 \n",
38067
+ "1 0 ... 0 \n",
38068
+ "2 0 ... 0 \n",
38069
+ "3 0 ... 0 \n",
38070
+ "4 0 ... 0 \n",
38071
+ "... ... ... ... \n",
38072
+ "39995 0 ... 0 \n",
38073
+ "39996 0 ... 0 \n",
38074
+ "39997 0 ... 0 \n",
38075
+ "39998 0 ... 0 \n",
38076
+ "39999 0 ... 0 \n",
38077
+ "\n",
38078
+ " Portfolio Management Pricing of Securities Risk Management \\\n",
38079
+ "0 0 0 0 \n",
38080
+ "1 0 0 0 \n",
38081
+ "2 0 0 0 \n",
38082
+ "3 0 0 0 \n",
38083
+ "4 0 0 0 \n",
38084
+ "... ... ... ... \n",
38085
+ "39995 0 0 0 \n",
38086
+ "39996 0 0 0 \n",
38087
+ "39997 0 0 0 \n",
38088
+ "39998 0 0 0 \n",
38089
+ "39999 0 0 0 \n",
38090
+ "\n",
38091
+ " Trading and Market Microstructure Quantum Physics Applications \\\n",
38092
+ "0 0 0 0 \n",
38093
+ "1 0 0 0 \n",
38094
+ "2 0 0 0 \n",
38095
+ "3 0 0 0 \n",
38096
+ "4 0 0 0 \n",
38097
+ "... ... ... ... \n",
38098
+ "39995 0 0 0 \n",
38099
+ "39996 0 0 0 \n",
38100
+ "39997 0 0 0 \n",
38101
+ "39998 0 0 0 \n",
38102
+ "39999 0 0 0 \n",
38103
+ "\n",
38104
+ " Computation Methodology Other Statistics \n",
38105
+ "0 0 0 0 \n",
38106
+ "1 0 0 0 \n",
38107
+ "2 0 0 0 \n",
38108
+ "3 0 0 0 \n",
38109
+ "4 0 0 0 \n",
38110
+ "... ... ... ... \n",
38111
+ "39995 0 0 0 \n",
38112
+ "39996 0 0 0 \n",
38113
+ "39997 0 0 0 \n",
38114
+ "39998 0 0 0 \n",
38115
+ "39999 0 0 0 \n",
38116
+ "\n",
38117
+ "[40000 rows x 125 columns]"
38118
+ ]
38119
+ },
38120
+ "execution_count": 23,
38121
+ "metadata": {},
38122
+ "output_type": "execute_result"
38123
  }
38124
  ],
38125
  "source": [
38126
+ "new.arxiv_subjects"
38127
  ]
38128
  }
38129
  ],
project_log.ipynb CHANGED
@@ -89,6 +89,27 @@
89
  " - any data cleaning procedures will occur in the pipeline here\n",
90
  "3. Plug into topic model(s)"
91
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  }
93
  ],
94
  "metadata": {
 
89
  " - any data cleaning procedures will occur in the pipeline here\n",
90
  "3. Plug into topic model(s)"
91
  ]
92
+ },
93
+ {
94
+ "attachments": {},
95
+ "cell_type": "markdown",
96
+ "metadata": {},
97
+ "source": [
98
+ "## 07/03/2023\n",
99
+ "\n",
100
+ "#### Modified data_storage.py\n",
101
+ "\n",
102
+ "Done:\n",
103
+ "1. Wrote `load_from_feather` and `save_to_feather`\n",
104
+ "1. Pulled and stored metadata for 40k papers in pde and spectral theory called 'APSP_40.feather'\n",
105
+ "\n",
106
+ "To Do:\n",
107
+ "1. Make sure the class functionality works correctly when a query returns no results.\n",
108
+ "\n",
109
+ "\n",
110
+ "#### Miscellaneous\n",
111
+ "1. Install `tabbed out` extension for exiting delimiter environments with tab.\n"
112
+ ]
113
  }
114
  ],
115
  "metadata": {
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  pandas
2
  numpy
3
  arxiv
 
1
+ pyarrow
2
  pandas
3
  numpy
4
  arxiv