Michael-Geis commited on
Commit
7cc8002
β€’
1 Parent(s): f649f3f

reorganized

Browse files
.gitignore CHANGED
@@ -12,4 +12,5 @@ settings.json
12
  collection.ipynb
13
  testing.ipynb
14
  testnb.ipynb
15
- output
 
 
12
  collection.ipynb
13
  testing.ipynb
14
  testnb.ipynb
15
+ output
16
+ notebooks/
src/cleaning.py β†’ cleaning.py RENAMED
File without changes
config/main.yaml DELETED
@@ -1,13 +0,0 @@
1
-
2
-
3
- input_id:
4
- id: 1602.00730
5
-
6
-
7
-
8
-
9
-
10
- paths:
11
- path_to_library: ./data/libraries/APSP_50_allenai-specter
12
- path_to_save_recs: ./output/first_recs.feather
13
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/embedding.py β†’ embedding.py RENAMED
File without changes
src/library.py β†’ library.py RENAMED
File without changes
src/model.py β†’ model.py RENAMED
File without changes
prediction-flow-sample.ipynb DELETED
@@ -1,1230 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 5,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import importlib"
10
- ]
11
- },
12
- {
13
- "cell_type": "code",
14
- "execution_count": 6,
15
- "metadata": {},
16
- "outputs": [],
17
- "source": [
18
- "import pandas as pd\n",
19
- "\n",
20
- "pd.set_option(\"display.max_colwidth\", 0)"
21
- ]
22
- },
23
- {
24
- "cell_type": "code",
25
- "execution_count": 7,
26
- "metadata": {},
27
- "outputs": [
28
- {
29
- "name": "stderr",
30
- "output_type": "stream",
31
- "text": [
32
- "c:\\Users\\Leems\\Desktop\\Coding\\Projects\\fritz\\venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
33
- " from .autonotebook import tqdm as notebook_tqdm\n"
34
- ]
35
- },
36
- {
37
- "data": {
38
- "text/plain": [
39
- "<module 'src.storage' from 'c:\\\\Users\\\\Leems\\\\Desktop\\\\Coding\\\\Projects\\\\fritz\\\\src\\\\storage.py'>"
40
- ]
41
- },
42
- "execution_count": 7,
43
- "metadata": {},
44
- "output_type": "execute_result"
45
- }
46
- ],
47
- "source": [
48
- "import src.embedding as embedding\n",
49
- "import src.storage as storage\n",
50
- "from src.storage import ArXivData\n",
51
- "from src.cleaning import TextCleaner\n",
52
- "from src.embedding import Embedder\n",
53
- "from sentence_transformers import util\n",
54
- "\n",
55
- "importlib.reload(embedding)\n",
56
- "importlib.reload(storage)"
57
- ]
58
- },
59
- {
60
- "cell_type": "code",
61
- "execution_count": 8,
62
- "metadata": {},
63
- "outputs": [
64
- {
65
- "ename": "PermissionError",
66
- "evalue": "[Errno 13] Permission denied: './data/libraries/APSP_50_allenai-specter/'",
67
- "output_type": "error",
68
- "traceback": [
69
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
70
- "\u001b[1;31mPermissionError\u001b[0m Traceback (most recent call last)",
71
- "Cell \u001b[1;32mIn[8], line 6\u001b[0m\n\u001b[0;32m 3\u001b[0m path_to_library_embeddings \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m./data/libraries/APSP_50_allenai-specter/embeddings.feather\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 5\u001b[0m library \u001b[39m=\u001b[39m ArXivData()\n\u001b[1;32m----> 6\u001b[0m library\u001b[39m.\u001b[39;49mload_from_feather(library_path)\n",
72
- "File \u001b[1;32mc:\\Users\\Leems\\Desktop\\Coding\\Projects\\fritz\\src\\storage.py:32\u001b[0m, in \u001b[0;36mArXivData.load_from_feather\u001b[1;34m(self, path_to_dataset)\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mload_from_feather\u001b[39m(\u001b[39mself\u001b[39m, path_to_dataset):\n\u001b[0;32m 27\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Loads metadata from a saved feather file.\u001b[39;00m\n\u001b[0;32m 28\u001b[0m \n\u001b[0;32m 29\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[0;32m 30\u001b[0m \u001b[39m path_to_dataset: path to the feather file containing the dataset.\u001b[39;00m\n\u001b[0;32m 31\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 32\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_returned_metadata \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_feather(path_to_dataset)\n\u001b[0;32m 33\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmetadata \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_returned_metadata\n\u001b[0;32m 34\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39marxiv_subjects \u001b[39m=\u001b[39m clean\u001b[39m.\u001b[39mOHE_arxiv_subjects(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmetadata)\n",
73
- "File \u001b[1;32mc:\\Users\\Leems\\Desktop\\Coding\\Projects\\fritz\\venv\\Lib\\site-packages\\pandas\\io\\feather_format.py:144\u001b[0m, in \u001b[0;36mread_feather\u001b[1;34m(path, columns, use_threads, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m 140\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpyarrow\u001b[39;00m \u001b[39mimport\u001b[39;00m feather\n\u001b[0;32m 142\u001b[0m check_dtype_backend(dtype_backend)\n\u001b[1;32m--> 144\u001b[0m \u001b[39mwith\u001b[39;00m get_handle(\n\u001b[0;32m 145\u001b[0m path, \u001b[39m\"\u001b[39;49m\u001b[39mrb\u001b[39;49m\u001b[39m\"\u001b[39;49m, storage_options\u001b[39m=\u001b[39;49mstorage_options, is_text\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m\n\u001b[0;32m 146\u001b[0m ) \u001b[39mas\u001b[39;00m handles:\n\u001b[0;32m 147\u001b[0m \u001b[39mif\u001b[39;00m dtype_backend \u001b[39mis\u001b[39;00m lib\u001b[39m.\u001b[39mno_default:\n\u001b[0;32m 148\u001b[0m \u001b[39mreturn\u001b[39;00m feather\u001b[39m.\u001b[39mread_feather(\n\u001b[0;32m 149\u001b[0m handles\u001b[39m.\u001b[39mhandle, columns\u001b[39m=\u001b[39mcolumns, use_threads\u001b[39m=\u001b[39m\u001b[39mbool\u001b[39m(use_threads)\n\u001b[0;32m 150\u001b[0m )\n",
74
- "File \u001b[1;32mc:\\Users\\Leems\\Desktop\\Coding\\Projects\\fritz\\venv\\Lib\\site-packages\\pandas\\io\\common.py:868\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 859\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39m(\n\u001b[0;32m 860\u001b[0m handle,\n\u001b[0;32m 861\u001b[0m ioargs\u001b[39m.\u001b[39mmode,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 864\u001b[0m newline\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 865\u001b[0m )\n\u001b[0;32m 866\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 867\u001b[0m \u001b[39m# Binary mode\u001b[39;00m\n\u001b[1;32m--> 868\u001b[0m handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39;49m(handle, ioargs\u001b[39m.\u001b[39;49mmode)\n\u001b[0;32m 869\u001b[0m handles\u001b[39m.\u001b[39mappend(handle)\n\u001b[0;32m 871\u001b[0m \u001b[39m# Convert BytesIO or file objects passed with an encoding\u001b[39;00m\n",
75
- "\u001b[1;31mPermissionError\u001b[0m: [Errno 13] Permission denied: './data/libraries/APSP_50_allenai-specter/'"
76
- ]
77
- }
78
- ],
79
- "source": [
80
- "## Load library\n",
81
- "library_path = \"./data/libraries/APSP_50_allenai-specter/\"\n",
82
- "path_to_library_embeddings = (\n",
83
- " \"./data/libraries/APSP_50_allenai-specter/embeddings.feather\"\n",
84
- ")\n",
85
- "\n",
86
- "library = ArXivData()\n",
87
- "library.load_from_feather(library_path)"
88
- ]
89
- },
90
- {
91
- "cell_type": "code",
92
- "execution_count": 40,
93
- "metadata": {},
94
- "outputs": [],
95
- "source": [
96
- "## Prepare the Library\n",
97
- "\n",
98
- "cleaner = TextCleaner()\n",
99
- "embedder = Embedder()\n",
100
- "\n",
101
- "clean_library = cleaner.transform(library)\n",
102
- "prepped_library = embedder.transform(\n",
103
- " X=clean_library, load_from_file=True, path_to_embeddings=path_to_library_embeddings\n",
104
- ")"
105
- ]
106
- },
107
- {
108
- "cell_type": "code",
109
- "execution_count": 48,
110
- "metadata": {},
111
- "outputs": [],
112
- "source": [
113
- "## retrieve and pre-process the input\n",
114
- "\n",
115
- "input_id = \"1602.00730\"\n",
116
- "\n",
117
- "## create query string\n",
118
- "\n",
119
- "id_list = [input_id]\n",
120
- "\n",
121
- "input_article = ArXivData()\n",
122
- "input_article.load_from_id_list(id_list=id_list)"
123
- ]
124
- },
125
- {
126
- "cell_type": "code",
127
- "execution_count": 49,
128
- "metadata": {},
129
- "outputs": [
130
- {
131
- "name": "stderr",
132
- "output_type": "stream",
133
- "text": [
134
- "c:\\Users\\Leems\\Desktop\\Coding\\Projects\\Fritz\\cleaning.py:23: SettingWithCopyWarning: \n",
135
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
136
- "\n",
137
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
138
- " X.metadata.msc_tags[X.metadata.msc_tags.notna()] = X.metadata.msc_tags[\n",
139
- "Batches: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00, 4.39it/s]\n"
140
- ]
141
- }
142
- ],
143
- "source": [
144
- "## Clean and process the input\n",
145
- "\n",
146
- "clean_input_article = cleaner.transform(input_article)\n",
147
- "prepped_input_article = embedder.transform(\n",
148
- " X=clean_input_article,\n",
149
- " model_name=\"allenai-specter\",\n",
150
- " path_to_embeddings=\"./data/input_embedding.feather\",\n",
151
- ")"
152
- ]
153
- },
154
- {
155
- "cell_type": "code",
156
- "execution_count": 55,
157
- "metadata": {},
158
- "outputs": [],
159
- "source": [
160
- "## Perform the search and return the closest matches\n",
161
- "\n",
162
- "matches = util.semantic_search(\n",
163
- " query_embeddings=prepped_input_article.embeddings,\n",
164
- " corpus_embeddings=prepped_library.embeddings,\n",
165
- " top_k=5,\n",
166
- ")"
167
- ]
168
- },
169
- {
170
- "cell_type": "code",
171
- "execution_count": 57,
172
- "metadata": {},
173
- "outputs": [],
174
- "source": [
175
- "indices = [dict[\"corpus_id\"] for dict in matches[0]]"
176
- ]
177
- },
178
- {
179
- "cell_type": "code",
180
- "execution_count": 59,
181
- "metadata": {},
182
- "outputs": [
183
- {
184
- "data": {
185
- "text/plain": [
186
- "[38787, 39127, 9786, 49609, 14857]"
187
- ]
188
- },
189
- "execution_count": 59,
190
- "metadata": {},
191
- "output_type": "execute_result"
192
- }
193
- ],
194
- "source": [
195
- "indices"
196
- ]
197
- },
198
- {
199
- "cell_type": "code",
200
- "execution_count": 60,
201
- "metadata": {},
202
- "outputs": [
203
- {
204
- "data": {
205
- "text/html": [
206
- "<div>\n",
207
- "<style scoped>\n",
208
- " .dataframe tbody tr th:only-of-type {\n",
209
- " vertical-align: middle;\n",
210
- " }\n",
211
- "\n",
212
- " .dataframe tbody tr th {\n",
213
- " vertical-align: top;\n",
214
- " }\n",
215
- "\n",
216
- " .dataframe thead th {\n",
217
- " text-align: right;\n",
218
- " }\n",
219
- "</style>\n",
220
- "<table border=\"1\" class=\"dataframe\">\n",
221
- " <thead>\n",
222
- " <tr style=\"text-align: right;\">\n",
223
- " <th></th>\n",
224
- " <th>title</th>\n",
225
- " <th>abstract</th>\n",
226
- " <th>id</th>\n",
227
- " <th>arxiv_subjects</th>\n",
228
- " <th>msc_tags</th>\n",
229
- " <th>doc_strings</th>\n",
230
- " </tr>\n",
231
- " </thead>\n",
232
- " <tbody>\n",
233
- " <tr>\n",
234
- " <th>38787</th>\n",
235
- " <td>C-infinity Scaling Asymptotics for the Spectral Function of the Laplacian</td>\n",
236
- " <td>This article concerns new off-diagonal estimates on the remainder and its derivatives in the pointwise Weyl law on a compact n-dimensional Riemannian manifold. As an application, we prove that near any non self-focal point, the scaling limit of the spectral projector of the Laplacian onto frequency windows of constant size is a normalized Bessel function depending only on n.</td>\n",
237
- " <td>1602.00730v1</td>\n",
238
- " <td>[math.AP, math-ph, math.DG, math.FA, math.MP, math.SP]</td>\n",
239
- " <td>None</td>\n",
240
- " <td>C-infinity Scaling Asymptotics for the Spectral Function of the Laplacian This article concerns new off-diagonal estimates on the remainder and its derivatives in the pointwise Weyl law on a compact n-dimensional Riemannian manifold. As an application, we prove that near any non self-focal point, the scaling limit of the spectral projector of the Laplacian onto frequency windows of constant size is a normalized Bessel function depending only on n.</td>\n",
241
- " </tr>\n",
242
- " <tr>\n",
243
- " <th>39127</th>\n",
244
- " <td>Scaling Limit for the Kernel of the Spectral Projector and Remainder Estimates in the Pointwise Weyl Law</td>\n",
245
- " <td>Let (M, g) be a compact smooth Riemannian manifold. We obtain new off-diagonal estimates as {ambda} tend to infinity for the remainder in the pointwise Weyl Law for the kernel of the spectral projector of the Laplacian onto functions with frequency at most {ambda}. A corollary is that, when rescaled around a non self-focal point, the kernel of the spectral projector onto the frequency interval (ambda, ambda + 1] has a universal scaling limit as {ambda} goes to infinity (depending only on the dimension of M). Our results also imply that if M has no conjugate points, then immersions of M into Euclidean space by an orthonormal basis of eigenfunctions with frequencies in (ambda, ambda + 1] are embeddings for all {ambda} sufficiently large.</td>\n",
246
- " <td>1411.0658v3</td>\n",
247
- " <td>[math.SP, math.AP, math.DG]</td>\n",
248
- " <td>None</td>\n",
249
- " <td>Scaling Limit for the Kernel of the Spectral Projector and Remainder Estimates in the Pointwise Weyl Law Let (M, g) be a compact smooth Riemannian manifold. We obtain new off-diagonal estimates as {ambda} tend to infinity for the remainder in the pointwise Weyl Law for the kernel of the spectral projector of the Laplacian onto functions with frequency at most {ambda}. A corollary is that, when rescaled around a non self-focal point, the kernel of the spectral projector onto the frequency interval (ambda, ambda + 1] has a universal scaling limit as {ambda} goes to infinity (depending only on the dimension of M). Our results also imply that if M has no conjugate points, then immersions of M into Euclidean space by an orthonormal basis of eigenfunctions with frequencies in (ambda, ambda + 1] are embeddings for all {ambda} sufficiently large.</td>\n",
250
- " </tr>\n",
251
- " <tr>\n",
252
- " <th>9786</th>\n",
253
- " <td>A logarithmic improvement in the two-point Weyl law for manifolds without conjugate points</td>\n",
254
- " <td>In this paper, we study the two-point Weyl Law for the Laplace-Beltrami operator on a smooth, compact Riemannian manifold LATEX with no conjugate points. That is, we find the asymptotic behavior of the Schwartz kernel, LATEX of the projection operator from LATEX onto the direct sum of eigenspaces with eigenvalue smaller than LATEX as LATEX In the regime where LATEX are restricted to a compact neighborhood of the diagonal in LATEX we obtain a uniform logarithmic improvement in the remainder of the asymptotic expansion for LATEX and its derivatives of all orders, which generalizes a result of Berard, who treated the on-diagonal case LATEX When LATEX avoid a compact neighborhood of the diagonal, we obtain this same improvement in an upper bound for LATEX Our results imply that the rescaled covariance kernel of a monochromatic random wave locally converges in the LATEX topology to a universal scaling limit at an inverse logarithmic rate.</td>\n",
255
- " <td>1905.05136v3</td>\n",
256
- " <td>[math.AP, math.SP]</td>\n",
257
- " <td>[Asymptotic distributions of eigenvalues in context of PDEs]</td>\n",
258
- " <td>A logarithmic improvement in the two-point Weyl law for manifolds without conjugate points In this paper, we study the two-point Weyl Law for the Laplace-Beltrami operator on a smooth, compact Riemannian manifold LATEX with no conjugate points. That is, we find the asymptotic behavior of the Schwartz kernel, LATEX of the projection operator from LATEX onto the direct sum of eigenspaces with eigenvalue smaller than LATEX as LATEX In the regime where LATEX are restricted to a compact neighborhood of the diagonal in LATEX we obtain a uniform logarithmic improvement in the remainder of the asymptotic expansion for LATEX and its derivatives of all orders, which generalizes a result of Berard, who treated the on-diagonal case LATEX When LATEX avoid a compact neighborhood of the diagonal, we obtain this same improvement in an upper bound for LATEX Our results imply that the rescaled covariance kernel of a monochromatic random wave locally converges in the LATEX topology to a universal scaling limit at an inverse logarithmic rate.</td>\n",
259
- " </tr>\n",
260
- " <tr>\n",
261
- " <th>49609</th>\n",
262
- " <td>The blowup along the diagonal of the spectral function of the Laplacian</td>\n",
263
- " <td>We formulate a precise conjecture about the universal behavior near the diagonal of the spectral function of the Laplacian of a smooth compact Riemann manifold. We prove this conjecture when the manifold and the metric are real analytic, and we also present an alternate proof when the manifold is the round sphere.</td>\n",
264
- " <td>1103.1276v4</td>\n",
265
- " <td>[math.DG, math-ph, math.AP, math.MP]</td>\n",
266
- " <td>[Spectral problems; spectral geometry; scattering theory on manifolds, Second-order elliptic equations]</td>\n",
267
- " <td>The blowup along the diagonal of the spectral function of the Laplacian We formulate a precise conjecture about the universal behavior near the diagonal of the spectral function of the Laplacian of a smooth compact Riemann manifold. We prove this conjecture when the manifold and the metric are real analytic, and we also present an alternate proof when the manifold is the round sphere.</td>\n",
268
- " </tr>\n",
269
- " <tr>\n",
270
- " <th>14857</th>\n",
271
- " <td>Growth of high LATEX norms for eigenfunctions: an application of geodesic beams</td>\n",
272
- " <td>This work concerns LATEX norms of high energy Laplace eigenfunctions, LATEX LATEX In 1988, Sogge gave optimal estimates on the growth of LATEX for a general compact Riemannian manifold. The goal of this article is to give general dynamical conditions guaranteeing quantitative improvements in LATEX estimates for LATEX where LATEX is the critical exponent. We also apply previous results of the authors to obtain quantitative improvements in concrete geometric settings including all product manifolds. These are the first results improving estimates for the LATEX growth of eigenfunctions that only require dynamical assumptions. In contrast with previous improvements, our assumptions are local in the sense that they depend only on the geodesics passing through a shrinking neighborhood of a given set in LATEX Moreover, the article gives a structure theorem for eigenfunctions which saturate the quantitatively improved LATEX bound. Modulo an error, the theorem describes these eigenfunctions as finite sums of quasimodes which, roughly, approximate zonal harmonics on the sphere scaled by LATEX</td>\n",
273
- " <td>2003.04597v2</td>\n",
274
- " <td>[math.AP, math.SP]</td>\n",
275
- " <td>None</td>\n",
276
- " <td>Growth of high LATEX norms for eigenfunctions: an application of geodesic beams This work concerns LATEX norms of high energy Laplace eigenfunctions, LATEX LATEX In 1988, Sogge gave optimal estimates on the growth of LATEX for a general compact Riemannian manifold. The goal of this article is to give general dynamical conditions guaranteeing quantitative improvements in LATEX estimates for LATEX where LATEX is the critical exponent. We also apply previous results of the authors to obtain quantitative improvements in concrete geometric settings including all product manifolds. These are the first results improving estimates for the LATEX growth of eigenfunctions that only require dynamical assumptions. In contrast with previous improvements, our assumptions are local in the sense that they depend only on the geodesics passing through a shrinking neighborhood of a given set in LATEX Moreover, the article gives a structure theorem for eigenfunctions which saturate the quantitatively improved LATEX bound. Modulo an error, the theorem describes these eigenfunctions as finite sums of quasimodes which, roughly, approximate zonal harmonics on the sphere scaled by LATEX</td>\n",
277
- " </tr>\n",
278
- " </tbody>\n",
279
- "</table>\n",
280
- "</div>"
281
- ],
282
- "text/plain": [
283
- " title \\\n",
284
- "38787 C-infinity Scaling Asymptotics for the Spectral Function of the Laplacian \n",
285
- "39127 Scaling Limit for the Kernel of the Spectral Projector and Remainder Estimates in the Pointwise Weyl Law \n",
286
- "9786 A logarithmic improvement in the two-point Weyl law for manifolds without conjugate points \n",
287
- "49609 The blowup along the diagonal of the spectral function of the Laplacian \n",
288
- "14857 Growth of high LATEX norms for eigenfunctions: an application of geodesic beams \n",
289
- "\n",
290
- " abstract \\\n",
291
- "38787 This article concerns new off-diagonal estimates on the remainder and its derivatives in the pointwise Weyl law on a compact n-dimensional Riemannian manifold. As an application, we prove that near any non self-focal point, the scaling limit of the spectral projector of the Laplacian onto frequency windows of constant size is a normalized Bessel function depending only on n. \n",
292
- "39127 Let (M, g) be a compact smooth Riemannian manifold. We obtain new off-diagonal estimates as {ambda} tend to infinity for the remainder in the pointwise Weyl Law for the kernel of the spectral projector of the Laplacian onto functions with frequency at most {ambda}. A corollary is that, when rescaled around a non self-focal point, the kernel of the spectral projector onto the frequency interval (ambda, ambda + 1] has a universal scaling limit as {ambda} goes to infinity (depending only on the dimension of M). Our results also imply that if M has no conjugate points, then immersions of M into Euclidean space by an orthonormal basis of eigenfunctions with frequencies in (ambda, ambda + 1] are embeddings for all {ambda} sufficiently large. \n",
293
- "9786 In this paper, we study the two-point Weyl Law for the Laplace-Beltrami operator on a smooth, compact Riemannian manifold LATEX with no conjugate points. That is, we find the asymptotic behavior of the Schwartz kernel, LATEX of the projection operator from LATEX onto the direct sum of eigenspaces with eigenvalue smaller than LATEX as LATEX In the regime where LATEX are restricted to a compact neighborhood of the diagonal in LATEX we obtain a uniform logarithmic improvement in the remainder of the asymptotic expansion for LATEX and its derivatives of all orders, which generalizes a result of Berard, who treated the on-diagonal case LATEX When LATEX avoid a compact neighborhood of the diagonal, we obtain this same improvement in an upper bound for LATEX Our results imply that the rescaled covariance kernel of a monochromatic random wave locally converges in the LATEX topology to a universal scaling limit at an inverse logarithmic rate. \n",
294
- "49609 We formulate a precise conjecture about the universal behavior near the diagonal of the spectral function of the Laplacian of a smooth compact Riemann manifold. We prove this conjecture when the manifold and the metric are real analytic, and we also present an alternate proof when the manifold is the round sphere. \n",
295
- "14857 This work concerns LATEX norms of high energy Laplace eigenfunctions, LATEX LATEX In 1988, Sogge gave optimal estimates on the growth of LATEX for a general compact Riemannian manifold. The goal of this article is to give general dynamical conditions guaranteeing quantitative improvements in LATEX estimates for LATEX where LATEX is the critical exponent. We also apply previous results of the authors to obtain quantitative improvements in concrete geometric settings including all product manifolds. These are the first results improving estimates for the LATEX growth of eigenfunctions that only require dynamical assumptions. In contrast with previous improvements, our assumptions are local in the sense that they depend only on the geodesics passing through a shrinking neighborhood of a given set in LATEX Moreover, the article gives a structure theorem for eigenfunctions which saturate the quantitatively improved LATEX bound. Modulo an error, the theorem describes these eigenfunctions as finite sums of quasimodes which, roughly, approximate zonal harmonics on the sphere scaled by LATEX \n",
296
- "\n",
297
- " id arxiv_subjects \\\n",
298
- "38787 1602.00730v1 [math.AP, math-ph, math.DG, math.FA, math.MP, math.SP] \n",
299
- "39127 1411.0658v3 [math.SP, math.AP, math.DG] \n",
300
- "9786 1905.05136v3 [math.AP, math.SP] \n",
301
- "49609 1103.1276v4 [math.DG, math-ph, math.AP, math.MP] \n",
302
- "14857 2003.04597v2 [math.AP, math.SP] \n",
303
- "\n",
304
- " msc_tags \\\n",
305
- "38787 None \n",
306
- "39127 None \n",
307
- "9786 [Asymptotic distributions of eigenvalues in context of PDEs] \n",
308
- "49609 [Spectral problems; spectral geometry; scattering theory on manifolds, Second-order elliptic equations] \n",
309
- "14857 None \n",
310
- "\n",
311
- " doc_strings \n",
312
- "38787 C-infinity Scaling Asymptotics for the Spectral Function of the Laplacian This article concerns new off-diagonal estimates on the remainder and its derivatives in the pointwise Weyl law on a compact n-dimensional Riemannian manifold. As an application, we prove that near any non self-focal point, the scaling limit of the spectral projector of the Laplacian onto frequency windows of constant size is a normalized Bessel function depending only on n. \n",
313
- "39127 Scaling Limit for the Kernel of the Spectral Projector and Remainder Estimates in the Pointwise Weyl Law Let (M, g) be a compact smooth Riemannian manifold. We obtain new off-diagonal estimates as {ambda} tend to infinity for the remainder in the pointwise Weyl Law for the kernel of the spectral projector of the Laplacian onto functions with frequency at most {ambda}. A corollary is that, when rescaled around a non self-focal point, the kernel of the spectral projector onto the frequency interval (ambda, ambda + 1] has a universal scaling limit as {ambda} goes to infinity (depending only on the dimension of M). Our results also imply that if M has no conjugate points, then immersions of M into Euclidean space by an orthonormal basis of eigenfunctions with frequencies in (ambda, ambda + 1] are embeddings for all {ambda} sufficiently large. \n",
314
- "9786 A logarithmic improvement in the two-point Weyl law for manifolds without conjugate points In this paper, we study the two-point Weyl Law for the Laplace-Beltrami operator on a smooth, compact Riemannian manifold LATEX with no conjugate points. That is, we find the asymptotic behavior of the Schwartz kernel, LATEX of the projection operator from LATEX onto the direct sum of eigenspaces with eigenvalue smaller than LATEX as LATEX In the regime where LATEX are restricted to a compact neighborhood of the diagonal in LATEX we obtain a uniform logarithmic improvement in the remainder of the asymptotic expansion for LATEX and its derivatives of all orders, which generalizes a result of Berard, who treated the on-diagonal case LATEX When LATEX avoid a compact neighborhood of the diagonal, we obtain this same improvement in an upper bound for LATEX Our results imply that the rescaled covariance kernel of a monochromatic random wave locally converges in the LATEX topology to a universal scaling limit at an inverse logarithmic rate. \n",
315
- "49609 The blowup along the diagonal of the spectral function of the Laplacian We formulate a precise conjecture about the universal behavior near the diagonal of the spectral function of the Laplacian of a smooth compact Riemann manifold. We prove this conjecture when the manifold and the metric are real analytic, and we also present an alternate proof when the manifold is the round sphere. \n",
316
- "14857 Growth of high LATEX norms for eigenfunctions: an application of geodesic beams This work concerns LATEX norms of high energy Laplace eigenfunctions, LATEX LATEX In 1988, Sogge gave optimal estimates on the growth of LATEX for a general compact Riemannian manifold. The goal of this article is to give general dynamical conditions guaranteeing quantitative improvements in LATEX estimates for LATEX where LATEX is the critical exponent. We also apply previous results of the authors to obtain quantitative improvements in concrete geometric settings including all product manifolds. These are the first results improving estimates for the LATEX growth of eigenfunctions that only require dynamical assumptions. In contrast with previous improvements, our assumptions are local in the sense that they depend only on the geodesics passing through a shrinking neighborhood of a given set in LATEX Moreover, the article gives a structure theorem for eigenfunctions which saturate the quantitatively improved LATEX bound. Modulo an error, the theorem describes these eigenfunctions as finite sums of quasimodes which, roughly, approximate zonal harmonics on the sphere scaled by LATEX "
317
- ]
318
- },
319
- "execution_count": 60,
320
- "metadata": {},
321
- "output_type": "execute_result"
322
- }
323
- ],
324
- "source": [
325
- "## Retrieve indices\n",
326
- "\n",
327
- "prepped_library._returned_metadata.iloc[indices]"
328
- ]
329
- },
330
- {
331
- "cell_type": "code",
332
- "execution_count": 18,
333
- "metadata": {},
334
- "outputs": [],
335
- "source": [
336
- "id_list = [\"1602.00730\"]"
337
- ]
338
- },
339
- {
340
- "cell_type": "code",
341
- "execution_count": 10,
342
- "metadata": {},
343
- "outputs": [
344
- {
345
- "data": {
346
- "text/plain": [
347
- "<module 'src.search' from 'c:\\\\Users\\\\Leems\\\\Desktop\\\\Coding\\\\Projects\\\\fritz\\\\src\\\\search.py'>"
348
- ]
349
- },
350
- "execution_count": 10,
351
- "metadata": {},
352
- "output_type": "execute_result"
353
- }
354
- ],
355
- "source": [
356
- "import src.embedding as embedding\n",
357
- "import src.search as search\n",
358
- "import importlib\n",
359
- "from src.storage import Fetch\n",
360
- "from src.cleaning import TextCleaner\n",
361
- "from src.embedding import Embedder\n",
362
- "from src.search import Search\n",
363
- "\n",
364
- "importlib.reload(embedding)\n",
365
- "importlib.reload(search)"
366
- ]
367
- },
368
- {
369
- "cell_type": "code",
370
- "execution_count": 11,
371
- "metadata": {},
372
- "outputs": [
373
- {
374
- "ename": "NameError",
375
- "evalue": "name 'id_list' is not defined",
376
- "output_type": "error",
377
- "traceback": [
378
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
379
- "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
380
- "Cell \u001b[1;32mIn[11], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m## Fetch metadata of input\u001b[39;00m\n\u001b[0;32m 2\u001b[0m getter \u001b[39m=\u001b[39m Fetch()\n\u001b[1;32m----> 3\u001b[0m into_cleaner \u001b[39m=\u001b[39m getter\u001b[39m.\u001b[39mtransform(X\u001b[39m=\u001b[39mid_list)\n",
381
- "\u001b[1;31mNameError\u001b[0m: name 'id_list' is not defined"
382
- ]
383
- }
384
- ],
385
- "source": [
386
- "## Fetch metadata of input\n",
387
- "getter = Fetch()\n",
388
- "into_cleaner = getter.transform(X=id_list)"
389
- ]
390
- },
391
- {
392
- "cell_type": "code",
393
- "execution_count": 12,
394
- "metadata": {},
395
- "outputs": [
396
- {
397
- "ename": "NameError",
398
- "evalue": "name 'into_cleaner' is not defined",
399
- "output_type": "error",
400
- "traceback": [
401
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
402
- "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
403
- "Cell \u001b[1;32mIn[12], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m cleaner \u001b[39m=\u001b[39m TextCleaner()\n\u001b[1;32m----> 3\u001b[0m into_embedder \u001b[39m=\u001b[39m cleaner\u001b[39m.\u001b[39mtransform(into_cleaner)\n",
404
- "\u001b[1;31mNameError\u001b[0m: name 'into_cleaner' is not defined"
405
- ]
406
- }
407
- ],
408
- "source": [
409
- "cleaner = TextCleaner()\n",
410
- "\n",
411
- "into_embedder = cleaner.transform(into_cleaner)"
412
- ]
413
- },
414
- {
415
- "cell_type": "code",
416
- "execution_count": 13,
417
- "metadata": {},
418
- "outputs": [
419
- {
420
- "ename": "NameError",
421
- "evalue": "name 'into_embedder' is not defined",
422
- "output_type": "error",
423
- "traceback": [
424
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
425
- "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
426
- "Cell \u001b[1;32mIn[13], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m embedder \u001b[39m=\u001b[39m Embedder(model_name\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mallenai-specter\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> 2\u001b[0m into_search \u001b[39m=\u001b[39m embedder\u001b[39m.\u001b[39mtransform(into_embedder)\n",
427
- "\u001b[1;31mNameError\u001b[0m: name 'into_embedder' is not defined"
428
- ]
429
- }
430
- ],
431
- "source": [
432
- "embedder = Embedder(model_name=\"allenai-specter\")\n",
433
- "into_search = embedder.transform(into_embedder)"
434
- ]
435
- },
436
- {
437
- "cell_type": "code",
438
- "execution_count": 14,
439
- "metadata": {},
440
- "outputs": [
441
- {
442
- "ename": "NameError",
443
- "evalue": "name 'into_search' is not defined",
444
- "output_type": "error",
445
- "traceback": [
446
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
447
- "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
448
- "Cell \u001b[1;32mIn[14], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m search \u001b[39m=\u001b[39m Search(path_to_library\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m./data/libraries/APSP_50_allenai-specter/\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m----> 3\u001b[0m search\u001b[39m.\u001b[39mtransform(X\u001b[39m=\u001b[39minto_search)\u001b[39m.\u001b[39mid\u001b[39m.\u001b[39mto_list()\n",
449
- "\u001b[1;31mNameError\u001b[0m: name 'into_search' is not defined"
450
- ]
451
- }
452
- ],
453
- "source": [
454
- "search = Search(path_to_library=\"./data/libraries/APSP_50_allenai-specter/\")\n",
455
- "\n",
456
- "search.transform(X=into_search).id.to_list()"
457
- ]
458
- },
459
- {
460
- "cell_type": "code",
461
- "execution_count": 15,
462
- "metadata": {},
463
- "outputs": [
464
- {
465
- "ename": "NameError",
466
- "evalue": "name 'id_list' is not defined",
467
- "output_type": "error",
468
- "traceback": [
469
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
470
- "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
471
- "Cell \u001b[1;32mIn[15], line 13\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msklearn\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpipeline\u001b[39;00m \u001b[39mimport\u001b[39;00m Pipeline\n\u001b[0;32m 3\u001b[0m pipe \u001b[39m=\u001b[39m Pipeline(\n\u001b[0;32m 4\u001b[0m [\n\u001b[0;32m 5\u001b[0m (\u001b[39m\"\u001b[39m\u001b[39mfetch\u001b[39m\u001b[39m\"\u001b[39m, Fetch()),\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 9\u001b[0m ]\n\u001b[0;32m 10\u001b[0m )\n\u001b[1;32m---> 13\u001b[0m pipe\u001b[39m.\u001b[39mtransform(X\u001b[39m=\u001b[39mid_list)\n",
472
- "\u001b[1;31mNameError\u001b[0m: name 'id_list' is not defined"
473
- ]
474
- }
475
- ],
476
- "source": [
477
- "from sklearn.pipeline import Pipeline\n",
478
- "\n",
479
- "pipe = Pipeline(\n",
480
- " [\n",
481
- " (\"fetch\", Fetch()),\n",
482
- " (\"clean\", TextCleaner()),\n",
483
- " (\"embed\", Embedder(model_name=\"allenai-specter\")),\n",
484
- " (\"search\", Search(path_to_library=\"./data/libraries/APSP_50_allenai-specter/\")),\n",
485
- " ]\n",
486
- ")\n",
487
- "\n",
488
- "\n",
489
- "pipe.transform(X=id_list)"
490
- ]
491
- },
492
- {
493
- "cell_type": "code",
494
- "execution_count": 16,
495
- "metadata": {},
496
- "outputs": [],
497
- "source": [
498
- "import src.model\n",
499
- "import importlib\n",
500
- "\n",
501
- "importlib.reload(src.model)\n",
502
- "from src.model import main"
503
- ]
504
- },
505
- {
506
- "cell_type": "code",
507
- "execution_count": 17,
508
- "metadata": {},
509
- "outputs": [
510
- {
511
- "data": {
512
- "text/html": [
513
- "<div>\n",
514
- "<style scoped>\n",
515
- " .dataframe tbody tr th:only-of-type {\n",
516
- " vertical-align: middle;\n",
517
- " }\n",
518
- "\n",
519
- " .dataframe tbody tr th {\n",
520
- " vertical-align: top;\n",
521
- " }\n",
522
- "\n",
523
- " .dataframe thead th {\n",
524
- " text-align: right;\n",
525
- " }\n",
526
- "</style>\n",
527
- "<table border=\"1\" class=\"dataframe\">\n",
528
- " <thead>\n",
529
- " <tr style=\"text-align: right;\">\n",
530
- " <th></th>\n",
531
- " <th>title</th>\n",
532
- " <th>abstract</th>\n",
533
- " <th>id</th>\n",
534
- " <th>arxiv_subjects</th>\n",
535
- " <th>msc_tags</th>\n",
536
- " </tr>\n",
537
- " </thead>\n",
538
- " <tbody>\n",
539
- " <tr>\n",
540
- " <th>9786</th>\n",
541
- " <td>A logarithmic improvement in the two-point Weyl law for manifolds without conjugate points</td>\n",
542
- " <td>In this paper, we study the two-point Weyl Law for the Laplace-Beltrami\\noperator on a smooth, compact Riemannian manifold $M$ with no conjugate points.\\nThat is, we find the asymptotic behavior of the Schwartz kernel,\\n$E_\\lambda(x,y)$, of the projection operator from $L^2(M)$ onto the direct sum\\nof eigenspaces with eigenvalue smaller than $\\lambda^2$ as $\\lambda \\to\\infty$.\\nIn the regime where $x,y$ are restricted to a compact neighborhood of the\\ndiagonal in $M\\times M$, we obtain a uniform logarithmic improvement in the\\nremainder of the asymptotic expansion for $E_\\lambda$ and its derivatives of\\nall orders, which generalizes a result of B\\'erard, who treated the on-diagonal\\ncase $E_\\lambda(x,x)$. When $x,y$ avoid a compact neighborhood of the diagonal,\\nwe obtain this same improvement in an upper bound for $E_\\lambda$. Our results\\nimply that the rescaled covariance kernel of a monochromatic random wave\\nlocally converges in the $C^\\infty$ topology to a universal scaling limit at an\\ninverse logarithmic rate.</td>\n",
543
- " <td>1905.05136v3</td>\n",
544
- " <td>[math.AP, math.SP]</td>\n",
545
- " <td>[35P20]</td>\n",
546
- " </tr>\n",
547
- " <tr>\n",
548
- " <th>39127</th>\n",
549
- " <td>Scaling Limit for the Kernel of the Spectral Projector and Remainder Estimates in the Pointwise Weyl Law</td>\n",
550
- " <td>Let (M, g) be a compact smooth Riemannian manifold. We obtain new\\noff-diagonal estimates as {\\lambda} tend to infinity for the remainder in the\\npointwise Weyl Law for the kernel of the spectral projector of the Laplacian\\nonto functions with frequency at most {\\lambda}. A corollary is that, when\\nrescaled around a non self-focal point, the kernel of the spectral projector\\nonto the frequency interval (\\lambda, \\lambda + 1] has a universal scaling\\nlimit as {\\lambda} goes to infinity (depending only on the dimension of M). Our\\nresults also imply that if M has no conjugate points, then immersions of M into\\nEuclidean space by an orthonormal basis of eigenfunctions with frequencies in\\n(\\lambda, \\lambda + 1] are embeddings for all {\\lambda} sufficiently large.</td>\n",
551
- " <td>1411.0658v3</td>\n",
552
- " <td>[math.SP, math.AP, math.DG]</td>\n",
553
- " <td>None</td>\n",
554
- " </tr>\n",
555
- " <tr>\n",
556
- " <th>46524</th>\n",
557
- " <td>On $L^p$-resolvent estimates and the density of eigenvalues for compact Riemannian manifolds</td>\n",
558
- " <td>We address an interesting question raised by Dos Santos Ferreira, Kenig and\\nSalo about regions ${\\mathcal R}_g\\subset {\\mathbb C}$ for which there can be\\nuniform $L^{\\frac{2n}{n+2}}\\to L^{\\frac{2n}{n-2}}$ resolvent estimates for\\n$\\Delta_g+\\zeta$, $\\zeta \\in {\\mathcal R}_g$, where $\\Delta_g$ is the\\nLaplace-Beltrami operator with metric $g$ on a given compact boundaryless\\nRiemannian manifold of dimension $n\\ge3$. This is related to earlier work of\\nKenig, Ruiz and the third author for the Euclidean Laplacian, in which case the\\nregion is the entire complex plane minus any disc centered at the origin.\\nPresently, we show that for the round metric on the sphere, $S^n$, the\\nresolvent estimates in Ferreira et al, involving a much smaller region, are\\nessentially optimal. We do this by establishing sharp bounds based on the\\ndistance from $\\zeta$ to the spectrum of $\\Delta_{S^n}$.\\n In the other direction, we also show that the bounds in \\cite{Kenig} can be\\nsharpened logarithmically for manifolds with nonpositive curvature, and by\\npowers in the case of the torus, ${\\mathbb T}^n={\\mathbb R}^n/{\\mathbb Z}^n$,\\nwith the flat metric. The latter improves earlier bounds of Shen.\\n Further improvements for the torus are obtained using recent techniques of\\nthe first author and his work with Guth based on the multilinear estimates of\\nBennett, Carbery and Tao. Our approach also allows us to give a natural\\nnecessary condition for favorable resolvent estimates that is based on a\\nmeasurement of the density of the spectrum of $\\sqrt{-\\Delta_g}$, and,\\nmoreover, a necessary and sufficient condition based on natural improved\\nspectral projection estimates for shrinking intervals.</td>\n",
559
- " <td>1204.3927v3</td>\n",
560
- " <td>[math.AP, math.CA]</td>\n",
561
- " <td>[58J50]</td>\n",
562
- " </tr>\n",
563
- " <tr>\n",
564
- " <th>38787</th>\n",
565
- " <td>C-infinity Scaling Asymptotics for the Spectral Function of the Laplacian</td>\n",
566
- " <td>This article concerns new off-diagonal estimates on the remainder and its\\nderivatives in the pointwise Weyl law on a compact n-dimensional Riemannian\\nmanifold. As an application, we prove that near any non self-focal point, the\\nscaling limit of the spectral projector of the Laplacian onto frequency windows\\nof constant size is a normalized Bessel function depending only on n.</td>\n",
567
- " <td>1602.00730v1</td>\n",
568
- " <td>[math.AP, math-ph, math.DG, math.FA, math.MP, math.SP]</td>\n",
569
- " <td>None</td>\n",
570
- " </tr>\n",
571
- " <tr>\n",
572
- " <th>38299</th>\n",
573
- " <td>A lower bound for the $Θ$ function on manifolds without conjugate points</td>\n",
574
- " <td>In this short note, we prove that the usual $\\Theta$ function on a Riemannian\\nmanifold without conjugate points is uniformly bounded from below. This extends\\na result of Green in two dimensions. This elementary lemma implies that the\\nB\\'erard remainder in the Weyl law is valid for a manifold without conjugate\\npoints, without any restriction on the dimension.</td>\n",
575
- " <td>1603.05697v1</td>\n",
576
- " <td>[math.DG, math.SP]</td>\n",
577
- " <td>None</td>\n",
578
- " </tr>\n",
579
- " </tbody>\n",
580
- "</table>\n",
581
- "</div>"
582
- ],
583
- "text/plain": [
584
- " title \\\n",
585
- "9786 A logarithmic improvement in the two-point Weyl law for manifolds without conjugate points \n",
586
- "39127 Scaling Limit for the Kernel of the Spectral Projector and Remainder Estimates in the Pointwise Weyl Law \n",
587
- "46524 On $L^p$-resolvent estimates and the density of eigenvalues for compact Riemannian manifolds \n",
588
- "38787 C-infinity Scaling Asymptotics for the Spectral Function of the Laplacian \n",
589
- "38299 A lower bound for the $Θ$ function on manifolds without conjugate points \n",
590
- "\n",
591
- " abstract \\\n",
592
- "9786 In this paper, we study the two-point Weyl Law for the Laplace-Beltrami\\noperator on a smooth, compact Riemannian manifold $M$ with no conjugate points.\\nThat is, we find the asymptotic behavior of the Schwartz kernel,\\n$E_\\lambda(x,y)$, of the projection operator from $L^2(M)$ onto the direct sum\\nof eigenspaces with eigenvalue smaller than $\\lambda^2$ as $\\lambda \\to\\infty$.\\nIn the regime where $x,y$ are restricted to a compact neighborhood of the\\ndiagonal in $M\\times M$, we obtain a uniform logarithmic improvement in the\\nremainder of the asymptotic expansion for $E_\\lambda$ and its derivatives of\\nall orders, which generalizes a result of B\\'erard, who treated the on-diagonal\\ncase $E_\\lambda(x,x)$. When $x,y$ avoid a compact neighborhood of the diagonal,\\nwe obtain this same improvement in an upper bound for $E_\\lambda$. Our results\\nimply that the rescaled covariance kernel of a monochromatic random wave\\nlocally converges in the $C^\\infty$ topology to a universal scaling limit at an\\ninverse logarithmic rate. \n",
593
- "39127 Let (M, g) be a compact smooth Riemannian manifold. We obtain new\\noff-diagonal estimates as {\\lambda} tend to infinity for the remainder in the\\npointwise Weyl Law for the kernel of the spectral projector of the Laplacian\\nonto functions with frequency at most {\\lambda}. A corollary is that, when\\nrescaled around a non self-focal point, the kernel of the spectral projector\\nonto the frequency interval (\\lambda, \\lambda + 1] has a universal scaling\\nlimit as {\\lambda} goes to infinity (depending only on the dimension of M). Our\\nresults also imply that if M has no conjugate points, then immersions of M into\\nEuclidean space by an orthonormal basis of eigenfunctions with frequencies in\\n(\\lambda, \\lambda + 1] are embeddings for all {\\lambda} sufficiently large. \n",
594
- "46524 We address an interesting question raised by Dos Santos Ferreira, Kenig and\\nSalo about regions ${\\mathcal R}_g\\subset {\\mathbb C}$ for which there can be\\nuniform $L^{\\frac{2n}{n+2}}\\to L^{\\frac{2n}{n-2}}$ resolvent estimates for\\n$\\Delta_g+\\zeta$, $\\zeta \\in {\\mathcal R}_g$, where $\\Delta_g$ is the\\nLaplace-Beltrami operator with metric $g$ on a given compact boundaryless\\nRiemannian manifold of dimension $n\\ge3$. This is related to earlier work of\\nKenig, Ruiz and the third author for the Euclidean Laplacian, in which case the\\nregion is the entire complex plane minus any disc centered at the origin.\\nPresently, we show that for the round metric on the sphere, $S^n$, the\\nresolvent estimates in Ferreira et al, involving a much smaller region, are\\nessentially optimal. We do this by establishing sharp bounds based on the\\ndistance from $\\zeta$ to the spectrum of $\\Delta_{S^n}$.\\n In the other direction, we also show that the bounds in \\cite{Kenig} can be\\nsharpened logarithmically for manifolds with nonpositive curvature, and by\\npowers in the case of the torus, ${\\mathbb T}^n={\\mathbb R}^n/{\\mathbb Z}^n$,\\nwith the flat metric. The latter improves earlier bounds of Shen.\\n Further improvements for the torus are obtained using recent techniques of\\nthe first author and his work with Guth based on the multilinear estimates of\\nBennett, Carbery and Tao. Our approach also allows us to give a natural\\nnecessary condition for favorable resolvent estimates that is based on a\\nmeasurement of the density of the spectrum of $\\sqrt{-\\Delta_g}$, and,\\nmoreover, a necessary and sufficient condition based on natural improved\\nspectral projection estimates for shrinking intervals. \n",
595
- "38787 This article concerns new off-diagonal estimates on the remainder and its\\nderivatives in the pointwise Weyl law on a compact n-dimensional Riemannian\\nmanifold. As an application, we prove that near any non self-focal point, the\\nscaling limit of the spectral projector of the Laplacian onto frequency windows\\nof constant size is a normalized Bessel function depending only on n. \n",
596
- "38299 In this short note, we prove that the usual $\\Theta$ function on a Riemannian\\nmanifold without conjugate points is uniformly bounded from below. This extends\\na result of Green in two dimensions. This elementary lemma implies that the\\nB\\'erard remainder in the Weyl law is valid for a manifold without conjugate\\npoints, without any restriction on the dimension. \n",
597
- "\n",
598
- " id arxiv_subjects \\\n",
599
- "9786 1905.05136v3 [math.AP, math.SP] \n",
600
- "39127 1411.0658v3 [math.SP, math.AP, math.DG] \n",
601
- "46524 1204.3927v3 [math.AP, math.CA] \n",
602
- "38787 1602.00730v1 [math.AP, math-ph, math.DG, math.FA, math.MP, math.SP] \n",
603
- "38299 1603.05697v1 [math.DG, math.SP] \n",
604
- "\n",
605
- " msc_tags \n",
606
- "9786 [35P20] \n",
607
- "39127 None \n",
608
- "46524 [58J50] \n",
609
- "38787 None \n",
610
- "38299 None "
611
- ]
612
- },
613
- "execution_count": 17,
614
- "metadata": {},
615
- "output_type": "execute_result"
616
- }
617
- ],
618
- "source": [
619
- "recs = main(id_list=[\"1905.05136v3\"])\n",
620
- "\n",
621
- "recs.head()"
622
- ]
623
- },
624
- {
625
- "cell_type": "code",
626
- "execution_count": 1,
627
- "metadata": {},
628
- "outputs": [],
629
- "source": [
630
- "import pandas as pd\n",
631
- "\n",
632
- "lib = pd.read_feather(\"./data/libraries/APSP_50_allenai-specter/metadata.feather\")"
633
- ]
634
- },
635
- {
636
- "cell_type": "code",
637
- "execution_count": 2,
638
- "metadata": {},
639
- "outputs": [
640
- {
641
- "data": {
642
- "text/html": [
643
- "<div>\n",
644
- "<style scoped>\n",
645
- " .dataframe tbody tr th:only-of-type {\n",
646
- " vertical-align: middle;\n",
647
- " }\n",
648
- "\n",
649
- " .dataframe tbody tr th {\n",
650
- " vertical-align: top;\n",
651
- " }\n",
652
- "\n",
653
- " .dataframe thead th {\n",
654
- " text-align: right;\n",
655
- " }\n",
656
- "</style>\n",
657
- "<table border=\"1\" class=\"dataframe\">\n",
658
- " <thead>\n",
659
- " <tr style=\"text-align: right;\">\n",
660
- " <th></th>\n",
661
- " <th>title</th>\n",
662
- " <th>abstract</th>\n",
663
- " <th>id</th>\n",
664
- " <th>arxiv_subjects</th>\n",
665
- " <th>msc_tags</th>\n",
666
- " </tr>\n",
667
- " </thead>\n",
668
- " <tbody>\n",
669
- " <tr>\n",
670
- " <th>0</th>\n",
671
- " <td>Post-Lie algebras in Regularity Structures</td>\n",
672
- " <td>In this work, we construct the deformed Butche...</td>\n",
673
- " <td>2208.00514v5</td>\n",
674
- " <td>[math.PR, math.AP, math.RA]</td>\n",
675
- " <td>None</td>\n",
676
- " </tr>\n",
677
- " <tr>\n",
678
- " <th>1</th>\n",
679
- " <td>Borderline gradient regularity estimates for q...</td>\n",
680
- " <td>In this paper, we study some regularity issues...</td>\n",
681
- " <td>2307.02420v1</td>\n",
682
- " <td>[math.AP]</td>\n",
683
- " <td>None</td>\n",
684
- " </tr>\n",
685
- " <tr>\n",
686
- " <th>2</th>\n",
687
- " <td>Deep Learning Hydrodynamic Forecasting for Flo...</td>\n",
688
- " <td>Hydrodynamic flood modeling improves hydrologi...</td>\n",
689
- " <td>2305.12052v2</td>\n",
690
- " <td>[cs.LG, math.AP, physics.flu-dyn]</td>\n",
691
- " <td>None</td>\n",
692
- " </tr>\n",
693
- " <tr>\n",
694
- " <th>3</th>\n",
695
- " <td>Gradient estimates for the non-stationary Stok...</td>\n",
696
- " <td>For the non-stationary Stokes system, it is we...</td>\n",
697
- " <td>2306.16480v2</td>\n",
698
- " <td>[math.AP]</td>\n",
699
- " <td>[35Q30, 35B65]</td>\n",
700
- " </tr>\n",
701
- " <tr>\n",
702
- " <th>4</th>\n",
703
- " <td>Puiseux asymptotic expansions for convection-d...</td>\n",
704
- " <td>This article completes the study of the influe...</td>\n",
705
- " <td>2307.02387v1</td>\n",
706
- " <td>[math.AP]</td>\n",
707
- " <td>[35K20, 35R02, 35B40, 35B25, 35B45, 35K57, 35Q49]</td>\n",
708
- " </tr>\n",
709
- " <tr>\n",
710
- " <th>...</th>\n",
711
- " <td>...</td>\n",
712
- " <td>...</td>\n",
713
- " <td>...</td>\n",
714
- " <td>...</td>\n",
715
- " <td>...</td>\n",
716
- " </tr>\n",
717
- " <tr>\n",
718
- " <th>49995</th>\n",
719
- " <td>Singular Limits for Thin Film Superconductors ...</td>\n",
720
- " <td>We consider singular limits of the three-dimen...</td>\n",
721
- " <td>1209.3696v1</td>\n",
722
- " <td>[math.AP, math-ph, math.MP]</td>\n",
723
- " <td>[35J50, 35Q56, 49J45]</td>\n",
724
- " </tr>\n",
725
- " <tr>\n",
726
- " <th>49996</th>\n",
727
- " <td>Energy partition for the linear radial wave eq...</td>\n",
728
- " <td>We consider the radial free wave equation in a...</td>\n",
729
- " <td>1209.3678v1</td>\n",
730
- " <td>[math.AP]</td>\n",
731
- " <td>[35L05]</td>\n",
732
- " </tr>\n",
733
- " <tr>\n",
734
- " <th>49997</th>\n",
735
- " <td>Spectral stability for subsonic traveling puls...</td>\n",
736
- " <td>We consider the spectral stability of certain ...</td>\n",
737
- " <td>1209.3666v1</td>\n",
738
- " <td>[math.AP]</td>\n",
739
- " <td>[35B35, 35B40, 35G30]</td>\n",
740
- " </tr>\n",
741
- " <tr>\n",
742
- " <th>49998</th>\n",
743
- " <td>On the extension property of Reifenberg-flat d...</td>\n",
744
- " <td>We provide a detailed proof of the fact that a...</td>\n",
745
- " <td>1209.3602v1</td>\n",
746
- " <td>[math.AP]</td>\n",
747
- " <td>[49Q20, 49Q05, 46E35]</td>\n",
748
- " </tr>\n",
749
- " <tr>\n",
750
- " <th>49999</th>\n",
751
- " <td>BMO estimates for nonvariational operators wit...</td>\n",
752
- " <td>We consider a class of nonvariational linear o...</td>\n",
753
- " <td>1209.3601v1</td>\n",
754
- " <td>[math.AP]</td>\n",
755
- " <td>[35B45]</td>\n",
756
- " </tr>\n",
757
- " </tbody>\n",
758
- "</table>\n",
759
- "<p>50000 rows Γ— 5 columns</p>\n",
760
- "</div>"
761
- ],
762
- "text/plain": [
763
- " title \\\n",
764
- "0 Post-Lie algebras in Regularity Structures \n",
765
- "1 Borderline gradient regularity estimates for q... \n",
766
- "2 Deep Learning Hydrodynamic Forecasting for Flo... \n",
767
- "3 Gradient estimates for the non-stationary Stok... \n",
768
- "4 Puiseux asymptotic expansions for convection-d... \n",
769
- "... ... \n",
770
- "49995 Singular Limits for Thin Film Superconductors ... \n",
771
- "49996 Energy partition for the linear radial wave eq... \n",
772
- "49997 Spectral stability for subsonic traveling puls... \n",
773
- "49998 On the extension property of Reifenberg-flat d... \n",
774
- "49999 BMO estimates for nonvariational operators wit... \n",
775
- "\n",
776
- " abstract id \\\n",
777
- "0 In this work, we construct the deformed Butche... 2208.00514v5 \n",
778
- "1 In this paper, we study some regularity issues... 2307.02420v1 \n",
779
- "2 Hydrodynamic flood modeling improves hydrologi... 2305.12052v2 \n",
780
- "3 For the non-stationary Stokes system, it is we... 2306.16480v2 \n",
781
- "4 This article completes the study of the influe... 2307.02387v1 \n",
782
- "... ... ... \n",
783
- "49995 We consider singular limits of the three-dimen... 1209.3696v1 \n",
784
- "49996 We consider the radial free wave equation in a... 1209.3678v1 \n",
785
- "49997 We consider the spectral stability of certain ... 1209.3666v1 \n",
786
- "49998 We provide a detailed proof of the fact that a... 1209.3602v1 \n",
787
- "49999 We consider a class of nonvariational linear o... 1209.3601v1 \n",
788
- "\n",
789
- " arxiv_subjects \\\n",
790
- "0 [math.PR, math.AP, math.RA] \n",
791
- "1 [math.AP] \n",
792
- "2 [cs.LG, math.AP, physics.flu-dyn] \n",
793
- "3 [math.AP] \n",
794
- "4 [math.AP] \n",
795
- "... ... \n",
796
- "49995 [math.AP, math-ph, math.MP] \n",
797
- "49996 [math.AP] \n",
798
- "49997 [math.AP] \n",
799
- "49998 [math.AP] \n",
800
- "49999 [math.AP] \n",
801
- "\n",
802
- " msc_tags \n",
803
- "0 None \n",
804
- "1 None \n",
805
- "2 None \n",
806
- "3 [35Q30, 35B65] \n",
807
- "4 [35K20, 35R02, 35B40, 35B25, 35B45, 35K57, 35Q49] \n",
808
- "... ... \n",
809
- "49995 [35J50, 35Q56, 49J45] \n",
810
- "49996 [35L05] \n",
811
- "49997 [35B35, 35B40, 35G30] \n",
812
- "49998 [49Q20, 49Q05, 46E35] \n",
813
- "49999 [35B45] \n",
814
- "\n",
815
- "[50000 rows x 5 columns]"
816
- ]
817
- },
818
- "execution_count": 2,
819
- "metadata": {},
820
- "output_type": "execute_result"
821
- }
822
- ],
823
- "source": [
824
- "lib"
825
- ]
826
- },
827
- {
828
- "cell_type": "code",
829
- "execution_count": 3,
830
- "metadata": {},
831
- "outputs": [],
832
- "source": [
833
- "emb = pd.read_feather(\"./data/libraries/APSP_50_allenai-specter/embeddings.feather\")"
834
- ]
835
- },
836
- {
837
- "cell_type": "code",
838
- "execution_count": 4,
839
- "metadata": {},
840
- "outputs": [
841
- {
842
- "data": {
843
- "text/html": [
844
- "<div>\n",
845
- "<style scoped>\n",
846
- " .dataframe tbody tr th:only-of-type {\n",
847
- " vertical-align: middle;\n",
848
- " }\n",
849
- "\n",
850
- " .dataframe tbody tr th {\n",
851
- " vertical-align: top;\n",
852
- " }\n",
853
- "\n",
854
- " .dataframe thead th {\n",
855
- " text-align: right;\n",
856
- " }\n",
857
- "</style>\n",
858
- "<table border=\"1\" class=\"dataframe\">\n",
859
- " <thead>\n",
860
- " <tr style=\"text-align: right;\">\n",
861
- " <th></th>\n",
862
- " <th>0</th>\n",
863
- " <th>1</th>\n",
864
- " <th>2</th>\n",
865
- " <th>3</th>\n",
866
- " <th>4</th>\n",
867
- " <th>5</th>\n",
868
- " <th>6</th>\n",
869
- " <th>7</th>\n",
870
- " <th>8</th>\n",
871
- " <th>9</th>\n",
872
- " <th>...</th>\n",
873
- " <th>758</th>\n",
874
- " <th>759</th>\n",
875
- " <th>760</th>\n",
876
- " <th>761</th>\n",
877
- " <th>762</th>\n",
878
- " <th>763</th>\n",
879
- " <th>764</th>\n",
880
- " <th>765</th>\n",
881
- " <th>766</th>\n",
882
- " <th>767</th>\n",
883
- " </tr>\n",
884
- " </thead>\n",
885
- " <tbody>\n",
886
- " <tr>\n",
887
- " <th>0</th>\n",
888
- " <td>-0.354270</td>\n",
889
- " <td>0.422403</td>\n",
890
- " <td>-0.105672</td>\n",
891
- " <td>-0.129077</td>\n",
892
- " <td>0.289177</td>\n",
893
- " <td>0.382220</td>\n",
894
- " <td>0.183098</td>\n",
895
- " <td>0.102091</td>\n",
896
- " <td>0.635695</td>\n",
897
- " <td>1.120547</td>\n",
898
- " <td>...</td>\n",
899
- " <td>0.843546</td>\n",
900
- " <td>-0.591661</td>\n",
901
- " <td>1.413266</td>\n",
902
- " <td>0.980099</td>\n",
903
- " <td>1.254564</td>\n",
904
- " <td>-0.756020</td>\n",
905
- " <td>0.614037</td>\n",
906
- " <td>0.139899</td>\n",
907
- " <td>0.117359</td>\n",
908
- " <td>0.159412</td>\n",
909
- " </tr>\n",
910
- " <tr>\n",
911
- " <th>1</th>\n",
912
- " <td>-0.005778</td>\n",
913
- " <td>0.875256</td>\n",
914
- " <td>0.844623</td>\n",
915
- " <td>-0.913219</td>\n",
916
- " <td>-0.220542</td>\n",
917
- " <td>0.457574</td>\n",
918
- " <td>0.819090</td>\n",
919
- " <td>0.658583</td>\n",
920
- " <td>-0.206531</td>\n",
921
- " <td>0.899738</td>\n",
922
- " <td>...</td>\n",
923
- " <td>-0.000560</td>\n",
924
- " <td>-0.572531</td>\n",
925
- " <td>0.789380</td>\n",
926
- " <td>1.063664</td>\n",
927
- " <td>-0.072007</td>\n",
928
- " <td>0.111034</td>\n",
929
- " <td>0.270689</td>\n",
930
- " <td>0.319568</td>\n",
931
- " <td>1.085690</td>\n",
932
- " <td>0.670377</td>\n",
933
- " </tr>\n",
934
- " <tr>\n",
935
- " <th>2</th>\n",
936
- " <td>0.115181</td>\n",
937
- " <td>-0.087180</td>\n",
938
- " <td>0.114065</td>\n",
939
- " <td>0.246189</td>\n",
940
- " <td>0.714248</td>\n",
941
- " <td>0.402952</td>\n",
942
- " <td>0.313888</td>\n",
943
- " <td>0.908008</td>\n",
944
- " <td>0.219879</td>\n",
945
- " <td>1.368971</td>\n",
946
- " <td>...</td>\n",
947
- " <td>0.806028</td>\n",
948
- " <td>-0.331930</td>\n",
949
- " <td>1.068578</td>\n",
950
- " <td>1.111367</td>\n",
951
- " <td>-0.686173</td>\n",
952
- " <td>-0.046650</td>\n",
953
- " <td>-0.116867</td>\n",
954
- " <td>0.380806</td>\n",
955
- " <td>0.239970</td>\n",
956
- " <td>0.928296</td>\n",
957
- " </tr>\n",
958
- " <tr>\n",
959
- " <th>3</th>\n",
960
- " <td>0.052282</td>\n",
961
- " <td>0.800266</td>\n",
962
- " <td>0.831988</td>\n",
963
- " <td>0.155950</td>\n",
964
- " <td>-0.213863</td>\n",
965
- " <td>0.179749</td>\n",
966
- " <td>1.394324</td>\n",
967
- " <td>0.505120</td>\n",
968
- " <td>-0.341608</td>\n",
969
- " <td>0.040288</td>\n",
970
- " <td>...</td>\n",
971
- " <td>0.333275</td>\n",
972
- " <td>-1.103323</td>\n",
973
- " <td>0.387326</td>\n",
974
- " <td>1.064309</td>\n",
975
- " <td>0.196870</td>\n",
976
- " <td>0.380791</td>\n",
977
- " <td>1.301055</td>\n",
978
- " <td>0.288548</td>\n",
979
- " <td>0.353034</td>\n",
980
- " <td>0.239037</td>\n",
981
- " </tr>\n",
982
- " <tr>\n",
983
- " <th>4</th>\n",
984
- " <td>0.095074</td>\n",
985
- " <td>0.099750</td>\n",
986
- " <td>0.638213</td>\n",
987
- " <td>-1.026867</td>\n",
988
- " <td>0.020405</td>\n",
989
- " <td>0.488524</td>\n",
990
- " <td>0.555310</td>\n",
991
- " <td>0.269329</td>\n",
992
- " <td>-0.769490</td>\n",
993
- " <td>0.888668</td>\n",
994
- " <td>...</td>\n",
995
- " <td>0.319892</td>\n",
996
- " <td>-0.673623</td>\n",
997
- " <td>0.750743</td>\n",
998
- " <td>0.930013</td>\n",
999
- " <td>0.033606</td>\n",
1000
- " <td>0.261526</td>\n",
1001
- " <td>0.425253</td>\n",
1002
- " <td>0.908287</td>\n",
1003
- " <td>1.101179</td>\n",
1004
- " <td>0.378441</td>\n",
1005
- " </tr>\n",
1006
- " <tr>\n",
1007
- " <th>...</th>\n",
1008
- " <td>...</td>\n",
1009
- " <td>...</td>\n",
1010
- " <td>...</td>\n",
1011
- " <td>...</td>\n",
1012
- " <td>...</td>\n",
1013
- " <td>...</td>\n",
1014
- " <td>...</td>\n",
1015
- " <td>...</td>\n",
1016
- " <td>...</td>\n",
1017
- " <td>...</td>\n",
1018
- " <td>...</td>\n",
1019
- " <td>...</td>\n",
1020
- " <td>...</td>\n",
1021
- " <td>...</td>\n",
1022
- " <td>...</td>\n",
1023
- " <td>...</td>\n",
1024
- " <td>...</td>\n",
1025
- " <td>...</td>\n",
1026
- " <td>...</td>\n",
1027
- " <td>...</td>\n",
1028
- " <td>...</td>\n",
1029
- " </tr>\n",
1030
- " <tr>\n",
1031
- " <th>49995</th>\n",
1032
- " <td>-0.522521</td>\n",
1033
- " <td>-0.330984</td>\n",
1034
- " <td>0.136525</td>\n",
1035
- " <td>-0.450189</td>\n",
1036
- " <td>-0.076839</td>\n",
1037
- " <td>1.248817</td>\n",
1038
- " <td>0.334444</td>\n",
1039
- " <td>0.873641</td>\n",
1040
- " <td>0.188449</td>\n",
1041
- " <td>0.102323</td>\n",
1042
- " <td>...</td>\n",
1043
- " <td>-0.237047</td>\n",
1044
- " <td>-0.376410</td>\n",
1045
- " <td>1.547221</td>\n",
1046
- " <td>1.126172</td>\n",
1047
- " <td>-0.722363</td>\n",
1048
- " <td>0.549418</td>\n",
1049
- " <td>0.979395</td>\n",
1050
- " <td>0.055092</td>\n",
1051
- " <td>0.610912</td>\n",
1052
- " <td>-0.126857</td>\n",
1053
- " </tr>\n",
1054
- " <tr>\n",
1055
- " <th>49996</th>\n",
1056
- " <td>-0.470180</td>\n",
1057
- " <td>-0.401946</td>\n",
1058
- " <td>0.982030</td>\n",
1059
- " <td>-0.207640</td>\n",
1060
- " <td>0.532523</td>\n",
1061
- " <td>0.231821</td>\n",
1062
- " <td>0.380483</td>\n",
1063
- " <td>1.066097</td>\n",
1064
- " <td>0.130898</td>\n",
1065
- " <td>0.458105</td>\n",
1066
- " <td>...</td>\n",
1067
- " <td>0.178845</td>\n",
1068
- " <td>-0.644469</td>\n",
1069
- " <td>1.544612</td>\n",
1070
- " <td>0.765639</td>\n",
1071
- " <td>0.171692</td>\n",
1072
- " <td>0.082497</td>\n",
1073
- " <td>0.258444</td>\n",
1074
- " <td>0.898845</td>\n",
1075
- " <td>-0.184204</td>\n",
1076
- " <td>-0.039506</td>\n",
1077
- " </tr>\n",
1078
- " <tr>\n",
1079
- " <th>49997</th>\n",
1080
- " <td>-1.095224</td>\n",
1081
- " <td>0.074697</td>\n",
1082
- " <td>0.357558</td>\n",
1083
- " <td>-0.289866</td>\n",
1084
- " <td>0.776415</td>\n",
1085
- " <td>1.029506</td>\n",
1086
- " <td>1.334372</td>\n",
1087
- " <td>0.711085</td>\n",
1088
- " <td>-0.037792</td>\n",
1089
- " <td>0.165926</td>\n",
1090
- " <td>...</td>\n",
1091
- " <td>-0.528522</td>\n",
1092
- " <td>-0.889131</td>\n",
1093
- " <td>1.200090</td>\n",
1094
- " <td>1.039473</td>\n",
1095
- " <td>0.167707</td>\n",
1096
- " <td>0.511078</td>\n",
1097
- " <td>-0.065529</td>\n",
1098
- " <td>0.447770</td>\n",
1099
- " <td>0.551285</td>\n",
1100
- " <td>0.328493</td>\n",
1101
- " </tr>\n",
1102
- " <tr>\n",
1103
- " <th>49998</th>\n",
1104
- " <td>-0.344982</td>\n",
1105
- " <td>0.121328</td>\n",
1106
- " <td>0.305855</td>\n",
1107
- " <td>-0.381629</td>\n",
1108
- " <td>-0.181680</td>\n",
1109
- " <td>0.434278</td>\n",
1110
- " <td>1.460984</td>\n",
1111
- " <td>0.992868</td>\n",
1112
- " <td>0.167097</td>\n",
1113
- " <td>1.005540</td>\n",
1114
- " <td>...</td>\n",
1115
- " <td>0.580155</td>\n",
1116
- " <td>-0.436302</td>\n",
1117
- " <td>0.818202</td>\n",
1118
- " <td>0.528767</td>\n",
1119
- " <td>0.078137</td>\n",
1120
- " <td>0.811233</td>\n",
1121
- " <td>0.269796</td>\n",
1122
- " <td>0.241384</td>\n",
1123
- " <td>-0.356777</td>\n",
1124
- " <td>0.245386</td>\n",
1125
- " </tr>\n",
1126
- " <tr>\n",
1127
- " <th>49999</th>\n",
1128
- " <td>-0.758263</td>\n",
1129
- " <td>0.188403</td>\n",
1130
- " <td>0.582321</td>\n",
1131
- " <td>-1.106614</td>\n",
1132
- " <td>0.063970</td>\n",
1133
- " <td>0.288577</td>\n",
1134
- " <td>0.510509</td>\n",
1135
- " <td>0.543814</td>\n",
1136
- " <td>-0.262185</td>\n",
1137
- " <td>0.727537</td>\n",
1138
- " <td>...</td>\n",
1139
- " <td>0.017372</td>\n",
1140
- " <td>-0.628980</td>\n",
1141
- " <td>1.412982</td>\n",
1142
- " <td>1.034429</td>\n",
1143
- " <td>0.289884</td>\n",
1144
- " <td>-0.282774</td>\n",
1145
- " <td>0.831488</td>\n",
1146
- " <td>0.248558</td>\n",
1147
- " <td>0.771177</td>\n",
1148
- " <td>-0.124385</td>\n",
1149
- " </tr>\n",
1150
- " </tbody>\n",
1151
- "</table>\n",
1152
- "<p>50000 rows Γ— 768 columns</p>\n",
1153
- "</div>"
1154
- ],
1155
- "text/plain": [
1156
- " 0 1 2 3 4 5 6 \\\n",
1157
- "0 -0.354270 0.422403 -0.105672 -0.129077 0.289177 0.382220 0.183098 \n",
1158
- "1 -0.005778 0.875256 0.844623 -0.913219 -0.220542 0.457574 0.819090 \n",
1159
- "2 0.115181 -0.087180 0.114065 0.246189 0.714248 0.402952 0.313888 \n",
1160
- "3 0.052282 0.800266 0.831988 0.155950 -0.213863 0.179749 1.394324 \n",
1161
- "4 0.095074 0.099750 0.638213 -1.026867 0.020405 0.488524 0.555310 \n",
1162
- "... ... ... ... ... ... ... ... \n",
1163
- "49995 -0.522521 -0.330984 0.136525 -0.450189 -0.076839 1.248817 0.334444 \n",
1164
- "49996 -0.470180 -0.401946 0.982030 -0.207640 0.532523 0.231821 0.380483 \n",
1165
- "49997 -1.095224 0.074697 0.357558 -0.289866 0.776415 1.029506 1.334372 \n",
1166
- "49998 -0.344982 0.121328 0.305855 -0.381629 -0.181680 0.434278 1.460984 \n",
1167
- "49999 -0.758263 0.188403 0.582321 -1.106614 0.063970 0.288577 0.510509 \n",
1168
- "\n",
1169
- " 7 8 9 ... 758 759 760 \\\n",
1170
- "0 0.102091 0.635695 1.120547 ... 0.843546 -0.591661 1.413266 \n",
1171
- "1 0.658583 -0.206531 0.899738 ... -0.000560 -0.572531 0.789380 \n",
1172
- "2 0.908008 0.219879 1.368971 ... 0.806028 -0.331930 1.068578 \n",
1173
- "3 0.505120 -0.341608 0.040288 ... 0.333275 -1.103323 0.387326 \n",
1174
- "4 0.269329 -0.769490 0.888668 ... 0.319892 -0.673623 0.750743 \n",
1175
- "... ... ... ... ... ... ... ... \n",
1176
- "49995 0.873641 0.188449 0.102323 ... -0.237047 -0.376410 1.547221 \n",
1177
- "49996 1.066097 0.130898 0.458105 ... 0.178845 -0.644469 1.544612 \n",
1178
- "49997 0.711085 -0.037792 0.165926 ... -0.528522 -0.889131 1.200090 \n",
1179
- "49998 0.992868 0.167097 1.005540 ... 0.580155 -0.436302 0.818202 \n",
1180
- "49999 0.543814 -0.262185 0.727537 ... 0.017372 -0.628980 1.412982 \n",
1181
- "\n",
1182
- " 761 762 763 764 765 766 767 \n",
1183
- "0 0.980099 1.254564 -0.756020 0.614037 0.139899 0.117359 0.159412 \n",
1184
- "1 1.063664 -0.072007 0.111034 0.270689 0.319568 1.085690 0.670377 \n",
1185
- "2 1.111367 -0.686173 -0.046650 -0.116867 0.380806 0.239970 0.928296 \n",
1186
- "3 1.064309 0.196870 0.380791 1.301055 0.288548 0.353034 0.239037 \n",
1187
- "4 0.930013 0.033606 0.261526 0.425253 0.908287 1.101179 0.378441 \n",
1188
- "... ... ... ... ... ... ... ... \n",
1189
- "49995 1.126172 -0.722363 0.549418 0.979395 0.055092 0.610912 -0.126857 \n",
1190
- "49996 0.765639 0.171692 0.082497 0.258444 0.898845 -0.184204 -0.039506 \n",
1191
- "49997 1.039473 0.167707 0.511078 -0.065529 0.447770 0.551285 0.328493 \n",
1192
- "49998 0.528767 0.078137 0.811233 0.269796 0.241384 -0.356777 0.245386 \n",
1193
- "49999 1.034429 0.289884 -0.282774 0.831488 0.248558 0.771177 -0.124385 \n",
1194
- "\n",
1195
- "[50000 rows x 768 columns]"
1196
- ]
1197
- },
1198
- "execution_count": 4,
1199
- "metadata": {},
1200
- "output_type": "execute_result"
1201
- }
1202
- ],
1203
- "source": [
1204
- "emb"
1205
- ]
1206
- }
1207
- ],
1208
- "metadata": {
1209
- "kernelspec": {
1210
- "display_name": "Python 3",
1211
- "language": "python",
1212
- "name": "python3"
1213
- },
1214
- "language_info": {
1215
- "codemirror_mode": {
1216
- "name": "ipython",
1217
- "version": 3
1218
- },
1219
- "file_extension": ".py",
1220
- "mimetype": "text/x-python",
1221
- "name": "python",
1222
- "nbconvert_exporter": "python",
1223
- "pygments_lexer": "ipython3",
1224
- "version": "3.11.4"
1225
- },
1226
- "orig_nbformat": 4
1227
- },
1228
- "nbformat": 4,
1229
- "nbformat_minor": 2
1230
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
project_log.ipynb DELETED
@@ -1,170 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "attachments": {},
5
- "cell_type": "markdown",
6
- "metadata": {},
7
- "source": [
8
- "# Project log"
9
- ]
10
- },
11
- {
12
- "attachments": {},
13
- "cell_type": "markdown",
14
- "metadata": {},
15
- "source": [
16
- "## Monday 6.26.2023\n",
17
- "\n",
18
- "- Created log to record important progress\n",
19
- "\n",
20
- "Restructuring project\n",
21
- "- Remove extraneous files from \n",
22
- "the data folder. The relevant 'data' for this project consists of \n",
23
- " - The arxiv metadata the model is trained on. For the prototype we use 20k PDE/Spectral theory articles titled 'APSP.parquet'\n",
24
- " - The MSC tag database. A json dictionary mapping the 5 digit codes e.g. 38A17 to their corresponding english names.\n",
25
- "\n"
26
- ]
27
- },
28
- {
29
- "attachments": {},
30
- "cell_type": "markdown",
31
- "metadata": {},
32
- "source": [
33
- "## Thursday 6.29.2023\n",
34
- "\n",
35
- "- Created data_storage.py\n",
36
- "- This houses all functions and classes related to data storage\n",
37
- "- Created a class ArXivData to store arxiv metadata\n",
38
- " - Designed to be passed to embedding class to vectorize\n",
39
- " - The embedding class should call the cleaning methods under the hood.\n",
40
- "- Can load raw metadata from a query. \n",
41
- " - Only stores id, title, abstract, categories\n",
42
- " - Faster than previous version, now can retrieve 1k articles in ~10 seconds\n",
43
- "\n",
44
- "#### Todo: Write ArXivData methods\n",
45
- " 1. `get_full_metadata`: take a list of ids and retrieve all of the available metadata as a generator.\n",
46
- " 1. `load_from_file`: load arxiv data from a parquet file.\n",
47
- " 1. `Save_to_file`: to store data as a parquet\n",
48
- " 2. How to improve the query functionality so that we can make larger queries, say all math articles in the last year.\n",
49
- " - need a way of breaking up an arxiv api call into pieces. How exactly does the code work? Creating the generator object doesn't \n",
50
- " actually load the webpages.\n",
51
- " \n",
52
- "\n",
53
- "#### Todo: In `load_from_query` function, fix the problem that the categories tags are not returned properly\n",
54
- " - Make one column for arxiv tags, one column for msc tags\n",
55
- " - store msc tags as their english names in a list\n",
56
- " - store arxiv cats one-hot-encoded as a separate dataframe attribute\n",
57
- "\n",
58
- "#### Idea for the pipeline\n",
59
- " 1. Load data in the ArXivData class whether from file or from query\n",
60
- " 2. Pass to embedding class to either create or load the necessary embeddings and prepare it to be easily fed into a topic model\n",
61
- " - What exactly is needed?\n",
62
- " 3. Pass to topic model (BERTopic, LSA, LDA, PCA) experiment with multiple.\n",
63
- "\n",
64
- "#### EDA tools needed\n",
65
- " 1. Semantic analysis of MSC tags to choose the best one out of the labels for the 'category'\n",
66
- " 2. Are there better ideas that don't just ammount to labeling based on semnatic similarity with the tag?\n",
67
- " - an EDA question: Are the tagged MSC tags the top most semantically similar to the title/abstract?\n",
68
- "\n",
69
- "\n"
70
- ]
71
- },
72
- {
73
- "attachments": {},
74
- "cell_type": "markdown",
75
- "metadata": {},
76
- "source": [
77
- "## 07/02/2023\n",
78
- "\n",
79
- "-Read medium article about using config files to set up highly modular data analysis pipelines.\n",
80
- "-Interested in setting this up here\n",
81
- "\n",
82
- "#### Outline of pipeline architecture\n",
83
- "\n",
84
- "1. Load dataset \n",
85
- " - option to load from file or from querying arxiv directly\n",
86
- " - stores raw title and abstract, id #s, msc_tags as english, and categories (OHE) as a separate dataframe\n",
87
- "2. Load embeddings\n",
88
- " - option to load from file or generate using sentence transformers directly.\n",
89
- " - any data cleaning procedures will occur in the pipeline here\n",
90
- "3. Plug into topic model(s)"
91
- ]
92
- },
93
- {
94
- "attachments": {},
95
- "cell_type": "markdown",
96
- "metadata": {},
97
- "source": [
98
- "## 07/03/2023\n",
99
- "\n",
100
- "#### Modified data_storage.py\n",
101
- "\n",
102
- "Done:\n",
103
- "1. Wrote `load_from_feather` and `save_to_feather`\n",
104
- "1. Pulled and stored metadata for 40k papers in pde and spectral theory called 'APSP_40.feather'\n",
105
- "\n",
106
- "To Do:\n",
107
- "1. Write comments for the methods in the arXivData class.\n",
108
- "1. Make sure the class functionality works correctly when a query returns no results.\n",
109
- "\n",
110
- "\n",
111
- "#### Miscellaneous\n",
112
- "1. Install `tabbed out` extension for exiting delimiter environments with tab.\n"
113
- ]
114
- },
115
- {
116
- "attachments": {},
117
- "cell_type": "markdown",
118
- "metadata": {},
119
- "source": [
120
- "## 07/04/2023\n",
121
- "\n",
122
- "#### Create embedding module, `embedding.py`\n",
123
- "\n",
124
- "Functions\n",
125
- "1. Take in an arXivData class object\n",
126
- "1. generate embeddings for the clean text\n",
127
- "1. compute the most semantically similar msc tags\n",
128
- "1. output the np array containing the embeddings\n",
129
- "1. output the np array in which row i is \n",
130
- " - the embedding vector of the most similar msc tag, if there are msc tags\n",
131
- " - NAN if there are no msc tags.\n",
132
- "\n",
133
- "\n",
134
- "Stopping in the middle of step 3, which is the function `rank_msc_tags` in embedding.py\n",
135
- "\n",
136
- "need to add the dataclass decorator from the data storage module to my arXivData class.\n"
137
- ]
138
- },
139
- {
140
- "cell_type": "markdown",
141
- "metadata": {},
142
- "source": [
143
- "## 07/14/2023\n",
144
- "\n",
145
- "#### Modified embedding module\n",
146
- "1. Added functions to generate and load the embeddings of msc and arxiv subject tags.\n",
147
- "2. Saved these embedding in the data directory as parquet files, with the index of a row equal to the word that row vector encodes."
148
- ]
149
- },
150
- {
151
- "cell_type": "markdown",
152
- "metadata": {},
153
- "source": []
154
- }
155
- ],
156
- "metadata": {
157
- "kernelspec": {
158
- "display_name": "Python 3",
159
- "language": "python",
160
- "name": "python3"
161
- },
162
- "language_info": {
163
- "name": "python",
164
- "version": "3.10.11"
165
- },
166
- "orig_nbformat": 4
167
- },
168
- "nbformat": 4,
169
- "nbformat_minor": 2
170
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/search.py β†’ search.py RENAMED
File without changes
src/__init__.py DELETED
File without changes
src/storage.py β†’ storage.py RENAMED
File without changes