Michael-Geis commited on
Commit
0e1d213
1 Parent(s): eaf77fe

removed useless notebooks

Browse files
Files changed (3) hide show
  1. data-exploration.ipynb +0 -516
  2. email-extraction.ipynb +0 -19
  3. library_class.py +0 -25
data-exploration.ipynb DELETED
@@ -1,516 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "attachments": {},
5
- "cell_type": "markdown",
6
- "metadata": {},
7
- "source": [
8
- "# EDA for cleaned arXiv dataset"
9
- ]
10
- },
11
- {
12
- "attachments": {},
13
- "cell_type": "markdown",
14
- "metadata": {},
15
- "source": [
16
- "## Imports"
17
- ]
18
- },
19
- {
20
- "cell_type": "code",
21
- "execution_count": 1,
22
- "metadata": {},
23
- "outputs": [],
24
- "source": [
25
- "import pandas as pd\n",
26
- "import numpy as np"
27
- ]
28
- },
29
- {
30
- "attachments": {},
31
- "cell_type": "markdown",
32
- "metadata": {},
33
- "source": [
34
- "## Which subject tag occurs the most frequently our 175k dataset?"
35
- ]
36
- },
37
- {
38
- "cell_type": "code",
39
- "execution_count": 2,
40
- "metadata": {},
41
- "outputs": [
42
- {
43
- "data": {
44
- "text/html": [
45
- "<div>\n",
46
- "<style scoped>\n",
47
- " .dataframe tbody tr th:only-of-type {\n",
48
- " vertical-align: middle;\n",
49
- " }\n",
50
- "\n",
51
- " .dataframe tbody tr th {\n",
52
- " vertical-align: top;\n",
53
- " }\n",
54
- "\n",
55
- " .dataframe thead th {\n",
56
- " text-align: right;\n",
57
- " }\n",
58
- "</style>\n",
59
- "<table border=\"1\" class=\"dataframe\">\n",
60
- " <thead>\n",
61
- " <tr style=\"text-align: right;\">\n",
62
- " <th></th>\n",
63
- " <th>Accelerator Physics</th>\n",
64
- " <th>Adaptation and Self-Organizing Systems</th>\n",
65
- " <th>Algebraic Geometry</th>\n",
66
- " <th>Algebraic Topology</th>\n",
67
- " <th>Analysis of PDEs</th>\n",
68
- " <th>Applications</th>\n",
69
- " <th>Applied Physics</th>\n",
70
- " <th>Artificial Intelligence</th>\n",
71
- " <th>Astrophysics</th>\n",
72
- " <th>Astrophysics of Galaxies</th>\n",
73
- " <th>...</th>\n",
74
- " <th>Strongly Correlated Electrons</th>\n",
75
- " <th>Subcellular Processes</th>\n",
76
- " <th>Superconductivity</th>\n",
77
- " <th>Symbolic Computation</th>\n",
78
- " <th>Symplectic Geometry</th>\n",
79
- " <th>Systems and Control</th>\n",
80
- " <th>Theoretical Economics</th>\n",
81
- " <th>Tissues and Organs</th>\n",
82
- " <th>Trading and Market Microstructure</th>\n",
83
- " <th>UNK</th>\n",
84
- " </tr>\n",
85
- " </thead>\n",
86
- " <tbody>\n",
87
- " <tr>\n",
88
- " <th>0</th>\n",
89
- " <td>False</td>\n",
90
- " <td>False</td>\n",
91
- " <td>False</td>\n",
92
- " <td>False</td>\n",
93
- " <td>False</td>\n",
94
- " <td>False</td>\n",
95
- " <td>False</td>\n",
96
- " <td>False</td>\n",
97
- " <td>False</td>\n",
98
- " <td>False</td>\n",
99
- " <td>...</td>\n",
100
- " <td>False</td>\n",
101
- " <td>False</td>\n",
102
- " <td>False</td>\n",
103
- " <td>False</td>\n",
104
- " <td>False</td>\n",
105
- " <td>False</td>\n",
106
- " <td>False</td>\n",
107
- " <td>False</td>\n",
108
- " <td>False</td>\n",
109
- " <td>False</td>\n",
110
- " </tr>\n",
111
- " <tr>\n",
112
- " <th>1</th>\n",
113
- " <td>False</td>\n",
114
- " <td>False</td>\n",
115
- " <td>False</td>\n",
116
- " <td>False</td>\n",
117
- " <td>False</td>\n",
118
- " <td>False</td>\n",
119
- " <td>False</td>\n",
120
- " <td>False</td>\n",
121
- " <td>False</td>\n",
122
- " <td>False</td>\n",
123
- " <td>...</td>\n",
124
- " <td>False</td>\n",
125
- " <td>False</td>\n",
126
- " <td>False</td>\n",
127
- " <td>False</td>\n",
128
- " <td>False</td>\n",
129
- " <td>False</td>\n",
130
- " <td>False</td>\n",
131
- " <td>False</td>\n",
132
- " <td>False</td>\n",
133
- " <td>False</td>\n",
134
- " </tr>\n",
135
- " <tr>\n",
136
- " <th>2</th>\n",
137
- " <td>False</td>\n",
138
- " <td>False</td>\n",
139
- " <td>False</td>\n",
140
- " <td>False</td>\n",
141
- " <td>False</td>\n",
142
- " <td>False</td>\n",
143
- " <td>False</td>\n",
144
- " <td>False</td>\n",
145
- " <td>False</td>\n",
146
- " <td>False</td>\n",
147
- " <td>...</td>\n",
148
- " <td>False</td>\n",
149
- " <td>False</td>\n",
150
- " <td>False</td>\n",
151
- " <td>False</td>\n",
152
- " <td>False</td>\n",
153
- " <td>False</td>\n",
154
- " <td>False</td>\n",
155
- " <td>False</td>\n",
156
- " <td>False</td>\n",
157
- " <td>False</td>\n",
158
- " </tr>\n",
159
- " <tr>\n",
160
- " <th>3</th>\n",
161
- " <td>False</td>\n",
162
- " <td>False</td>\n",
163
- " <td>False</td>\n",
164
- " <td>False</td>\n",
165
- " <td>False</td>\n",
166
- " <td>False</td>\n",
167
- " <td>False</td>\n",
168
- " <td>False</td>\n",
169
- " <td>False</td>\n",
170
- " <td>False</td>\n",
171
- " <td>...</td>\n",
172
- " <td>False</td>\n",
173
- " <td>False</td>\n",
174
- " <td>False</td>\n",
175
- " <td>False</td>\n",
176
- " <td>False</td>\n",
177
- " <td>False</td>\n",
178
- " <td>False</td>\n",
179
- " <td>False</td>\n",
180
- " <td>False</td>\n",
181
- " <td>False</td>\n",
182
- " </tr>\n",
183
- " <tr>\n",
184
- " <th>4</th>\n",
185
- " <td>False</td>\n",
186
- " <td>False</td>\n",
187
- " <td>False</td>\n",
188
- " <td>False</td>\n",
189
- " <td>False</td>\n",
190
- " <td>False</td>\n",
191
- " <td>False</td>\n",
192
- " <td>False</td>\n",
193
- " <td>False</td>\n",
194
- " <td>False</td>\n",
195
- " <td>...</td>\n",
196
- " <td>False</td>\n",
197
- " <td>False</td>\n",
198
- " <td>False</td>\n",
199
- " <td>False</td>\n",
200
- " <td>False</td>\n",
201
- " <td>False</td>\n",
202
- " <td>False</td>\n",
203
- " <td>False</td>\n",
204
- " <td>False</td>\n",
205
- " <td>False</td>\n",
206
- " </tr>\n",
207
- " </tbody>\n",
208
- "</table>\n",
209
- "<p>5 rows × 150 columns</p>\n",
210
- "</div>"
211
- ],
212
- "text/plain": [
213
- " Accelerator Physics Adaptation and Self-Organizing Systems \\\n",
214
- "0 False False \n",
215
- "1 False False \n",
216
- "2 False False \n",
217
- "3 False False \n",
218
- "4 False False \n",
219
- "\n",
220
- " Algebraic Geometry Algebraic Topology Analysis of PDEs Applications \\\n",
221
- "0 False False False False \n",
222
- "1 False False False False \n",
223
- "2 False False False False \n",
224
- "3 False False False False \n",
225
- "4 False False False False \n",
226
- "\n",
227
- " Applied Physics Artificial Intelligence Astrophysics \\\n",
228
- "0 False False False \n",
229
- "1 False False False \n",
230
- "2 False False False \n",
231
- "3 False False False \n",
232
- "4 False False False \n",
233
- "\n",
234
- " Astrophysics of Galaxies ... Strongly Correlated Electrons \\\n",
235
- "0 False ... False \n",
236
- "1 False ... False \n",
237
- "2 False ... False \n",
238
- "3 False ... False \n",
239
- "4 False ... False \n",
240
- "\n",
241
- " Subcellular Processes Superconductivity Symbolic Computation \\\n",
242
- "0 False False False \n",
243
- "1 False False False \n",
244
- "2 False False False \n",
245
- "3 False False False \n",
246
- "4 False False False \n",
247
- "\n",
248
- " Symplectic Geometry Systems and Control Theoretical Economics \\\n",
249
- "0 False False False \n",
250
- "1 False False False \n",
251
- "2 False False False \n",
252
- "3 False False False \n",
253
- "4 False False False \n",
254
- "\n",
255
- " Tissues and Organs Trading and Market Microstructure UNK \n",
256
- "0 False False False \n",
257
- "1 False False False \n",
258
- "2 False False False \n",
259
- "3 False False False \n",
260
- "4 False False False \n",
261
- "\n",
262
- "[5 rows x 150 columns]"
263
- ]
264
- },
265
- "execution_count": 2,
266
- "metadata": {},
267
- "output_type": "execute_result"
268
- }
269
- ],
270
- "source": [
271
- "cats = pd.read_parquet('./data/arXiv_cat.parquet')\n",
272
- "cats.head()"
273
- ]
274
- },
275
- {
276
- "cell_type": "code",
277
- "execution_count": 3,
278
- "metadata": {},
279
- "outputs": [
280
- {
281
- "data": {
282
- "text/plain": [
283
- "Analysis of PDEs 18944\n",
284
- "Combinatorics 18930\n",
285
- "Optimization and Control 18284\n",
286
- "Mathematical Physics 16381\n",
287
- "Probability 15343\n",
288
- "dtype: int64"
289
- ]
290
- },
291
- "execution_count": 3,
292
- "metadata": {},
293
- "output_type": "execute_result"
294
- }
295
- ],
296
- "source": [
297
- "## Calculate the number of times each tag appears\n",
298
- "\n",
299
- "totals = cats.sum(axis=0).sort_values(ascending=False)\n",
300
- "totals.head()"
301
- ]
302
- },
303
- {
304
- "cell_type": "code",
305
- "execution_count": 5,
306
- "metadata": {},
307
- "outputs": [
308
- {
309
- "data": {
310
- "text/html": [
311
- "<div>\n",
312
- "<style scoped>\n",
313
- " .dataframe tbody tr th:only-of-type {\n",
314
- " vertical-align: middle;\n",
315
- " }\n",
316
- "\n",
317
- " .dataframe tbody tr th {\n",
318
- " vertical-align: top;\n",
319
- " }\n",
320
- "\n",
321
- " .dataframe thead th {\n",
322
- " text-align: right;\n",
323
- " }\n",
324
- "</style>\n",
325
- "<table border=\"1\" class=\"dataframe\">\n",
326
- " <thead>\n",
327
- " <tr style=\"text-align: right;\">\n",
328
- " <th></th>\n",
329
- " <th>raw_title</th>\n",
330
- " <th>clean_title</th>\n",
331
- " <th>hyph_in_title</th>\n",
332
- " <th>raw_abstract</th>\n",
333
- " <th>clean_abstract</th>\n",
334
- " <th>hyph_in_abstract</th>\n",
335
- " <th>authors_parsed</th>\n",
336
- " <th>cat</th>\n",
337
- " <th>update_date</th>\n",
338
- " <th>id</th>\n",
339
- " </tr>\n",
340
- " </thead>\n",
341
- " <tbody>\n",
342
- " <tr>\n",
343
- " <th>42</th>\n",
344
- " <td>The Prolongation Problem for the Heavenly Equa...</td>\n",
345
- " <td>The Prolongation Problem for the Heavenly Equa...</td>\n",
346
- " <td>None</td>\n",
347
- " <td>We provide an exact regular solution of an o...</td>\n",
348
- " <td>We provide an exact regular solution of an o...</td>\n",
349
- " <td>None</td>\n",
350
- " <td>[['Palese', 'M.', '', 'Dept. Math. Univ. of To...</td>\n",
351
- " <td>[math.AP, math-ph, math.MP]</td>\n",
352
- " <td>2022-09-21</td>\n",
353
- " <td>math/0311218</td>\n",
354
- " </tr>\n",
355
- " <tr>\n",
356
- " <th>55</th>\n",
357
- " <td>Null Controllability for a Degenerate Structur...</td>\n",
358
- " <td>Null Controllability for a Degenerate Structur...</td>\n",
359
- " <td>None</td>\n",
360
- " <td>In this paper, we consider the infinite dime...</td>\n",
361
- " <td>In this paper, we consider the infinite dime...</td>\n",
362
- " <td>[final-state]</td>\n",
363
- " <td>[['Simporé', 'Yacouba', ''], ['gantouh', 'Yass...</td>\n",
364
- " <td>[math.OC, math.AP]</td>\n",
365
- " <td>2022-09-09</td>\n",
366
- " <td>2209.03645</td>\n",
367
- " </tr>\n",
368
- " <tr>\n",
369
- " <th>59</th>\n",
370
- " <td>Voting models and semilinear parabolic equations</td>\n",
371
- " <td>Voting models and semilinear parabolic equations</td>\n",
372
- " <td>None</td>\n",
373
- " <td>We present probabilistic interpretations of ...</td>\n",
374
- " <td>We present probabilistic interpretations of ...</td>\n",
375
- " <td>[semi-linear, Fisher-KPP, group-based, pushmi-...</td>\n",
376
- " <td>[['An', 'Jing', ''], ['Henderson', 'Christophe...</td>\n",
377
- " <td>[math.AP, math.PR]</td>\n",
378
- " <td>2022-09-09</td>\n",
379
- " <td>2209.03435</td>\n",
380
- " </tr>\n",
381
- " <tr>\n",
382
- " <th>72</th>\n",
383
- " <td>Flows of $G_2$-Structures associated to Calabi...</td>\n",
384
- " <td>Flows of LATEX associated to Calabi-Yau Manif...</td>\n",
385
- " <td>[Calabi-Yau]</td>\n",
386
- " <td>We establish a correspondence between a para...</td>\n",
387
- " <td>We establish a correspondence between a para...</td>\n",
388
- " <td>[Monge-Ampere, Monge-Ampere, torsion-free, Ric...</td>\n",
389
- " <td>[['Picard', 'Sébastien', ''], ['Suan', 'Caleb'...</td>\n",
390
- " <td>[math.DG, math.AP]</td>\n",
391
- " <td>2022-09-09</td>\n",
392
- " <td>2209.03411</td>\n",
393
- " </tr>\n",
394
- " <tr>\n",
395
- " <th>78</th>\n",
396
- " <td>On the dynamics of vortices in viscous 2D flows</td>\n",
397
- " <td>On the dynamics of vortices in viscous 2D flows</td>\n",
398
- " <td>None</td>\n",
399
- " <td>We study the 2D Navier--Stokes solution star...</td>\n",
400
- " <td>We study the 2D Navier--Stokes solution star...</td>\n",
401
- " <td>None</td>\n",
402
- " <td>[['Ceci', 'Stefano', ''], ['Seis', 'Christian'...</td>\n",
403
- " <td>[math.AP]</td>\n",
404
- " <td>2022-09-09</td>\n",
405
- " <td>2203.07185</td>\n",
406
- " </tr>\n",
407
- " </tbody>\n",
408
- "</table>\n",
409
- "</div>"
410
- ],
411
- "text/plain": [
412
- " raw_title \\\n",
413
- "42 The Prolongation Problem for the Heavenly Equa... \n",
414
- "55 Null Controllability for a Degenerate Structur... \n",
415
- "59 Voting models and semilinear parabolic equations \n",
416
- "72 Flows of $G_2$-Structures associated to Calabi... \n",
417
- "78 On the dynamics of vortices in viscous 2D flows \n",
418
- "\n",
419
- " clean_title hyph_in_title \\\n",
420
- "42 The Prolongation Problem for the Heavenly Equa... None \n",
421
- "55 Null Controllability for a Degenerate Structur... None \n",
422
- "59 Voting models and semilinear parabolic equations None \n",
423
- "72 Flows of LATEX associated to Calabi-Yau Manif... [Calabi-Yau] \n",
424
- "78 On the dynamics of vortices in viscous 2D flows None \n",
425
- "\n",
426
- " raw_abstract \\\n",
427
- "42 We provide an exact regular solution of an o... \n",
428
- "55 In this paper, we consider the infinite dime... \n",
429
- "59 We present probabilistic interpretations of ... \n",
430
- "72 We establish a correspondence between a para... \n",
431
- "78 We study the 2D Navier--Stokes solution star... \n",
432
- "\n",
433
- " clean_abstract \\\n",
434
- "42 We provide an exact regular solution of an o... \n",
435
- "55 In this paper, we consider the infinite dime... \n",
436
- "59 We present probabilistic interpretations of ... \n",
437
- "72 We establish a correspondence between a para... \n",
438
- "78 We study the 2D Navier--Stokes solution star... \n",
439
- "\n",
440
- " hyph_in_abstract \\\n",
441
- "42 None \n",
442
- "55 [final-state] \n",
443
- "59 [semi-linear, Fisher-KPP, group-based, pushmi-... \n",
444
- "72 [Monge-Ampere, Monge-Ampere, torsion-free, Ric... \n",
445
- "78 None \n",
446
- "\n",
447
- " authors_parsed \\\n",
448
- "42 [['Palese', 'M.', '', 'Dept. Math. Univ. of To... \n",
449
- "55 [['Simporé', 'Yacouba', ''], ['gantouh', 'Yass... \n",
450
- "59 [['An', 'Jing', ''], ['Henderson', 'Christophe... \n",
451
- "72 [['Picard', 'Sébastien', ''], ['Suan', 'Caleb'... \n",
452
- "78 [['Ceci', 'Stefano', ''], ['Seis', 'Christian'... \n",
453
- "\n",
454
- " cat update_date id \n",
455
- "42 [math.AP, math-ph, math.MP] 2022-09-21 math/0311218 \n",
456
- "55 [math.OC, math.AP] 2022-09-09 2209.03645 \n",
457
- "59 [math.AP, math.PR] 2022-09-09 2209.03435 \n",
458
- "72 [math.DG, math.AP] 2022-09-09 2209.03411 \n",
459
- "78 [math.AP] 2022-09-09 2203.07185 "
460
- ]
461
- },
462
- "execution_count": 5,
463
- "metadata": {},
464
- "output_type": "execute_result"
465
- }
466
- ],
467
- "source": [
468
- "## Create the dataset of all pde articles\n",
469
- "\n",
470
- "full_data = pd.read_parquet('./data/arXiv_clean.parquet')\n",
471
- "pde = full_data.loc[cats['Analysis of PDEs'] == True]\n",
472
- "pde.head()"
473
- ]
474
- },
475
- {
476
- "attachments": {},
477
- "cell_type": "markdown",
478
- "metadata": {},
479
- "source": [
480
- "## Next goal: Does the raw arxiv dataset contain the MSC subject information?\n",
481
- "\n",
482
- "No, it doesn't -- this was verified in a kaggle notebook. It has only arxiv subject tag information."
483
- ]
484
- },
485
- {
486
- "attachments": {},
487
- "cell_type": "markdown",
488
- "metadata": {},
489
- "source": [
490
- "## 1. Frequently occuring author names. Discover some large subsets of the data consisting of papers who have at least one author with a given name. What are the most commonly occuring names?"
491
- ]
492
- }
493
- ],
494
- "metadata": {
495
- "kernelspec": {
496
- "display_name": "Python 3",
497
- "language": "python",
498
- "name": "python3"
499
- },
500
- "language_info": {
501
- "codemirror_mode": {
502
- "name": "ipython",
503
- "version": 3
504
- },
505
- "file_extension": ".py",
506
- "mimetype": "text/x-python",
507
- "name": "python",
508
- "nbconvert_exporter": "python",
509
- "pygments_lexer": "ipython3",
510
- "version": "3.10.11"
511
- },
512
- "orig_nbformat": 4
513
- },
514
- "nbformat": 4,
515
- "nbformat_minor": 2
516
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
email-extraction.ipynb DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": []
9
- }
10
- ],
11
- "metadata": {
12
- "language_info": {
13
- "name": "python"
14
- },
15
- "orig_nbformat": 4
16
- },
17
- "nbformat": 4,
18
- "nbformat_minor": 2
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
library_class.py DELETED
@@ -1,25 +0,0 @@
1
- import util
2
- import pandas as pd
3
- import os
4
-
5
- class Library(object):
6
-
7
- def load_from_file(self,library_name):
8
- self.raw_lib = pd.read_parquet(os.path.join('./data',library_name))
9
-
10
- def load_from_query(self,query_string,max_results):
11
- self.raw_lib = util.query_to_df(query_string,max_results)
12
-
13
- def clean_library(self):
14
-
15
- ## drop columns that we aren't going to modify
16
- cols = ['title','summary','authors','primary_category','categories']
17
- input_lib = self.raw_lib[cols].copy()
18
-
19
- input_lib['title'] = input_lib['title'].apply(util.cleanse)
20
- input_lib['summary'] = input_lib['summary'].apply(util.cleanse)
21
- input_lib['hyph_in_summary'] = input_lib['summary'].apply(util.find_hyph)
22
- input_lib['hyph_in_title'] = input_lib['title'].apply(util.find_hyph)
23
- input_lib['msc_tags'] = input_lib.categories.apply(util.find_msc).apply(util.msc_to_eng)
24
-
25
- self.clean_lib = input_lib