Upload meshutils.py
Browse files- meshutils.py +52 -0
meshutils.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
|
4 |
+
from collections import defaultdict
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
# load file
|
9 |
+
def df_mem(df):
|
10 |
+
return '%.1f Mb' % (df.memory_usage(index=True, deep=True).values.sum() / 1024 / 1024)
|
11 |
+
|
12 |
+
|
13 |
+
def load_df(file_name, nrows=1000, header='infer', names=None):
|
14 |
+
df = pd.read_csv(file_name, sep='|', nrows=nrows, low_memory=False, header=header, names=names)
|
15 |
+
# print("loaded '%s', %d rows (%s)" % (file_name, len(df), df_mem(df)))
|
16 |
+
return df
|
17 |
+
|
18 |
+
|
19 |
+
# Map Studies to Mesh
|
20 |
+
df_mesh_ct = load_df('asset/data/browse_conditions.txt', nrows=None)
|
21 |
+
df_mesh_ct = df_mesh_ct[['nct_id', 'downcase_mesh_term']]
|
22 |
+
|
23 |
+
## search mesh_term
|
24 |
+
nct_to_mesh_term = defaultdict(set)
|
25 |
+
|
26 |
+
for row in df_mesh_ct[['nct_id', 'downcase_mesh_term']].itertuples():
|
27 |
+
nct_to_mesh_term[row[1]].add(row[2])
|
28 |
+
|
29 |
+
###==========================================================================================================
|
30 |
+
|
31 |
+
# # Map Mesh to Keywords
|
32 |
+
# df_mesh_kw = load_df('data/keywords.txt', nrows=None)
|
33 |
+
# df_mesh_kw = df_mesh_kw[['nct_id', 'downcase_name']]
|
34 |
+
|
35 |
+
# ## get mesh keywords
|
36 |
+
# nct_to_mesh_kywd = defaultdict(set)
|
37 |
+
|
38 |
+
# for row in df_mesh_kw[['nct_id','downcase_name']].itertuples():
|
39 |
+
# nct_to_mesh_kywd[row[1]].add(row[2])
|
40 |
+
|
41 |
+
###==========================================================================================================
|
42 |
+
# original mesh fuction in creator py
|
43 |
+
###==========================================================================================================
|
44 |
+
# load mesh dataframe
|
45 |
+
|
46 |
+
df_mesh = pd.read_csv('asset/data/df_mesh.csv', encoding='unicode_escape')
|
47 |
+
|
48 |
+
# Map Mesh Term to ID
|
49 |
+
mesh_term_to_id = {}
|
50 |
+
|
51 |
+
for row in df_mesh[['name', 'ui']].itertuples():
|
52 |
+
mesh_term_to_id[row[1]] = row[2]
|