mishtert commited on
Commit
06de737
1 Parent(s): 32da6be

Upload meshutils.py

Browse files
Files changed (1) hide show
  1. meshutils.py +52 -0
meshutils.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ from collections import defaultdict
5
+
6
+
7
+
8
+ # load file
9
+ def df_mem(df):
10
+ return '%.1f Mb' % (df.memory_usage(index=True, deep=True).values.sum() / 1024 / 1024)
11
+
12
+
13
+ def load_df(file_name, nrows=1000, header='infer', names=None):
14
+ df = pd.read_csv(file_name, sep='|', nrows=nrows, low_memory=False, header=header, names=names)
15
+ # print("loaded '%s', %d rows (%s)" % (file_name, len(df), df_mem(df)))
16
+ return df
17
+
18
+
19
+ # Map Studies to Mesh
20
+ df_mesh_ct = load_df('asset/data/browse_conditions.txt', nrows=None)
21
+ df_mesh_ct = df_mesh_ct[['nct_id', 'downcase_mesh_term']]
22
+
23
+ ## search mesh_term
24
+ nct_to_mesh_term = defaultdict(set)
25
+
26
+ for row in df_mesh_ct[['nct_id', 'downcase_mesh_term']].itertuples():
27
+ nct_to_mesh_term[row[1]].add(row[2])
28
+
29
+ ###==========================================================================================================
30
+
31
+ # # Map Mesh to Keywords
32
+ # df_mesh_kw = load_df('data/keywords.txt', nrows=None)
33
+ # df_mesh_kw = df_mesh_kw[['nct_id', 'downcase_name']]
34
+
35
+ # ## get mesh keywords
36
+ # nct_to_mesh_kywd = defaultdict(set)
37
+
38
+ # for row in df_mesh_kw[['nct_id','downcase_name']].itertuples():
39
+ # nct_to_mesh_kywd[row[1]].add(row[2])
40
+
41
+ ###==========================================================================================================
42
+ # original mesh fuction in creator py
43
+ ###==========================================================================================================
44
+ # load mesh dataframe
45
+
46
+ df_mesh = pd.read_csv('asset/data/df_mesh.csv', encoding='unicode_escape')
47
+
48
+ # Map Mesh Term to ID
49
+ mesh_term_to_id = {}
50
+
51
+ for row in df_mesh[['name', 'ui']].itertuples():
52
+ mesh_term_to_id[row[1]] = row[2]