Alsentzer commited on
Commit
568c4f3
1 Parent(s): 5356ad2

initial commit

Browse files
app.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ import ast
5
+
6
+ '''
7
+ Causal Gene Discovery Model
8
+ /home/ema30/zaklab/rare_disease_dx/checkpoints/aligner/04_30_22:13:29:55_lr_1e-05_val_simulated_pats.disease_split_val_sim_pats_kg_8.9.21_kg_losstype_gene_multisimilarity/all_udn_patients_kg_8.9.21_kgsolved_manual_baylor_nobgm_distractor_genes_5_candidates_mapped_only_genes
9
+
10
+ Patients-Like-Me Model
11
+ /home/ema30/zaklab/rare_disease_dx/checkpoints/patient_NCA/04_26_22:17:38:30_lr_5e-05_val_simulated_pats.disease_split_val_sim_pats_kg_8.9.21_kg_losstype_patient_patient_NCA/mygene2_all_sim_all_udn_patients_kg_8.9.21_kgsolved_with_phenotypes
12
+
13
+ Disease Characterization Model
14
+ /home/ema30/zaklab/rare_disease_dx/checkpoints/patient_NCA/05_13_22:08:00:32_lr_1e-05_val_simulated_pats.disease_split_val_sim_pats_kg_8.9.21_kg_losstype_pd_NCA/mygene2_all_sim_all_udn_patients_kg_8.9.21_kgsolved_with_phenotypes
15
+ '''
16
+
17
+
18
+
19
+ gene_scores_df = pd.read_csv('gene_discovery_scores.csv')
20
+ exomiser_gene_scores_df = pd.read_csv('exomiser_gene_discovery_scores.csv')
21
+ patient_scores_df = pd.read_csv('patients_like_me_scores.csv')
22
+ dx_scores_df = pd.read_csv('dx_characterization_scores.csv')
23
+ plm_attn_df = pd.read_csv('patients_like_me_scores_attn.csv')
24
+ dx_attn_df = pd.read_csv('dx_characterization_scores_attn.csv')
25
+ gene_attn_df = pd.read_csv('gene_discovery_scores_attn.csv')
26
+ exomiser_gene_attn_df = pd.read_csv('exomiser_gene_discovery_scores_attn.csv')
27
+
28
+ diseases_map = {'UDN-P1': 'POLR3-releated leukodystrophy', 'UDN-P2': 'Novel Syndrome', 'UDN-P3':'Coffin-Lowry syndrome' ,
29
+ 'UDN-P4': 'automsomal recessive spastic paraplegia type 76', 'UDN-P5': 'atypical presentation of familial cold autoinflammatory syndrome',
30
+ 'UDN-P6': '*GATAD2B*-associated syndrome', 'UDN-P7': 'AR limb-girdle muscular atrophy type 2D', 'UDN-P8': '*ATP5PO*-related Leigh syndrome', 'UDN-P9': 'Spondyloepimetaphyseal dysplasia, Isidor-Toutain type'}
31
+ genes_map = {'UDN-P3': 'RPS6KA3', 'UDN-P4': 'CAPN1', 'UDN-P5': 'NLRP12, RAPGEFL1', 'UDN-P6': 'GATAD2B', 'UDN-P7': 'SGCA', 'UDN-P8': 'ATP5P0', 'UDN-P9': 'RPL13'}
32
+
33
+
34
+
35
+ def get_patient(patient_id, attn_df):
36
+ '''
37
+ Returns phenotypes, candidate genes, Causal gene, disease
38
+ '''
39
+ if patient_id in genes_map: gene = genes_map[patient_id]
40
+ else:
41
+ patient_gene_scores_df = gene_scores_df.loc[gene_scores_df['patient_id'] == patient_id]
42
+ gene = ', '.join(patient_gene_scores_df.loc[patient_gene_scores_df['correct_gene_label'] == 1, 'genes'].tolist())
43
+
44
+ if patient_id in diseases_map: disease = diseases_map[patient_id]
45
+ else:
46
+ patient_dx_scores_df = dx_scores_df.loc[dx_scores_df['patient_id'] == patient_id]
47
+ disease = ', '.join(patient_dx_scores_df.loc[patient_dx_scores_df['correct_label'] == 1, 'diseases'].tolist())
48
+
49
+ patient_attn_df = attn_df.loc[attn_df['patient_id'] == patient_id]
50
+ phenotypes = ', '.join(patient_attn_df['phenotypes'].tolist())
51
+
52
+ patient_str = f'''
53
+ **Selected Patient:** {patient_id}<br>
54
+ **Causal Gene:** *{gene}*<br>
55
+ **Disease:** {disease}<br>
56
+ **Phenotypes:** {phenotypes}<br><br>
57
+ '''
58
+
59
+ return patient_str
60
+
61
+
62
+ def read_file(filename):
63
+ with open(filename, 'r') as file:
64
+ f = file.read()
65
+ return f
66
+
67
+
68
+ def causal_gene_discovery(patient_id, prioritization_type):
69
+ if prioritization_type == 'Variant Filtered':
70
+ scores_df = exomiser_gene_scores_df.loc[exomiser_gene_scores_df['patient_id'] == patient_id]
71
+ else:
72
+ scores_df = gene_scores_df.loc[gene_scores_df['patient_id'] == patient_id]
73
+
74
+
75
+
76
+ # read in gene scores
77
+ scores_df = scores_df.sort_values("similarities", ascending=False)
78
+ scores_df['similarities'] = scores_df['similarities'].round(3).astype(str)
79
+
80
+ # add links to gene cards
81
+ scores_df['genes'] = scores_df['genes'].apply(lambda x: f'<u>[{x}](https://www.genecards.org/cgi-bin/carddisp.pl?gene={x})</u>')
82
+
83
+ # bold/color causal gene
84
+ scores_df.loc[scores_df['correct_gene_label'] == 1, 'similarities'] = scores_df.loc[scores_df['correct_gene_label'] == 1, 'similarities'].apply(lambda x: f'<span style="color:green">**{x}**</span>')
85
+ scores_df.loc[scores_df['correct_gene_label'] == 1, 'genes'] = scores_df.loc[scores_df['correct_gene_label'] == 1, 'genes'].apply(lambda x: f'<span style="color:green">**{x}**</span>')
86
+
87
+ #filter df
88
+ scores_df = scores_df.drop(columns=['patient_id', 'correct_gene_label']).rename(columns={ 'similarities': 'SHEPHERD Score', 'genes': 'Candidate Genes'}) #'correct_gene_label' : 'Is Causal Gene',
89
+
90
+ #############
91
+ # Attention
92
+
93
+ #read in phenotype attention
94
+ if prioritization_type == 'Variant Filtered':
95
+ attn_df = exomiser_gene_attn_df.loc[exomiser_gene_attn_df['patient_id'] == patient_id]
96
+ else:
97
+ attn_df = gene_attn_df.loc[gene_attn_df['patient_id'] == patient_id]
98
+ attn_df = attn_df.sort_values("attention", ascending=False)
99
+ attn_df['attention'] = attn_df['attention'].round(4)
100
+ attn_df = attn_df.drop(columns=['patient_id', 'degrees'])
101
+
102
+ #############
103
+ # KG neighborhood
104
+ #image_loc = f'images/{patient_id}.png'
105
+ html_file = f'https://michellemli.github.io/test_html/{patient_id}.html'
106
+ kg_html = f'''<iframe id="igraph" scrolling="no" style="border:none; width: 100%; height: 600px" seamless="seamless" src="{html_file}"></iframe>'''
107
+
108
+
109
+ #patient_info
110
+ patient = get_patient(patient_id, gene_attn_df)
111
+
112
+ return patient, scores_df, attn_df, kg_html
113
+
114
+
115
+ def patients_like_me(patient_id, k=10):
116
+
117
+
118
+ scores_df = patient_scores_df.loc[patient_scores_df['patient_id'] == patient_id]
119
+ scores_df = scores_df.sort_values("similarities", ascending=False)
120
+
121
+ #scores_df['phenotypes'] ='PHEN'
122
+
123
+ # add links to disease pages
124
+ scores_df['disease_ids'] = scores_df['disease_ids'].apply(lambda x: f'(https://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=en&Expert={x})</u>')
125
+ scores_df['diseases'] = scores_df['diseases'].apply(lambda x: f'<u>[{x}]')
126
+ scores_df['diseases'] = scores_df['diseases'] + scores_df['disease_ids']
127
+
128
+ scores_df['genes'] = scores_df['genes'].apply(lambda x: f'<u>[{x}](https://www.genecards.org/cgi-bin/carddisp.pl?gene={x})</u>')
129
+
130
+
131
+
132
+ # bold/color patients with same causal gene
133
+ scores_df.loc[scores_df['correct_label'] == 1, 'candidate_patients'] = scores_df.loc[scores_df['correct_label'] == 1, 'candidate_patients'].apply(lambda x: f'<span style="color:green">**{x}**</span>')
134
+ scores_df.loc[scores_df['correct_label'] == 1, 'genes'] = scores_df.loc[scores_df['correct_label'] == 1, 'genes'].apply(lambda x: f'<span style="color:green">**{x}**</span>')
135
+ scores_df.loc[scores_df['correct_label'] == 1, 'diseases'] = scores_df.loc[scores_df['correct_label'] == 1, 'diseases'].apply(lambda x: f'<span style="color:green">**{x}**</span>')
136
+
137
+ scores_df = scores_df.drop(columns=['patient_id', 'similarities', 'correct_label', 'disease_ids']).rename(columns={'candidate_patients': 'Candidate Patient', 'genes': 'Candidate Patient\'s Gene', 'diseases': 'Candidate Patient\'s Disease' }) #'phenotypes': 'Candidate Patient\'s Phenotypes'
138
+ scores_df = scores_df.head(k)
139
+
140
+
141
+ #read in phenotype attention
142
+ attn_df = plm_attn_df.loc[plm_attn_df['patient_id'] == patient_id]
143
+ attn_df = attn_df.sort_values("attention", ascending=False)
144
+ attn_df['attention'] = attn_df['attention'].round(4)
145
+ attn_df = attn_df.drop(columns=['patient_id', 'degrees'])
146
+
147
+ #patient_info
148
+ patient = get_patient(patient_id, plm_attn_df)
149
+
150
+
151
+ return patient, scores_df, attn_df
152
+
153
+
154
+ def disease_characterization(patient_id, k=10):
155
+
156
+
157
+ #TODO: limit # of rows
158
+ scores_df = dx_scores_df.loc[dx_scores_df['patient_id'] == patient_id]
159
+ scores_df = scores_df.sort_values("similarities", ascending=False)
160
+ scores_df = scores_df.head(k)
161
+
162
+ scores_df.loc[ scores_df['disease_ids'].str.contains('Coxa vara'), 'disease_ids'] = '2812'
163
+ scores_df.loc[ scores_df['disease_ids'].str.contains('Multiple epiphyseal dysplasia'), 'disease_ids'] = '2654'
164
+
165
+
166
+
167
+ scores_df['disease_ids'] = scores_df['disease_ids'].apply(lambda x: ast.literal_eval(x))
168
+ scores_df['type_disease_ids'] = scores_df['disease_ids'].apply(lambda x: type(x))
169
+
170
+ scores_df.loc[scores_df['type_disease_ids'] == list, 'disease_ids'] = scores_df.loc[scores_df['type_disease_ids'] == list, 'disease_ids'].apply(lambda x: x[0])
171
+
172
+
173
+ # add links to disease pages
174
+ scores_df['disease_ids'] = scores_df['disease_ids'].apply(lambda x: f'(https://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=en&Expert={x})</u>')
175
+ scores_df['diseases'] = scores_df['diseases'].apply(lambda x: f'<u>[{x}]')
176
+ scores_df['diseases'] = scores_df['diseases'] + scores_df['disease_ids']
177
+
178
+ # one disease couldn't map to orphanet
179
+ scores_df.loc[ scores_df['disease_ids'].str.contains('33657'), 'diseases'] = '<u>[leukodystrophy, hypomyelinating, 20](https://www.omim.org/entry/619071)</u>'
180
+ scores_df.loc[ scores_df['disease_ids'].str.contains('2654'), 'diseases'] = '<u>[Multiple epiphyseal dysplasia](https://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=EN&Expert=251)</u>'
181
+ scores_df.loc[ scores_df['disease_ids'].str.contains('2812'), 'diseases'] = '<u>[Coxa vara](https://omim.org/entry/122750)</u>'
182
+
183
+
184
+
185
+ scores_df = scores_df.drop(columns=['patient_id', 'similarities', 'correct_label', 'disease_ids','type_disease_ids']).rename(columns={'diseases' : 'Disease'})
186
+
187
+
188
+
189
+ #read in phenotype attention
190
+ attn_df = dx_attn_df.loc[dx_attn_df['patient_id'] == patient_id]
191
+ attn_df = attn_df.sort_values("attention", ascending=False)
192
+ attn_df['attention'] = attn_df['attention'].round(4)
193
+ attn_df = attn_df.drop(columns=['patient_id', 'degrees'])
194
+
195
+ #patient_info
196
+ patient = get_patient(patient_id, dx_attn_df)
197
+
198
+
199
+
200
+ return patient, scores_df, attn_df
201
+
202
+ def get_umap(umap_type):
203
+ # get UMAP
204
+ if umap_type == 'disease':
205
+ html_file = 'https://michellemli.github.io/test_html/shepherd_disease_characterization_umap.html'
206
+ #html_file = read_file('images/udn_orphafit_patient_umap_nneigh=50_mindist=0.9_spread=1.0colored_by_disease_category.html')
207
+ elif umap_type == 'patient':
208
+ html_file = 'https://michellemli.github.io/test_html/shepherd_patient_umap.html'
209
+
210
+ else:
211
+ raise NotImplementedError
212
+
213
+
214
+ # return f"""<iframe style="width: 100%; height: 480px" name="result" allow="midi;
215
+ # display-capture; encrypted-media;" sandbox="allow-modals allow-forms
216
+ # allow-scripts allow-same-origin allow-popups
217
+ # allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
218
+ # allowpaymentrequest="" frameborder="0" srcdoc='{html_file}'></iframe>"""
219
+ return f'''<embed style="border: none;" src="{html_file}" dpi="300" width="100%" height="750px" />'''
220
+
221
+ #return f'''<iframe id="igraph" scrolling="no" style="border:none; width: 100%; height: 750px" seamless="seamless" src="{html_file}"></iframe>'''
222
+
223
+
224
+ with gr.Blocks() as demo: #css="#gene_attn_accordion {text-align: center}" css="kg_neigh {width: 70%}"
225
+ gr.Markdown('<center><h1>AI-assisted Rare Disease Diagnosis with SHEPHERD</h1></center>')
226
+ #gr.Markdown('<center><h2>A few SHot Explainable Predictor for Hard-to-diagnosE Rare Diseases</h2></center>')
227
+
228
+ with gr.Tabs():
229
+ with gr.TabItem("Causal Gene Discovery"):
230
+ with gr.Column():
231
+ gr.Markdown('<center><h2>Select a patient to view SHEPHERD\'s predictions</h2></center>')
232
+ gene_dropdown = gr.Dropdown(choices=['UDN-P1', 'UDN-P2'], label='Rare Disease Patients', type='value') #value='UDN-P1',
233
+ gene_radio = gr.Radio(choices=['Expert Curated', 'Variant Filtered'], value='Expert Curated', label='Type of Gene List')
234
+ patient_info = gr.Markdown() #get_patient('UDN-P1')
235
+
236
+ with gr.Accordion(label=f'SHEPHERD\'s Ranking of Patient\'s Candidate Genes', open=True, elem_id='gene_accordion'):
237
+ #gr.Markdown(f'<center><h3>SHEPHERD\'s Ranking of Patient\'s Candidate Genes</h3></center>')
238
+ gr.Markdown('The patient\'s causal gene (i.e. gene harboring a variant that explains the patient\'s symptoms) is colored in green.')
239
+ gene_dataframe = gr.Dataframe(max_rows=5, elem_id="gene_df", datatype = 'markdown', headers=['Candidate Genes', 'SHEPHERD Score' ], overflow_row_behaviour='paginate') # label='Candidate Genes', show_label=False,
240
+ with gr.Accordion(label=f'SHEPHERD\'s Attention to Patient\'s Phenotypes', open=False, elem_id='gene_attn_accordion'):
241
+ #gr.Markdown(f'<center><h3>SHEPHERD\'s Attention to Patient\'s Phenotypes</h3></center>')
242
+ gene_attn_dataframe = gr.Dataframe(max_rows=5, elem_id="gene_attn_df", headers=['Phenotypes', 'Attention' ], overflow_row_behaviour='paginate') # label='Candidate Genes', show_label=False,
243
+ with gr.Accordion(label=f'Visualization of Patient\'s Neighborhood in the Knowledge Graph', open=False, elem_id='kg_neigh_accordion'):
244
+ #kg_neighborhood_image = gr.Image(elem_id='kg_neigh')#.style(height=200, width=200)
245
+ kg_neighborhood_image = gr.HTML(elem_id = 'kg_patient_neighborhood')
246
+
247
+ #gene_button = gr.Button("Go")
248
+
249
+ with gr.TabItem("Patients Like Me"):
250
+ gr.HTML(get_umap('patient'))
251
+ gr.Markdown('<center><h2>Select a patient to view SHEPHERD\'s predictions</h2></center>')
252
+ patient_dropdown = gr.Dropdown(choices=['UDN-P3','UDN-P4','UDN-P5','UDN-P6'], label='Rare Disease Patients', type='value')
253
+ p_patient_info = gr.Markdown()
254
+ with gr.Accordion(label=f'Most Similar Patients according to SHEPHERD', open=True, elem_id='pt_accordion'): #
255
+ patient_dataframe = gr.Dataframe(max_rows=10, datatype = 'markdown', show_label=False, elem_id="pat_df", headers=['Candidate Patient', 'Candidate Patient\'s Gene', 'Candidate Patient\'s Disease' ]) #'Candidate Patient\'s Phenotypes'
256
+ #patient_button = gr.Button("Go")
257
+ with gr.Accordion(label='SHEPHERD\'s Attention to Patient\'s Phenotypes', open=False, elem_id='pt_attn_accordion'):
258
+ pt_attn_dataframe = gr.Dataframe(max_rows=5, elem_id="pt_attn_df", headers=['Phenotypes', 'Attention' ], overflow_row_behaviour='paginate')
259
+
260
+
261
+ with gr.TabItem("Disease Characterization"):
262
+ gr.HTML(get_umap('disease'))
263
+ gr.Markdown('<center><h2>Select a patient to view SHEPHERD\'s predictions</h2></center>')
264
+ dx_dropdown = gr.Dropdown(choices=['UDN-P7','UDN-P8','UDN-P9','UDN-P2'], label='Rare Disease Patients', type='value')
265
+ dx_patient_info = gr.Markdown()
266
+ with gr.Accordion(label='Top 10 Most Similar Diseases according to SHEPHERD', open=True, elem_id='pt_accordion'): #
267
+ dx_dataframe = gr.Dataframe(max_rows=10, datatype = 'markdown', show_label=False, elem_id="dx_df", headers=['Diseases'])
268
+ with gr.Accordion(label='SHEPHERD\'s Attention to Patient\'s Phenotypes', open=False, elem_id='dx_attn_accordion'):
269
+ dx_attn_dataframe = gr.Dataframe(max_rows=5, elem_id="dx_attn_df", headers=['Phenotypes', 'Attention' ], overflow_row_behaviour='paginate')
270
+
271
+ #dx_button = gr.Button("Go")
272
+
273
+ gene_dropdown.change(causal_gene_discovery, inputs=[gene_dropdown,gene_radio], outputs=[patient_info, gene_dataframe, gene_attn_dataframe, kg_neighborhood_image])
274
+ gene_radio.change(causal_gene_discovery, inputs=[gene_dropdown,gene_radio], outputs=[patient_info, gene_dataframe, gene_attn_dataframe, kg_neighborhood_image])
275
+
276
+ patient_dropdown.change(patients_like_me, inputs=patient_dropdown, outputs=[p_patient_info, patient_dataframe, pt_attn_dataframe])
277
+ dx_dropdown.change(disease_characterization, inputs=dx_dropdown, outputs=[dx_patient_info, dx_dataframe, dx_attn_dataframe])
278
+
279
+ #gene_dropdown.change(get_patient, inputs=gene_dropdown, outputs=patient_info)
280
+ #gene_button.click(causal_gene_discovery, inputs=gene_dropdown, outputs=[gene_dataframe,gene_attn_dataframe, kg_neighborhood_image])
281
+ #patient_button.click(patients_like_me, inputs=patient_dropdown, outputs=patient_dataframe)
282
+ #dx_button.click(disease_characterization, inputs=dx_dropdown, outputs=dx_dataframe)
283
+
284
+ demo.launch( ) #server_port=50018, share=True
dx_characterization_scores.csv ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ patient_id,diseases,similarities,correct_label,disease_ids
2
+ UDN-P7,autosomal recessive limb-girdle muscular dystrophy type 2B,0.0004265006864443,0,[268]
3
+ UDN-P7,GNE myopathy,0.0004263115406502,0,[602]
4
+ UDN-P7,MYH7-related late-onset scapuloperoneal muscular dystrophy,0.0004259833076503,0,[437572]
5
+ UDN-P7,"Emery-Dreifuss muscular dystrophy 2, autosomal dominant",0.0004257017571944,0,[264]
6
+ UDN-P7,autosomal recessive limb-girdle muscular dystrophy type 2G,0.0004251024511177,0,[34514]
7
+ UDN-P7,autosomal recessive limb-girdle muscular dystrophy type 2C,0.0004250344645697,0,[353]
8
+ UDN-P7,myofibrillar myopathy 4,0.0004249221819918,0,[98912]
9
+ UDN-P7,autosomal recessive limb-girdle muscular dystrophy type 2Q,0.0004244803567416,0,[254361]
10
+ UDN-P7,distal myopathy with posterior leg and anterior hand involvement,0.0004238819237798,0,[63273]
11
+ UDN-P7,autosomal recessive limb-girdle muscular dystrophy type 2I,0.000423846533522,0,[34515]
12
+ UDN-P9,multiple epiphyseal dysplasia type 1,0.0004178489616606,0,[93308]
13
+ UDN-P9,progressive pseudorheumatoid arthropathy of childhood,0.0004172090848442,0,[1159]
14
+ UDN-P9,multiple epiphyseal dysplasia type 5,0.0004147714353166,0,[93311]
15
+ UDN-P9,"metaphyseal chondrodysplasia, Spahr type",0.000414393376559,0,[2501]
16
+ UDN-P9,Multiple epiphyseal dysplasia,0.0004142468678764,0,Multiple epiphyseal dysplasia
17
+ UDN-P9,spondyloepiphyseal dysplasia congenita,0.0004140565870329,0,[94068]
18
+ UDN-P9,"spondyloepiphyseal dysplasia tarda, X-linked",0.0004133584443479,0,10737
19
+ UDN-P9,"spondylometaphyseal dysplasia, 'corner fracture' type",0.0004127403371967,0,[93315]
20
+ UDN-P9,pseudoachondroplasia,0.0004125143168494,0,[750]
21
+ UDN-P9,Coxa vara,0.000412239722209,0,Coxa vara
22
+ UDN-P8,combined oxidative phosphorylation deficiency 39,0.0003771049669012,0,[565624]
23
+ UDN-P8,"leukodystrophy, hypomyelinating, 20",0.0003728091833181,0,33657
24
+ UDN-P8,pyruvate dehydrogenase E3-binding protein deficiency,0.0003713524783961,0,[255182]
25
+ UDN-P8,intellectual disability-epilepsy-extrapyramidal syndrome,0.000369677611161,0,[468620]
26
+ UDN-P8,combined oxidative phosphorylation defect type 27,0.0003694282495416,0,[477774]
27
+ UDN-P8,severe Canavan disease,0.0003670282894745,0,[314911]
28
+ UDN-P8,guanidinoacetate methyltransferase deficiency,0.0003666807315312,0,[382]
29
+ UDN-P8,"neurodevelopmental disorder, mitochondrial, with abnormal movements and lactic acidosis, with or without seizures",0.0003646771074272,0,[572798]
30
+ UDN-P8,pyruvate dehydrogenase phosphatase deficiency,0.0003641054208856,0,[79246]
31
+ UDN-P8,childhood-onset motor and cognitive regression syndrome with extrapyramidal movement disorder,0.00036309598363,0,[500180]
32
+ UDN-P2,methylmalonic aciduria and homocystinuria type cblF,0.0003374280931893,0,[79284]
33
+ UDN-P2,neonatal hemochromatosis,0.0003332369087729,0,[446]
34
+ UDN-P2,homozygous 11P15-p14 deletion syndrome,0.0003293412737548,0,11678
35
+ UDN-P2,ALG8-CDG,0.000328178924974,0,[79325]
36
+ UDN-P2,congenital anemia,0.000324971275404,0,577
37
+ UDN-P2,familial thyroid dyshormonogenesis 1,0.0003192963486071,0,20716
38
+ UDN-P2,congenital hypothyroidism due to maternal intake of antithyroid drugs,0.0003163111105095,0,[226313]
39
+ UDN-P2,"methylmalonic acidemia with homocystinuria, type cblJ",0.000315255660098,0,[369955]
40
+ UDN-P2,COG6-CGD,0.0003150861884932,0,[464443]
41
+ UDN-P2,congenital toxoplasmosis,0.0003135850711259,0,[858]
dx_characterization_scores_attn.csv ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ patient_id,phenotypes,degrees,attention
2
+ UDN-P2,Abnormality of vision,111,0.0989853367209434
3
+ UDN-P2,Global developmental delay,1330,0.1393505930900573
4
+ UDN-P2,Abnormality of the liver,65,0.0837447270750999
5
+ UDN-P2,Failure to thrive in infancy,1,0.1541623473167419
6
+ UDN-P2,Exocrine pancreatic insufficiency,4,0.0962460488080978
7
+ UDN-P2,Abdominal pain,257,0.0696877762675285
8
+ UDN-P2,Asthma,177,0.0721269026398658
9
+ UDN-P2,Duodenal atresia,34,0.0989027693867683
10
+ UDN-P2,Intestinal malrotation,91,0.0801789388060569
11
+ UDN-P2,Gastroparesis,23,0.106614664196968
12
+ UDN-P9,Flat glenoid fossa,1,0.0665825009346008
13
+ UDN-P9,Abnormality of the vertebral column,76,0.0361983776092529
14
+ UDN-P9,Platyspondyly,129,0.0615288130939006
15
+ UDN-P9,Cutis marmorata,62,0.0318356454372406
16
+ UDN-P9,Wormian bones,1,0.0537327453494071
17
+ UDN-P9,Scoliosis,801,0.0558811277151107
18
+ UDN-P9,Spondylometaphyseal dysplasia,9,0.0940656214952468
19
+ UDN-P9,Coxa vara,56,0.0579654388129711
20
+ UDN-P9,Abnormality of the lower limb,48,0.0458225421607494
21
+ UDN-P9,Thoracic scoliosis,30,0.0464530810713768
22
+ UDN-P9,Genu varum,57,0.073094867169857
23
+ UDN-P9,Tibial bowing,43,0.0525600053369998
24
+ UDN-P9,Abnormal form of the vertebral bodies,138,0.0546819157898426
25
+ UDN-P9,Short stature,1167,0.0585363544523716
26
+ UDN-P9,Abnormality of the femoral metaphysis,10,0.0352263785898685
27
+ UDN-P9,Abnormality of femoral epiphysis,10,0.0409332402050495
28
+ UDN-P9,Abnormality of lower limb epiphysis morphology,6,0.0124136246740818
29
+ UDN-P9,Acetabular dysplasia,1,0.0529508516192436
30
+ UDN-P9,Short femoral neck,1,0.0695368871092796
31
+ UDN-P7,Elevated serum creatine kinase,219,0.156127393245697
32
+ UDN-P7,Generalized muscle weakness,1,0.139058843255043
33
+ UDN-P7,Muscular dystrophy,52,0.1362844258546829
34
+ UDN-P7,Proximal muscle weakness,120,0.1536363512277603
35
+ UDN-P7,Calf muscle pseudohypertrophy,1,0.1570710241794586
36
+ UDN-P7,Abnormal muscle fiber morphology,39,0.0849768370389938
37
+ UDN-P7,Toe walking,1,0.1728451550006866
38
+ UDN-P8,Muscle weakness,392,0.0540948249399662
39
+ UDN-P8,Hypertrophic cardiomyopathy,153,0.0568107031285762
40
+ UDN-P8,Cerebral atrophy,231,0.0622156262397766
41
+ UDN-P8,Hypoplasia of the corpus callosum,332,0.0678444355726242
42
+ UDN-P8,Limb hypertonia,47,0.0716666877269744
43
+ UDN-P8,Lactic acidosis,122,0.0437235236167907
44
+ UDN-P8,Abnormality of temperature regulation,23,0.0598855391144752
45
+ UDN-P8,Respiratory failure requiring assisted ventilation,31,0.0432068929076194
46
+ UDN-P8,Cerebral hypomyelination,19,0.0751257538795471
47
+ UDN-P8,Infantile axial hypotonia,2,0.0626801550388336
48
+ UDN-P8,EEG with focal slow activity,4,0.0187200624495744
49
+ UDN-P8,EEG with generalized slow activity,13,0.0373874828219413
50
+ UDN-P8,Infantile spasms,3,0.0651202574372291
51
+ UDN-P8,Elevated brain lactate level by MRS,4,0.0452156253159046
52
+ UDN-P8,Profound global developmental delay,1,0.0835469588637352
53
+ UDN-P8,Abnormal cerebral morphology,25,0.0396268218755722
54
+ UDN-P8,Cerebral visual impairment,97,0.0617175139486789
55
+ UDN-P8,Oral-pharyngeal dysphagia,1,0.0514112450182437
exomiser_gene_discovery_scores.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ patient_id,genes,similarities,correct_gene_label
2
+ UDN-P1,UBE3A,0.77525794506073,0
3
+ UDN-P1,POLR3A,0.7635941505432129,1
4
+ UDN-P2,AK2,0.7269436120986938,0
5
+ UDN-P2,INSR,0.7229311466217041,0
6
+ UDN-P2,SMC1A,0.7217983603477478,0
7
+ UDN-P1,KMT2E,0.705829381942749,0
8
+ UDN-P1,TNIK,0.6911647319793701,0
9
+ UDN-P1,ORC4,0.683530867099762,0
10
+ UDN-P2,UBE3A,0.6692474484443665,0
11
+ UDN-P1,CTU2,0.6311229467391968,0
12
+ UDN-P1,TGIF1,0.561279833316803,0
13
+ UDN-P2,NKX2-3,0.5290572643280029,0
14
+ UDN-P2,SSTR2,0.524275541305542,0
15
+ UDN-P2,PABPC1,0.5164581537246704,0
16
+ UDN-P1,TBP,0.5104368329048157,0
17
+ UDN-P1,MED16,0.5011780858039856,0
18
+ UDN-P1,DVL3,0.4911206364631653,0
19
+ UDN-P2,NBPF1,0.4773442149162292,0
20
+ UDN-P2,GLYR1,0.4720344543457031,1
21
+ UDN-P2,PBRM1,0.4607828855514526,0
exomiser_gene_discovery_scores_attn.csv ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ patient_id,phenotypes,degrees,attention
2
+ UDN-P2,Abnormality of vision,111,0.1183807775378227
3
+ UDN-P2,Global developmental delay,1330,0.1499209105968475
4
+ UDN-P2,Abnormality of the liver,65,0.0950504913926124
5
+ UDN-P2,Failure to thrive in infancy,1,0.1553362309932708
6
+ UDN-P2,Exocrine pancreatic insufficiency,4,0.0865935012698173
7
+ UDN-P2,Abdominal pain,257,0.068713754415512
8
+ UDN-P2,Asthma,177,0.0740201994776725
9
+ UDN-P2,Duodenal atresia,34,0.064612977206707
10
+ UDN-P2,Intestinal malrotation,91,0.0643364712595939
11
+ UDN-P2,Gastroparesis,23,0.1230346783995628
12
+ UDN-P1,Thick lower lip vermilion,1,0.0236996822059154
13
+ UDN-P1,Hydrocephalus,304,0.0230413302779197
14
+ UDN-P1,Microcephaly,1006,0.0316418893635273
15
+ UDN-P1,Low anterior hairline,4,0.0270521882921457
16
+ UDN-P1,Mandibular prognathia,6,0.026597810909152
17
+ UDN-P1,Alacrima,1,0.0138036329299211
18
+ UDN-P1,Thick eyebrow,8,0.0170831736177206
19
+ UDN-P1,Exotropia,94,0.0167111363261938
20
+ UDN-P1,Synophrys,3,0.0241384990513324
21
+ UDN-P1,Delayed eruption of teeth,131,0.0224792174994945
22
+ UDN-P1,Abnormal sternum morphology,51,0.0120808342471718
23
+ UDN-P1,Hypertension,278,0.0193403456360101
24
+ UDN-P1,Hypotonia,911,0.0355296544730663
25
+ UDN-P1,Global developmental delay,1330,0.0301800202578306
26
+ UDN-P1,Dystonia,360,0.0236831586807966
27
+ UDN-P1,Absent speech,1,0.0214502979069948
28
+ UDN-P1,Failure to thrive,648,0.0351606905460357
29
+ UDN-P1,Growth delay,435,0.0286059584468603
30
+ UDN-P1,Pancreatitis,63,0.0134885320439934
31
+ UDN-P1,Overlapping toe,2,0.0212031248956918
32
+ UDN-P1,Constipation,270,0.0162120759487152
33
+ UDN-P1,Gastroesophageal reflux,268,0.0238284692168235
34
+ UDN-P1,Cerebral atrophy,231,0.0270951297134161
35
+ UDN-P1,Hypoplasia of the corpus callosum,332,0.0298781190067529
36
+ UDN-P1,Respiratory insufficiency,325,0.0320345684885978
37
+ UDN-P1,Ventriculomegaly,409,0.0279995389282703
38
+ UDN-P1,Coarse hair,2,0.032378576695919
39
+ UDN-P1,Drooling,85,0.0170378927141428
40
+ UDN-P1,Febrile seizure (within the age range of 3 months to 6 years),62,0.0118239596486091
41
+ UDN-P1,Developmental regression,203,0.0243585351854562
42
+ UDN-P1,Abnormal cerebral white matter morphology,76,0.0176227763295173
43
+ UDN-P1,Short stature,1167,0.0382893718779087
44
+ UDN-P1,Chronic pancreatitis,1,0.0170067008584737
45
+ UDN-P1,Premature loss of primary teeth,1,0.0167370326817035
46
+ UDN-P1,Premature loss of teeth,32,0.0260231960564851
47
+ UDN-P1,Chronic lung disease,1,0.0176592171192169
48
+ UDN-P1,Moderate sensorineural hearing impairment,1,0.009951589629054
49
+ UDN-P1,Laryngeal cleft,1,0.0077759157866239
50
+ UDN-P1,Laryngeal calcification,1,0.0125970430672168
51
+ UDN-P1,Low hanging columella,2,0.0163100585341453
52
+ UDN-P1,Prominent eyelashes,1,0.024880614131689
53
+ UDN-P1,Gastrostomy tube feeding in infancy,1,0.0276554562151432
54
+ UDN-P1,Midface retrusion,1,0.0258742850273847
55
+ UDN-P1,Premature adrenarche,1,0.0266624484211206
56
+ UDN-P1,T2 hypointense brainstem,1,0.0004453512665349
57
+ UDN-P1,T2 hypointense basal ganglia,49,0.0048909387551248
gene_discovery_scores.csv ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ patient_id,genes,similarities,correct_gene_label
2
+ UDN-P2,CTH,0.3697766065597534,0
3
+ UDN-P2,WASHC2A,0.1429679691791534,0
4
+ UDN-P2,GLYR1,0.4873602986335754,1
5
+ UDN-P2,PRKACA,0.2901750802993774,0
6
+ UDN-P2,SMPD3,0.0044486671686172,0
7
+ UDN-P1,ANO3,0.1011797934770584,0
8
+ UDN-P1,TYMP,-0.1458933055400848,0
9
+ UDN-P1,ORC4,0.6925094127655029,0
10
+ UDN-P1,DST,-0.0270717144012451,0
11
+ UDN-P1,NDUFAF5,0.1227701157331466,0
12
+ UDN-P1,WDFY4,0.5533968806266785,0
13
+ UDN-P1,TOPORS,-0.1653318554162979,0
14
+ UDN-P1,SLK,-0.1769217252731323,0
15
+ UDN-P1,GMPPA,0.2679024338722229,0
16
+ UDN-P1,APC,0.2313176691532135,0
17
+ UDN-P1,INSL3,0.0432731509208679,0
18
+ UDN-P1,ZFYVE26,0.3379367589950561,0
19
+ UDN-P1,KAT6A,0.7698580026626587,0
20
+ UDN-P1,POLR3A,0.7476195693016052,1
21
+ UDN-P1,DYNAP,-0.2454878091812133,0
22
+ UDN-P1,NCOR2,0.2544351518154144,0
23
+ UDN-P1,PIWIL3,-0.2566407322883606,0
gene_discovery_scores_attn.csv ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ patient_id,phenotypes,degrees,attention
2
+ UDN-P2,Abnormality of vision,111,0.1178910732269287
3
+ UDN-P2,Global developmental delay,1330,0.1363666206598281
4
+ UDN-P2,Abnormality of the liver,65,0.0964500233530998
5
+ UDN-P2,Failure to thrive in infancy,1,0.1844952553510666
6
+ UDN-P2,Exocrine pancreatic insufficiency,4,0.0708952322602272
7
+ UDN-P2,Abdominal pain,257,0.0660589486360549
8
+ UDN-P2,Asthma,177,0.0678283423185348
9
+ UDN-P2,Duodenal atresia,34,0.0732858255505561
10
+ UDN-P2,Intestinal malrotation,91,0.0733114182949066
11
+ UDN-P2,Gastroparesis,23,0.1134173870086669
12
+ UDN-P1,Thick lower lip vermilion,1,0.0254656299948692
13
+ UDN-P1,Hydrocephalus,304,0.0226140450686216
14
+ UDN-P1,Microcephaly,1006,0.0317770205438137
15
+ UDN-P1,Low anterior hairline,4,0.0236586183309555
16
+ UDN-P1,Mandibular prognathia,6,0.0236049313098192
17
+ UDN-P1,Alacrima,1,0.0144500378519296
18
+ UDN-P1,Thick eyebrow,8,0.0233250595629215
19
+ UDN-P1,Exotropia,94,0.0174672622233629
20
+ UDN-P1,Synophrys,3,0.0241911858320236
21
+ UDN-P1,Delayed eruption of teeth,131,0.0234366580843925
22
+ UDN-P1,Abnormal sternum morphology,51,0.0120793534442782
23
+ UDN-P1,Hypertension,278,0.0188926625996828
24
+ UDN-P1,Hypotonia,911,0.03265380859375
25
+ UDN-P1,Global developmental delay,1330,0.027355071157217
26
+ UDN-P1,Dystonia,360,0.0240448117256164
27
+ UDN-P1,Absent speech,1,0.022760335355997
28
+ UDN-P1,Failure to thrive,648,0.0337891578674316
29
+ UDN-P1,Growth delay,435,0.0274257939308881
30
+ UDN-P1,Pancreatitis,63,0.0125568015500903
31
+ UDN-P1,Overlapping toe,2,0.0273327771574258
32
+ UDN-P1,Constipation,270,0.0161661840975284
33
+ UDN-P1,Gastroesophageal reflux,268,0.0214648023247718
34
+ UDN-P1,Cerebral atrophy,231,0.0250347331166267
35
+ UDN-P1,Hypoplasia of the corpus callosum,332,0.0269252881407737
36
+ UDN-P1,Respiratory insufficiency,325,0.0316686779260635
37
+ UDN-P1,Ventriculomegaly,409,0.0280382838100194
38
+ UDN-P1,Coarse hair,2,0.024032786488533
39
+ UDN-P1,Drooling,85,0.0192697960883378
40
+ UDN-P1,Febrile seizure (within the age range of 3 months to 6 years),62,0.0105393650010228
41
+ UDN-P1,Developmental regression,203,0.0249004419893026
42
+ UDN-P1,Abnormal cerebral white matter morphology,76,0.0189827494323253
43
+ UDN-P1,Short stature,1167,0.036681056022644
44
+ UDN-P1,Chronic pancreatitis,1,0.0087832231074571
45
+ UDN-P1,Premature loss of primary teeth,1,0.0137059595435857
46
+ UDN-P1,Premature loss of teeth,32,0.0200045183300972
47
+ UDN-P1,Chronic lung disease,1,0.030693894252181
48
+ UDN-P1,Moderate sensorineural hearing impairment,1,0.0128651773557066
49
+ UDN-P1,Laryngeal cleft,1,0.0062378644943237
50
+ UDN-P1,Laryngeal calcification,1,0.0187820736318826
51
+ UDN-P1,Low hanging columella,2,0.0193525701761245
52
+ UDN-P1,Prominent eyelashes,1,0.0317739509046077
53
+ UDN-P1,Gastrostomy tube feeding in infancy,1,0.0312498826533556
54
+ UDN-P1,Midface retrusion,1,0.0272352565079927
55
+ UDN-P1,Premature adrenarche,1,0.0155880162492394
56
+ UDN-P1,T2 hypointense brainstem,1,0.0003417030384298
57
+ UDN-P1,T2 hypointense basal ganglia,49,0.0108007565140724
patients_like_me_scores.csv ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ patient_id,candidate_patients,similarities,correct_label,genes,diseases,disease_ids
2
+ UDN-P4,SIM-P7513,0.0001346864301012,0.0,REEP1,Autosomal dominant spastic paraplegia type 31,101011
3
+ UDN-P4,SIM-P38172,0.0001341508032055,0.0,KIF1A,Autosomal spastic paraplegia type 30,101010
4
+ UDN-P4,SIM-P31298,0.0001340870658168,0.0,DDHD1,Autosomal recessive spastic paraplegia type 28,101008
5
+ UDN-P4,SIM-P19068,0.0001340282615274,1.0,CAPN1,Autosomal recessive spastic paraplegia type 76,488594
6
+ UDN-P4,SIM-P19513,0.0001340070302831,0.0,ATL1,Autosomal dominant spastic paraplegia type 3,100984
7
+ UDN-P4,SIM-P36075,0.000133997775265,0.0,MTPAP,Autosomal recessive spastic ataxia-optic atrophy-dysarthria syndrome,254343
8
+ UDN-P4,SIM-P18901,0.000133881723741,0.0,TDP1,Spinocerebellar ataxia with axonal neuropathy type 1,94124
9
+ UDN-P4,SIM-P20117,0.000133869878482,0.0,SPAST,Autosomal dominant spastic paraplegia type 4,100985
10
+ UDN-P4,SIM-P41708,0.0001337969733867,0.0,SPG19,Autosomal dominant spastic paraplegia type 19,100999
11
+ UDN-P4,SIM-P24993,0.0001337462454102,0.0,SPG19,Autosomal dominant spastic paraplegia type 19,100999
12
+ UDN-P6,SIM-P34720,0.0001335124979959,0.0,PMM2,PMM2-CDG,79318
13
+ UDN-P6,SIM-P6961,0.000133484849357,0.0,AP1S2,X-linked intellectual disability-hypotonia-facial dysmorphism-aggressive behavior syndrome,85329
14
+ UDN-P6,SIM-P39957,0.0001328073558397,0.0,TCF4,Pitt-Hopkins syndrome,2896
15
+ UDN-P6,SIM-P13143,0.0001326532074017,0.0,SSR4,SSR4-CDG,370927
16
+ UDN-P6,SIM-P21183,0.0001325271441601,0.0,KAT6A,Autosomal dominant intellectual disability-craniofacial anomalies-cardiac defects syndrome,457193
17
+ UDN-P6,SIM-P27029,0.0001323336182394,0.0,ATRX,Chudley-Lowry-Hoar syndrome,93971
18
+ UDN-P6,SIM-P26917,0.0001322419411735,0.0,NONO,Macrocephaly-intellectual disability-left ventricular non compaction syndrome,466791
19
+ UDN-P6,SIM-P13788,0.0001321789313806,0.0,NRXN1,Pitt-Hopkins-like syndrome,221150
20
+ UDN-P6,SIM-P3592,0.0001321435265708,0.0,FRMD4A,Severe intellectual disability-corpus callosum agenesis-facial dysmorphism-cerebellar ataxia syndrome,466688
21
+ UDN-P6,SIM-P24214,0.0001321362215094,0.0,AP1S2,X-linked intellectual disability-hypotonia-facial dysmorphism-aggressive behavior syndrome,85329
22
+ UDN-P5,SIM-P12152,0.0001249182969331,0.0,MYD88,Waldenström macroglobulinemia,33226
23
+ UDN-P5,SIM-P23023,0.0001241921563632,0.0,TNFRSF1A,Tumor necrosis factor receptor 1 associated periodic syndrome,32960
24
+ UDN-P5,SIM-P30461,0.0001231428614119,0.0,MYD88,Waldenström macroglobulinemia,33226
25
+ UDN-P5,SIM-P6717,0.0001229638874065,0.0,MYD88,Waldenström macroglobulinemia,33226
26
+ UDN-P5,SIM-P30356,0.00012291384337,0.0,PSTPIP1,Pyogenic arthritis-pyoderma gangrenosum-acne syndrome,69126
27
+ UDN-P5,SIM-P35189,0.0001224889274453,0.0,NLRC4,Familial cold urticaria,47045
28
+ UDN-P5,SIM-P20277,0.0001221649872604,0.0,CPOX,Hereditary coproporphyria,79273
29
+ UDN-P5,SIM-P7603,0.000121750497783,0.0,NLRP3,Familial cold urticaria,47045
30
+ UDN-P5,SIM-P8138,0.0001210815316881,0.0,MEFV,Familial Mediterranean fever,342
31
+ UDN-P5,SIM-P36107,0.0001210761329275,0.0,TNFRSF1A,Tumor necrosis factor receptor 1 associated periodic syndrome,32960
32
+ UDN-P3,SIM-P17696,0.0001152271579485,0.0,GRIA3,X-linked intellectual disability due to GRIA3 mutations,364028
33
+ UDN-P3,SIM-P12018,0.0001116794228437,1.0,RPS6KA3,Symptomatic form of Coffin-Lowry syndrome in female carriers,276630
34
+ UDN-P3,SIM-P13072,0.0001079059220501,0.0,THOC2,X-linked intellectual disability-short stature-overweight syndrome,457240
35
+ UDN-P3,SIM-P27682,0.0001075164036592,0.0,AP1S2,Fried syndrome,85335
36
+ UDN-P3,SIM-P17495,0.0001061590883182,0.0,SMS,"X-linked intellectual disability, Snyder type",3063
37
+ UDN-P3,SIM-P12040,0.000106000341475,0.0,AP1S2,Fried syndrome,85335
38
+ UDN-P3,SIM-P38032,0.0001052861043717,0.0,UBE3A,Angelman syndrome due to a point mutation,411511
39
+ UDN-P3,SIM-P15144,0.0001048136182362,0.0,PHF8,"X-linked intellectual disability, Siderius type",85287
40
+ UDN-P3,SIM-P2560,0.0001046940378728,0.0,AP1S2,Fried syndrome,85335
41
+ UDN-P3,SIM-P13027,0.000104646103864,0.0,FMR1,Fragile X syndrome,908
patients_like_me_scores_attn.csv ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ patient_id,phenotypes,degrees,attention
2
+ UDN-P4,Nystagmus,554,0.052525907754898
3
+ UDN-P4,Horizontal nystagmus,56,0.0585071220993995
4
+ UDN-P4,Spasticity,411,0.0557427555322647
5
+ UDN-P4,Dysarthria,369,0.0397082976996898
6
+ UDN-P4,Brisk reflexes,1,0.0718949064612388
7
+ UDN-P4,Headache,257,0.0289554204791784
8
+ UDN-P4,Unsteady gait,103,0.0579096935689449
9
+ UDN-P4,Limb dysmetria,9,0.0552756935358047
10
+ UDN-P4,Upper motor neuron dysfunction,44,0.0472515001893043
11
+ UDN-P4,Steppage gait,32,0.0555065162479877
12
+ UDN-P4,Paresthesia,116,0.0359307341277599
13
+ UDN-P4,EMG abnormality,99,0.0366697795689106
14
+ UDN-P4,Proportionate short stature,29,0.0580268092453479
15
+ UDN-P4,Lumbar spinal canal stenosis,1,0.0060006459243595
16
+ UDN-P4,Impaired distal vibration sensation,13,0.0658590272068977
17
+ UDN-P4,Abnormal pyramidal sign,147,0.0405740477144718
18
+ UDN-P4,Cervical spondylosis,3,0.042157094925642
19
+ UDN-P4,Spondylolisthesis at L5-S1,1,0.0032220152206718
20
+ UDN-P4,Distal lower limb amyotrophy,23,0.0685843080282211
21
+ UDN-P4,Weakness due to upper motor neuron dysfunction,10,0.029885521158576
22
+ UDN-P4,Ankle clonus,1,0.0551076084375381
23
+ UDN-P4,Brain atrophy,65,0.0347045883536338
24
+ UDN-P3,Low anterior hairline,4,0.0839744806289672
25
+ UDN-P3,Psychosis,91,0.0314286798238754
26
+ UDN-P3,Oligomenorrhea,29,0.0356979817152023
27
+ UDN-P3,Tapered finger,1,0.0815264210104942
28
+ UDN-P3,Mental deterioration,127,0.0507740229368209
29
+ UDN-P3,Joint laxity,166,0.0528562292456626
30
+ UDN-P3,Pes planus,224,0.0508626289665699
31
+ UDN-P3,Sleep disturbance,191,0.0410025417804718
32
+ UDN-P3,Abnormal brainstem morphology,35,0.055194191634655
33
+ UDN-P3,Abnormal rapid eye movement sleep,6,0.0391974486410617
34
+ UDN-P3,Scoliosis,801,0.0617553479969501
35
+ UDN-P3,"High, narrow palate",4,0.0490836761891841
36
+ UDN-P3,Short nose,3,0.0597070343792438
37
+ UDN-P3,Abnormality of the clivus,3,0.0444605834782123
38
+ UDN-P3,Prominent forehead,1,0.0760357454419136
39
+ UDN-P3,Abnormal occipital bone morphology,1,0.031018890440464
40
+ UDN-P3,Abnormal upper to lower segment ratio,3,0.0362525805830955
41
+ UDN-P3,Increased head circumference,14,0.0620610117912292
42
+ UDN-P3,Low levels of vitamin D,1,0.0350590944290161
43
+ UDN-P3,Thyroiditis,31,0.0220514684915542
44
+ UDN-P5,Cutis marmorata,62,0.0696348845958709
45
+ UDN-P5,Skin rash,146,0.0380239747464656
46
+ UDN-P5,Muscle weakness,392,0.0529503189027309
47
+ UDN-P5,Joint hypermobility,142,0.1087778359651565
48
+ UDN-P5,Exocrine pancreatic insufficiency,4,0.0637081339955329
49
+ UDN-P5,Weight loss,253,0.0690883025527
50
+ UDN-P5,Recurrent fever,66,0.0378629937767982
51
+ UDN-P5,Constipation,270,0.0589890293776989
52
+ UDN-P5,Migraine,101,0.0587076544761657
53
+ UDN-P5,Episodic abdominal pain,1,0.0709817335009574
54
+ UDN-P5,Recurrent infections,214,0.082623191177845
55
+ UDN-P5,Arthralgia,217,0.0823353752493858
56
+ UDN-P5,Easy fatigability,2,0.0924237966537475
57
+ UDN-P5,Gastritis,13,0.0227577649056911
58
+ UDN-P5,Peripheral neuropathy,215,0.0911349803209304
59
+ UDN-P6,Urinary incontinence,101,0.0282541252672672
60
+ UDN-P6,Narrow mouth,1,0.0286488384008407
61
+ UDN-P6,Thin upper lip vermilion,222,0.0543260090053081
62
+ UDN-P6,Macrocephaly,355,0.0478888861835002
63
+ UDN-P6,Hypertelorism,755,0.0466706231236457
64
+ UDN-P6,Smooth philtrum,1,0.0513933710753917
65
+ UDN-P6,Low-set ears,604,0.0431698933243751
66
+ UDN-P6,Exotropia,94,0.0236861575394868
67
+ UDN-P6,Global developmental delay,1330,0.0373244658112525
68
+ UDN-P6,Generalized hypotonia,762,0.0430077537894248
69
+ UDN-P6,Absent speech,1,0.0480412095785141
70
+ UDN-P6,Failure to thrive,648,0.0439043194055557
71
+ UDN-P6,Growth delay,435,0.0379057824611663
72
+ UDN-P6,Gastroesophageal reflux,268,0.0282815545797348
73
+ UDN-P6,Chronic diarrhea,9,0.0330403670668602
74
+ UDN-P6,Myopathic facies,2,0.0682374909520149
75
+ UDN-P6,Broad-based gait,3,0.0389422886073589
76
+ UDN-P6,Gait imbalance,6,0.0347393229603767
77
+ UDN-P6,Bowel incontinence,51,0.0291043147444725
78
+ UDN-P6,Delayed skeletal maturation,290,0.0552288256585598
79
+ UDN-P6,Anisocoria,16,0.0166235510259866
80
+ UDN-P6,Monocular strabismus,1,0.0136106135323643
81
+ UDN-P6,Prominent forehead,1,0.0529596321284771
82
+ UDN-P6,Chronic constipation,3,0.0185318272560834
83
+ UDN-P6,Small pituitary gland,11,0.0520597584545612
84
+ UDN-P6,Low levels of vitamin D,1,0.0244190022349357
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pandas
2
+ gradio
3
+