Kévin Yauy commited on
Commit
86f91be
1 Parent(s): 3d7977d

fix(dependencies): fix updates

Browse files

Former-commit-id: 24d597c3e610a89214ed8c2113986c7b1326ff61

Files changed (2) hide show
  1. phenogenius_app.py +107 -106
  2. pyproject.toml +2 -2
phenogenius_app.py CHANGED
@@ -55,12 +55,12 @@ image_chuga = Image.open("data/img/logo-chuga.png")
55
  st.sidebar.image(image_chuga, caption=None, width=60)
56
 
57
 
58
- @st.cache(max_entries=50)
59
  def convert_df(df):
60
  return df.to_csv(sep="\t").encode("utf-8")
61
 
62
 
63
- @st.cache(allow_output_mutation=True, max_entries=50)
64
  def load_data():
65
  matrix = pd.read_csv(
66
  "data/resources/ohe_all_thesaurus_weighted.tsv.gz",
@@ -71,7 +71,7 @@ def load_data():
71
  return matrix
72
 
73
 
74
- @st.cache(allow_output_mutation=True, max_entries=50)
75
  def load_umap_cohort():
76
  matrix = pd.read_csv(
77
  "data/resources/umap_loc_cohort.tsv",
@@ -81,7 +81,7 @@ def load_umap_cohort():
81
  return matrix
82
 
83
 
84
- @st.cache(allow_output_mutation=True, max_entries=50)
85
  def load_cohort():
86
  matrix = pd.read_csv(
87
  "data/resources/cohort_diag.tsv",
@@ -90,8 +90,8 @@ def load_cohort():
90
  return matrix
91
 
92
 
93
- @st.cache(
94
- hash_funcs={"Pickle": lambda _: None}, allow_output_mutation=True, max_entries=50
95
  )
96
  def load_nmf_model():
97
  with open("data/resources/pheno_NMF_390_model_42.pkl", "rb") as pickle_file:
@@ -101,7 +101,7 @@ def load_nmf_model():
101
  return pheno_NMF, reduced
102
 
103
 
104
- @st.cache(allow_output_mutation=True, max_entries=50)
105
  def symbol_to_id_to_dict():
106
  # from NCBI
107
  ncbi_df = pd.read_csv("data/resources/Homo_sapiens.gene_info.gz", sep="\t")
@@ -113,8 +113,8 @@ def symbol_to_id_to_dict():
113
  return ncbi_to_dict_ncbi, ncbi_to_dict
114
 
115
 
116
- @st.cache(
117
- hash_funcs={"_json.Scanner": hash}, allow_output_mutation=True, max_entries=50
118
  )
119
  def load_hp_ontology():
120
  with open("data/resources/hpo_obo.json") as json_data:
@@ -122,7 +122,7 @@ def load_hp_ontology():
122
  return data_dict
123
 
124
 
125
- @st.cache(allow_output_mutation=True, max_entries=50)
126
  def hpo_description_to_id():
127
  data_dict = {}
128
  for key, value in hp_onto.items():
@@ -130,8 +130,8 @@ def hpo_description_to_id():
130
  return data_dict
131
 
132
 
133
- @st.cache(
134
- hash_funcs={"_json.Scanner": hash}, allow_output_mutation=True, max_entries=50
135
  )
136
  def load_cluster_data():
137
  with open("data/resources/cluster_info.json") as json_data:
@@ -139,7 +139,7 @@ def load_cluster_data():
139
  return data_dict
140
 
141
 
142
- @st.cache(allow_output_mutation=True, max_entries=50)
143
  def load_topic_data():
144
  topic = pd.read_csv(
145
  "data/resources/main_topics_hpo_390_42_filtered_norm_004.tsv",
@@ -149,8 +149,8 @@ def load_topic_data():
149
  return topic
150
 
151
 
152
- @st.cache(
153
- hash_funcs={"_json.Scanner": hash}, allow_output_mutation=True, max_entries=50
154
  )
155
  def load_similarity_dict():
156
  with open("data/resources/similarity_dict_threshold_80.json") as json_data:
@@ -158,15 +158,15 @@ def load_similarity_dict():
158
  return data_dict
159
 
160
 
161
- @st.cache(
162
- hash_funcs={"Pickle": lambda _: None}, allow_output_mutation=True, max_entries=50
163
- )
164
- def load_projection():
165
- with open("data/resources/clustering_model.pkl", "rb") as pickle_file:
166
- cluster = pk.load(pickle_file)
167
- with open("data/resources/umap_projection.pkl", "rb") as pickle_file:
168
- umap = pk.load(pickle_file)
169
- return cluster, umap
170
 
171
 
172
  def get_symbol(gene):
@@ -308,8 +308,8 @@ if submit_button:
308
  hpo = get_hpo_id(hpo_raw)
309
  data = load_data()
310
  pheno_NMF, reduced = load_nmf_model()
311
- cluster, umap = load_projection()
312
- umap_cohort = load_umap_cohort()
313
  cohort = load_cohort()
314
  cluster_info = load_cluster_data()
315
  topic = load_topic_data()
@@ -435,88 +435,89 @@ if submit_button:
435
  key="download-csv-proj",
436
  )
437
 
438
- patient_transposed = sklearn.preprocessing.normalize(
439
- np.array(patient_df_info["mean_score"]).reshape(1, -1), norm="l1"
440
- )
441
- del patient_df_info
442
-
443
- patient_nmf_umap = umap.transform(pd.DataFrame(patient_transposed))
444
- del patient_transposed
445
-
446
- with st.expander("See projection in cohort"):
447
- umap_cohort["dist"] = abs(umap_cohort["x"] - patient_nmf_umap[0, 0]) + abs(
448
- umap_cohort["y"] - patient_nmf_umap[0, 1]
449
- )
450
- del patient_nmf_umap
451
- closest_patient = umap_cohort.nsmallest(3, "dist")
452
- st.write("Closest patients in the cohort are: ", closest_patient)
453
- st.write("Closest patient: ", cohort.loc[closest_patient.index[0]])
454
- st.write(
455
- get_hpo_name_list(
456
- cohort.loc[closest_patient.index[0]].hpo_list.split(","),
457
- hp_onto,
458
- )
459
- )
460
-
461
- cluster_selected = cluster_info[str(closest_patient["cluster"].values[0])]
462
- st.write("Selected cluster: ", closest_patient["cluster"].values[0])
463
- st.write("Number of patient in cluster: ", cluster_selected["n_patients"])
464
- del closest_patient
465
-
466
- gene_in_cluster = pd.DataFrame.from_dict(
467
- dict(Counter(cluster_selected["gene_list"])), orient="index"
468
- )
469
- gene_in_cluster.columns = ["count"]
470
- if gene_diag:
471
- if gene_diag in gene_in_cluster.index:
472
- st.write("Gene diag in cluster", gene_in_cluster.loc[gene_diag, :])
473
-
474
- st.write(
475
- "Gene(s) involved in cluster: ",
476
- gene_in_cluster.sort_values("count", ascending=False),
477
- )
478
- del gene_in_cluster
479
-
480
- group_involved = cluster_selected["group"]
481
- if (
482
- isinstance(group_involved, float)
483
- and math.isnan(float(group_involved)) == False
484
- ):
485
- topic_involved = topic.loc[topic_involved, :]
486
- st.write(
487
- "Group(s) of symptoms statistically enriched: ", topic_involved
488
- )
489
- elif isinstance(group_involved, str):
490
- group_list = [int(x) for x in cluster_selected["group"].split(",")]
491
- topic_involved = topic.loc[group_list, :]
492
- st.write(
493
- "Group(s) of symptoms statistically enriched: ", topic_involved
494
- )
495
- del topic_involved
496
- del group_involved
497
-
498
- dict_count_print = {}
499
- dict_count = dict(Counter(cluster_selected["hpo_list"]))
500
- dict_count_sorted = sorted(
501
- dict_count.items(), key=lambda x: x[1], reverse=True
502
- )
503
- del cluster_selected
504
- for element in dict_count_sorted:
505
- dict_count_print[element[0]] = {
506
- "description": hp_onto[element[0]]["name"],
507
- "count": element[1],
508
- }
509
- st.write(
510
- "HPOs declared in cluster:",
511
- pd.DataFrame.from_dict(dict_count_print, orient="index"),
512
- )
513
- del dict_count
514
- del dict_count_print
515
- del dict_count_sorted
516
-
517
  sim_dict, hpo_list_add = get_similar_terms(hpo_list, similarity_terms_dict)
518
  similar_list = list(set(hpo_list_add) - set(hpo_list))
519
- similar_list_desc = get_hpo_name_list(similar_list, hp_onto)
 
520
  if similar_list_desc:
521
  with st.expander("See symptoms with similarity > 80%"):
522
  similar_list_desc_df = pd.DataFrame.from_dict(
 
55
  st.sidebar.image(image_chuga, caption=None, width=60)
56
 
57
 
58
+ @st.cache_data(max_entries=50)
59
  def convert_df(df):
60
  return df.to_csv(sep="\t").encode("utf-8")
61
 
62
 
63
+ @st.cache_data(max_entries=50)
64
  def load_data():
65
  matrix = pd.read_csv(
66
  "data/resources/ohe_all_thesaurus_weighted.tsv.gz",
 
71
  return matrix
72
 
73
 
74
+ @st.cache_data(max_entries=50)
75
  def load_umap_cohort():
76
  matrix = pd.read_csv(
77
  "data/resources/umap_loc_cohort.tsv",
 
81
  return matrix
82
 
83
 
84
+ @st.cache_data( max_entries=50)
85
  def load_cohort():
86
  matrix = pd.read_csv(
87
  "data/resources/cohort_diag.tsv",
 
90
  return matrix
91
 
92
 
93
+ @st.cache_data(
94
+ hash_funcs={"Pickle": lambda _: None}, max_entries=50
95
  )
96
  def load_nmf_model():
97
  with open("data/resources/pheno_NMF_390_model_42.pkl", "rb") as pickle_file:
 
101
  return pheno_NMF, reduced
102
 
103
 
104
+ @st.cache_data(max_entries=50)
105
  def symbol_to_id_to_dict():
106
  # from NCBI
107
  ncbi_df = pd.read_csv("data/resources/Homo_sapiens.gene_info.gz", sep="\t")
 
113
  return ncbi_to_dict_ncbi, ncbi_to_dict
114
 
115
 
116
+ @st.cache_data(
117
+ hash_funcs={"_json.Scanner": hash}, max_entries=50
118
  )
119
  def load_hp_ontology():
120
  with open("data/resources/hpo_obo.json") as json_data:
 
122
  return data_dict
123
 
124
 
125
+ @st.cache_data(max_entries=50)
126
  def hpo_description_to_id():
127
  data_dict = {}
128
  for key, value in hp_onto.items():
 
130
  return data_dict
131
 
132
 
133
+ @st.cache_data(
134
+ hash_funcs={"_json.Scanner": hash}, max_entries=50
135
  )
136
  def load_cluster_data():
137
  with open("data/resources/cluster_info.json") as json_data:
 
139
  return data_dict
140
 
141
 
142
+ @st.cache_data(max_entries=50)
143
  def load_topic_data():
144
  topic = pd.read_csv(
145
  "data/resources/main_topics_hpo_390_42_filtered_norm_004.tsv",
 
149
  return topic
150
 
151
 
152
+ @st.cache_data(
153
+ hash_funcs={"_json.Scanner": hash}, max_entries=50
154
  )
155
  def load_similarity_dict():
156
  with open("data/resources/similarity_dict_threshold_80.json") as json_data:
 
158
  return data_dict
159
 
160
 
161
+ # @st.cache_data(
162
+ # hash_funcs={"Pickle": lambda _: None}, max_entries=50
163
+ # )
164
+ # def load_projection():
165
+ # with open("data/resources/clustering_model.pkl", "rb") as pickle_file:
166
+ # cluster = pk.load(pickle_file)
167
+ # with open("data/resources/umap_projection.pkl", "rb") as pickle_file:
168
+ # umap = pk.load(pickle_file)
169
+ # return cluster, umap
170
 
171
 
172
  def get_symbol(gene):
 
308
  hpo = get_hpo_id(hpo_raw)
309
  data = load_data()
310
  pheno_NMF, reduced = load_nmf_model()
311
+ # cluster, umap = load_projection()
312
+ # umap_cohort = load_umap_cohort()
313
  cohort = load_cohort()
314
  cluster_info = load_cluster_data()
315
  topic = load_topic_data()
 
435
  key="download-csv-proj",
436
  )
437
 
438
+ # patient_transposed = sklearn.preprocessing.normalize(
439
+ # np.array(patient_df_info["mean_score"]).reshape(1, -1), norm="l1"
440
+ # )
441
+ # del patient_df_info
442
+ #
443
+ # patient_nmf_umap = umap.transform(pd.DataFrame(patient_transposed))
444
+ # del patient_transposed
445
+ #
446
+ # with st.expander("See projection in cohort"):
447
+ # umap_cohort["dist"] = abs(umap_cohort["x"] - patient_nmf_umap[0, 0]) + abs(
448
+ # umap_cohort["y"] - patient_nmf_umap[0, 1]
449
+ # )
450
+ # del patient_nmf_umap
451
+ # closest_patient = umap_cohort.nsmallest(3, "dist")
452
+ # st.write("Closest patients in the cohort are: ", closest_patient)
453
+ # st.write("Closest patient: ", cohort.loc[closest_patient.index[0]])
454
+ # st.write(
455
+ # get_hpo_name_list(
456
+ # cohort.loc[closest_patient.index[0]].hpo_list.split(","),
457
+ # hp_onto,
458
+ # )
459
+ # )
460
+ #
461
+ # cluster_selected = cluster_info[str(closest_patient["cluster"].values[0])]
462
+ # st.write("Selected cluster: ", closest_patient["cluster"].values[0])
463
+ # st.write("Number of patient in cluster: ", cluster_selected["n_patients"])
464
+ # del closest_patient
465
+ #
466
+ # gene_in_cluster = pd.DataFrame.from_dict(
467
+ # dict(Counter(cluster_selected["gene_list"])), orient="index"
468
+ # )
469
+ # gene_in_cluster.columns = ["count"]
470
+ # if gene_diag:
471
+ # if gene_diag in gene_in_cluster.index:
472
+ # st.write("Gene diag in cluster", gene_in_cluster.loc[gene_diag, :])
473
+ #
474
+ # st.write(
475
+ # "Gene(s) involved in cluster: ",
476
+ # gene_in_cluster.sort_values("count", ascending=False),
477
+ # )
478
+ # del gene_in_cluster
479
+ #
480
+ # group_involved = cluster_selected["group"]
481
+ # if (
482
+ # isinstance(group_involved, float)
483
+ # and math.isnan(float(group_involved)) == False
484
+ # ):
485
+ # topic_involved = topic.loc[topic_involved, :]
486
+ # st.write(
487
+ # "Group(s) of symptoms statistically enriched: ", topic_involved
488
+ # )
489
+ # elif isinstance(group_involved, str):
490
+ # group_list = [int(x) for x in cluster_selected["group"].split(",")]
491
+ # topic_involved = topic.loc[group_list, :]
492
+ # st.write(
493
+ # "Group(s) of symptoms statistically enriched: ", topic_involved
494
+ # )
495
+ # del topic_involved
496
+ # del group_involved
497
+ #
498
+ # dict_count_print = {}
499
+ # dict_count = dict(Counter(cluster_selected["hpo_list"]))
500
+ # dict_count_sorted = sorted(
501
+ # dict_count.items(), key=lambda x: x[1], reverse=True
502
+ # )
503
+ # del cluster_selected
504
+ # for element in dict_count_sorted:
505
+ # dict_count_print[element[0]] = {
506
+ # "description": hp_onto[element[0]]["name"],
507
+ # "count": element[1],
508
+ # }
509
+ # st.write(
510
+ # "HPOs declared in cluster:",
511
+ # pd.DataFrame.from_dict(dict_count_print, orient="index"),
512
+ # )
513
+ # del dict_count
514
+ # del dict_count_print
515
+ # del dict_count_sorted
516
+ #
517
  sim_dict, hpo_list_add = get_similar_terms(hpo_list, similarity_terms_dict)
518
  similar_list = list(set(hpo_list_add) - set(hpo_list))
519
+ similar_list_desc = get_hpo_name_list(similar_list, hp_onto)
520
+
521
  if similar_list_desc:
522
  with st.expander("See symptoms with similarity > 80%"):
523
  similar_list_desc_df = pd.DataFrame.from_dict(
pyproject.toml CHANGED
@@ -5,13 +5,12 @@ description = ""
5
  authors = ["kevin.yauy <kevin.yauy@seqone.fr>"]
6
 
7
  [tool.poetry.dependencies]
8
- python = "^3.8"
9
  pandas = ">=1.3.0"
10
  pandarallel = "^1.6.1"
11
  scikit-learn = "^1.1.1"
12
  ujson = "^5.4.0"
13
  streamlit = "^1.11.1"
14
- umap-learn = "^0.5.3"
15
  matplotlib = "3.5"
16
  seaborn = "^0.11.2"
17
  plotnine = "^0.9.0"
@@ -19,6 +18,7 @@ obonet = "^0.3.0"
19
  multipy = "^0.16"
20
  psrecord = "^1.2"
21
  numpy = ">=1.20,<1.24"
 
22
 
23
  [tool.poetry.dev-dependencies]
24
  pytest = "^5.2"
 
5
  authors = ["kevin.yauy <kevin.yauy@seqone.fr>"]
6
 
7
  [tool.poetry.dependencies]
8
+ python = "~3.8"
9
  pandas = ">=1.3.0"
10
  pandarallel = "^1.6.1"
11
  scikit-learn = "^1.1.1"
12
  ujson = "^5.4.0"
13
  streamlit = "^1.11.1"
 
14
  matplotlib = "3.5"
15
  seaborn = "^0.11.2"
16
  plotnine = "^0.9.0"
 
18
  multipy = "^0.16"
19
  psrecord = "^1.2"
20
  numpy = ">=1.20,<1.24"
21
+ umap-learn = "^0.5.4"
22
 
23
  [tool.poetry.dev-dependencies]
24
  pytest = "^5.2"