ctheodoris commited on
Commit
42e9bf9
1 Parent(s): 188029e

Fixes to stats and adding gene dict attempt number 2 (#13)

Browse files

- fix in silico perturber (f4048822734452938a6f2b614534590ed54201bc)

MANIFEST.in CHANGED
@@ -1,2 +1,3 @@
1
  include geneformer/gene_median_dictionary.pkl
2
  include geneformer/token_dictionary.pkl
 
 
1
  include geneformer/gene_median_dictionary.pkl
2
  include geneformer/token_dictionary.pkl
3
+ include geneformer/gene_name_id_dict.pkl
geneformer/in_silico_perturber_stats.py CHANGED
@@ -150,10 +150,10 @@ def isp_stats_vs_null(cos_sims_df, dict_list, null_dict_list):
150
  null_shifts = []
151
 
152
  for dict_i in dict_list:
153
- token_tuples += dict_i.get((token, "cell_emb"),[])
154
 
155
  for dict_i in null_dict_list:
156
- null_tuples += dict_i.get((token, "cell_emb"),[])
157
 
158
  cos_sims_full_df.loc[i, "Test_avg_shift"] = np.mean(test_shifts)
159
  cos_sims_full_df.loc[i, "Null_avg_shift"] = np.mean(null_shifts)
@@ -302,6 +302,7 @@ class InSilicoPerturberStats:
302
  self.gene_id_name_dict = invert_dict(self.gene_name_id_dict)
303
 
304
  # obtain total gene list
 
305
  gene_list = get_gene_list(dict_list)
306
 
307
  # initiate results dataframe
@@ -314,7 +315,6 @@ class InSilicoPerturberStats:
314
  for genes in gene_list]}, \
315
  index=[i for i in range(len(gene_list))])
316
 
317
- dict_list = read_dictionaries(input_data_directory, "cell")
318
  if self.mode == "goal_state_shift":
319
  cos_sims_df = isp_stats_to_goal_state(cos_sims_df_initial, dict_list)
320
 
 
150
  null_shifts = []
151
 
152
  for dict_i in dict_list:
153
+ test_shifts += dict_i.get((token, "cell_emb"),[])
154
 
155
  for dict_i in null_dict_list:
156
+ null_shifts += dict_i.get((token, "cell_emb"),[])
157
 
158
  cos_sims_full_df.loc[i, "Test_avg_shift"] = np.mean(test_shifts)
159
  cos_sims_full_df.loc[i, "Null_avg_shift"] = np.mean(null_shifts)
 
302
  self.gene_id_name_dict = invert_dict(self.gene_name_id_dict)
303
 
304
  # obtain total gene list
305
+ dict_list = read_dictionaries(input_data_directory, "cell")
306
  gene_list = get_gene_list(dict_list)
307
 
308
  # initiate results dataframe
 
315
  for genes in gene_list]}, \
316
  index=[i for i in range(len(gene_list))])
317
 
 
318
  if self.mode == "goal_state_shift":
319
  cos_sims_df = isp_stats_to_goal_state(cos_sims_df_initial, dict_list)
320