giovp commited on
Commit
42acd14
1 Parent(s): b23ca9d

update tokenizer to use total counts

Browse files
Files changed (1) hide show
  1. geneformer/tokenizer.py +6 -1
geneformer/tokenizer.py CHANGED
@@ -183,7 +183,12 @@ class TranscriptomeTokenizer:
183
  filter_pass_loc, coding_miRNA_loc # filter cells and genes
184
  ]
185
 
186
- X_norm = (adata_filter.X / adata.X.sum(1) * 10_000 / norm_factor_vector).tocsr()
 
 
 
 
 
187
 
188
  tokenized_cells += [
189
  tokenize_cell(X_norm[i, ...].A.flatten(), coding_miRNA_tokens)
 
183
  filter_pass_loc, coding_miRNA_loc # filter cells and genes
184
  ]
185
 
186
+ X_norm = (
187
+ adata_filter.X
188
+ / adata.obs["n_counts"].values.reshape(-1, 1)
189
+ * 10_000
190
+ / norm_factor_vector
191
+ ).tocsr()
192
 
193
  tokenized_cells += [
194
  tokenize_cell(X_norm[i, ...].A.flatten(), coding_miRNA_tokens)