Christina Theodoris commited on
Commit
e78c44d
1 Parent(s): 2181aa4

Modify tokenizer to allow renaming attr names btwn loom and .dataset

Browse files
Files changed (1) hide show
  1. geneformer/tokenizer.py +4 -3
geneformer/tokenizer.py CHANGED
@@ -106,7 +106,8 @@ class TranscriptomeTokenizer:
106
 
107
  def tokenize_files(self, loom_data_directory):
108
  tokenized_cells = []
109
- cell_metadata = {attr_key: [] for attr_key in self.custom_attr_name_dict.keys()}
 
110
 
111
  # loops through directories to tokenize .loom files
112
  for loom_file_path in loom_data_directory.glob("*.loom"):
@@ -115,8 +116,8 @@ class TranscriptomeTokenizer:
115
  loom_file_path
116
  )
117
  tokenized_cells += file_tokenized_cells
118
- for k in cell_metadata.keys():
119
- cell_metadata[k] += file_cell_metadata[k]
120
 
121
  return tokenized_cells, cell_metadata
122
 
 
106
 
107
  def tokenize_files(self, loom_data_directory):
108
  tokenized_cells = []
109
+ loom_cell_attr = [attr_key for attr_key in self.custom_attr_name_dict.keys()]
110
+ cell_metadata = {attr_key: [] for attr_key in self.custom_attr_name_dict.values()}
111
 
112
  # loops through directories to tokenize .loom files
113
  for loom_file_path in loom_data_directory.glob("*.loom"):
 
116
  loom_file_path
117
  )
118
  tokenized_cells += file_tokenized_cells
119
+ for k in loom_cell_attr:
120
+ cell_metadata[self.custom_attr_name_dict[k]] += file_cell_metadata[k]
121
 
122
  return tokenized_cells, cell_metadata
123