ricomnl commited on
Commit
5cb733f
1 Parent(s): b6ca566

Generalized

Browse files
Files changed (1) hide show
  1. geneformer/tokenizer.py +1 -4
geneformer/tokenizer.py CHANGED
@@ -288,10 +288,7 @@ class TranscriptomeTokenizer:
288
  # create dataset
289
  def dict_generator():
290
  for i in range(len(tokenized_cells)):
291
- yield {
292
- 'input_ids': dataset_dict['input_ids'][i],
293
- 'cell_type': dataset_dict['cell_type'][i]
294
- }
295
  output_dataset = Dataset.from_generator(dict_generator, num_proc=self.nproc)
296
 
297
  # truncate dataset
 
288
  # create dataset
289
  def dict_generator():
290
  for i in range(len(tokenized_cells)):
291
+ yield {k: dataset_dict[k][i] for k in dataset_dict.keys()}
 
 
 
292
  output_dataset = Dataset.from_generator(dict_generator, num_proc=self.nproc)
293
 
294
  # truncate dataset