theislab
/

Nicheformer

transcriptomics

Model card Files Files and versions

aletlvl commited on Mar 25

Commit

a27a63d

·

verified ·

1 Parent(s): ac2a80e

Tokenization fixed

Files changed (1) hide show

tokenization_nicheformer.py +11 -2

tokenization_nicheformer.py CHANGED Viewed

@@ -277,7 +277,7 @@ class NicheformerTokenizer(PreTrainedTokenizer):
         """
         if isinstance(data, ad.AnnData):
             adata = data.copy()
             # Align with reference model if available
             if hasattr(self, '_load_reference_model'):
                 reference_model = self._load_reference_model()
@@ -287,11 +287,16 @@ class NicheformerTokenizer(PreTrainedTokenizer):
                     for col in ['modality', 'specie', 'assay']:
                         if col in adata.obs.columns:
                             original_types[col] = adata.obs[col].dtype
                     # Concatenate and then remove the reference
                     adata = ad.concat([reference_model, adata], join='outer', axis=0)
                     adata = adata[1:]
                     # Restore original column types after concatenation
                     for col, dtype in original_types.items():
                         if col in adata.obs.columns:
@@ -299,6 +304,10 @@ class NicheformerTokenizer(PreTrainedTokenizer):
                                 adata.obs[col] = adata.obs[col].astype(dtype)
                             except Exception as e:
                                 print(f"Warning: Could not convert {col} back to {dtype}: {e}")
             # Get gene expression data
             X = adata.X

         """
         if isinstance(data, ad.AnnData):
             adata = data.copy()
             # Align with reference model if available
             if hasattr(self, '_load_reference_model'):
                 reference_model = self._load_reference_model()
                     for col in ['modality', 'specie', 'assay']:
                         if col in adata.obs.columns:
                             original_types[col] = adata.obs[col].dtype
+                    print(f"modality dtype: {adata.obs['modality'].dtype}")
+                    print(f"specie dtype: {adata.obs['specie'].dtype}")
+                    print(f"assay dtype: {adata.obs['assay'].dtype}")
                     # Concatenate and then remove the reference
                     adata = ad.concat([reference_model, adata], join='outer', axis=0)
                     adata = adata[1:]
                     # Restore original column types after concatenation
                     for col, dtype in original_types.items():
                         if col in adata.obs.columns:
                                 adata.obs[col] = adata.obs[col].astype(dtype)
                             except Exception as e:
                                 print(f"Warning: Could not convert {col} back to {dtype}: {e}")
+                    print(f"modality dtype: {adata.obs['modality'].dtype}")
+                    print(f"specie dtype: {adata.obs['specie'].dtype}")
+                    print(f"assay dtype: {adata.obs['assay'].dtype}")
             # Get gene expression data
             X = adata.X