Jiqing commited on
Commit
f0b2e2a
1 Parent(s): b733fa5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -3
README.md CHANGED
@@ -10,7 +10,7 @@ ProtST for binary localization
10
  ## Running script
11
  ```python
12
  from transformers import AutoModel, AutoTokenizer, HfArgumentParser, TrainingArguments, Trainer
13
- from transformers.data.data_collator import DataCollatorForLanguageModeling, DataCollatorForTokenClassification, DataCollatorWithPadding
14
  from transformers.trainer_pt_utils import get_parameter_names
15
  from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
16
  from datasets import load_dataset
@@ -125,8 +125,6 @@ if __name__ == "__main__":
125
  for split in ["train", "validation", "test"]:
126
  raw_dataset[split] = raw_dataset[split].map(func_tokenize_protein, batched=False, remove_columns=["Unnamed: 0", "prot_seq", "localization"])
127
 
128
- data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.0)
129
- data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
130
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
131
 
132
  transformers.utils.logging.set_verbosity_info()
 
10
  ## Running script
11
  ```python
12
  from transformers import AutoModel, AutoTokenizer, HfArgumentParser, TrainingArguments, Trainer
13
+ from transformers.data.data_collator import DataCollatorWithPadding
14
  from transformers.trainer_pt_utils import get_parameter_names
15
  from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
16
  from datasets import load_dataset
 
125
  for split in ["train", "validation", "test"]:
126
  raw_dataset[split] = raw_dataset[split].map(func_tokenize_protein, batched=False, remove_columns=["Unnamed: 0", "prot_seq", "localization"])
127
 
 
 
128
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
129
 
130
  transformers.utils.logging.set_verbosity_info()