lucio commited on
Commit
25b96d6
1 Parent(s): d5c631c

Training in progress, step 500

Browse files
Files changed (23) hide show
  1. .ipynb_checkpoints/eval-checkpoint.py +61 -3
  2. .ipynb_checkpoints/run-checkpoint.sh +1 -0
  3. .ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py +1 -0
  4. config.json +3 -2
  5. eval.py +61 -3
  6. pytorch_model.bin +1 -1
  7. requirements.txt +3 -0
  8. run.sh +1 -0
  9. run_speech_recognition_ctc.py +1 -0
  10. runs/Feb02_06-54-25_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/1643785646.6555233/events.out.tfevents.1643785646.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.33872.1 +3 -0
  11. runs/Feb02_06-54-25_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1643785646.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.33872.0 +3 -0
  12. runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643325211.6916795/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.1 +0 -0
  13. runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.0 +0 -0
  14. runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643346306.8664992/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.1 +0 -0
  15. runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.0 +0 -0
  16. runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643572438.487491/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.1 +0 -0
  17. runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.0 +0 -0
  18. runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643588110.005454/events.out.tfevents.1643588110.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.1 +0 -0
  19. runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643588109.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.0 +0 -0
  20. runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643608732.4243534/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.1 +0 -0
  21. runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.0 +0 -0
  22. special_tokens_map.json +1 -1
  23. training_args.bin +1 -1
.ipynb_checkpoints/eval-checkpoint.py CHANGED
@@ -1,11 +1,12 @@
1
  #!/usr/bin/env python3
2
  import argparse
 
3
  import re
4
  from typing import Dict
5
 
6
- from datasets import Audio, Dataset, load_dataset, load_metric
7
 
8
- from transformers import AutoFeatureExtractor, pipeline
9
 
10
 
11
  def log_results(result: Dataset, args: Dict[str, str]):
@@ -63,12 +64,50 @@ def normalize_text(text: str) -> str:
63
  return text
64
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def main(args):
67
  # load dataset
68
  dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
69
 
70
  # for testing: only process the first two examples as a test
71
- # dataset = dataset.select(range(10))
72
 
73
  # load processor
74
  feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
@@ -90,6 +129,7 @@ def main(args):
90
  batch["target"] = normalize_text(batch["sentence"])
91
  return batch
92
 
 
93
  # run inference on all examples
94
  result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
95
 
@@ -97,6 +137,21 @@ def main(args):
97
  # do not change function below
98
  log_results(result, args)
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  if __name__ == "__main__":
102
  parser = argparse.ArgumentParser()
@@ -123,6 +178,9 @@ if __name__ == "__main__":
123
  parser.add_argument(
124
  "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
125
  )
 
 
 
126
  args = parser.parse_args()
127
 
128
  main(args)
 
1
  #!/usr/bin/env python3
2
  import argparse
3
+ import functools
4
  import re
5
  from typing import Dict
6
 
7
+ from datasets import Audio, Dataset, DatasetDict, load_dataset, load_metric
8
 
9
+ from transformers import AutoFeatureExtractor, AutoTokenizer, pipeline
10
 
11
 
12
  def log_results(result: Dataset, args: Dict[str, str]):
 
64
  return text
65
 
66
 
67
+ def create_vocabulary_from_data(
68
+ datasets: DatasetDict,
69
+ word_delimiter_token = None,
70
+ unk_token = None,
71
+ pad_token = None,
72
+ ):
73
+ # Given training and test labels create vocabulary
74
+ def extract_all_chars(batch):
75
+ all_text = " ".join(batch["target"])
76
+ vocab = list(set(all_text))
77
+ return {"vocab": [vocab], "all_text": [all_text]}
78
+
79
+ vocabs = datasets.map(
80
+ extract_all_chars,
81
+ batched=True,
82
+ batch_size=-1,
83
+ keep_in_memory=True,
84
+ remove_columns=datasets["test"].column_names,
85
+ )
86
+
87
+
88
+ vocab_dict = {v: k for k, v in enumerate(sorted(list(vocabs["test"]["vocab"])))}
89
+
90
+ # replace white space with delimiter token
91
+ if word_delimiter_token is not None:
92
+ vocab_dict[word_delimiter_token] = vocab_dict[" "]
93
+ del vocab_dict[" "]
94
+
95
+ # add unk and pad token
96
+ if unk_token is not None:
97
+ vocab_dict[unk_token] = len(vocab_dict)
98
+
99
+ if pad_token is not None:
100
+ vocab_dict[pad_token] = len(vocab_dict)
101
+
102
+ return vocab_dict
103
+
104
+
105
  def main(args):
106
  # load dataset
107
  dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
108
 
109
  # for testing: only process the first two examples as a test
110
+ dataset = dataset.select(range(10))
111
 
112
  # load processor
113
  feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
 
129
  batch["target"] = normalize_text(batch["sentence"])
130
  return batch
131
 
132
+
133
  # run inference on all examples
134
  result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
135
 
 
137
  # do not change function below
138
  log_results(result, args)
139
 
140
+ if args.check_vocab:
141
+ tokenizer = AutoTokenizer.from_pretrained(args.model_id)
142
+ unk_token = "[UNK]"
143
+ pad_token = "[PAD]"
144
+ word_delimiter_token = "|"
145
+ raw_datasets = DatasetDict({"test": result})
146
+ vocab_dict = create_vocabulary_from_data(
147
+ raw_datasets,
148
+ word_delimiter_token=word_delimiter_token,
149
+ unk_token=unk_token,
150
+ pad_token=pad_token,
151
+ )
152
+ print(vocab_dict)
153
+ print(set(vocab_dict) - set(tokenizer.get_vocab()))
154
+
155
 
156
  if __name__ == "__main__":
157
  parser = argparse.ArgumentParser()
 
178
  parser.add_argument(
179
  "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
180
  )
181
+ parser.add_argument(
182
+ "--check_vocab", action="store_true", help="Verify that normalized target text is within character set"
183
+ )
184
  args = parser.parse_args()
185
 
186
  main(args)
.ipynb_checkpoints/run-checkpoint.sh CHANGED
@@ -16,6 +16,7 @@ python xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
16
  --save_steps="500" \
17
  --eval_steps="500" \
18
  --logging_steps="100" \
 
19
  --layerdrop="0.0" \
20
  --activation_dropout="0.1" \
21
  --save_total_limit="3" \
 
16
  --save_steps="500" \
17
  --eval_steps="500" \
18
  --logging_steps="100" \
19
+ --min_duration_in_seconds="0.5" \
20
  --layerdrop="0.0" \
21
  --activation_dropout="0.1" \
22
  --save_total_limit="3" \
.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py CHANGED
@@ -553,6 +553,7 @@ def main():
553
  "gradient_checkpointing": training_args.gradient_checkpointing,
554
  "layerdrop": model_args.layerdrop,
555
  "ctc_loss_reduction": model_args.ctc_loss_reduction,
 
556
  "pad_token_id": tokenizer.pad_token_id,
557
  "vocab_size": len(tokenizer),
558
  "activation_dropout": model_args.activation_dropout,
 
553
  "gradient_checkpointing": training_args.gradient_checkpointing,
554
  "layerdrop": model_args.layerdrop,
555
  "ctc_loss_reduction": model_args.ctc_loss_reduction,
556
+ "zero_infinity": True,
557
  "pad_token_id": tokenizer.pad_token_id,
558
  "vocab_size": len(tokenizer),
559
  "activation_dropout": model_args.activation_dropout,
config.json CHANGED
@@ -100,8 +100,9 @@
100
  1
101
  ],
102
  "torch_dtype": "float32",
103
- "transformers_version": "4.16.0.dev0",
104
  "use_weighted_layer_sum": false,
105
  "vocab_size": 32,
106
- "xvector_output_dim": 512
 
107
  }
 
100
  1
101
  ],
102
  "torch_dtype": "float32",
103
+ "transformers_version": "4.17.0.dev0",
104
  "use_weighted_layer_sum": false,
105
  "vocab_size": 32,
106
+ "xvector_output_dim": 512,
107
+ "zero_infinity": true
108
  }
eval.py CHANGED
@@ -1,11 +1,12 @@
1
  #!/usr/bin/env python3
2
  import argparse
 
3
  import re
4
  from typing import Dict
5
 
6
- from datasets import Audio, Dataset, load_dataset, load_metric
7
 
8
- from transformers import AutoFeatureExtractor, pipeline
9
 
10
 
11
  def log_results(result: Dataset, args: Dict[str, str]):
@@ -63,12 +64,50 @@ def normalize_text(text: str) -> str:
63
  return text
64
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def main(args):
67
  # load dataset
68
  dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
69
 
70
  # for testing: only process the first two examples as a test
71
- # dataset = dataset.select(range(10))
72
 
73
  # load processor
74
  feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
@@ -90,6 +129,7 @@ def main(args):
90
  batch["target"] = normalize_text(batch["sentence"])
91
  return batch
92
 
 
93
  # run inference on all examples
94
  result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
95
 
@@ -97,6 +137,21 @@ def main(args):
97
  # do not change function below
98
  log_results(result, args)
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  if __name__ == "__main__":
102
  parser = argparse.ArgumentParser()
@@ -123,6 +178,9 @@ if __name__ == "__main__":
123
  parser.add_argument(
124
  "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
125
  )
 
 
 
126
  args = parser.parse_args()
127
 
128
  main(args)
 
1
  #!/usr/bin/env python3
2
  import argparse
3
+ import functools
4
  import re
5
  from typing import Dict
6
 
7
+ from datasets import Audio, Dataset, DatasetDict, load_dataset, load_metric
8
 
9
+ from transformers import AutoFeatureExtractor, AutoTokenizer, pipeline
10
 
11
 
12
  def log_results(result: Dataset, args: Dict[str, str]):
 
64
  return text
65
 
66
 
67
+ def create_vocabulary_from_data(
68
+ datasets: DatasetDict,
69
+ word_delimiter_token = None,
70
+ unk_token = None,
71
+ pad_token = None,
72
+ ):
73
+ # Given training and test labels create vocabulary
74
+ def extract_all_chars(batch):
75
+ all_text = " ".join(batch["target"])
76
+ vocab = list(set(all_text))
77
+ return {"vocab": [vocab], "all_text": [all_text]}
78
+
79
+ vocabs = datasets.map(
80
+ extract_all_chars,
81
+ batched=True,
82
+ batch_size=-1,
83
+ keep_in_memory=True,
84
+ remove_columns=datasets["test"].column_names,
85
+ )
86
+
87
+
88
+ vocab_dict = {v: k for k, v in enumerate(sorted(list(vocabs["test"]["vocab"])))}
89
+
90
+ # replace white space with delimiter token
91
+ if word_delimiter_token is not None:
92
+ vocab_dict[word_delimiter_token] = vocab_dict[" "]
93
+ del vocab_dict[" "]
94
+
95
+ # add unk and pad token
96
+ if unk_token is not None:
97
+ vocab_dict[unk_token] = len(vocab_dict)
98
+
99
+ if pad_token is not None:
100
+ vocab_dict[pad_token] = len(vocab_dict)
101
+
102
+ return vocab_dict
103
+
104
+
105
  def main(args):
106
  # load dataset
107
  dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
108
 
109
  # for testing: only process the first two examples as a test
110
+ dataset = dataset.select(range(10))
111
 
112
  # load processor
113
  feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
 
129
  batch["target"] = normalize_text(batch["sentence"])
130
  return batch
131
 
132
+
133
  # run inference on all examples
134
  result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
135
 
 
137
  # do not change function below
138
  log_results(result, args)
139
 
140
+ if args.check_vocab:
141
+ tokenizer = AutoTokenizer.from_pretrained(args.model_id)
142
+ unk_token = "[UNK]"
143
+ pad_token = "[PAD]"
144
+ word_delimiter_token = "|"
145
+ raw_datasets = DatasetDict({"test": result})
146
+ vocab_dict = create_vocabulary_from_data(
147
+ raw_datasets,
148
+ word_delimiter_token=word_delimiter_token,
149
+ unk_token=unk_token,
150
+ pad_token=pad_token,
151
+ )
152
+ print(vocab_dict)
153
+ print(set(vocab_dict) - set(tokenizer.get_vocab()))
154
+
155
 
156
  if __name__ == "__main__":
157
  parser = argparse.ArgumentParser()
 
178
  parser.add_argument(
179
  "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
180
  )
181
+ parser.add_argument(
182
+ "--check_vocab", action="store_true", help="Verify that normalized target text is within character set"
183
+ )
184
  args = parser.parse_args()
185
 
186
  main(args)
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f992aa6854d16e2db041cc2d1e625cd6f276fca51497aacd3b9d9563932eab38
3
  size 1262054897
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b71f72aac1d492daee452de8218f59a78d50f29656419218b12834c50edc15f
3
  size 1262054897
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ unidecode
2
+ tensorboard
3
+
run.sh CHANGED
@@ -16,6 +16,7 @@ python xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
16
  --save_steps="500" \
17
  --eval_steps="500" \
18
  --logging_steps="100" \
 
19
  --layerdrop="0.0" \
20
  --activation_dropout="0.1" \
21
  --save_total_limit="3" \
 
16
  --save_steps="500" \
17
  --eval_steps="500" \
18
  --logging_steps="100" \
19
+ --min_duration_in_seconds="0.5" \
20
  --layerdrop="0.0" \
21
  --activation_dropout="0.1" \
22
  --save_total_limit="3" \
run_speech_recognition_ctc.py CHANGED
@@ -553,6 +553,7 @@ def main():
553
  "gradient_checkpointing": training_args.gradient_checkpointing,
554
  "layerdrop": model_args.layerdrop,
555
  "ctc_loss_reduction": model_args.ctc_loss_reduction,
 
556
  "pad_token_id": tokenizer.pad_token_id,
557
  "vocab_size": len(tokenizer),
558
  "activation_dropout": model_args.activation_dropout,
 
553
  "gradient_checkpointing": training_args.gradient_checkpointing,
554
  "layerdrop": model_args.layerdrop,
555
  "ctc_loss_reduction": model_args.ctc_loss_reduction,
556
+ "zero_infinity": True,
557
  "pad_token_id": tokenizer.pad_token_id,
558
  "vocab_size": len(tokenizer),
559
  "activation_dropout": model_args.activation_dropout,
runs/Feb02_06-54-25_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/1643785646.6555233/events.out.tfevents.1643785646.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.33872.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9500cee755dde8faf816fe7422af4bc642d5207e5a7715fc6d36a685700c4666
3
+ size 4799
runs/Feb02_06-54-25_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1643785646.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.33872.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:412cb17226cf304713e4986e1eab9dffa422ea44ea63de0bdd747546e911336e
3
+ size 5852
runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643325211.6916795/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.1 CHANGED
Binary files a/runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643325211.6916795/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.1 and b/runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643325211.6916795/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.1 differ
 
runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.0 CHANGED
Binary files a/runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.0 and b/runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.0 differ
 
runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643346306.8664992/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.1 CHANGED
Binary files a/runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643346306.8664992/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.1 and b/runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643346306.8664992/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.1 differ
 
runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.0 CHANGED
Binary files a/runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.0 and b/runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.0 differ
 
runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643572438.487491/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.1 CHANGED
Binary files a/runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643572438.487491/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.1 and b/runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643572438.487491/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.1 differ
 
runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.0 CHANGED
Binary files a/runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.0 and b/runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.0 differ
 
runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643588110.005454/events.out.tfevents.1643588110.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.1 CHANGED
Binary files a/runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643588110.005454/events.out.tfevents.1643588110.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.1 and b/runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643588110.005454/events.out.tfevents.1643588110.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.1 differ
 
runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643588109.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.0 CHANGED
Binary files a/runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643588109.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.0 and b/runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643588109.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.0 differ
 
runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643608732.4243534/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.1 CHANGED
Binary files a/runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643608732.4243534/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.1 and b/runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643608732.4243534/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.1 differ
 
runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.0 CHANGED
Binary files a/runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.0 and b/runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.0 differ
 
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ef714526a959c262fd3874035daab89db41dfb4f72a127557e4538085974074
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39c0625450d0afa8d2e897190721a9173256a42e1f889cdecc94feee325632c3
3
  size 3055