princeton-nlp commited on
Commit
b08c9d1
1 Parent(s): 1600167

Upload 10 files

Browse files
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForMabel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "transformers_version": "4.2.1",
23
+ "type_vocab_size": 1,
24
+ "use_cache": true,
25
+ "vocab_size": 50265
26
+ }
log.txt ADDED
The diff for this file is too large to render. See raw diff
 
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d41b808e3ed51e72b3dc7a1d4a533e65ffe30de29bf6db0ab57602f8996efb67
3
+ size 498872747
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 512, "name_or_path": "roberta-base"}
train_results.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ epoch = 2.0
2
+ train_runtime = 2105.7691
3
+ train_samples_per_second = 1.055
trainer_state.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "global_step": 2222,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.45,
12
+ "learning_rate": 7.74977497749775e-05,
13
+ "loss": 3.0294,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.9,
18
+ "learning_rate": 5.4995499549954995e-05,
19
+ "loss": 1.7905,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 1.35,
24
+ "learning_rate": 3.2493249324932494e-05,
25
+ "loss": 1.5222,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.8,
30
+ "learning_rate": 9.990999099909991e-06,
31
+ "loss": 1.3883,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "step": 2222,
37
+ "train_runtime": 2105.7691,
38
+ "train_samples_per_second": 1.055
39
+ }
40
+ ],
41
+ "max_steps": 2222,
42
+ "num_train_epochs": 2,
43
+ "total_flos": 161621779478999040,
44
+ "trial_name": null,
45
+ "trial_params": null
46
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce55beaeb9614e44d8e53a4843a8949219ea394a0194f7c7cab8ea8b77a846be
3
+ size 2159
vocab.json ADDED
The diff for this file is too large to render. See raw diff