Nighter commited on
Commit
801879e
1 Parent(s): 9b23899

Delete models

Browse files
models/checkpoint-1000/config.json DELETED
@@ -1,43 +0,0 @@
1
- {
2
- "_name_or_path": "allenai/longformer-base-4096",
3
- "architectures": [
4
- "LongformerForQuestionAnswering"
5
- ],
6
- "attention_mode": "longformer",
7
- "attention_probs_dropout_prob": 0.1,
8
- "attention_window": [
9
- 512,
10
- 512,
11
- 512,
12
- 512,
13
- 512,
14
- 512,
15
- 512,
16
- 512,
17
- 512,
18
- 512,
19
- 512,
20
- 512
21
- ],
22
- "bos_token_id": 0,
23
- "eos_token_id": 2,
24
- "gradient_checkpointing": false,
25
- "hidden_act": "gelu",
26
- "hidden_dropout_prob": 0.1,
27
- "hidden_size": 768,
28
- "ignore_attention_mask": false,
29
- "initializer_range": 0.02,
30
- "intermediate_size": 3072,
31
- "layer_norm_eps": 1e-05,
32
- "max_position_embeddings": 4098,
33
- "model_type": "longformer",
34
- "num_attention_heads": 12,
35
- "num_hidden_layers": 12,
36
- "onnx_export": false,
37
- "pad_token_id": 1,
38
- "sep_token_id": 2,
39
- "torch_dtype": "float32",
40
- "transformers_version": "4.35.0",
41
- "type_vocab_size": 1,
42
- "vocab_size": 50265
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/checkpoint-1000/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:72b079e4512fc1ab489f928dd8507b9dffed6b66476cb6e0fee37fa23e031a33
3
- size 592315616
 
 
 
 
models/checkpoint-1000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f54ec241c82997be03e2aaa9809317f72aafb5ad6482d58692c69452a27e433
3
- size 1184789445
 
 
 
 
models/checkpoint-1000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f53b1d2a50f10a43ca81f48c5a81ecc9c58446114c9d8793e1fcacb686f82f64
3
- size 14575
 
 
 
 
models/checkpoint-1000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:72613c3db44bc51082dbd8daac1b7493525ab4d2e8ea3698a81b418a19bd4aba
3
- size 627
 
 
 
 
models/checkpoint-1000/trainer_state.json DELETED
@@ -1,139 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.024622051509331756,
5
- "eval_steps": 50,
6
- "global_step": 1000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0,
13
- "learning_rate": 9.5e-05,
14
- "loss": 5.161,
15
- "step": 50
16
- },
17
- {
18
- "epoch": 0.0,
19
- "learning_rate": 9e-05,
20
- "loss": 5.2417,
21
- "step": 100
22
- },
23
- {
24
- "epoch": 0.0,
25
- "learning_rate": 8.5e-05,
26
- "loss": 5.2132,
27
- "step": 150
28
- },
29
- {
30
- "epoch": 0.0,
31
- "learning_rate": 8e-05,
32
- "loss": 5.1672,
33
- "step": 200
34
- },
35
- {
36
- "epoch": 0.01,
37
- "learning_rate": 7.500000000000001e-05,
38
- "loss": 5.0757,
39
- "step": 250
40
- },
41
- {
42
- "epoch": 0.01,
43
- "learning_rate": 7e-05,
44
- "loss": 5.0972,
45
- "step": 300
46
- },
47
- {
48
- "epoch": 0.01,
49
- "learning_rate": 6.500000000000001e-05,
50
- "loss": 5.2191,
51
- "step": 350
52
- },
53
- {
54
- "epoch": 0.01,
55
- "learning_rate": 6e-05,
56
- "loss": 5.1446,
57
- "step": 400
58
- },
59
- {
60
- "epoch": 0.01,
61
- "learning_rate": 5.500000000000001e-05,
62
- "loss": 5.1846,
63
- "step": 450
64
- },
65
- {
66
- "epoch": 0.01,
67
- "learning_rate": 5e-05,
68
- "loss": 5.1827,
69
- "step": 500
70
- },
71
- {
72
- "epoch": 0.01,
73
- "learning_rate": 4.5e-05,
74
- "loss": 4.9039,
75
- "step": 550
76
- },
77
- {
78
- "epoch": 0.01,
79
- "learning_rate": 4e-05,
80
- "loss": 5.0134,
81
- "step": 600
82
- },
83
- {
84
- "epoch": 0.02,
85
- "learning_rate": 3.5e-05,
86
- "loss": 5.1791,
87
- "step": 650
88
- },
89
- {
90
- "epoch": 0.02,
91
- "learning_rate": 3e-05,
92
- "loss": 5.0575,
93
- "step": 700
94
- },
95
- {
96
- "epoch": 0.02,
97
- "learning_rate": 2.5e-05,
98
- "loss": 5.0015,
99
- "step": 750
100
- },
101
- {
102
- "epoch": 0.02,
103
- "learning_rate": 2e-05,
104
- "loss": 5.0853,
105
- "step": 800
106
- },
107
- {
108
- "epoch": 0.02,
109
- "learning_rate": 1.5e-05,
110
- "loss": 5.0391,
111
- "step": 850
112
- },
113
- {
114
- "epoch": 0.02,
115
- "learning_rate": 1e-05,
116
- "loss": 5.1997,
117
- "step": 900
118
- },
119
- {
120
- "epoch": 0.02,
121
- "learning_rate": 5e-06,
122
- "loss": 5.0387,
123
- "step": 950
124
- },
125
- {
126
- "epoch": 0.02,
127
- "learning_rate": 0.0,
128
- "loss": 4.8919,
129
- "step": 1000
130
- }
131
- ],
132
- "logging_steps": 50,
133
- "max_steps": 1000,
134
- "num_train_epochs": 1,
135
- "save_steps": 500,
136
- "total_flos": 5225784115200000.0,
137
- "trial_name": null,
138
- "trial_params": null
139
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/checkpoint-1000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:858178eccbe5c575b20f211c1cdbf7566d777add66e5a8bb6a44678d1773e11c
3
- size 4091
 
 
 
 
models/checkpoint-500/config.json DELETED
@@ -1,43 +0,0 @@
1
- {
2
- "_name_or_path": "allenai/longformer-base-4096",
3
- "architectures": [
4
- "LongformerForQuestionAnswering"
5
- ],
6
- "attention_mode": "longformer",
7
- "attention_probs_dropout_prob": 0.1,
8
- "attention_window": [
9
- 512,
10
- 512,
11
- 512,
12
- 512,
13
- 512,
14
- 512,
15
- 512,
16
- 512,
17
- 512,
18
- 512,
19
- 512,
20
- 512
21
- ],
22
- "bos_token_id": 0,
23
- "eos_token_id": 2,
24
- "gradient_checkpointing": false,
25
- "hidden_act": "gelu",
26
- "hidden_dropout_prob": 0.1,
27
- "hidden_size": 768,
28
- "ignore_attention_mask": false,
29
- "initializer_range": 0.02,
30
- "intermediate_size": 3072,
31
- "layer_norm_eps": 1e-05,
32
- "max_position_embeddings": 4098,
33
- "model_type": "longformer",
34
- "num_attention_heads": 12,
35
- "num_hidden_layers": 12,
36
- "onnx_export": false,
37
- "pad_token_id": 1,
38
- "sep_token_id": 2,
39
- "torch_dtype": "float32",
40
- "transformers_version": "4.35.0",
41
- "type_vocab_size": 1,
42
- "vocab_size": 50265
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/checkpoint-500/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2460df31587bb48597110a1567445f0cacc27f2ee6e77a6d91f17c50177981bc
3
- size 592315616
 
 
 
 
models/checkpoint-500/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f756aa84451538e2dce168e9a224c8155e48329cfb136a15b2049f907a7e3587
3
- size 1184789445
 
 
 
 
models/checkpoint-500/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cfd58e02f4808773b2c3954bfc7fbba88d4d8075a08c8ec124654f3457993a9
3
- size 14575
 
 
 
 
models/checkpoint-500/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1772beaff8e1a54e1110d9989a781f461d3e253f0c02cade590c2e900e7d022
3
- size 627
 
 
 
 
models/checkpoint-500/trainer_state.json DELETED
@@ -1,79 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.012311025754665878,
5
- "eval_steps": 50,
6
- "global_step": 500,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0,
13
- "learning_rate": 9.5e-05,
14
- "loss": 5.161,
15
- "step": 50
16
- },
17
- {
18
- "epoch": 0.0,
19
- "learning_rate": 9e-05,
20
- "loss": 5.2417,
21
- "step": 100
22
- },
23
- {
24
- "epoch": 0.0,
25
- "learning_rate": 8.5e-05,
26
- "loss": 5.2132,
27
- "step": 150
28
- },
29
- {
30
- "epoch": 0.0,
31
- "learning_rate": 8e-05,
32
- "loss": 5.1672,
33
- "step": 200
34
- },
35
- {
36
- "epoch": 0.01,
37
- "learning_rate": 7.500000000000001e-05,
38
- "loss": 5.0757,
39
- "step": 250
40
- },
41
- {
42
- "epoch": 0.01,
43
- "learning_rate": 7e-05,
44
- "loss": 5.0972,
45
- "step": 300
46
- },
47
- {
48
- "epoch": 0.01,
49
- "learning_rate": 6.500000000000001e-05,
50
- "loss": 5.2191,
51
- "step": 350
52
- },
53
- {
54
- "epoch": 0.01,
55
- "learning_rate": 6e-05,
56
- "loss": 5.1446,
57
- "step": 400
58
- },
59
- {
60
- "epoch": 0.01,
61
- "learning_rate": 5.500000000000001e-05,
62
- "loss": 5.1846,
63
- "step": 450
64
- },
65
- {
66
- "epoch": 0.01,
67
- "learning_rate": 5e-05,
68
- "loss": 5.1827,
69
- "step": 500
70
- }
71
- ],
72
- "logging_steps": 50,
73
- "max_steps": 1000,
74
- "num_train_epochs": 1,
75
- "save_steps": 500,
76
- "total_flos": 2612892057600000.0,
77
- "trial_name": null,
78
- "trial_params": null
79
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/checkpoint-500/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:858178eccbe5c575b20f211c1cdbf7566d777add66e5a8bb6a44678d1773e11c
3
- size 4091
 
 
 
 
models/config.json DELETED
@@ -1,43 +0,0 @@
1
- {
2
- "_name_or_path": "allenai/longformer-base-4096",
3
- "architectures": [
4
- "LongformerForQuestionAnswering"
5
- ],
6
- "attention_mode": "longformer",
7
- "attention_probs_dropout_prob": 0.1,
8
- "attention_window": [
9
- 512,
10
- 512,
11
- 512,
12
- 512,
13
- 512,
14
- 512,
15
- 512,
16
- 512,
17
- 512,
18
- 512,
19
- 512,
20
- 512
21
- ],
22
- "bos_token_id": 0,
23
- "eos_token_id": 2,
24
- "gradient_checkpointing": false,
25
- "hidden_act": "gelu",
26
- "hidden_dropout_prob": 0.1,
27
- "hidden_size": 768,
28
- "ignore_attention_mask": false,
29
- "initializer_range": 0.02,
30
- "intermediate_size": 3072,
31
- "layer_norm_eps": 1e-05,
32
- "max_position_embeddings": 4098,
33
- "model_type": "longformer",
34
- "num_attention_heads": 12,
35
- "num_hidden_layers": 12,
36
- "onnx_export": false,
37
- "pad_token_id": 1,
38
- "sep_token_id": 2,
39
- "torch_dtype": "float32",
40
- "transformers_version": "4.35.0",
41
- "type_vocab_size": 1,
42
- "vocab_size": 50265
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/eval_results.txt DELETED
@@ -1,5 +0,0 @@
1
- epoch = 0.02
2
- eval_loss = 5.012203216552734
3
- eval_runtime = 5098.5061
4
- eval_samples_per_second = 3.983
5
- eval_steps_per_second = 3.983
 
 
 
 
 
 
models/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
models/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:72b079e4512fc1ab489f928dd8507b9dffed6b66476cb6e0fee37fa23e031a33
3
- size 592315616
 
 
 
 
models/runs/Dec02_17-07-54_47f49ef5dd0b/events.out.tfevents.1701536890.47f49ef5dd0b.102.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac195f5620133debbc1489b20458444af76e3f1e4cfdef3e819d64559f1907c6
3
- size 7836
 
 
 
 
models/runs/Dec02_17-07-54_47f49ef5dd0b/events.out.tfevents.1701544466.47f49ef5dd0b.102.1 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa9668cadd28a078978c436c550310718abb00a01d9fb3b1b4d335cfbce0bcfd
3
- size 359
 
 
 
 
models/special_tokens_map.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
models/tokenizer_config.json DELETED
@@ -1,57 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
- "bos_token": "<s>",
46
- "clean_up_tokenization_spaces": true,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "mask_token": "<mask>",
51
- "model_max_length": 4096,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "LongformerTokenizer",
55
- "trim_offsets": true,
56
- "unk_token": "<unk>"
57
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:858178eccbe5c575b20f211c1cdbf7566d777add66e5a8bb6a44678d1773e11c
3
- size 4091
 
 
 
 
models/vocab.json DELETED
The diff for this file is too large to render. See raw diff