jimypbr commited on
Commit
01aa69e
1 Parent(s): ef425c9

Training in progress, step 500

Browse files
config.json CHANGED
@@ -26,7 +26,7 @@
26
  "position_biased_input": false,
27
  "relative_attention": true,
28
  "torch_dtype": "float16",
29
- "transformers_version": "4.19.0.dev0",
30
  "type_vocab_size": 0,
31
  "vocab_size": 50265
32
  }
 
26
  "position_biased_input": false,
27
  "relative_attention": true,
28
  "torch_dtype": "float16",
29
+ "transformers_version": "4.18.0",
30
  "type_vocab_size": 0,
31
  "vocab_size": 50265
32
  }
ipu_config.json CHANGED
@@ -4,6 +4,7 @@
4
  "enable_half_first_order_momentum": true,
5
  "enable_half_partials": true,
6
  "executable_cache_dir": "./exe_cache",
 
7
  "gradient_accumulation_steps": 32,
8
  "inference_device_iterations": 4,
9
  "inference_replication_factor": 4,
@@ -21,13 +22,14 @@
21
  0.1
22
  ],
23
  "optimizer_state_offchip": true,
24
- "optimum_version": "1.0.0",
25
  "output_mode": "final",
26
  "profile_dir": "",
27
  "recompute_checkpoint_every_layer": true,
28
  "replicated_tensor_sharding": true,
29
  "replication_factor": 4,
30
  "seed": 1984,
31
- "transformers_version": "4.19.0.dev0",
 
32
  "use_popdist": false
33
  }
 
4
  "enable_half_first_order_momentum": true,
5
  "enable_half_partials": true,
6
  "executable_cache_dir": "./exe_cache",
7
+ "execute_encoder_on_cpu_for_generation": false,
8
  "gradient_accumulation_steps": 32,
9
  "inference_device_iterations": 4,
10
  "inference_replication_factor": 4,
 
22
  0.1
23
  ],
24
  "optimizer_state_offchip": true,
25
+ "optimum_version": "1.1.1",
26
  "output_mode": "final",
27
  "profile_dir": "",
28
  "recompute_checkpoint_every_layer": true,
29
  "replicated_tensor_sharding": true,
30
  "replication_factor": 4,
31
  "seed": 1984,
32
+ "sharded_execution_for_inference": false,
33
+ "transformers_version": "4.18.0",
34
  "use_popdist": false
35
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c53de7a6f355194542f010b25d2591fb9ca83acda557a8734b3b17ed060f8d9
3
  size 277274547
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e1f09f6119a29f383a3d1ea74bf0c26235173cda966dd57182282e10454a2b6
3
  size 277274547
runs/May23_13-05-50_gbnwp-pod015-2.ipu.graphcore.ai/1653308353.434315/events.out.tfevents.1653308353.gbnwp-pod015-2.ipu.graphcore.ai ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95929573ae79478ad324196f12e15d087032bd0cc2365bcca195d1249961d885
3
+ size 3919
runs/May23_13-05-50_gbnwp-pod015-2.ipu.graphcore.ai/events.out.tfevents.1653308353.gbnwp-pod015-2.ipu.graphcore.ai ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75dae6908f3231386220522caf514e158d17f209d360490f753490571ecb536f
3
+ size 10664
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
@@ -53,7 +67,8 @@
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
56
- "trim_offsets": true
 
57
  },
58
  "post_processor": {
59
  "type": "TemplateProcessing",
@@ -133,7 +148,8 @@
133
  "decoder": {
134
  "type": "ByteLevel",
135
  "add_prefix_space": true,
136
- "trim_offsets": true
 
137
  },
138
  "model": {
139
  "type": "BPE",
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 384,
6
+ "strategy": "OnlySecond",
7
+ "stride": 128
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 384
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
67
  "pre_tokenizer": {
68
  "type": "ByteLevel",
69
  "add_prefix_space": false,
70
+ "trim_offsets": true,
71
+ "use_regex": true
72
  },
73
  "post_processor": {
74
  "type": "TemplateProcessing",
 
148
  "decoder": {
149
  "type": "ByteLevel",
150
  "add_prefix_space": true,
151
+ "trim_offsets": true,
152
+ "use_regex": true
153
  },
154
  "model": {
155
  "type": "BPE",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:182fd1bfb055d8a087a848be1e08eb491fd553ac431da5330a9b5e2216525bbf
3
- size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ecb4a491a038022196f4b2927d2d70e5ef54d75e818e8c1e37227d522c2a430
3
+ size 2735