DaJulster commited on
Commit
5351182
1 Parent(s): 18720e5

Training in progress, step 500

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/bart-large",
3
  "activation_dropout": 0.1,
4
  "activation_function": "gelu",
5
  "add_bias_logits": false,
@@ -11,18 +11,18 @@
11
  "bos_token_id": 0,
12
  "classif_dropout": 0.1,
13
  "classifier_dropout": 0.0,
14
- "d_model": 1024,
15
- "decoder_attention_heads": 16,
16
- "decoder_ffn_dim": 4096,
17
  "decoder_layerdrop": 0.0,
18
- "decoder_layers": 12,
19
  "decoder_start_token_id": 2,
20
  "dropout": 0.1,
21
  "early_stopping": true,
22
- "encoder_attention_heads": 16,
23
- "encoder_ffn_dim": 4096,
24
  "encoder_layerdrop": 0.0,
25
- "encoder_layers": 12,
26
  "eos_token_id": 2,
27
  "forced_bos_token_id": 0,
28
  "forced_eos_token_id": 2,
@@ -43,8 +43,9 @@
43
  "model_type": "bart",
44
  "no_repeat_ngram_size": 3,
45
  "normalize_before": false,
 
46
  "num_beams": 4,
47
- "num_hidden_layers": 12,
48
  "pad_token_id": 1,
49
  "scale_embedding": false,
50
  "task_specific_params": {
 
1
  {
2
+ "_name_or_path": "facebook/bart-base",
3
  "activation_dropout": 0.1,
4
  "activation_function": "gelu",
5
  "add_bias_logits": false,
 
11
  "bos_token_id": 0,
12
  "classif_dropout": 0.1,
13
  "classifier_dropout": 0.0,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
  "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 6,
19
  "decoder_start_token_id": 2,
20
  "dropout": 0.1,
21
  "early_stopping": true,
22
+ "encoder_attention_heads": 12,
23
+ "encoder_ffn_dim": 3072,
24
  "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 6,
26
  "eos_token_id": 2,
27
  "forced_bos_token_id": 0,
28
  "forced_eos_token_id": 2,
 
43
  "model_type": "bart",
44
  "no_repeat_ngram_size": 3,
45
  "normalize_before": false,
46
+ "normalize_embedding": true,
47
  "num_beams": 4,
48
+ "num_hidden_layers": 6,
49
  "pad_token_id": 1,
50
  "scale_embedding": false,
51
  "task_specific_params": {
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a6a180c498eceeed8d4f55e7583519142bd96cd71d94347855fc73033ba8993
3
- size 1625426996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0de2e2b119385f61efe50e026b9fb5ffe6765bab41548c4cd12a9ec96547c139
3
+ size 557912620
runs/Apr11_13-00-37_86457e1bc633/events.out.tfevents.1712840438.86457e1bc633.25.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bce6f4a16bd632471f07edfa2fe2fd23d0b07ea2973e38b456f0d9f47721fb03
3
+ size 6016
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e7ffad5565cb40d9f93c977ef583fe9576dfdb95495aa31210dcbf4b6d054ae
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:444927edb1b349de9e01397c79fe58ea8fff3d496f44843bb4256094d35a403d
3
  size 5048