kanishka commited on
Commit
72f5daa
1 Parent(s): 011fe36

Training in progress, step 2000

Browse files
Files changed (6) hide show
  1. config.json +2 -2
  2. merges.txt +1 -0
  3. pytorch_model.bin +2 -2
  4. tokenizer.json +0 -0
  5. training_args.bin +1 -1
  6. vocab.json +0 -0
config.json CHANGED
@@ -17,7 +17,7 @@
17
  "init_std": 0.02,
18
  "layer_norm_elementwise_affine": true,
19
  "layerdrop": 0.0,
20
- "max_position_embeddings": 130,
21
  "model_type": "opt",
22
  "num_attention_heads": 8,
23
  "num_hidden_layers": 8,
@@ -26,6 +26,6 @@
26
  "torch_dtype": "float32",
27
  "transformers_version": "4.32.1",
28
  "use_cache": true,
29
- "vocab_size": 8193,
30
  "word_embed_proj_dim": 256
31
  }
 
17
  "init_std": 0.02,
18
  "layer_norm_elementwise_affine": true,
19
  "layerdrop": 0.0,
20
+ "max_position_embeddings": 128,
21
  "model_type": "opt",
22
  "num_attention_heads": 8,
23
  "num_hidden_layers": 8,
 
26
  "torch_dtype": "float32",
27
  "transformers_version": "4.32.1",
28
  "use_cache": true,
29
+ "vocab_size": 8192,
30
  "word_embed_proj_dim": 256
31
  }
merges.txt CHANGED
@@ -8153,3 +8153,4 @@ oss ible
8153
  Ġmilk man
8154
  Ġz a
8155
  Ġhear s
 
 
8153
  Ġmilk man
8154
  Ġz a
8155
  Ġhear s
8156
+ Ġfix es
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b8f3892ea96c63c69455c0489fb6cf5a1ac2f9a06a4c95715cd0c54ef5d1523
3
- size 33843613
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f5f70b60da16b5f3acab78a9d8e96562e4df4f77fc9b32cdb39fb1e719bfe8d
3
+ size 33840541
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41106a57e71f9c495fd7238cb0306eeb5faf15b3fe505385d0440dc3eaed91b8
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f39e91d80205cad423d540a55f78920ed0716d676fd086072be57cb61d24433e
3
  size 4155
vocab.json CHANGED
The diff for this file is too large to render. See raw diff