shiva-shiva-shiva
commited on
Commit
•
d4819b1
1
Parent(s):
a65347f
Training in progress, step 500
Browse files- .gitignore +1 -0
- config.json +32 -0
- pytorch_model.bin +3 -0
- runs/Apr11_05-35-05_fb29cda6ca0b/1681191337.5036135/events.out.tfevents.1681191337.fb29cda6ca0b.924.1 +3 -0
- runs/Apr11_05-35-05_fb29cda6ca0b/events.out.tfevents.1681191337.fb29cda6ca0b.924.0 +3 -0
- runs/Apr11_05-37-49_fb29cda6ca0b/1681191509.2655795/events.out.tfevents.1681191509.fb29cda6ca0b.26540.1 +3 -0
- runs/Apr11_05-37-49_fb29cda6ca0b/events.out.tfevents.1681191509.fb29cda6ca0b.26540.0 +3 -0
- training_args.bin +3 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bigscience/bloom-560m",
|
3 |
+
"apply_residual_connection_post_layernorm": false,
|
4 |
+
"architectures": [
|
5 |
+
"BloomForCausalLM"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"attention_softmax_in_fp32": true,
|
9 |
+
"bias_dropout_fusion": true,
|
10 |
+
"bos_token_id": 1,
|
11 |
+
"eos_token_id": 2,
|
12 |
+
"hidden_dropout": 0.0,
|
13 |
+
"hidden_size": 1024,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"masked_softmax_fusion": true,
|
17 |
+
"model_type": "bloom",
|
18 |
+
"n_head": 16,
|
19 |
+
"n_inner": null,
|
20 |
+
"n_layer": 24,
|
21 |
+
"offset_alibi": 100,
|
22 |
+
"pad_token_id": 3,
|
23 |
+
"pretraining_tp": 1,
|
24 |
+
"skip_bias_add": true,
|
25 |
+
"skip_bias_add_qkv": false,
|
26 |
+
"slow_but_exact": false,
|
27 |
+
"torch_dtype": "float32",
|
28 |
+
"transformers_version": "4.27.4",
|
29 |
+
"unk_token_id": 0,
|
30 |
+
"use_cache": true,
|
31 |
+
"vocab_size": 250880
|
32 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4315c60a5e310b12ce9102efc726635f1cd43ed7dfe2f192a2ac91cb9a050cc0
|
3 |
+
size 2236957537
|
runs/Apr11_05-35-05_fb29cda6ca0b/1681191337.5036135/events.out.tfevents.1681191337.fb29cda6ca0b.924.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b7fb8b2f70c476f0ff605c44b93bec6d9e8baff03c09e236012b98e4326a89a
|
3 |
+
size 5856
|
runs/Apr11_05-35-05_fb29cda6ca0b/events.out.tfevents.1681191337.fb29cda6ca0b.924.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5348897018868678a0de853c8a3d13020d8f75c084b22da1521b6036df180a7a
|
3 |
+
size 88
|
runs/Apr11_05-37-49_fb29cda6ca0b/1681191509.2655795/events.out.tfevents.1681191509.fb29cda6ca0b.26540.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47b9184ad67d0eb9f4f187a7151b9054fbcaed0a6cd8b4fb6a921235f90ff544
|
3 |
+
size 5856
|
runs/Apr11_05-37-49_fb29cda6ca0b/events.out.tfevents.1681191509.fb29cda6ca0b.26540.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63502cea24f32bfa2d0f9218c05e372c3794c4fd99ace15ded54af3c34ffb811
|
3 |
+
size 4271
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c583c8f7a9957753b62e469798e6aa719efd53ea953da449bc37fc0e23c0cae4
|
3 |
+
size 3579
|