Training in progress, step 100
Browse files- config.json +57 -0
- logs/events.out.tfevents.1725365849.47-G3-032988-23.94553.0 +3 -0
- logs/events.out.tfevents.1725366320.47-G3-032988-23.94778.0 +3 -0
- logs/events.out.tfevents.1725366980.47-G3-032988-23.94921.0 +3 -0
- logs/events.out.tfevents.1725367630.47-G3-032988-23.95193.0 +3 -0
- logs/events.out.tfevents.1725367932.47-G3-032988-23.95279.0 +3 -0
- logs/events.out.tfevents.1725368012.47-G3-032988-23.95311.0 +3 -0
- logs/events.out.tfevents.1725368705.47-G3-032988-23.95540.0 +3 -0
- logs/events.out.tfevents.1725368909.47-G3-032988-23.95594.0 +3 -0
- logs/events.out.tfevents.1725370529.47-G3-032988-23.96095.0 +3 -0
- logs/events.out.tfevents.1725371054.47-G3-032988-23.96252.0 +3 -0
- logs/events.out.tfevents.1725371151.47-G3-032988-23.96406.0 +3 -0
- logs/events.out.tfevents.1725371298.47-G3-032988-23.96443.0 +3 -0
- logs/events.out.tfevents.1725371464.47-G3-032988-23.96443.1 +3 -0
- logs/events.out.tfevents.1725371651.47-G3-032988-23.96543.0 +3 -0
- logs/events.out.tfevents.1725371747.47-G3-032988-23.96584.0 +3 -0
- logs/events.out.tfevents.1725371807.47-G3-032988-23.96616.0 +3 -0
- logs/events.out.tfevents.1725371858.47-G3-032988-23.96665.0 +3 -0
- logs/events.out.tfevents.1725371887.47-G3-032988-23.96682.0 +3 -0
- logs/events.out.tfevents.1725372066.47-G3-032988-23.96721.0 +3 -0
- logs/events.out.tfevents.1725372087.47-G3-032988-23.96752.0 +3 -0
- logs/events.out.tfevents.1725372183.47-G3-032988-23.96772.0 +3 -0
- logs/events.out.tfevents.1725372278.47-G3-032988-23.96899.0 +3 -0
- logs/events.out.tfevents.1725372333.47-G3-032988-23.96939.0 +3 -0
- logs/events.out.tfevents.1725372428.47-G3-032988-23.96963.0 +3 -0
- logs/events.out.tfevents.1725372543.47-G3-032988-23.97014.0 +3 -0
- logs/events.out.tfevents.1725373066.47-G3-032988-23.97120.0 +3 -0
- logs/events.out.tfevents.1725373913.47-G3-032988-23.97402.0 +3 -0
- logs/events.out.tfevents.1725373967.47-G3-032988-23.97402.1 +3 -0
- logs/events.out.tfevents.1725373975.47-G3-032988-23.97435.0 +3 -0
- logs/events.out.tfevents.1725374063.47-G3-032988-23.97486.0 +3 -0
- model.safetensors +3 -0
- training_args.bin +3 -0
config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ammarnasr/t5mimo-seq2seq-conv",
|
3 |
+
"architectures": [
|
4 |
+
"T5MIMOForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"auto_map": {
|
7 |
+
"AutoConfig": "ammarnasr/t5mimo-seq2seq-conv--configuration_t5mimo.T5MIMOConfig",
|
8 |
+
"AutoModel": "ammarnasr/t5mimo-seq2seq-conv--modeling_t5mimo.T5MIMOModel",
|
9 |
+
"AutoModelForSeq2SeqLM": "ammarnasr/t5mimo-seq2seq-conv--modeling_t5mimo.T5MIMOForConditionalGeneration"
|
10 |
+
},
|
11 |
+
"chronos_config": {
|
12 |
+
"context_length": 512,
|
13 |
+
"eos_token_id": 1,
|
14 |
+
"model_type": "seq2seq",
|
15 |
+
"n_special_tokens": 2,
|
16 |
+
"n_tokens": 4096,
|
17 |
+
"num_samples": 20,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"prediction_length": 64,
|
20 |
+
"temperature": 1.0,
|
21 |
+
"tokenizer_class": "MeanScaleUniformBins",
|
22 |
+
"tokenizer_kwargs": {
|
23 |
+
"high_limit": 15,
|
24 |
+
"low_limit": -15
|
25 |
+
},
|
26 |
+
"top_k": 50,
|
27 |
+
"top_p": 1.0,
|
28 |
+
"use_eos_token": true
|
29 |
+
},
|
30 |
+
"classifier_dropout": 0.0,
|
31 |
+
"d_ff": 1024,
|
32 |
+
"d_kv": 64,
|
33 |
+
"d_model": 256,
|
34 |
+
"decoder_start_token_id": 0,
|
35 |
+
"dense_act_fn": "relu",
|
36 |
+
"dropout_rate": 0.1,
|
37 |
+
"eos_token_id": 1,
|
38 |
+
"feed_forward_proj": "relu",
|
39 |
+
"initializer_factor": 0.05,
|
40 |
+
"is_encoder_decoder": true,
|
41 |
+
"is_gated_act": false,
|
42 |
+
"is_mimo": true,
|
43 |
+
"layer_norm_epsilon": 1e-06,
|
44 |
+
"model_type": "t5mimo",
|
45 |
+
"num_decoder_layers": 4,
|
46 |
+
"num_filters": 64,
|
47 |
+
"num_heads": 4,
|
48 |
+
"num_layers": 4,
|
49 |
+
"num_seqs": 3,
|
50 |
+
"pad_token_id": 0,
|
51 |
+
"relative_attention_max_distance": 128,
|
52 |
+
"relative_attention_num_buckets": 32,
|
53 |
+
"torch_dtype": "float32",
|
54 |
+
"transformers_version": "4.41.1",
|
55 |
+
"use_cache": true,
|
56 |
+
"vocab_size": 4096
|
57 |
+
}
|
logs/events.out.tfevents.1725365849.47-G3-032988-23.94553.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b930e052f562e9c0c0eb8aabfd03729f7a27ac94013f53282eb22e902b9595e
|
3 |
+
size 11262
|
logs/events.out.tfevents.1725366320.47-G3-032988-23.94778.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82f5775ec41616ee95c08fda107710a3c507fde234ec6d1554ef26b294eb4aed
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725366980.47-G3-032988-23.94921.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d798144985f424223d416f8abb5616332ab19e0d20b09bd28f815f7437355c73
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725367630.47-G3-032988-23.95193.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d837691c8cb4ea7e316eea5b96b8e925ca8189d725ca19fceae5f6c28b26c09
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725367932.47-G3-032988-23.95279.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b9439cb039e2acdee2d2f21a308056958d7685d4bff720a4281c88ca1612565
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725368012.47-G3-032988-23.95311.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:375e494d2c33547379ff0672cb08469cac770a8601998d28cffa419fcfba27e3
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725368705.47-G3-032988-23.95540.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac693084ec34a7f9e436dc7609b658be9196947b0b9be6935e3245914940da6e
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725368909.47-G3-032988-23.95594.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6627af2bd6551d8059bddb10ff3a31b18daeeff6687804629ee56e4867ba1acc
|
3 |
+
size 5330
|
logs/events.out.tfevents.1725370529.47-G3-032988-23.96095.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:798a2d441713ec8891982758562b340071e59c2e01294a7477f49f14e58e4f39
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725371054.47-G3-032988-23.96252.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5fee05a427a527e2905913fcd475d592e227d1da494bf8d85e706ec9fdfeaf8
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725371151.47-G3-032988-23.96406.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99cf9ca4e648dc38e8818e196c6bd2d401b037ffd499acb4a32864faecdaf90b
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725371298.47-G3-032988-23.96443.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1a290e8adfddec247a2059aea88ae45bf4101f5b6cb2a1cfa4de9e802558141
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725371464.47-G3-032988-23.96443.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c35585f4878e7391d315475f201391f625716b85c1b36e65b950db3717d45795
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725371651.47-G3-032988-23.96543.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d06569d181ef017f1fe6f18324a6b63a89bacd0be2957d248df4b1e6e3de8b4f
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725371747.47-G3-032988-23.96584.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6d46fe5d4c6d5be65382973d4a98a0bab68c81853c5c9ab80223e1ab01e2314
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725371807.47-G3-032988-23.96616.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dab189ad338400d2283929df8f97448d3a79e7f1e16fb83bbf0c37dc9789634
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725371858.47-G3-032988-23.96665.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5854db3b79856c06b5294986f3786e05a401f26cc365e991a34471c55bd9ee6c
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725371887.47-G3-032988-23.96682.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:250c232ec0658de0c7bd02a4aa321f4a164c2569bdfb7d712e8f9714d45e6b7a
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725372066.47-G3-032988-23.96721.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49d6cdcab7818434d91cb43115578d15a0e879d0e8071bdf9259ad69cb516c94
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725372087.47-G3-032988-23.96752.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:239a63af8523f2557ea7c3ffeaa179c6eb90ef159703c329437020cc41feea64
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725372183.47-G3-032988-23.96772.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22526aa2686a132c7ed28d39d11c2e14466d57b05d34e770fc6e79e93c41dcd7
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725372278.47-G3-032988-23.96899.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5190738b6cd671769c2742ca205768262dc0379f4c666b6d06ffdf867bb65edd
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725372333.47-G3-032988-23.96939.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc56ada837f92f01b5c9824337cf591ddd9328e932f39892a00a026dc76ef72a
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725372428.47-G3-032988-23.96963.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b889c91d5dd81439fd8cd7626737bf929e5fe8a9c5cd63d9fd20eebec556018e
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725372543.47-G3-032988-23.97014.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ccebda3505ea1b95e4f3999eb076ddbe20426aa1bc27d2d30608e2aa24e976a
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725373066.47-G3-032988-23.97120.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7075a70ace908b12f88b8bd2cd0bb7b9a6c4c56fab05842e388d5a8fd4c3b9ff
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725373913.47-G3-032988-23.97402.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:905f83a68643fdd4eb7e7d2ab411b260015862954399741b17b71289eec817da
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725373967.47-G3-032988-23.97402.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77481a9be6480d429a5496a1ff7fe244867787766f2ecaab8a7e7c7902ff0cdd
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725373975.47-G3-032988-23.97435.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d41f570d8cba3caea3eb8fd9b9503f9651374a57436c381c93c3b522740fa900
|
3 |
+
size 5675
|
logs/events.out.tfevents.1725374063.47-G3-032988-23.97486.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:757956ebe3eb688526614502f7e618807808e8343fbe8da0db36ba1fa9a96e20
|
3 |
+
size 9815
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4c7dd376c9c46a620656ad0774b36090cadaaa11df5846a14044ad5272fc4aa
|
3 |
+
size 33649068
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e61c1d00a04ecfcd00b1254ffec11171a866a405c651ed13ccadfbda4dc2d1b
|
3 |
+
size 5048
|