maxidl commited on
Commit
14a79a1
1 Parent(s): b3c9758

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +59 -0
  2. all_results.json +8 -0
  3. config.json +31 -0
  4. generation_config.json +6 -0
  5. model-00001-of-00059.safetensors +3 -0
  6. model-00002-of-00059.safetensors +3 -0
  7. model-00003-of-00059.safetensors +3 -0
  8. model-00004-of-00059.safetensors +3 -0
  9. model-00005-of-00059.safetensors +3 -0
  10. model-00006-of-00059.safetensors +3 -0
  11. model-00007-of-00059.safetensors +3 -0
  12. model-00008-of-00059.safetensors +3 -0
  13. model-00009-of-00059.safetensors +3 -0
  14. model-00010-of-00059.safetensors +3 -0
  15. model-00011-of-00059.safetensors +3 -0
  16. model-00012-of-00059.safetensors +3 -0
  17. model-00013-of-00059.safetensors +3 -0
  18. model-00014-of-00059.safetensors +3 -0
  19. model-00015-of-00059.safetensors +3 -0
  20. model-00016-of-00059.safetensors +3 -0
  21. model-00017-of-00059.safetensors +3 -0
  22. model-00018-of-00059.safetensors +3 -0
  23. model-00019-of-00059.safetensors +3 -0
  24. model-00020-of-00059.safetensors +3 -0
  25. model-00021-of-00059.safetensors +3 -0
  26. model-00022-of-00059.safetensors +3 -0
  27. model-00023-of-00059.safetensors +3 -0
  28. model-00024-of-00059.safetensors +3 -0
  29. model-00025-of-00059.safetensors +3 -0
  30. model-00026-of-00059.safetensors +3 -0
  31. model-00027-of-00059.safetensors +3 -0
  32. model-00028-of-00059.safetensors +3 -0
  33. model-00029-of-00059.safetensors +3 -0
  34. model-00030-of-00059.safetensors +3 -0
  35. model-00031-of-00059.safetensors +3 -0
  36. model-00032-of-00059.safetensors +3 -0
  37. model-00033-of-00059.safetensors +3 -0
  38. model-00034-of-00059.safetensors +3 -0
  39. model-00035-of-00059.safetensors +3 -0
  40. model-00036-of-00059.safetensors +3 -0
  41. model-00037-of-00059.safetensors +3 -0
  42. model-00038-of-00059.safetensors +3 -0
  43. model-00039-of-00059.safetensors +3 -0
  44. model-00040-of-00059.safetensors +3 -0
  45. model-00041-of-00059.safetensors +3 -0
  46. model-00042-of-00059.safetensors +3 -0
  47. model-00043-of-00059.safetensors +3 -0
  48. model-00044-of-00059.safetensors +3 -0
  49. model-00045-of-00059.safetensors +3 -0
  50. model-00046-of-00059.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mistral-community/Mixtral-8x22B-v0.1
3
+ tags:
4
+ - alignment-handbook
5
+ - generated_from_trainer
6
+ datasets:
7
+ - maxidl/instruct-en-de
8
+ model-index:
9
+ - name: Mixtral-8x22B-v0.1-Instruct-sft-en-de
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # Mixtral-8x22B-v0.1-Instruct-sft-en-de
17
+
18
+ This model is a fine-tuned version of [mistral-community/Mixtral-8x22B-v0.1](https://huggingface.co/mistral-community/Mixtral-8x22B-v0.1) on the maxidl/instruct-en-de dataset.
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 1e-05
38
+ - train_batch_size: 1
39
+ - eval_batch_size: 8
40
+ - seed: 42
41
+ - distributed_type: multi-GPU
42
+ - num_devices: 64
43
+ - total_train_batch_size: 64
44
+ - total_eval_batch_size: 512
45
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
+ - lr_scheduler_type: cosine
47
+ - lr_scheduler_warmup_steps: 50
48
+ - num_epochs: 3
49
+
50
+ ### Training results
51
+
52
+
53
+
54
+ ### Framework versions
55
+
56
+ - Transformers 4.39.3
57
+ - Pytorch 2.1.2
58
+ - Datasets 2.18.0
59
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.6362597953351998,
4
+ "train_runtime": 82257.6345,
5
+ "train_samples": 1226825,
6
+ "train_samples_per_second": 0.673,
7
+ "train_steps_per_second": 0.011
8
+ }
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mistral-community/Mixtral-8x22B-v0.1",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 6144,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16384,
13
+ "max_position_embeddings": 65536,
14
+ "model_type": "mixtral",
15
+ "num_attention_heads": 48,
16
+ "num_experts_per_tok": 2,
17
+ "num_hidden_layers": 56,
18
+ "num_key_value_heads": 8,
19
+ "num_local_experts": 8,
20
+ "output_router_logits": false,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_theta": 1000000,
23
+ "router_aux_loss_coef": 0.001,
24
+ "router_jitter_noise": 0.0,
25
+ "sliding_window": null,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.39.3",
29
+ "use_cache": true,
30
+ "vocab_size": 32000
31
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.39.3"
6
+ }
model-00001-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:079a6bfb8134e242a1a926916dc5979001d06dbea95ac03949bbdd7964fc3ef0
3
+ size 4998663696
model-00002-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e4f207f37a87d30ea0aea9062242b58cdc36b28f41763eaa8b33edf23283726
3
+ size 4806799120
model-00003-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f67c51db1feeb762a3f2f283b17794cc26946ec5d10202196ac09abd27a8ed
3
+ size 4806799120
model-00004-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec8673a3b2fd4abc1ce8a363be2f2fa2a1e7dbf3196a1d1a3e02c213d117f89
3
+ size 4806799120
model-00005-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c94971d4ea7128d98acbfc870a6039cecac238731716a530daaa805f40e6673
3
+ size 4806799120
model-00006-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1c3efc5a6524d923f1ce9f3917b5cd120091a095433dc5a7fe74bfbe0b4dfaf
3
+ size 4806799120
model-00007-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4fb048d1831c3b0df91415fb13602c1d8a87a591c77033386bf6bfe68a38d7
3
+ size 4806799120
model-00008-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c68d117bcdcd314a2d91313374e66f76595396d062f51eed2b20ca1b88d4b0c
3
+ size 4806799120
model-00009-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fb9876d704f899ea09301840b3273f1182e4c607f1dedf2af925110c6d0da9d
3
+ size 4806799120
model-00010-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3c66fd33b4a98ad9c94acb84387b2e91ada10c904adc2cb2075cf0e799869f4
3
+ size 4806799120
model-00011-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b788bdea4a304c702020a8b44ea2fe7684f41af6b0e19c1ff666e0d9de948f
3
+ size 4806799136
model-00012-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b26a4868ed838550accd5068cde930b18338c631a6ca896a1cc28a694eef6a0b
3
+ size 4806799152
model-00013-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5069e165ba66c326283820ec535cd2b7ed415942001ea8926c758eec7496bedd
3
+ size 4806799152
model-00014-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34efa7884574fc1adaf2c40eae9faf22b9f155a3e8fa78b31a07aeaf564f3408
3
+ size 4806799152
model-00015-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9799c40810f0aeba296a94afdeeaa1cd7df6d9353a7c4e0dae9c0a48bce722b
3
+ size 4806799152
model-00016-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c407f6ae0207cec1ee3f7939e5a44c6060c758ba355a4ec494c0df2c69bd492
3
+ size 4806799152
model-00017-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb8ab3bd128ae31a7e0b62771eb074ca5919f5e890796ccf393bbfed8d28b6dd
3
+ size 4806799152
model-00018-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d93b028127cbbd9efea3ee9c908b2a195639a5a4ffce3bf76fa95084a808b21
3
+ size 4806799152
model-00019-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7a4539fdce302ccb7997310fb48d87f135f476bd507e54b80e109a4ab4fc422
3
+ size 4806799152
model-00020-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb286e54f91df2d52650f95732d7f0c55b9605cc8da7e6786605986649681978
3
+ size 4806799152
model-00021-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b6e16d521111a38368e997d3532e808e31f310e1a27fdc9da4b01cca21f45ab
3
+ size 4806799152
model-00022-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b609c1b90ac4d0574a64ac9cb8e7627d1b97943d9ac52d01a91bd53db30dda9b
3
+ size 4806799152
model-00023-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bac5baf955c98a7d9f7d267f9f1f69ed77ccf6719a479d21441b19c9cb8a7fe2
3
+ size 4806799152
model-00024-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb9f67ad5693d19f3601e986f1c43ac5fe1e0b69d5adc1143b4db1f9132b6a89
3
+ size 4932529864
model-00025-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d3481c8893b857369c2dadc927dd67e29ec29523062dd0df45a0ef2bf1ca632
3
+ size 4995542848
model-00026-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd4b67da7d7282fa2d2427db16cef8039009dc19399ea69f00c70d278c6e280f
3
+ size 4995542848
model-00027-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce21800a1db3c61f4381f62355aac2f17e91c9e4dcbeb9239f2334a91697f3c4
3
+ size 4932628288
model-00028-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db3de287487cf40f5be1d773aca30a99eaabd52dd37b58d3882c7229d3a6d18
3
+ size 4806774344
model-00029-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2de1e2359812785161997736be4042e665eec44e230c6ac2273bc85e0dc9153
3
+ size 4806799144
model-00030-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2847def46af8605e13cab7ea455c230daac8854124de40a9c5aede78b9283422
3
+ size 4806799144
model-00031-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7833913e2a702c5de0e41da1d9683c6fc7e9eda930bfbe67b28bb4609a01d474
3
+ size 4806799144
model-00032-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1d3be2741318ef784b7f5605a3fad0e55923f0bc1a34d7edf7728e3917105f5
3
+ size 4806799144
model-00033-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bba74c5e4530546134938937e34e71c92c6b7190914891c194cb3bcbf5ac66d
3
+ size 4806799152
model-00034-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bcc2b91ba5454223c9fe8d3e7c54562fde28327cffa26a1d64396b65e0105cd
3
+ size 4806799152
model-00035-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6367b5d5b08be04ee6fd9e471d2ec3f5f9ca6a33b011d9c2452b2fa8b0a68a21
3
+ size 4806799152
model-00036-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45fc380a75a6f4bb6d1352a8e8ea7235cf8fb0ddf2c477ea6a6b414fe4cf91c6
3
+ size 4806799152
model-00037-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40984790ffc43c281dd8e77bd11fea4878f3ca2b289b8853cf82fd6c1942ab49
3
+ size 4806799152
model-00038-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6fda121793088e34677b31f415abe129026bbbabe8f29b4f05c7960dd8e42b4
3
+ size 4806799152
model-00039-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:645196b7ea939ed1a11a06141453eefd63ab0295166a31a310f12a7532e0afa8
3
+ size 4806799152
model-00040-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:542bb168e38f3b9358c441ab1c1c3fb77e2d05a36cb42189aa42a5f19e1ced08
3
+ size 4806799152
model-00041-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc31593820e8c3a3e34ffbf56ec49b9d9a892e8c3b9c52c4900454c508f3fa8
3
+ size 4806799152
model-00042-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08f2ce4ad1bb832bf1e15ec71637df89fc197736bcbff57c70de09e01a092088
3
+ size 4806799152
model-00043-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92a9c418ca2f8ebefd22aca84a7bedc2f6349bf8d0a3e778c94af8ceb011c195
3
+ size 4806799152
model-00044-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12cb8b5f6154ea6e61006c51ca2bad2355fb60770ccf48b4c948a2e178331343
3
+ size 4806799152
model-00045-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f021662a34400f51ad8caa39751ce9fca8300edc33f8ba1bee398bb54c92c936
3
+ size 4806799152
model-00046-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdd98eb51bb71ca610723f597db38d36282b23bab762d63c86da33f7ad263af7
3
+ size 4806799152