zaenalium commited on
Commit
318b843
1 Parent(s): 6990b35

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: Mistral-Indo
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # Mistral-Indo
13
 
14
- This model was trained from scratch on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - MBZUAI/Bactrian-X
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: Mistral-Indo
10
+ results:
11
+ - task:
12
+ name: Causal Language Modeling
13
+ type: text-generation
14
+ dataset:
15
+ name: MBZUAI/Bactrian-X id
16
+ type: MBZUAI/Bactrian-X
17
+ args: id
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.2462286333254075
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # Mistral-Indo
28
 
29
+ This model was trained from scratch on the MBZUAI/Bactrian-X id dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 5.4269
32
+ - Accuracy: 0.2462
33
 
34
  ## Model description
35
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.2462286333254075,
4
+ "eval_loss": 5.426878929138184,
5
+ "eval_runtime": 8.9817,
6
+ "eval_samples": 37,
7
+ "eval_samples_per_second": 4.119,
8
+ "eval_steps_per_second": 2.115,
9
+ "perplexity": 227.43828551721677,
10
+ "total_flos": 3.670508706988032e+16,
11
+ "train_loss": 3.2965724309285482,
12
+ "train_runtime": 9164.1975,
13
+ "train_samples": 767,
14
+ "train_samples_per_second": 1.674,
15
+ "train_steps_per_second": 0.21
16
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.2462286333254075,
4
+ "eval_loss": 5.426878929138184,
5
+ "eval_runtime": 8.9817,
6
+ "eval_samples": 37,
7
+ "eval_samples_per_second": 4.119,
8
+ "eval_steps_per_second": 2.115,
9
+ "perplexity": 227.43828551721677
10
+ }
runs/Jan30_15-35-55_b59b81b92666/events.out.tfevents.1706638215.b59b81b92666.26.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fd3a294eb3d69f405361efb7626818d06977edd5c6dc6408984a5dea3c29522
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "total_flos": 3.670508706988032e+16,
4
+ "train_loss": 3.2965724309285482,
5
+ "train_runtime": 9164.1975,
6
+ "train_samples": 767,
7
+ "train_samples_per_second": 1.674,
8
+ "train_steps_per_second": 0.21
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1920,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 2.08,
13
+ "learning_rate": 4.4791666666666673e-05,
14
+ "loss": 6.9421,
15
+ "step": 200
16
+ },
17
+ {
18
+ "epoch": 4.17,
19
+ "learning_rate": 3.958333333333333e-05,
20
+ "loss": 4.8424,
21
+ "step": 400
22
+ },
23
+ {
24
+ "epoch": 6.25,
25
+ "learning_rate": 3.4375e-05,
26
+ "loss": 4.0383,
27
+ "step": 600
28
+ },
29
+ {
30
+ "epoch": 8.33,
31
+ "learning_rate": 2.916666666666667e-05,
32
+ "loss": 3.4406,
33
+ "step": 800
34
+ },
35
+ {
36
+ "epoch": 10.42,
37
+ "learning_rate": 2.3958333333333334e-05,
38
+ "loss": 2.9599,
39
+ "step": 1000
40
+ },
41
+ {
42
+ "epoch": 12.5,
43
+ "learning_rate": 1.8750000000000002e-05,
44
+ "loss": 2.5537,
45
+ "step": 1200
46
+ },
47
+ {
48
+ "epoch": 14.58,
49
+ "learning_rate": 1.3541666666666666e-05,
50
+ "loss": 2.2096,
51
+ "step": 1400
52
+ },
53
+ {
54
+ "epoch": 16.67,
55
+ "learning_rate": 8.333333333333334e-06,
56
+ "loss": 1.944,
57
+ "step": 1600
58
+ },
59
+ {
60
+ "epoch": 18.75,
61
+ "learning_rate": 3.125e-06,
62
+ "loss": 1.7395,
63
+ "step": 1800
64
+ },
65
+ {
66
+ "epoch": 20.0,
67
+ "step": 1920,
68
+ "total_flos": 3.670508706988032e+16,
69
+ "train_loss": 3.2965724309285482,
70
+ "train_runtime": 9164.1975,
71
+ "train_samples_per_second": 1.674,
72
+ "train_steps_per_second": 0.21
73
+ }
74
+ ],
75
+ "logging_steps": 200,
76
+ "max_steps": 1920,
77
+ "num_input_tokens_seen": 0,
78
+ "num_train_epochs": 20,
79
+ "save_steps": 200,
80
+ "total_flos": 3.670508706988032e+16,
81
+ "train_batch_size": 2,
82
+ "trial_name": null,
83
+ "trial_params": null
84
+ }