hanasim commited on
Commit
4807529
1 Parent(s): 3805c5d

End of training

Browse files
README.md CHANGED
@@ -1,7 +1,12 @@
1
  ---
 
 
2
  license: cc-by-nc-4.0
3
  base_model: facebook/mms-1b-all
4
  tags:
 
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - common_voice_16_0
@@ -14,15 +19,15 @@ model-index:
14
  name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
16
  dataset:
17
- name: common_voice_16_0
18
  type: common_voice_16_0
19
  config: ml
20
  split: test
21
- args: ml
22
  metrics:
23
  - name: Wer
24
  type: wer
25
- value: 0.5345542501727713
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,10 +35,10 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # breeze-listen-w2v2-ml
32
 
33
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the common_voice_16_0 dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.2698
36
- - Wer: 0.5346
37
 
38
  ## Model description
39
 
 
1
  ---
2
+ language:
3
+ - ml
4
  license: cc-by-nc-4.0
5
  base_model: facebook/mms-1b-all
6
  tags:
7
+ - automatic-speech-recognition
8
+ - mozilla-foundation/common_voice_16_0
9
+ - mms
10
  - generated_from_trainer
11
  datasets:
12
  - common_voice_16_0
 
19
  name: Automatic Speech Recognition
20
  type: automatic-speech-recognition
21
  dataset:
22
+ name: MOZILLA-FOUNDATION/COMMON_VOICE_16_0 - ML
23
  type: common_voice_16_0
24
  config: ml
25
  split: test
26
+ args: 'Config: ml, Training split: train+validation, Eval split: test'
27
  metrics:
28
  - name: Wer
29
  type: wer
30
+ value: 0.5348997926744989
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
35
 
36
  # breeze-listen-w2v2-ml
37
 
38
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the MOZILLA-FOUNDATION/COMMON_VOICE_16_0 - ML dataset.
39
  It achieves the following results on the evaluation set:
40
+ - Loss: 0.2666
41
+ - Wer: 0.5349
42
 
43
  ## Model description
44
 
adapter.mal.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b41817255284f3ae007dc04b8e4eb15d1b4e3feac18c049ba9bd1282af988ead
3
+ size 9018880
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_loss": 0.2665550410747528,
4
+ "eval_runtime": 160.6538,
5
+ "eval_samples": 663,
6
+ "eval_samples_per_second": 4.127,
7
+ "eval_steps_per_second": 0.517,
8
+ "eval_wer": 0.5348997926744989,
9
+ "train_loss": 2.1205503649827913,
10
+ "train_runtime": 5112.0325,
11
+ "train_samples": 1968,
12
+ "train_samples_per_second": 1.54,
13
+ "train_steps_per_second": 0.385
14
+ }
breeze-listen-w2v2-ml.log CHANGED
@@ -132,3 +132,20 @@ weight_decay=0.0,
132
  {'eval_loss': 0.27460750937461853, 'eval_wer': 0.5393918451969593, 'eval_runtime': 160.7333, 'eval_samples_per_second': 4.125, 'eval_steps_per_second': 0.516, 'epoch': 3.25}
133
  {'eval_loss': 0.26981213688850403, 'eval_wer': 0.5345542501727713, 'eval_runtime': 160.1257, 'eval_samples_per_second': 4.14, 'eval_steps_per_second': 0.518, 'epoch': 3.66}
134
  {'train_runtime': 5112.0325, 'train_samples_per_second': 1.54, 'train_steps_per_second': 0.385, 'train_loss': 2.1205503649827913, 'epoch': 4.0}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  {'eval_loss': 0.27460750937461853, 'eval_wer': 0.5393918451969593, 'eval_runtime': 160.7333, 'eval_samples_per_second': 4.125, 'eval_steps_per_second': 0.516, 'epoch': 3.25}
133
  {'eval_loss': 0.26981213688850403, 'eval_wer': 0.5345542501727713, 'eval_runtime': 160.1257, 'eval_samples_per_second': 4.14, 'eval_steps_per_second': 0.518, 'epoch': 3.66}
134
  {'train_runtime': 5112.0325, 'train_samples_per_second': 1.54, 'train_steps_per_second': 0.385, 'train_loss': 2.1205503649827913, 'epoch': 4.0}
135
+ ***** train metrics *****
136
+ epoch = 4.0
137
+ train_loss = 2.1206
138
+ train_runtime = 1:25:12.03
139
+ train_samples = 1968
140
+ train_samples_per_second = 1.54
141
+ train_steps_per_second = 0.385
142
+ 01/29/2024 21:22:32 - INFO - __main__ - *** Evaluate ***
143
+ ***** eval metrics *****
144
+ epoch = 4.0
145
+ eval_loss = 0.2666
146
+ eval_runtime = 0:02:40.65
147
+ eval_samples = 663
148
+ eval_samples_per_second = 4.127
149
+ eval_steps_per_second = 0.517
150
+ eval_wer = 0.5349
151
+ 01/29/2024 21:25:13 - INFO - __main__ - Saving adapter weights under /cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-ml/adapter.mal.safetensors...
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "eval_loss": 0.2665550410747528,
4
+ "eval_runtime": 160.6538,
5
+ "eval_samples": 663,
6
+ "eval_samples_per_second": 4.127,
7
+ "eval_steps_per_second": 0.517,
8
+ "eval_wer": 0.5348997926744989
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.0,
3
+ "train_loss": 2.1205503649827913,
4
+ "train_runtime": 5112.0325,
5
+ "train_samples": 1968,
6
+ "train_samples_per_second": 1.54,
7
+ "train_steps_per_second": 0.385
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "eval_steps": 200,
6
+ "global_step": 1968,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.41,
13
+ "eval_loss": 5.472805500030518,
14
+ "eval_runtime": 162.595,
15
+ "eval_samples_per_second": 4.078,
16
+ "eval_steps_per_second": 0.51,
17
+ "eval_wer": 1.075673807878369,
18
+ "step": 200
19
+ },
20
+ {
21
+ "epoch": 0.81,
22
+ "eval_loss": 5.127437114715576,
23
+ "eval_runtime": 163.1607,
24
+ "eval_samples_per_second": 4.063,
25
+ "eval_steps_per_second": 0.509,
26
+ "eval_wer": 1.003800967519005,
27
+ "step": 400
28
+ },
29
+ {
30
+ "epoch": 1.02,
31
+ "learning_rate": 0.0007890792291220557,
32
+ "loss": 6.5037,
33
+ "step": 500
34
+ },
35
+ {
36
+ "epoch": 1.22,
37
+ "eval_loss": 0.6166694760322571,
38
+ "eval_runtime": 161.3235,
39
+ "eval_samples_per_second": 4.11,
40
+ "eval_steps_per_second": 0.514,
41
+ "eval_wer": 0.8130615065653075,
42
+ "step": 600
43
+ },
44
+ {
45
+ "epoch": 1.63,
46
+ "eval_loss": 0.328411728143692,
47
+ "eval_runtime": 162.053,
48
+ "eval_samples_per_second": 4.091,
49
+ "eval_steps_per_second": 0.512,
50
+ "eval_wer": 0.582930200414651,
51
+ "step": 800
52
+ },
53
+ {
54
+ "epoch": 2.03,
55
+ "learning_rate": 0.0005214132762312634,
56
+ "loss": 1.0482,
57
+ "step": 1000
58
+ },
59
+ {
60
+ "epoch": 2.03,
61
+ "eval_loss": 0.3169207274913788,
62
+ "eval_runtime": 165.1028,
63
+ "eval_samples_per_second": 4.016,
64
+ "eval_steps_per_second": 0.503,
65
+ "eval_wer": 0.5666897028334485,
66
+ "step": 1000
67
+ },
68
+ {
69
+ "epoch": 2.44,
70
+ "eval_loss": 0.28758111596107483,
71
+ "eval_runtime": 160.9496,
72
+ "eval_samples_per_second": 4.119,
73
+ "eval_steps_per_second": 0.516,
74
+ "eval_wer": 0.5425017277125086,
75
+ "step": 1200
76
+ },
77
+ {
78
+ "epoch": 2.85,
79
+ "eval_loss": 0.2846720516681671,
80
+ "eval_runtime": 161.8788,
81
+ "eval_samples_per_second": 4.096,
82
+ "eval_steps_per_second": 0.513,
83
+ "eval_wer": 0.5521769177608846,
84
+ "step": 1400
85
+ },
86
+ {
87
+ "epoch": 3.05,
88
+ "learning_rate": 0.00025374732334047106,
89
+ "loss": 0.4314,
90
+ "step": 1500
91
+ },
92
+ {
93
+ "epoch": 3.25,
94
+ "eval_loss": 0.27460750937461853,
95
+ "eval_runtime": 160.7333,
96
+ "eval_samples_per_second": 4.125,
97
+ "eval_steps_per_second": 0.516,
98
+ "eval_wer": 0.5393918451969593,
99
+ "step": 1600
100
+ },
101
+ {
102
+ "epoch": 3.66,
103
+ "eval_loss": 0.26981213688850403,
104
+ "eval_runtime": 160.1257,
105
+ "eval_samples_per_second": 4.14,
106
+ "eval_steps_per_second": 0.518,
107
+ "eval_wer": 0.5345542501727713,
108
+ "step": 1800
109
+ },
110
+ {
111
+ "epoch": 4.0,
112
+ "step": 1968,
113
+ "total_flos": 2.984133759613272e+18,
114
+ "train_loss": 2.1205503649827913,
115
+ "train_runtime": 5112.0325,
116
+ "train_samples_per_second": 1.54,
117
+ "train_steps_per_second": 0.385
118
+ }
119
+ ],
120
+ "logging_steps": 500,
121
+ "max_steps": 1968,
122
+ "num_input_tokens_seen": 0,
123
+ "num_train_epochs": 4,
124
+ "save_steps": 200,
125
+ "total_flos": 2.984133759613272e+18,
126
+ "train_batch_size": 4,
127
+ "trial_name": null,
128
+ "trial_params": null
129
+ }