mazesmazes commited on
Commit
72a24d8
·
verified ·
1 Parent(s): fd72c1b

Training in progress, step 1000

Browse files
Files changed (4) hide show
  1. config.json +6 -16
  2. generation_config.json +1 -1
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
config.json CHANGED
@@ -46,7 +46,7 @@
46
  }
47
  },
48
  "audio_token_id": 59260,
49
- "dtype": "bfloat16",
50
  "hidden_size": 2048,
51
  "model_type": "glmasr",
52
  "num_mel_bins": 128,
@@ -122,7 +122,7 @@
122
  },
123
  "do_sample": false,
124
  "downsample_rate": 5,
125
- "dtype": "bfloat16",
126
  "encoder": {
127
  "_name_or_path": "zai-org/GLM-ASR-Nano-2512",
128
  "architectures": [
@@ -166,7 +166,7 @@
166
  }
167
  },
168
  "audio_token_id": 59260,
169
- "dtype": "bfloat16",
170
  "hidden_size": 2048,
171
  "model_type": "glmasr",
172
  "num_mel_bins": 128,
@@ -237,9 +237,6 @@
237
  "eos_token_id": 151645,
238
  "freeze_language_model": false,
239
  "freeze_projector": false,
240
- "freq_mask_length": 27,
241
- "inference_warmup_tokens": 10,
242
- "label_smoothing": 0.0,
243
  "length_penalty": 1.0,
244
  "llm_dim": 1024,
245
  "lora_alpha": 32,
@@ -254,23 +251,18 @@
254
  "up_proj",
255
  "down_proj"
256
  ],
257
- "max_new_tokens": 128,
258
  "min_new_tokens": 0,
259
- "model_dtype": "bfloat16",
260
  "model_type": "asr_model",
261
  "no_repeat_ngram_size": 0,
262
  "num_beams": 1,
263
  "num_experts": 4,
264
  "num_experts_per_tok": 2,
265
- "num_freq_masks": 2,
266
- "num_time_masks": 2,
267
  "pad_token_id": 151643,
268
  "pipeline_tag": "automatic-speech-recognition",
269
  "pretrained_model_path": "mazesmazes/tiny-audio-embedded",
270
- "projector_dropout": 0.0,
271
  "projector_hidden_dim": 2048,
272
- "projector_init_std": 0.02,
273
- "projector_num_layers": 2,
274
  "projector_pool_stride": 4,
275
  "projector_type": "mlp",
276
  "qformer_hidden_size": null,
@@ -290,7 +282,7 @@
290
  "attention_bias": false,
291
  "attention_dropout": 0.0,
292
  "bos_token_id": null,
293
- "dtype": "bfloat16",
294
  "eos_token_id": 151645,
295
  "head_dim": 128,
296
  "hidden_act": "silu",
@@ -346,12 +338,10 @@
346
  "vocab_size": 151670
347
  },
348
  "text_model_id": "Qwen/Qwen3-0.6B",
349
- "time_mask_length": 100,
350
  "top_k": null,
351
  "top_p": null,
352
  "transformers_version": "5.6.1",
353
  "use_cache": false,
354
  "use_lora": false,
355
- "use_specaugment": true,
356
  "vocab_size": 151670
357
  }
 
46
  }
47
  },
48
  "audio_token_id": 59260,
49
+ "dtype": "float32",
50
  "hidden_size": 2048,
51
  "model_type": "glmasr",
52
  "num_mel_bins": 128,
 
122
  },
123
  "do_sample": false,
124
  "downsample_rate": 5,
125
+ "dtype": "float32",
126
  "encoder": {
127
  "_name_or_path": "zai-org/GLM-ASR-Nano-2512",
128
  "architectures": [
 
166
  }
167
  },
168
  "audio_token_id": 59260,
169
+ "dtype": "float32",
170
  "hidden_size": 2048,
171
  "model_type": "glmasr",
172
  "num_mel_bins": 128,
 
237
  "eos_token_id": 151645,
238
  "freeze_language_model": false,
239
  "freeze_projector": false,
 
 
 
240
  "length_penalty": 1.0,
241
  "llm_dim": 1024,
242
  "lora_alpha": 32,
 
251
  "up_proj",
252
  "down_proj"
253
  ],
254
+ "max_new_tokens": 256,
255
  "min_new_tokens": 0,
256
+ "model_dtype": "float32",
257
  "model_type": "asr_model",
258
  "no_repeat_ngram_size": 0,
259
  "num_beams": 1,
260
  "num_experts": 4,
261
  "num_experts_per_tok": 2,
 
 
262
  "pad_token_id": 151643,
263
  "pipeline_tag": "automatic-speech-recognition",
264
  "pretrained_model_path": "mazesmazes/tiny-audio-embedded",
 
265
  "projector_hidden_dim": 2048,
 
 
266
  "projector_pool_stride": 4,
267
  "projector_type": "mlp",
268
  "qformer_hidden_size": null,
 
282
  "attention_bias": false,
283
  "attention_dropout": 0.0,
284
  "bos_token_id": null,
285
+ "dtype": "float32",
286
  "eos_token_id": 151645,
287
  "head_dim": 128,
288
  "hidden_act": "silu",
 
338
  "vocab_size": 151670
339
  },
340
  "text_model_id": "Qwen/Qwen3-0.6B",
 
341
  "top_k": null,
342
  "top_p": null,
343
  "transformers_version": "5.6.1",
344
  "use_cache": false,
345
  "use_lora": false,
 
346
  "vocab_size": 151670
347
  }
generation_config.json CHANGED
@@ -6,7 +6,7 @@
6
  151643
7
  ],
8
  "length_penalty": 1.0,
9
- "max_new_tokens": 128,
10
  "min_new_tokens": 0,
11
  "no_repeat_ngram_size": 0,
12
  "num_beams": 1,
 
6
  151643
7
  ],
8
  "length_penalty": 1.0,
9
+ "max_new_tokens": 256,
10
  "min_new_tokens": 0,
11
  "no_repeat_ngram_size": 0,
12
  "num_beams": 1,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a55b8ac11eccbfd0b8b99049ce9bf61b925eac0c0555cbc851655d947771644
3
- size 1216765200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb90942db267303d69bdea633cf63552c2c39a7982770a9ff1cbfca06b2d82d
3
+ size 2433494416
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1d8254e80afa51faadc78d03c6c356349ceecfcba493d79a26fe65541649e65
3
  size 5329
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c48a0a666b225969b861fcb86f68f1011c614c2b04bc3f107efa23ad74095078
3
  size 5329