sravan-gorugantu commited on
Commit
97bd0e0
1 Parent(s): 3df153b

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -1,60 +1,12 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-base",
3
- "activation_dropout": 0.0,
4
- "adapter_attn_dim": null,
5
- "adapter_kernel_size": 3,
6
- "adapter_stride": 2,
7
- "add_adapter": false,
8
- "apply_spec_augment": true,
9
  "architectures": [
10
- "Wav2Vec2ForSequenceClassification"
11
  ],
12
- "attention_dropout": 0.1,
13
- "bos_token_id": 1,
14
- "classifier_proj_size": 256,
15
- "codevector_dim": 256,
16
- "contrastive_logits_temperature": 0.1,
17
- "conv_bias": false,
18
- "conv_dim": [
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512,
25
- 512
26
- ],
27
- "conv_kernel": [
28
- 10,
29
- 3,
30
- 3,
31
- 3,
32
- 3,
33
- 2,
34
- 2
35
- ],
36
- "conv_stride": [
37
- 5,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2,
43
- 2
44
- ],
45
- "ctc_loss_reduction": "sum",
46
- "ctc_zero_infinity": false,
47
- "diversity_loss_weight": 0.1,
48
- "do_stable_layer_norm": false,
49
- "eos_token_id": 2,
50
- "feat_extract_activation": "gelu",
51
- "feat_extract_norm": "group",
52
- "feat_proj_dropout": 0.1,
53
- "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.0,
55
- "freeze_feat_extract_train": true,
56
  "hidden_act": "gelu",
57
- "hidden_dropout": 0.1,
58
  "hidden_size": 768,
59
  "id2label": {
60
  "0": "onboard",
@@ -70,61 +22,16 @@
70
  "return": "2",
71
  "unknown": "3"
72
  },
73
- "layer_norm_eps": 1e-05,
74
- "layerdrop": 0.0,
75
- "mask_channel_length": 10,
76
- "mask_channel_min_space": 1,
77
- "mask_channel_other": 0.0,
78
- "mask_channel_prob": 0.0,
79
- "mask_channel_selection": "static",
80
- "mask_feature_length": 10,
81
- "mask_feature_min_masks": 0,
82
- "mask_feature_prob": 0.0,
83
- "mask_time_length": 10,
84
- "mask_time_min_masks": 2,
85
- "mask_time_min_space": 1,
86
- "mask_time_other": 0.0,
87
- "mask_time_prob": 0.05,
88
- "mask_time_selection": "static",
89
- "model_type": "wav2vec2",
90
- "no_mask_channel_overlap": false,
91
- "no_mask_time_overlap": false,
92
- "num_adapter_layers": 3,
93
  "num_attention_heads": 12,
94
- "num_codevector_groups": 2,
95
- "num_codevectors_per_group": 320,
96
- "num_conv_pos_embedding_groups": 16,
97
- "num_conv_pos_embeddings": 128,
98
- "num_feat_extract_layers": 7,
99
  "num_hidden_layers": 12,
100
- "num_negatives": 100,
101
- "output_hidden_size": 768,
102
- "pad_token_id": 0,
103
- "proj_codevector_dim": 256,
104
- "tdnn_dilation": [
105
- 1,
106
- 2,
107
- 3,
108
- 1,
109
- 1
110
- ],
111
- "tdnn_dim": [
112
- 512,
113
- 512,
114
- 512,
115
- 512,
116
- 1500
117
- ],
118
- "tdnn_kernel": [
119
- 5,
120
- 3,
121
- 3,
122
- 1,
123
- 1
124
- ],
125
  "torch_dtype": "float32",
126
- "transformers_version": "4.38.1",
127
- "use_weighted_layer_sum": false,
128
- "vocab_size": 32,
129
- "xvector_output_dim": 512
130
  }
 
1
  {
2
+ "_name_or_path": "MIT/ast-finetuned-speech-commands-v2",
 
 
 
 
 
 
3
  "architectures": [
4
+ "ASTForAudioClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "frequency_stride": 10,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
  "0": "onboard",
 
22
  "return": "2",
23
  "unknown": "3"
24
  },
25
+ "layer_norm_eps": 1e-12,
26
+ "max_length": 128,
27
+ "model_type": "audio-spectrogram-transformer",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "num_attention_heads": 12,
 
 
 
 
 
29
  "num_hidden_layers": 12,
30
+ "num_mel_bins": 128,
31
+ "patch_size": 16,
32
+ "problem_type": "single_label_classification",
33
+ "qkv_bias": true,
34
+ "time_stride": 10,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  "torch_dtype": "float32",
36
+ "transformers_version": "4.38.1"
 
 
 
37
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0b163bd8a3ab5a7e8d0794e086e78de1f37e93f9e67c085e2535e99da29d74d
3
- size 378304424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a80324a692b6c11a88514e69c1fddae8d697625f17ef304584bc0672882c6639
3
+ size 341515296
preprocessor_config.json CHANGED
@@ -1,9 +1,13 @@
1
  {
2
  "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
  "feature_size": 1,
 
 
 
5
  "padding_side": "right",
6
  "padding_value": 0.0,
7
  "return_attention_mask": false,
8
- "sampling_rate": 16000
 
9
  }
 
1
  {
2
  "do_normalize": true,
3
+ "feature_extractor_type": "ASTFeatureExtractor",
4
  "feature_size": 1,
5
+ "max_length": 128,
6
+ "mean": -6.845978,
7
+ "num_mel_bins": 128,
8
  "padding_side": "right",
9
  "padding_value": 0.0,
10
  "return_attention_mask": false,
11
+ "sampling_rate": 16000,
12
+ "std": 5.5654526
13
  }
runs/Apr15_10-30-24_voice-ai-transformer/events.out.tfevents.1713177065.voice-ai-transformer.69074.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1ea44906e4839065ef5e02ccf61e76d608a051dc735ac48b95a35655a0e905a
3
+ size 4719
runs/Apr15_12-01-24_voice-ai-transformer/events.out.tfevents.1713182525.voice-ai-transformer.136042.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:593de290c5d6f45bd40e3bd092d25a9a680b89f3d8909c125c80c9b411f19366
3
+ size 4717
runs/Apr15_12-31-27_voice-ai-transformer/events.out.tfevents.1713184293.voice-ai-transformer.215553.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1383ea3878524082dbc4dd11c30daf660a0df0baac5512f1a33fe9339e8700f7
3
+ size 4717
runs/Apr15_13-54-56_voice-ai-transformer/events.out.tfevents.1713189302.voice-ai-transformer.282817.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fad1af2690fc313d92c0abb5f842c22f04bf86d77f91699bb37afb106ed0259
3
+ size 10265
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a88e161f66768b25ae4ebd206e69408d0d3a1268ddec6caf408a8862da9e5094
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ccb13220a3671b782b6e2dcb8a97ae212fe9433f34ab9c53c56f5ae5312c574
3
  size 4920