Titouan commited on
Commit
330fe4c
1 Parent(s): efe1a56

update LeBenchmark

Browse files
Files changed (6) hide show
  1. README.md +1 -1
  2. asr.ckpt +1 -1
  3. config.json +14 -7
  4. hyperparams.yaml +1 -1
  5. tokenizer.ckpt +0 -0
  6. wav2vec2.ckpt +1 -1
README.md CHANGED
@@ -37,7 +37,7 @@ The performance of the model is the following:
37
  This ASR system is composed of 2 different but linked blocks:
38
  - Tokenizer (unigram) that transforms words into subword units and trained with
39
  the train transcriptions (train.tsv) of CommonVoice (FR).
40
- - Acoustic model (wav2vec2.0 + CTC/Attention). A pretrained wav2vec 2.0 model ([wav2vec2-large-xlsr-53-french](https://huggingface.co/facebook/wav2vec2-large-xlsr-53-french)) is combined with two DNN layers and finetuned on CommonVoice FR.
41
  The obtained final acoustic representation is given to the CTC and attention decoders.
42
 
43
 
 
37
  This ASR system is composed of 2 different but linked blocks:
38
  - Tokenizer (unigram) that transforms words into subword units and trained with
39
  the train transcriptions (train.tsv) of CommonVoice (FR).
40
+ - Acoustic model (wav2vec2.0 + CTC/Attention). A pretrained wav2vec 2.0 model ([LeBenchmark/wav2vec2-FR-M-large](https://huggingface.co/LeBenchmark/wav2vec2-FR-M-large)) is combined with two DNN layers and finetuned on CommonVoice FR.
41
  The obtained final acoustic representation is given to the CTC and attention decoders.
42
 
43
 
asr.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee40bc648d23dccd4d6d8cf77eb317aede679218ad192c96ad631921e7561024
3
  size 60570064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f9b32cfe8a7d10fa852874b5507661d95cdb8c9c8dd9add45976e786e08c52e
3
  size 60570064
config.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "activation_dropout": 0.1,
3
  "apply_spec_augment": true,
4
  "architectures": [
5
- "Wav2Vec2ForCTC"
6
  ],
7
  "attention_dropout": 0.1,
8
  "bos_token_id": 1,
@@ -42,20 +42,27 @@
42
  "feat_extract_dropout": 0.0,
43
  "feat_extract_norm": "layer",
44
  "feat_proj_dropout": 0.1,
45
- "final_dropout": 0.1,
46
  "gradient_checkpointing": false,
47
  "hidden_act": "gelu",
48
  "hidden_dropout": 0.1,
49
- "hidden_dropout_prob": 0.1,
50
  "hidden_size": 1024,
51
  "initializer_range": 0.02,
52
  "intermediate_size": 4096,
53
  "layer_norm_eps": 1e-05,
54
  "layerdrop": 0.1,
 
 
 
 
 
55
  "mask_feature_length": 10,
56
  "mask_feature_prob": 0.0,
57
  "mask_time_length": 10,
58
- "mask_time_prob": 0.05,
 
 
 
59
  "model_type": "wav2vec2",
60
  "num_attention_heads": 16,
61
  "num_conv_pos_embedding_groups": 16,
@@ -63,6 +70,6 @@
63
  "num_feat_extract_layers": 7,
64
  "num_hidden_layers": 24,
65
  "pad_token_id": 0,
66
- "transformers_version": "4.4.0.dev0",
67
- "vocab_size": 49
68
  }
 
1
  {
2
+ "activation_dropout": 0.0,
3
  "apply_spec_augment": true,
4
  "architectures": [
5
+ "Wav2Vec2Model"
6
  ],
7
  "attention_dropout": 0.1,
8
  "bos_token_id": 1,
 
42
  "feat_extract_dropout": 0.0,
43
  "feat_extract_norm": "layer",
44
  "feat_proj_dropout": 0.1,
45
+ "final_dropout": 0.0,
46
  "gradient_checkpointing": false,
47
  "hidden_act": "gelu",
48
  "hidden_dropout": 0.1,
 
49
  "hidden_size": 1024,
50
  "initializer_range": 0.02,
51
  "intermediate_size": 4096,
52
  "layer_norm_eps": 1e-05,
53
  "layerdrop": 0.1,
54
+ "mask_channel_length": 10,
55
+ "mask_channel_min_space": 1,
56
+ "mask_channel_other": 0.0,
57
+ "mask_channel_prob": 0.0,
58
+ "mask_channel_selection": "static",
59
  "mask_feature_length": 10,
60
  "mask_feature_prob": 0.0,
61
  "mask_time_length": 10,
62
+ "mask_time_min_space": 1,
63
+ "mask_time_other": 0.0,
64
+ "mask_time_prob": 0.075,
65
+ "mask_time_selection": "static",
66
  "model_type": "wav2vec2",
67
  "num_attention_heads": 16,
68
  "num_conv_pos_embedding_groups": 16,
 
70
  "num_feat_extract_layers": 7,
71
  "num_hidden_layers": 24,
72
  "pad_token_id": 0,
73
+ "transformers_version": "4.5.1",
74
+ "vocab_size": 32
75
  }
hyperparams.yaml CHANGED
@@ -5,7 +5,7 @@
5
  # ################################
6
 
7
  sample_rate: 16000
8
- wav2vec2_hub: facebook/wav2vec2-large-xlsr-53-french
9
 
10
  # BPE parameters
11
  token_type: unigram # ["unigram", "bpe", "char"]
 
5
  # ################################
6
 
7
  sample_rate: 16000
8
+ wav2vec2_hub: LeBenchmark/wav2vec2-FR-M-large
9
 
10
  # BPE parameters
11
  token_type: unigram # ["unigram", "bpe", "char"]
tokenizer.ckpt CHANGED
Binary files a/tokenizer.ckpt and b/tokenizer.ckpt differ
 
wav2vec2.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5675c122faaa76ed0e81e658a98a7bd6e498cd79f2f171b158a6dae10985c49c
3
  size 1261930757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1869d41bd746312a183ce45f3119696d6a275680b0a01a7e5d2ebeba7e8a42
3
  size 1261930757