lewtun HF staff commited on
Commit
b4b1070
1 Parent(s): 11e5015

commit files to HF hub

Browse files
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - superb
4
+ tags:
5
+ - library:s3prl
6
+ - benchmark:superb
7
+ - type:model
8
+ ---
9
+
10
+ # Fine-tuned s3prl model
11
+
12
+ Upstream Model: superb-test-org/test-submission-with-example-expert
13
+
14
+ ## Model description
15
+
16
+ [More information needed]
17
+
18
+ ## Intended uses & limitations
19
+
20
+ [More information needed]
21
+
22
+ ## How to use
23
+
24
+ [More information needed]
25
+
26
+ ## Limitations and bias
27
+
28
+ [More information needed]
29
+
30
+ ## Training data
31
+
32
+ [More information needed]
33
+
34
+ ## Training procedure
35
+
36
+ [More information needed]
37
+
38
+ ## Evaluation results
39
+
40
+ [More information needed]
41
+
asr/asr_lr01/README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: superb
3
+ benchmark: superb
4
+ task: asr
5
+ datasets:
6
+ - superb
7
+ tags:
8
+ - automatic-speech-recognition
9
+ - ${upstream_model}
10
+ widget:
11
+ - label: Librispeech sample 1
12
+ src: https://cdn-media.huggingface.co/speech_samples/sample1.flac
13
+ ---
14
+
15
+ # Fine-tuned s3prl model for ASR
asr/asr_lr01/args_2021-09-24-18-59-50.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_resume: false
2
+ backend: nccl
3
+ cache_dir: null
4
+ config: ./downstream/asr/config.yaml
5
+ device: cuda
6
+ disable_cudnn: false
7
+ downstream: asr
8
+ downstream_variant: null
9
+ evaluate_split: test
10
+ expdir: result/downstream/asr_lr01
11
+ expname: asr_lr01
12
+ from_hf_hub: true
13
+ hf_hub_org: superb
14
+ hub: huggingface
15
+ init_ckpt: null
16
+ local_rank: null
17
+ mode: train
18
+ override: config.downstream_expert.datarc.libri_root='/data/lewis/superb/LibriSpeech',,config.downstream_expert.datarc.bucket_file='/data/lewis/superb/LibriSpeech/len_for_bucket',,config.runner.total_steps=10,,config.runner.save_step=5
19
+ past_exp: null
20
+ push_to_hf_hub: 'True'
21
+ seed: 1337
22
+ upstream: superb-test-org/test-submission-with-weights
23
+ upstream_ckpt: null
24
+ upstream_feature_selection: hidden_states
25
+ upstream_layer_selection: null
26
+ upstream_model_config: null
27
+ upstream_model_name: model.pt
28
+ upstream_refresh: false
29
+ upstream_trainable: false
30
+ verbose: false
asr/asr_lr01/args_2021-09-24-19-59-45.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_resume: false
2
+ backend: nccl
3
+ cache_dir: null
4
+ config: ./downstream/asr/config.yaml
5
+ device: cuda
6
+ disable_cudnn: false
7
+ downstream: asr
8
+ downstream_variant: null
9
+ evaluate_split: test
10
+ expdir: result/downstream/asr_lr01
11
+ expname: asr_lr01
12
+ from_hf_hub: true
13
+ hf_hub_org: superb
14
+ hub: huggingface
15
+ init_ckpt: null
16
+ local_rank: null
17
+ mode: train
18
+ override: config.downstream_expert.datarc.libri_root='/data/lewis/superb/LibriSpeech',,config.downstream_expert.datarc.bucket_file='/data/lewis/superb/LibriSpeech/len_for_bucket',,config.runner.total_steps=10,,config.runner.save_step=5
19
+ past_exp: null
20
+ push_to_hf_hub: 'True'
21
+ seed: 1337
22
+ upstream: superb-test-org/test-submission-with-example-expert
23
+ upstream_ckpt: null
24
+ upstream_feature_selection: hidden_states
25
+ upstream_layer_selection: null
26
+ upstream_model_config: null
27
+ upstream_model_name: model.pt
28
+ upstream_refresh: false
29
+ upstream_trainable: false
30
+ verbose: false
asr/asr_lr01/char.dict ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ | 1980202
2
+ E 1091870
3
+ T 789572
4
+ A 689048
5
+ O 647720
6
+ N 591778
7
+ I 585614
8
+ H 557204
9
+ S 545238
10
+ R 499568
11
+ D 380912
12
+ L 344952
13
+ U 242014
14
+ M 217730
15
+ C 210734
16
+ W 204598
17
+ F 195086
18
+ G 174098
19
+ Y 168548
20
+ P 146722
21
+ B 129608
22
+ V 81496
23
+ K 65070
24
+ ' 19660
25
+ X 12530
26
+ J 12062
27
+ Q 8164
28
+ Z 4916
asr/asr_lr01/config_2021-09-24-18-59-50.yaml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ downstream_expert:
2
+ datarc:
3
+ batch_size: 32
4
+ bucket_file: /data/lewis/superb/LibriSpeech/len_for_bucket
5
+ decoder_args:
6
+ beam: 5
7
+ beam_threshold: 25
8
+ criterion: ctc
9
+ decoder_type: None
10
+ kenlm_model: /path/to/KenLM
11
+ lexicon: /path/to/4-gram.arpa
12
+ lm_weight: 2
13
+ nbest: 1
14
+ sil_weight: 0
15
+ unk_weight: -math.inf
16
+ word_score: -1
17
+ dev-clean:
18
+ - dev-clean
19
+ dev-other:
20
+ - dev-other
21
+ dict_path: ./downstream/asr/char.dict
22
+ eval_batch_size: 1
23
+ libri_root: /data/lewis/superb/LibriSpeech
24
+ num_workers: 12
25
+ test-clean:
26
+ - test-clean
27
+ test-other:
28
+ - test-other
29
+ train:
30
+ - train-clean-100
31
+ train_batch_size: 32
32
+ zero_infinity: true
33
+ modelrc:
34
+ RNNs:
35
+ bidirection: true
36
+ dim:
37
+ - 1024
38
+ - 1024
39
+ dropout:
40
+ - 0.2
41
+ - 0.2
42
+ layer_norm:
43
+ - false
44
+ - false
45
+ module: LSTM
46
+ proj:
47
+ - false
48
+ - false
49
+ sample_rate:
50
+ - 1
51
+ - 1
52
+ sample_style: concat
53
+ total_rate: -1
54
+ Wav2Letter:
55
+ total_rate: 320
56
+ project_dim: 1024
57
+ select: RNNs
58
+ optimizer:
59
+ lr: 0.0001
60
+ name: TorchOptim
61
+ torch_optim_name: Adam
62
+ runner:
63
+ eval_dataloaders:
64
+ - dev-clean
65
+ eval_step: 2000
66
+ gradient_accumulate_steps: 1
67
+ gradient_clipping: 1
68
+ log_step: 100
69
+ max_keep: 1
70
+ save_step: 5
71
+ total_steps: 10
72
+ specaug:
73
+ apply_freq_mask: true
74
+ apply_time_mask: true
75
+ apply_time_warp: true
76
+ freq_mask_width_range:
77
+ - 0
78
+ - 50
79
+ num_freq_mask: 4
80
+ num_time_mask: 2
81
+ time_mask_width_range:
82
+ - 0
83
+ - 40
84
+ time_warp_window: 5
asr/asr_lr01/config_2021-09-24-19-59-45.yaml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ downstream_expert:
2
+ datarc:
3
+ batch_size: 32
4
+ bucket_file: /data/lewis/superb/LibriSpeech/len_for_bucket
5
+ decoder_args:
6
+ beam: 5
7
+ beam_threshold: 25
8
+ criterion: ctc
9
+ decoder_type: None
10
+ kenlm_model: /path/to/KenLM
11
+ lexicon: /path/to/4-gram.arpa
12
+ lm_weight: 2
13
+ nbest: 1
14
+ sil_weight: 0
15
+ unk_weight: -math.inf
16
+ word_score: -1
17
+ dev-clean:
18
+ - dev-clean
19
+ dev-other:
20
+ - dev-other
21
+ dict_path: ./downstream/asr/char.dict
22
+ eval_batch_size: 1
23
+ libri_root: /data/lewis/superb/LibriSpeech
24
+ num_workers: 12
25
+ test-clean:
26
+ - test-clean
27
+ test-other:
28
+ - test-other
29
+ train:
30
+ - train-clean-100
31
+ train_batch_size: 32
32
+ zero_infinity: true
33
+ modelrc:
34
+ RNNs:
35
+ bidirection: true
36
+ dim:
37
+ - 1024
38
+ - 1024
39
+ dropout:
40
+ - 0.2
41
+ - 0.2
42
+ layer_norm:
43
+ - false
44
+ - false
45
+ module: LSTM
46
+ proj:
47
+ - false
48
+ - false
49
+ sample_rate:
50
+ - 1
51
+ - 1
52
+ sample_style: concat
53
+ total_rate: -1
54
+ Wav2Letter:
55
+ total_rate: 320
56
+ project_dim: 1024
57
+ select: RNNs
58
+ optimizer:
59
+ lr: 0.0001
60
+ name: TorchOptim
61
+ torch_optim_name: Adam
62
+ runner:
63
+ eval_dataloaders:
64
+ - dev-clean
65
+ eval_step: 2000
66
+ gradient_accumulate_steps: 1
67
+ gradient_clipping: 1
68
+ log_step: 100
69
+ max_keep: 1
70
+ save_step: 5
71
+ total_steps: 10
72
+ specaug:
73
+ apply_freq_mask: true
74
+ apply_time_mask: true
75
+ apply_time_warp: true
76
+ freq_mask_width_range:
77
+ - 0
78
+ - 50
79
+ num_freq_mask: 4
80
+ num_time_mask: 2
81
+ time_mask_width_range:
82
+ - 0
83
+ - 40
84
+ time_warp_window: 5
asr/asr_lr01/events.out.tfevents.1632502799.vorace ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6a353ec80108ab9875ffb6037ba9ee749d688e38355ffbac51e617066ad53dd
3
+ size 40
asr/asr_lr01/events.out.tfevents.1632506421.vorace ADDED
File without changes
asr/asr_lr01/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92d9a9f55e0073badbe9464012212a5a48e8a9c24c0f9deda3157899b8736b0d
3
+ size 513965839
asr/asr_lr01/model.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from s3prl.downstream.runner import Runner
2
+ from typing import Dict
3
+ import torch
4
+ import os
5
+
6
+
7
+ class PreTrainedModel(Runner):
8
+ def __init__(self, path=""):
9
+ """
10
+ Initialize downstream model.
11
+ """
12
+ ckp_file = os.path.join(path, "model.ckpt")
13
+ ckp = torch.load(ckp_file, map_location='cpu')
14
+ ckp["Args"].init_ckpt = ckp_file
15
+ ckp["Args"].mode = "inference"
16
+ ckp["Args"].device = "cpu"
17
+ ckp["Config"]["downstream_expert"]["datarc"]["dict_path"] = os.path.join(path,'char.dict')
18
+
19
+ Runner.__init__(self, ckp["Args"], ckp["Config"])
20
+
21
+ def __call__(self, inputs)-> Dict[str, str]:
22
+ """
23
+ Args:
24
+ inputs (:obj:`np.array`):
25
+ The raw waveform of audio received. By default at 16KHz.
26
+ Return:
27
+ A :obj:`dict`:. The object return should be liked {"text": "XXX"} containing
28
+ the detected text from the input audio.
29
+ """
30
+ for entry in self.all_entries:
31
+ entry.model.eval()
32
+
33
+ inputs = [torch.FloatTensor(inputs)]
34
+
35
+ with torch.no_grad():
36
+ features = self.upstream.model(inputs)
37
+ features = self.featurizer.model(inputs, features)
38
+ preds = self.downstream.model.inference(features, [])
39
+ return {"text": preds[0]}