cociweb commited on Jan 9

Commit

a7dfa95

•

1 Parent(s): d6ba077

Quantizated models added

Browse files

Files changed (24) hide show

fp16/README.md +54 -0
fp16/config.json +231 -0
fp16/hash.json +10 -0
fp16/model.bin +3 -0
fp16/preprocessor_config.json +14 -0
fp16/tokenizer_config.json +0 -0
vocabulary.json → fp16/vocabulary.json +0 -0
vocabulary.txt → fp16/vocabulary.txt +0 -0
fp32/README.md +88 -0
fp32/config.json +154 -0
hash.json → fp32/hash.json +0 -0
model.bin → fp32/model.bin +0 -0
fp32/preprocessor_config.json +14 -0
fp32/tokenizer_config.json +0 -0
fp32/vocabulary.json +0 -0
fp32/vocabulary.txt +0 -0
int8/README.md +53 -0
int8/config.json +231 -0
int8/hash.json +10 -0
int8/model.bin +3 -0
int8/preprocessor_config.json +14 -0
int8/tokenizer_config.json +0 -0
int8/vocabulary.json +0 -0
int8/vocabulary.txt +0 -0

fp16/README.md ADDED Viewed

	@@ -0,0 +1,54 @@

+---
+language:
+  - hu
+tags:
+  - audio
+  - automatic-speech-recognition
+datasets:
+- mozilla-foundation/common_voice_16_0
+base_model: openai/whisper-base
+license: mit
+library_name: ctranslate2
+---
+# Whisper base model for CTranslate2
+This repository contains the conversion of a fine-tuned version of [openai/whisper-base](https://huggingface.co/openai/whisper-base) to the [CTranslate2](https://github.com/OpenNMT/CTranslate2) model format. Fine-tune is made by [@sarpba](https://huggingface.co/sarpba) on the Common Voice 16 dataset of Mozilla Foundation.
+This model can be used in CTranslate2 or projects based on CTranslate2 such as [faster-whisper](https://github.com/systran/faster-whisper).
+## Example
+```python
+from faster_whisper import WhisperModel
+model = WhisperModel("base")
+segments, info = model.transcribe("audio.mp3")
+for segment in segments:
+    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+```
+## Conversion details
+The original model was converted with the following command:
+```
+ct2-transformers-converter --model Hungarians/whisper-base-cv16-hu-v2 --output_dir faster-whisper-base-cv16-v2-fp16.hu \
+    --quantization fp16 --low_cpu_mem_usage --copy_files tokenizer_config.json preprocessor_config.json
+```
+## HASH calculation
+Hash calculation is executed with md5hash in the directory of the model with:
+```
+find ./ -maxdepth 1 -type f -not -path '*/\.*'  -exec md5sum {} \; | tr -d ' ' | jq -R 'split("./") | {(.[1]): (.[0])}' | jq -s 'add' > hash.json
+```
+Note that the model weights are saved in FP16. This type can be changed when the model is loaded using the [`compute_type` option in CTranslate2](https://opennmt.net/CTranslate2/quantization.html).
+## More information
+**For more information about the original model, see its [model card](https://huggingface.co/Hungarians/whisper-base-cv16-hu-v2).**

fp16/config.json ADDED Viewed

	@@ -0,0 +1,231 @@

+{
+  "alignment_heads": [
+    [
+      3,
+      1
+    ],
+    [
+      4,
+      2
+    ],
+    [
+      4,
+      3
+    ],
+    [
+      4,
+      7
+    ],
+    [
+      5,
+      1
+    ],
+    [
+      5,
+      2
+    ],
+    [
+      5,
+      4
+    ],
+    [
+      5,
+      6
+    ]
+  ],
+  "lang_ids": [
+    50259,
+    50260,
+    50261,
+    50262,
+    50263,
+    50264,
+    50265,
+    50266,
+    50267,
+    50268,
+    50269,
+    50270,
+    50271,
+    50272,
+    50273,
+    50274,
+    50275,
+    50276,
+    50277,
+    50278,
+    50279,
+    50280,
+    50281,
+    50282,
+    50283,
+    50284,
+    50285,
+    50286,
+    50287,
+    50288,
+    50289,
+    50290,
+    50291,
+    50292,
+    50293,
+    50294,
+    50295,
+    50296,
+    50297,
+    50298,
+    50299,
+    50300,
+    50301,
+    50302,
+    50303,
+    50304,
+    50305,
+    50306,
+    50307,
+    50308,
+    50309,
+    50310,
+    50311,
+    50312,
+    50313,
+    50314,
+    50315,
+    50316,
+    50317,
+    50318,
+    50319,
+    50320,
+    50321,
+    50322,
+    50323,
+    50324,
+    50325,
+    50326,
+    50327,
+    50328,
+    50329,
+    50330,
+    50331,
+    50332,
+    50333,
+    50334,
+    50335,
+    50336,
+    50337,
+    50338,
+    50339,
+    50340,
+    50341,
+    50342,
+    50343,
+    50344,
+    50345,
+    50346,
+    50347,
+    50348,
+    50349,
+    50350,
+    50351,
+    50352,
+    50353,
+    50354,
+    50355,
+    50356,
+    50357
+  ],
+  "suppress_ids": [
+    1,
+    2,
+    7,
+    8,
+    9,
+    10,
+    14,
+    25,
+    26,
+    27,
+    28,
+    29,
+    31,
+    58,
+    59,
+    60,
+    61,
+    62,
+    63,
+    90,
+    91,
+    92,
+    93,
+    359,
+    503,
+    522,
+    542,
+    873,
+    893,
+    902,
+    918,
+    922,
+    931,
+    1350,
+    1853,
+    1982,
+    2460,
+    2627,
+    3246,
+    3253,
+    3268,
+    3536,
+    3846,
+    3961,
+    4183,
+    4667,
+    6585,
+    6647,
+    7273,
+    9061,
+    9383,
+    10428,
+    10929,
+    11938,
+    12033,
+    12331,
+    12562,
+    13793,
+    14157,
+    14635,
+    15265,
+    15618,
+    16553,
+    16604,
+    18362,
+    18956,
+    20075,
+    21675,
+    22520,
+    26130,
+    26161,
+    26435,
+    28279,
+    29464,
+    31650,
+    32302,
+    32470,
+    36865,
+    42863,
+    47425,
+    49870,
+    50254,
+    50258,
+    50358,
+    50359,
+    50360,
+    50361,
+    50362
+  ],
+  "suppress_ids_begin": [
+    220,
+    50257
+  ]
+}

fp16/hash.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "README.md": "59f14159d6a5e2e8cb61efb0e1d22bab",
+  "config.json": "ece6185c2317573e8f550b44ee557df5",
+  "model.bin": "80b54f9f010d2c322d32ca585e49e2c0",
+  "preprocessor_config.json": "15d1d7ee1cc6801b71f8ab68966aed86",
+  "tokenizer_config.json": "938ef60d4aca3c286e99d583410da1f4",
+  "vocabulary.json": "aebe7623626c8f3f61cc5208ff29c348",
+  "vocabulary.txt": "980d7011195d0c733bd374e31708717f",
+  "hash.json": "d41d8cd98f00b204e9800998ecf8427e"
+}

fp16/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7139d61de28220c9ea0833a1c4adc37009f66bb6b5c84d193b8ef77b276b982
+size 145217607

fp16/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

fp16/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

vocabulary.json → fp16/vocabulary.json RENAMED Viewed

File without changes

vocabulary.txt → fp16/vocabulary.txt RENAMED Viewed

File without changes

fp32/README.md ADDED Viewed

	@@ -0,0 +1,88 @@

+---
+datasets:
+  - mozilla-foundation/common_voice_16_0
+language:
+- hu
+widget:
+  - example_title: Sample 1
+    src: >-
+      https://huggingface.co/datasets/Hungarians/samples/resolve/main/Sample1.flac
+  - example_title: Sample 2
+    src: >-
+      https://huggingface.co/datasets/Hungarians/samples/resolve/main/Sample2.flac
+license: apache-2.0
+base_model: openai/whisper-base
+tags:
+- generated_from_trainer
+metrics:
+- wer
+model-index:
+- name: Whisper Base Hu v2
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Whisper Base Hu v2
+This model is a fine-tuned version of [openai/whisper-base](https://huggingface.co/openai/whisper-base) on the Common Voice 16.0 dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.1599
+- Wer Ortho: 12.6641
+- Wer: 11.4171
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2.75e-05
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: constant_with_warmup
+- lr_scheduler_warmup_steps: 500
+- training_steps: 15000
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Wer Ortho | Wer     |
+|:-------------:|:-----:|:-----:|:---------------:|:---------:|:-------:|
+| 0.199         | 0.33  | 1000  | 0.3838          | 36.7548   | 33.5517 |
+| 0.3037        | 0.67  | 2000  | 0.3131          | 31.2748   | 28.3664 |
+| 0.221         | 1.0   | 3000  | 0.2546          | 27.1739   | 24.1773 |
+| 0.1562        | 1.34  | 4000  | 0.2319          | 23.9341   | 21.3341 |
+| 0.1623        | 1.67  | 5000  | 0.2101          | 21.4079   | 18.9623 |
+| 0.077         | 2.01  | 6000  | 0.1818          | 18.5415   | 16.2852 |
+| 0.078         | 2.34  | 7000  | 0.1846          | 17.8339   | 15.7456 |
+| 0.0818        | 2.68  | 8000  | 0.1712          | 16.4669   | 14.5983 |
+| 0.0352        | 3.01  | 9000  | 0.1669          | 15.6178   | 14.0676 |
+| 0.0413        | 3.35  | 10000 | 0.1673          | 14.9464   | 13.4539 |
+| 0.0454        | 3.68  | 11000 | 0.1649          | 14.5459   | 12.7542 |
+| 0.0225        | 4.02  | 12000 | 0.1589          | 13.5885   | 12.2087 |
+| 0.0269        | 4.35  | 13000 | 0.1638          | 14.3864   | 12.8343 |
+| 0.0299        | 4.69  | 14000 | 0.1621          | 13.0555   | 11.7610 |
+| 0.0171        | 5.02  | 15000 | 0.1599          | 12.6641   | 11.4171 |
+### Framework versions
+- Transformers 4.36.2
+- Pytorch 2.1.0+cu121
+- Datasets 2.16.1
+- Tokenizers 0.15.0

fp32/config.json ADDED Viewed

	@@ -0,0 +1,154 @@

+{
+  "_name_or_path": "openai/whisper-base",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "WhisperForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 512,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": [
+    [
+      1,
+      50259
+    ],
+    [
+      2,
+      50359
+    ],
+    [
+      3,
+      50363
+    ]
+  ],
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": 448,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 6,
+  "num_mel_bins": 80,
+  "pad_token_id": 50257,
+  "scale_embedding": false,
+  "suppress_tokens": [
+    1,
+    2,
+    7,
+    8,
+    9,
+    10,
+    14,
+    25,
+    26,
+    27,
+    28,
+    29,
+    31,
+    58,
+    59,
+    60,
+    61,
+    62,
+    63,
+    90,
+    91,
+    92,
+    93,
+    359,
+    503,
+    522,
+    542,
+    873,
+    893,
+    902,
+    918,
+    922,
+    931,
+    1350,
+    1853,
+    1982,
+    2460,
+    2627,
+    3246,
+    3253,
+    3268,
+    3536,
+    3846,
+    3961,
+    4183,
+    4667,
+    6585,
+    6647,
+    7273,
+    9061,
+    9383,
+    10428,
+    10929,
+    11938,
+    12033,
+    12331,
+    12562,
+    13793,
+    14157,
+    14635,
+    15265,
+    15618,
+    16553,
+    16604,
+    18362,
+    18956,
+    20075,
+    21675,
+    22520,
+    26130,
+    26161,
+    26435,
+    28279,
+    29464,
+    31650,
+    32302,
+    32470,
+    36865,
+    42863,
+    47425,
+    49870,
+    50254,
+    50258,
+    50358,
+    50359,
+    50360,
+    50361,
+    50362
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "use_cache": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}

hash.json → fp32/hash.json RENAMED Viewed

File without changes

model.bin → fp32/model.bin RENAMED Viewed

File without changes

fp32/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

fp32/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

fp32/vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

fp32/vocabulary.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

int8/README.md ADDED Viewed

	@@ -0,0 +1,53 @@

+---
+language:
+  - hu
+tags:
+  - audio
+  - automatic-speech-recognition
+datasets:
+- mozilla-foundation/common_voice_16_0
+base_model: openai/whisper-base
+license: mit
+library_name: ctranslate2
+---
+# Whisper base model for CTranslate2
+This repository contains the conversion of a fine-tuned version of [openai/whisper-base](https://huggingface.co/openai/whisper-base) to the [CTranslate2](https://github.com/OpenNMT/CTranslate2) model format. Fine-tune is made by [@sarpba](https://huggingface.co/sarpba) on the Common Voice 16 dataset of Mozilla Foundation.
+This model can be used in CTranslate2 or projects based on CTranslate2 such as [faster-whisper](https://github.com/systran/faster-whisper).
+## Example
+```python
+from faster_whisper import WhisperModel
+model = WhisperModel("base")
+segments, info = model.transcribe("audio.mp3")
+for segment in segments:
+    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+```
+## Conversion details
+The original model was converted with the following command:
+```
+ct2-transformers-converter --model Hungarians/whisper-base-cv16-hu-v2 --output_dir faster-whisper-base-cv16-v2-int8.hu \
+    --quantization int8 --low_cpu_mem_usage --copy_files tokenizer_config.json preprocessor_config.json
+```
+Note that the model weights are saved in INT8. This type can be changed when the model is loaded using the [`compute_type` option in CTranslate2](https://opennmt.net/CTranslate2/quantization.html).
+## HASH calculation
+Hash calculation is executed with md5hash in the directory of the model with:
+```
+find ./ -maxdepth 1 -type f -not -path '*/\.*'  -exec md5sum {} \; | tr -d ' ' | jq -R 'split("./") | {(.[1]): (.[0])}' | jq -s 'add' > hash.json
+```
+## More information
+**For more information about the original model, see its [model card](https://huggingface.co/Hungarians/whisper-base-cv16-hu-v2).**

int8/config.json ADDED Viewed

	@@ -0,0 +1,231 @@

+{
+  "alignment_heads": [
+    [
+      3,
+      1
+    ],
+    [
+      4,
+      2
+    ],
+    [
+      4,
+      3
+    ],
+    [
+      4,
+      7
+    ],
+    [
+      5,
+      1
+    ],
+    [
+      5,
+      2
+    ],
+    [
+      5,
+      4
+    ],
+    [
+      5,
+      6
+    ]
+  ],
+  "lang_ids": [
+    50259,
+    50260,
+    50261,
+    50262,
+    50263,
+    50264,
+    50265,
+    50266,
+    50267,
+    50268,
+    50269,
+    50270,
+    50271,
+    50272,
+    50273,
+    50274,
+    50275,
+    50276,
+    50277,
+    50278,
+    50279,
+    50280,
+    50281,
+    50282,
+    50283,
+    50284,
+    50285,
+    50286,
+    50287,
+    50288,
+    50289,
+    50290,
+    50291,
+    50292,
+    50293,
+    50294,
+    50295,
+    50296,
+    50297,
+    50298,
+    50299,
+    50300,
+    50301,
+    50302,
+    50303,
+    50304,
+    50305,
+    50306,
+    50307,
+    50308,
+    50309,
+    50310,
+    50311,
+    50312,
+    50313,
+    50314,
+    50315,
+    50316,
+    50317,
+    50318,
+    50319,
+    50320,
+    50321,
+    50322,
+    50323,
+    50324,
+    50325,
+    50326,
+    50327,
+    50328,
+    50329,
+    50330,
+    50331,
+    50332,
+    50333,
+    50334,
+    50335,
+    50336,
+    50337,
+    50338,
+    50339,
+    50340,
+    50341,
+    50342,
+    50343,
+    50344,
+    50345,
+    50346,
+    50347,
+    50348,
+    50349,
+    50350,
+    50351,
+    50352,
+    50353,
+    50354,
+    50355,
+    50356,
+    50357
+  ],
+  "suppress_ids": [
+    1,
+    2,
+    7,
+    8,
+    9,
+    10,
+    14,
+    25,
+    26,
+    27,
+    28,
+    29,
+    31,
+    58,
+    59,
+    60,
+    61,
+    62,
+    63,
+    90,
+    91,
+    92,
+    93,
+    359,
+    503,
+    522,
+    542,
+    873,
+    893,
+    902,
+    918,
+    922,
+    931,
+    1350,
+    1853,
+    1982,
+    2460,
+    2627,
+    3246,
+    3253,
+    3268,
+    3536,
+    3846,
+    3961,
+    4183,
+    4667,
+    6585,
+    6647,
+    7273,
+    9061,
+    9383,
+    10428,
+    10929,
+    11938,
+    12033,
+    12331,
+    12562,
+    13793,
+    14157,
+    14635,
+    15265,
+    15618,
+    16553,
+    16604,
+    18362,
+    18956,
+    20075,
+    21675,
+    22520,
+    26130,
+    26161,
+    26435,
+    28279,
+    29464,
+    31650,
+    32302,
+    32470,
+    36865,
+    42863,
+    47425,
+    49870,
+    50254,
+    50258,
+    50358,
+    50359,
+    50360,
+    50361,
+    50362
+  ],
+  "suppress_ids_begin": [
+    220,
+    50257
+  ]
+}

int8/hash.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "README.md": "4e446e5c8faabe7acac2893a14cbf581",
+  "config.json": "ce9b225b30072c0d86b6c93d49384821",
+  "hash.json": "d41d8cd98f00b204e9800998ecf8427e",
+  "preprocessor_config.json": "1160eec05b29412dd1389678cb25b3e0",
+  "tokenizer_config.json": "731573a1b27530e2fd2dd62fde94ef76",
+  "vocabulary.json": "aebe7623626c8f3f61cc5208ff29c348",
+  "vocabulary.txt": "980d7011195d0c733bd374e31708717f",
+  "model.bin": "574e83865681b5ee98d682b8e22dc258"
+}

int8/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5056fe79745315cbd0ebaec94bbc4dd4520b75c82ef87c9d7edd83ee23bfa8dd
+size 79120439

int8/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

int8/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

int8/vocabulary.json ADDED Viewed

The diff for this file is too large to render. See raw diff

int8/vocabulary.txt ADDED Viewed

The diff for this file is too large to render. See raw diff