clean up the repo

Browse files

Files changed (10) hide show

5gram.arpa +0 -0
5gram.bin +0 -3
5gram.txt +0 -0
README.md.0 +0 -129
arg.txt +0 -34
er2 +0 -259
err +0 -214
ngram.py +0 -25
test-vocab.py +0 -22
wav2vec2-base-turkish +0 -1

5gram.arpa DELETED Viewed

The diff for this file is too large to render. See raw diff

5gram.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7a76859c96afc4fa223dc7e5cb4d000926ec82f25ffbf560afec88ad39ed8783
-size 1831539

5gram.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

README.md.0 DELETED Viewed

@@ -1,129 +0,0 @@
----
-language: tr
-datasets:
-- common_voice
-metrics:
-- wer
-tags:
-- audio
-- automatic-speech-recognition
-- speech
-- common_voice
-- generated_from_trainer
-- tr
-- robust-speech-event
-license: apache-2.0
-model-index:
-- name: Wav2Vec2 Base Turkish by Cahya
-  results:
-  - task:
-      name: Speech Recognition
-      type: automatic-speech-recognition
-    dataset:
-      name: Common Voice tr
-      type: common_voice
-      args: tr
-    metrics:
-       - name: Test WER
-         type: wer
-         value: 13.70
----
-# Wav2Vec2-Large-XLSR-Turkish
-This is the model for Wav2Vec2-Base-Turkish-Artificial-CV, a fine-tuned
-[cahya/wav2vec2-base-turkish-artificial](https://huggingface.co/cahya/wav2vec2-base-turkish-artificial)
-model on [Turkish Common Voice dataset](https://huggingface.co/datasets/common_voice).
-When using this model, make sure that your speech input is sampled at 16kHz.
-## Usage
-The model can be used directly (without a language model) as follows:
-```python
-import torch
-import torchaudio
-from datasets import load_dataset
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
-test_dataset = load_dataset("common_voice", "tr", split="test[:2%]")
-processor = Wav2Vec2Processor.from_pretrained("cahya/wav2vec2-base-turkish-artificial-cv")
-model = Wav2Vec2ForCTC.from_pretrained("cahya/wav2vec2-base-turkish-artificial-cv")
-# Preprocessing the datasets.
-# We need to read the aduio files as arrays
-def speech_file_to_array_fn(batch):
-  speech_array, sampling_rate = torchaudio.load(batch["path"])
-  resampler = torchaudio.transforms.Resample(sampling_rate, 16_000)
-  batch["speech"] = resampler(speech_array).squeeze().numpy()
-  return batch
-test_dataset = test_dataset.map(speech_file_to_array_fn)
-inputs = processor(test_dataset[:2]["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
-with torch.no_grad():
-  logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
-predicted_ids = torch.argmax(logits, dim=-1)
-print("Prediction:", processor.batch_decode(predicted_ids))
-print("Reference:", test_dataset[:2]["sentence"])
-```
-## Evaluation
-The model can be evaluated as follows on the Turkish test data of Common Voice.
-```python
-import torch
-import torchaudio
-from datasets import load_dataset, load_metric
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
-import re
-test_dataset = load_dataset("common_voice", "tr", split="test")
-wer = load_metric("wer")
-processor = Wav2Vec2Processor.from_pretrained("cahya/wav2vec2-base-turkish-artificial-cv")
-model = Wav2Vec2ForCTC.from_pretrained("cahya/wav2vec2-base-turkish-artificial-cv")
-model.to("cuda")
-chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\‘\”\'\`…\’»«]'
-# Preprocessing the datasets.
-# We need to read the aduio files as arrays
-def speech_file_to_array_fn(batch):
-  batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
-  speech_array, sampling_rate = torchaudio.load(batch["path"])
-  resampler = torchaudio.transforms.Resample(sampling_rate, 16_000)
-  batch["speech"] = resampler(speech_array).squeeze().numpy()
-  return batch
-test_dataset = test_dataset.map(speech_file_to_array_fn)
-# Preprocessing the datasets.
-# We need to read the aduio files as arrays
-def evaluate(batch):
-  inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
-  with torch.no_grad():
-    logits = model(inputs.input_values.to("cuda")).logits
-  pred_ids = torch.argmax(logits, dim=-1)
-  batch["pred_strings"] = processor.batch_decode(pred_ids)
-  return batch
-result = test_dataset.map(evaluate, batched=True, batch_size=8)
-print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
-```
-**Test Result**: 13.70 %
-## Training
-The Common Voice `train`, `validation`, other and invalidated
-The script used for training can be found [here](https://github.com/cahya-wirawan/indonesian-speech-recognition)

arg.txt DELETED Viewed

@@ -1,34 +0,0 @@
---dataset_name="common_voice"
---model_name_or_path="cahya/wav2vec2-base-turkish-artificial-cv"
---dataset_config_name="tr"
---output_dir="./output"
---overwrite_output_dir
---num_train_epochs="1"
---per_device_train_batch_size="2"
---per_device_eval_batch_size="2"
---gradient_accumulation_steps="4"
---learning_rate="7.5e-7"
---warmup_steps="2000"
---length_column_name="input_length"
---evaluation_strategy="steps"
---text_column_name="sentence"
---save_steps="500"
---eval_steps="500"
---logging_steps="100"
---layerdrop="0.0"
---activation_dropout="0.1"
---save_total_limit="3"
---freeze_feature_encoder
---feat_proj_dropout="0.0"
---mask_time_prob="0.75"
---mask_time_length="10"
---mask_feature_prob="0.25"
---mask_feature_length="64"
---gradient_checkpointing
---use_auth_token
---fp16=false
---group_by_length
---do_train=true
---do_eval=true
---push_to_hub
---chars_to_ignore , ? . ! \; \: \"\" \% \' \" \' \' \` … \’ » « \‘ '“' '”' � é û

er2 DELETED Viewed

@@ -1,259 +0,0 @@
-loading configuration file https://huggingface.co/cahya/wav2vec2-base-turkish-artificial-cv/resolve/main/config.json from cache at /home/cahya/.cache/huggingface/transformers/47f005d7b541562c0734cfe1b8aaf7f644846084b33a9247f5810d5a16d001a7.1c2175954f7220a41c71683d239699eb295d40ec92ac51faac3b85ad4bef2ad8
-/home/cahya/Work/MachineLearning/transformers/src/transformers/configuration_utils.py:353: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
-  warnings.warn(
-Model config Wav2Vec2Config {
-  "_name_or_path": "cahya/wav2vec2-base-turkish-artificial-cv",
-  "activation_dropout": 0.055,
-  "adapter_kernel_size": 3,
-  "adapter_stride": 2,
-  "add_adapter": false,
-  "apply_spec_augment": true,
-  "architectures": [
-    "Wav2Vec2ForCTC"
-  ],
-  "attention_dropout": 0.094,
-  "bos_token_id": 1,
-  "classifier_proj_size": 256,
-  "codevector_dim": 256,
-  "contrastive_logits_temperature": 0.1,
-  "conv_bias": false,
-  "conv_dim": [
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512
-  ],
-  "conv_kernel": [
-    10,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2
-  ],
-  "conv_stride": [
-    5,
-    2,
-    2,
-    2,
-    2,
-    2,
-    2
-  ],
-  "ctc_loss_reduction": "mean",
-  "ctc_zero_infinity": true,
-  "diversity_loss_weight": 0.1,
-  "do_stable_layer_norm": false,
-  "eos_token_id": 2,
-  "feat_extract_activation": "gelu",
-  "feat_extract_norm": "group",
-  "feat_proj_dropout": 0.04,
-  "feat_quantizer_dropout": 0.0,
-  "final_dropout": 0.1,
-  "gradient_checkpointing": true,
-  "hidden_act": "gelu",
-  "hidden_dropout": 0.047,
-  "hidden_size": 768,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-05,
-  "layerdrop": 0.041,
-  "mask_feature_length": 10,
-  "mask_feature_min_masks": 0,
-  "mask_feature_prob": 0.0,
-  "mask_time_length": 10,
-  "mask_time_min_masks": 2,
-  "mask_time_prob": 0.4,
-  "model_type": "wav2vec2",
-  "num_adapter_layers": 3,
-  "num_attention_heads": 12,
-  "num_codevector_groups": 2,
-  "num_codevectors_per_group": 320,
-  "num_conv_pos_embedding_groups": 16,
-  "num_conv_pos_embeddings": 128,
-  "num_feat_extract_layers": 7,
-  "num_hidden_layers": 12,
-  "num_negatives": 100,
-  "output_hidden_size": 768,
-  "pad_token_id": 39,
-  "proj_codevector_dim": 256,
-  "tdnn_dilation": [
-    1,
-    2,
-    3,
-    1,
-    1
-  ],
-  "tdnn_dim": [
-    512,
-    512,
-    512,
-    512,
-    1500
-  ],
-  "tdnn_kernel": [
-    5,
-    3,
-    3,
-    1,
-    1
-  ],
-  "transformers_version": "4.17.0.dev0",
-  "use_weighted_layer_sum": false,
-  "vocab_size": 40,
-  "xvector_output_dim": 512
-}
  0%|          | 0/1 [00:00<?, ?ba/s]
  0%|          | 0/1 [00:00<?, ?ba/s]
-Didn't find file ./output/tokenizer_config.json. We won't load it.
-Didn't find file ./output/added_tokens.json. We won't load it.
-Didn't find file ./output/special_tokens_map.json. We won't load it.
-Didn't find file ./output/tokenizer.json. We won't load it.
-loading file ./output/vocab.json
-loading file None
-loading file None
-loading file None
-loading file None
-file ./output/config.json not found
-Adding <s> to the vocabulary
-Adding </s> to the vocabulary
-Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
-loading configuration file https://huggingface.co/cahya/wav2vec2-base-turkish-artificial-cv/resolve/main/config.json from cache at /home/cahya/.cache/huggingface/transformers/47f005d7b541562c0734cfe1b8aaf7f644846084b33a9247f5810d5a16d001a7.1c2175954f7220a41c71683d239699eb295d40ec92ac51faac3b85ad4bef2ad8
-Model config Wav2Vec2Config {
-  "_name_or_path": "cahya/wav2vec2-base-turkish-artificial-cv",
-  "activation_dropout": 0.055,
-  "adapter_kernel_size": 3,
-  "adapter_stride": 2,
-  "add_adapter": false,
-  "apply_spec_augment": true,
-  "architectures": [
-    "Wav2Vec2ForCTC"
-  ],
-  "attention_dropout": 0.094,
-  "bos_token_id": 1,
-  "classifier_proj_size": 256,
-  "codevector_dim": 256,
-  "contrastive_logits_temperature": 0.1,
-  "conv_bias": false,
-  "conv_dim": [
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512
-  ],
-  "conv_kernel": [
-    10,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2
-  ],
-  "conv_stride": [
-    5,
-    2,
-    2,
-    2,
-    2,
-    2,
-    2
-  ],
-  "ctc_loss_reduction": "mean",
-  "ctc_zero_infinity": true,
-  "diversity_loss_weight": 0.1,
-  "do_stable_layer_norm": false,
-  "eos_token_id": 2,
-  "feat_extract_activation": "gelu",
-  "feat_extract_norm": "group",
-  "feat_proj_dropout": 0.04,
-  "feat_quantizer_dropout": 0.0,
-  "final_dropout": 0.1,
-  "gradient_checkpointing": true,
-  "hidden_act": "gelu",
-  "hidden_dropout": 0.047,
-  "hidden_size": 768,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-05,
-  "layerdrop": 0.041,
-  "mask_feature_length": 10,
-  "mask_feature_min_masks": 0,
-  "mask_feature_prob": 0.0,
-  "mask_time_length": 10,
-  "mask_time_min_masks": 2,
-  "mask_time_prob": 0.4,
-  "model_type": "wav2vec2",
-  "num_adapter_layers": 3,
-  "num_attention_heads": 12,
-  "num_codevector_groups": 2,
-  "num_codevectors_per_group": 320,
-  "num_conv_pos_embedding_groups": 16,
-  "num_conv_pos_embeddings": 128,
-  "num_feat_extract_layers": 7,
-  "num_hidden_layers": 12,
-  "num_negatives": 100,
-  "output_hidden_size": 768,
-  "pad_token_id": 39,
-  "proj_codevector_dim": 256,
-  "tdnn_dilation": [
-    1,
-    2,
-    3,
-    1,
-    1
-  ],
-  "tdnn_dim": [
-    512,
-    512,
-    512,
-    512,
-    1500
-  ],
-  "tdnn_kernel": [
-    5,
-    3,
-    3,
-    1,
-    1
-  ],
-  "transformers_version": "4.17.0.dev0",
-  "use_weighted_layer_sum": false,
-  "vocab_size": 40,
-  "xvector_output_dim": 512
-}
-loading feature extractor configuration file https://huggingface.co/cahya/wav2vec2-base-turkish-artificial-cv/resolve/main/preprocessor_config.json from cache at /home/cahya/.cache/huggingface/transformers/34433162acde7e1ca4a265d8ae309442e4ddadff37e6e37d2d37eb7133f65f8f.fcd266b775b7f33ba9b607a0fee7cc615aeb2eb281586f046280492ea380ae23
-Feature extractor Wav2Vec2FeatureExtractor {
-  "do_normalize": true,
-  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
-  "feature_size": 1,
-  "padding_side": "right",
-  "padding_value": 0.0,
-  "return_attention_mask": true,
-  "sampling_rate": 16000
-}
-loading weights file https://huggingface.co/cahya/wav2vec2-base-turkish-artificial-cv/resolve/main/pytorch_model.bin from cache at /home/cahya/.cache/huggingface/transformers/3b3f7d0041c2b08b031c8357e39249bdbc06c8bfcd5a9f8891c7f259b07a0b85.356b4eec0d55a5c4d2d480c2dd2ea2cc0c867771bc39b8cdc97b629e4206482c
-Traceback (most recent call last):
-  File "run_speech_recognition_ctc.py", line 745, in <module>
-    main()
-  File "run_speech_recognition_ctc.py", line 552, in main
-    model = AutoModelForCTC.from_pretrained(
-  File "/home/cahya/Work/MachineLearning/transformers/src/transformers/models/auto/auto_factory.py", line 447, in from_pretrained
-    return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
-  File "/home/cahya/Work/MachineLearning/transformers/src/transformers/modeling_utils.py", line 1528, in from_pretrained
-    model, missing_keys, unexpected_keys, mismatched_keys, error_msgs = cls._load_state_dict_into_model(
-  File "/home/cahya/Work/MachineLearning/transformers/src/transformers/modeling_utils.py", line 1682, in _load_state_dict_into_model
-    raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}")
-RuntimeError: Error(s) in loading state_dict for Wav2Vec2ForCTC:
-	size mismatch for lm_head.weight: copying a param with shape torch.Size([40, 768]) from checkpoint, the shape in current model is torch.Size([41, 768]).
-	size mismatch for lm_head.bias: copying a param with shape torch.Size([40]) from checkpoint, the shape in current model is torch.Size([41]).

















































































































0	0%\| \| 0/1 [00:00<?, ?ba/s]

1	0%\| \| 0/1 [00:00<?, ?ba/s]

err DELETED Viewed

@@ -1,214 +0,0 @@
-training_args.do_train: True
-01/28/2022 11:13:09 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False
-01/28/2022 11:13:09 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
-_n_gpu=1,
-adafactor=False,
-adam_beta1=0.9,
-adam_beta2=0.999,
-adam_epsilon=1e-08,
-bf16=False,
-bf16_full_eval=False,
-dataloader_drop_last=False,
-dataloader_num_workers=0,
-dataloader_pin_memory=True,
-ddp_bucket_cap_mb=None,
-ddp_find_unused_parameters=None,
-debug=[],
-deepspeed=None,
-disable_tqdm=False,
-do_eval=True,
-do_predict=False,
-do_train=True,
-eval_accumulation_steps=None,
-eval_steps=500,
-evaluation_strategy=IntervalStrategy.STEPS,
-fp16=False,
-fp16_backend=auto,
-fp16_full_eval=False,
-fp16_opt_level=O1,
-gradient_accumulation_steps=4,
-gradient_checkpointing=True,
-greater_is_better=None,
-group_by_length=True,
-half_precision_backend=auto,
-hub_model_id=None,
-hub_strategy=HubStrategy.EVERY_SAVE,
-hub_token=<HUB_TOKEN>,
-ignore_data_skip=False,
-label_names=None,
-label_smoothing_factor=0.0,
-learning_rate=7.5e-07,
-length_column_name=input_length,
-load_best_model_at_end=False,
-local_rank=-1,
-log_level=-1,
-log_level_replica=-1,
-log_on_each_node=True,
-logging_dir=./output/runs/Jan28_11-13-09_arjuna,
-logging_first_step=False,
-logging_nan_inf_filter=True,
-logging_steps=100,
-logging_strategy=IntervalStrategy.STEPS,
-lr_scheduler_type=SchedulerType.LINEAR,
-max_grad_norm=1.0,
-max_steps=-1,
-metric_for_best_model=None,
-mp_parameters=,
-no_cuda=False,
-num_train_epochs=1.0,
-optim=OptimizerNames.ADAMW_HF,
-output_dir=./output,
-overwrite_output_dir=True,
-past_index=-1,
-per_device_eval_batch_size=2,
-per_device_train_batch_size=2,
-prediction_loss_only=False,
-push_to_hub=True,
-push_to_hub_model_id=None,
-push_to_hub_organization=None,
-push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
-remove_unused_columns=True,
-report_to=['tensorboard'],
-resume_from_checkpoint=None,
-run_name=./output,
-save_on_each_node=False,
-save_steps=500,
-save_strategy=IntervalStrategy.STEPS,
-save_total_limit=3,
-seed=42,
-sharded_ddp=[],
-skip_memory_metrics=True,
-tf32=None,
-tpu_metrics_debug=False,
-tpu_num_cores=None,
-use_legacy_prediction_loop=False,
-warmup_ratio=0.0,
-warmup_steps=2000,
-weight_decay=0.0,
-xpu_backend=None,
-)
-do_train: True
-load train
-01/28/2022 11:13:09 - WARNING - datasets.builder - Reusing dataset common_voice (/home/cahya/.cache/huggingface/datasets/common_voice/tr/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd)
-01/28/2022 11:13:10 - WARNING - datasets.builder - Reusing dataset common_voice (/home/cahya/.cache/huggingface/datasets/common_voice/tr/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd)
-char ignored: [',', '?', '.', '!', ';', ':', '""', '%', "'", '"', "'", "'", '`', '…', '’', '»', '«', '‘', '“', '”', '�', 'é', 'û'] [,?.!;:""%'"''`…’»«‘“”�éû]
-01/28/2022 11:13:10 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/cahya/.cache/huggingface/datasets/common_voice/tr/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd/cache-a0df3a81748e62dd.arrow
-01/28/2022 11:13:10 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/cahya/.cache/huggingface/datasets/common_voice/tr/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd/cache-859966f17c7349fb.arrow
-config: Wav2Vec2Config {
-  "_name_or_path": "cahya/wav2vec2-base-turkish-artificial-cv",
-  "activation_dropout": 0.055,
-  "adapter_kernel_size": 3,
-  "adapter_stride": 2,
-  "add_adapter": false,
-  "apply_spec_augment": true,
-  "architectures": [
-    "Wav2Vec2ForCTC"
-  ],
-  "attention_dropout": 0.094,
-  "bos_token_id": 1,
-  "classifier_proj_size": 256,
-  "codevector_dim": 256,
-  "contrastive_logits_temperature": 0.1,
-  "conv_bias": false,
-  "conv_dim": [
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512
-  ],
-  "conv_kernel": [
-    10,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2
-  ],
-  "conv_stride": [
-    5,
-    2,
-    2,
-    2,
-    2,
-    2,
-    2
-  ],
-  "ctc_loss_reduction": "mean",
-  "ctc_zero_infinity": true,
-  "diversity_loss_weight": 0.1,
-  "do_stable_layer_norm": false,
-  "eos_token_id": 2,
-  "feat_extract_activation": "gelu",
-  "feat_extract_norm": "group",
-  "feat_proj_dropout": 0.04,
-  "feat_quantizer_dropout": 0.0,
-  "final_dropout": 0.1,
-  "gradient_checkpointing": true,
-  "hidden_act": "gelu",
-  "hidden_dropout": 0.047,
-  "hidden_size": 768,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-05,
-  "layerdrop": 0.041,
-  "mask_feature_length": 10,
-  "mask_feature_min_masks": 0,
-  "mask_feature_prob": 0.0,
-  "mask_time_length": 10,
-  "mask_time_min_masks": 2,
-  "mask_time_prob": 0.4,
-  "model_type": "wav2vec2",
-  "num_adapter_layers": 3,
-  "num_attention_heads": 12,
-  "num_codevector_groups": 2,
-  "num_codevectors_per_group": 320,
-  "num_conv_pos_embedding_groups": 16,
-  "num_conv_pos_embeddings": 128,
-  "num_feat_extract_layers": 7,
-  "num_hidden_layers": 12,
-  "num_negatives": 100,
-  "output_hidden_size": 768,
-  "pad_token_id": 39,
-  "proj_codevector_dim": 256,
-  "tdnn_dilation": [
-    1,
-    2,
-    3,
-    1,
-    1
-  ],
-  "tdnn_dim": [
-    512,
-    512,
-    512,
-    512,
-    1500
-  ],
-  "tdnn_kernel": [
-    5,
-    3,
-    3,
-    1,
-    1
-  ],
-  "transformers_version": "4.17.0.dev0",
-  "use_weighted_layer_sum": false,
-  "vocab_size": 40,
-  "xvector_output_dim": 512
-}
-dataset: DatasetDict({
-    train: Dataset({
-        features: ['client_id', 'path', 'audio', 'up_votes', 'down_votes', 'age', 'gender', 'accent', 'locale', 'segment', 'target_text'],
-        num_rows: 3478
-    })
-    eval: Dataset({
-        features: ['client_id', 'path', 'audio', 'up_votes', 'down_votes', 'age', 'gender', 'accent', 'locale', 'segment', 'target_text'],
-        num_rows: 1647
-    })
-})
-vocab: {'-': 1, 'a': 2, 'b': 3, 'c': 4, 'd': 5, 'e': 6, 'f': 7, 'g': 8, 'h': 9, 'i': 10, 'j': 11, 'k': 12, 'l': 13, 'm': 14, 'n': 15, 'o': 16, 'p': 17, 'q': 18, 'r': 19, 's': 20, 't': 21, 'u': 22, 'v': 23, 'w': 24, 'x': 25, 'y': 26, 'z': 27, 'â': 28, 'ç': 29, 'ë': 30, 'î': 31, 'ö': 32, 'ü': 33, 'ğ': 34, 'ı': 35, 'ş': 36, '̇': 37, '|': 0, '[UNK]': 38, '[PAD]': 39}

ngram.py DELETED Viewed

@@ -1,25 +0,0 @@
-from transformers import AutoProcessor
-from transformers import Wav2Vec2ProcessorWithLM
-from huggingface_hub import Repository
-from pyctcdecode import build_ctcdecoder
-model_name = "cahya/wav2vec2-base-turkish-artificial-cv"
-processor = AutoProcessor.from_pretrained(model_name)
-vocab_dict = processor.tokenizer.get_vocab()
-sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
-decoder = build_ctcdecoder(
-    labels=list(sorted_vocab_dict.keys()),
-    kenlm_model_path="5gram.arpa",
-)
-processor_with_lm = Wav2Vec2ProcessorWithLM(
-    feature_extractor=processor.feature_extractor,
-    tokenizer=processor.tokenizer,
-    decoder=decoder
-)
-#repo = Repository(local_dir="wav2vec2-base-turkish", clone_from=model_name)
-processor_with_lm.save_pretrained("wav2vec2-base-turkish")
-#repo.push_to_hub(commit_message="Upload lm-boosted decoder")

test-vocab.py DELETED Viewed

@@ -1,22 +0,0 @@
-import torch
-from datasets import load_dataset
-from transformers import AutoModelForCTC, AutoProcessor
-import torchaudio.functional as F
-model_id = "cahya/wav2vec2-base-turkish"
-sample_iter = iter(load_dataset("common_voice", "tr", split="test", streaming=True))
-sample = next(sample_iter)
-resampled_audio = F.resample(torch.tensor(sample["audio"]["array"]), 48_000, 16_000).numpy()
-model = AutoModelForCTC.from_pretrained(model_id)
-processor = AutoProcessor.from_pretrained(model_id)
-input_values = processor(resampled_audio, return_tensors="pt").input_values
-with torch.no_grad():
-    logits = model(input_values).logits
-transcription = processor.batch_decode(logits.numpy()).text
-print(transcription)

wav2vec2-base-turkish DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 84a5ba89d7a3f162d409b42e1b515d9bf2a8d021