mattchurgin commited on
Commit
745e35f
1 Parent(s): b78080b

Training in progress, step 5

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
1
+ checkpoint-*/
added_tokens.json ADDED
@@ -0,0 +1 @@
 
1
+ {"<s>": 51, "</s>": 52}
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "mattchurgin/xls-r-eng",
3
  "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
1
  {
2
+ "_name_or_path": "hf-test/xls-r-dummy",
3
  "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
preprocessor_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_processor_class": null,
3
  "do_normalize": true,
4
  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
5
  "feature_size": 1,
1
  {
 
2
  "do_normalize": true,
3
  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
  "feature_size": 1,
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58eddd2cf0682445faa984d804066cdcb33d226b8255e030fa92403cfa2f100b
3
+ size 143910
run.sh CHANGED
@@ -1,6 +1,6 @@
1
  python run_speech_recognition_ctc.py \
2
  --dataset_name="mozilla-foundation/common_voice_7_0" \
3
- --model_name_or_path="mattchurgin/xls-r-eng" \
4
  --dataset_config_name="ab" \
5
  --output_dir="./" \
6
  --overwrite_output_dir \
1
  python run_speech_recognition_ctc.py \
2
  --dataset_name="mozilla-foundation/common_voice_7_0" \
3
+ --model_name_or_path="hf-test/xls-r-dummy" \
4
  --dataset_config_name="ab" \
5
  --output_dir="./" \
6
  --overwrite_output_dir \
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
test_install.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCTC, AutoProcessor
2
+ from datasets import load_dataset
3
+ import torch
4
+
5
+ dummy_dataset = load_dataset("common_voice", "ab", split="test")
6
+
7
+ model = AutoModelForCTC.from_pretrained("hf-internal-testing/tiny-random-wav2vec2")
8
+
9
+ model.to("cuda")
10
+
11
+ processor = AutoProcessor.from_pretrained("hf-internal-testing/tiny-random-wav2vec2")
12
+
13
+ input_values = processor(dummy_dataset[0]["audio"]["array"], return_tensors="pt", sampling_rate=16_000).input_values
14
+ input_values = input_values.to("cuda")
15
+
16
+ logits = model(input_values).logits
17
+
18
+ assert logits.shape[-1] == 32
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbdf2d3958633fdb29fe0d2d7d5677f2ba26d588d895af42a9d045f43bd17f85
3
+ size 2991
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"!": 1, ",": 2, "-": 3, ".": 4, ":": 5, ";": 6, "?": 7, "а": 8, "б": 9, "в": 10, "г": 11, "д": 12, "е": 13, "ж": 14, "з": 15, "и": 16, "к": 17, "л": 18, "м": 19, "н": 20, "о": 21, "п": 22, "р": 23, "с": 24, "т": 25, "у": 26, "ф": 27, "х": 28, "ц": 29, "ч": 30, "ш": 31, "ы": 32, "ь": 33, "џ": 34, "қ": 35, "ҟ": 36, "ҩ": 37, "ҭ": 38, "ҳ": 39, "ҵ": 40, "ҷ": 41, "ҽ": 42, "ҿ": 43, "ә": 44, "ӡ": 45, "ӷ": 46, "ԥ": 47, "–": 48, "|": 0, "[UNK]": 49, "[PAD]": 50}