xezpeleta commited on
Commit
70e3d99
1 Parent(s): 4088a0a

Training in progress, step 1000

Browse files
added_tokens.json CHANGED
@@ -17,7 +17,6 @@
17
  "<|da|>": 50285,
18
  "<|de|>": 50261,
19
  "<|el|>": 50281,
20
- "<|endoftext|>": 50257,
21
  "<|en|>": 50259,
22
  "<|es|>": 50262,
23
  "<|et|>": 50307,
@@ -30,6 +29,7 @@
30
  "<|gu|>": 50333,
31
  "<|haw|>": 50352,
32
  "<|ha|>": 50354,
 
33
  "<|hi|>": 50276,
34
  "<|hr|>": 50291,
35
  "<|ht|>": 50339,
@@ -38,7 +38,6 @@
38
  "<|id|>": 50275,
39
  "<|is|>": 50311,
40
  "<|it|>": 50274,
41
- "<|iw|>": 50279,
42
  "<|ja|>": 50266,
43
  "<|jw|>": 50356,
44
  "<|ka|>": 50329,
 
17
  "<|da|>": 50285,
18
  "<|de|>": 50261,
19
  "<|el|>": 50281,
 
20
  "<|en|>": 50259,
21
  "<|es|>": 50262,
22
  "<|et|>": 50307,
 
29
  "<|gu|>": 50333,
30
  "<|haw|>": 50352,
31
  "<|ha|>": 50354,
32
+ "<|he|>": 50279,
33
  "<|hi|>": 50276,
34
  "<|hr|>": 50291,
35
  "<|ht|>": 50339,
 
38
  "<|id|>": 50275,
39
  "<|is|>": 50311,
40
  "<|it|>": 50274,
 
41
  "<|ja|>": 50266,
42
  "<|jw|>": 50356,
43
  "<|ka|>": 50329,
nohup.out ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c700aca1251b4c28be3894c734c12a20f221265bbf4c5c01ff35a97f692a6d52
3
  size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dde53cf5fed853761b41e7ea0af91878d15f20c79d2499a3c3bf33d1288db232
3
  size 967102601
run.sh CHANGED
@@ -1,6 +1,6 @@
1
  python run_speech_recognition_seq2seq_streaming.py \
2
  --model_name_or_path="openai/whisper-small" \
3
- --dataset_name="mozilla-foundation/common_voice_11_0" \
4
  --dataset_config_name="eu" \
5
  --language="basque" \
6
  --train_split_name="train+validation" \
 
1
  python run_speech_recognition_seq2seq_streaming.py \
2
  --model_name_or_path="openai/whisper-small" \
3
+ --dataset_name="mozilla-foundation/common_voice_13_0" \
4
  --dataset_config_name="eu" \
5
  --language="basque" \
6
  --train_split_name="train+validation" \
runs/Jul20_11-05-05_tknadmin-System-Product-Name/1689843926.5927894/events.out.tfevents.1689843926.tknadmin-System-Product-Name.2399.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da669066da0745142fafdae02990c3ba43e82dcf98bd6ded128e5f548f9536f
3
+ size 5884
runs/Jul20_11-05-05_tknadmin-System-Product-Name/events.out.tfevents.1689843926.tknadmin-System-Product-Name.2399.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b34c8a964304c86412222ffbf172dded1cd8302326d17eb5246c5ec42d396c61
3
+ size 10864
special_tokens_map.json CHANGED
@@ -22,7 +22,7 @@
22
  "<|hi|>",
23
  "<|fi|>",
24
  "<|vi|>",
25
- "<|iw|>",
26
  "<|uk|>",
27
  "<|el|>",
28
  "<|ms|>",
@@ -124,7 +124,7 @@
124
  },
125
  "pad_token": "<|endoftext|>",
126
  "unk_token": {
127
- "content": "",
128
  "lstrip": false,
129
  "normalized": true,
130
  "rstrip": false,
 
22
  "<|hi|>",
23
  "<|fi|>",
24
  "<|vi|>",
25
+ "<|he|>",
26
  "<|uk|>",
27
  "<|el|>",
28
  "<|ms|>",
 
124
  },
125
  "pad_token": "<|endoftext|>",
126
  "unk_token": {
127
+ "content": "<|endoftext|>",
128
  "lstrip": false,
129
  "normalized": true,
130
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -27,7 +27,7 @@
27
  "tokenizer_class": "WhisperTokenizer",
28
  "unk_token": {
29
  "__type": "AddedToken",
30
- "content": "",
31
  "lstrip": false,
32
  "normalized": true,
33
  "rstrip": false,
 
27
  "tokenizer_class": "WhisperTokenizer",
28
  "unk_token": {
29
  "__type": "AddedToken",
30
+ "content": "<|endoftext|>",
31
  "lstrip": false,
32
  "normalized": true,
33
  "rstrip": false,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72d55d0427f58602fb4bf5f34f2662f2de850d5c66dbc6cd9f3df3b36274ce51
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d621f52817860d9aa272439631c6b2bfec18ad203c2627c0ed03f1989afcad1
3
  size 3643
vocab.json CHANGED
@@ -314,6 +314,7 @@
314
  ";;": 35746,
315
  "<": 27,
316
  "</": 3433,
 
317
  "=": 28,
318
  "=\"": 13114,
319
  "=\"#": 34106,
 
314
  ";;": 35746,
315
  "<": 27,
316
  "</": 3433,
317
+ "<|endoftext|>": 50257,
318
  "=": 28,
319
  "=\"": 13114,
320
  "=\"#": 34106,