versae commited on
Commit
72a6a19
1 Parent(s): 4f995e8

31ig5poi: saving weights and logs of step 0k

Browse files
Files changed (34) hide show
  1. events.out.tfevents.1659427021.t1v-n-eedfb410-w-0.3982953.0.v2 +3 -0
  2. run.recover.sh +50 -0
  3. run_flax_speech_recognition_ctc.py +32 -11
  4. special_tokens_map.json +14 -0
  5. wandb/debug-internal.log +1 -1
  6. wandb/debug.log +1 -1
  7. wandb/latest-run +1 -1
  8. wandb/run-20220731_183904-2sjxhpmt/files/config.yaml +6 -0
  9. wandb/run-20220731_183904-2sjxhpmt/files/media/table/eval/step_32k_32000_c06fd8316235f01c9293.table.json +1 -0
  10. wandb/run-20220731_183904-2sjxhpmt/files/output.log +973 -0
  11. wandb/run-20220731_183904-2sjxhpmt/files/wandb-summary.json +0 -0
  12. wandb/run-20220731_183904-2sjxhpmt/logs/debug-internal.log +0 -0
  13. wandb/run-20220731_183904-2sjxhpmt/logs/debug.log +164 -0
  14. wandb/run-20220731_183904-2sjxhpmt/run-2sjxhpmt.wandb +2 -2
  15. wandb/run-20220802_073947-3q3jac0b/files/code/run_flax_speech_recognition_ctc.py +1615 -0
  16. wandb/run-20220802_073947-3q3jac0b/files/config.yaml +33 -0
  17. wandb/run-20220802_073947-3q3jac0b/files/diff.patch +0 -0
  18. wandb/run-20220802_073947-3q3jac0b/files/output.log +457 -0
  19. wandb/run-20220802_073947-3q3jac0b/files/requirements.txt +158 -0
  20. wandb/run-20220802_073947-3q3jac0b/files/wandb-metadata.json +69 -0
  21. wandb/run-20220802_073947-3q3jac0b/files/wandb-summary.json +1 -0
  22. wandb/run-20220802_073947-3q3jac0b/logs/debug-internal.log +160 -0
  23. wandb/run-20220802_073947-3q3jac0b/logs/debug.log +139 -0
  24. wandb/run-20220802_073947-3q3jac0b/run-3q3jac0b.wandb +3 -0
  25. wandb/run-20220802_074501-31ig5poi/files/code/run_flax_speech_recognition_ctc.py +1625 -0
  26. wandb/run-20220802_074501-31ig5poi/files/config.yaml +27 -0
  27. wandb/run-20220802_074501-31ig5poi/files/diff.patch +0 -0
  28. wandb/run-20220802_074501-31ig5poi/files/output.log +0 -0
  29. wandb/run-20220802_074501-31ig5poi/files/requirements.txt +158 -0
  30. wandb/run-20220802_074501-31ig5poi/files/wandb-metadata.json +69 -0
  31. wandb/run-20220802_074501-31ig5poi/files/wandb-summary.json +1 -0
  32. wandb/run-20220802_074501-31ig5poi/logs/debug-internal.log +412 -0
  33. wandb/run-20220802_074501-31ig5poi/logs/debug.log +23 -0
  34. wandb/run-20220802_074501-31ig5poi/run-31ig5poi.wandb +3 -0
events.out.tfevents.1659427021.t1v-n-eedfb410-w-0.3982953.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd937a73f61eaed788dd46684124c265653d88ae7c7f577630591908581135f1
3
+ size 40
run.recover.sh ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Step... (33100 | Loss: 0.4021756649017334, Learning Rate: 0.00033713760785758495, Gradient Norm: 10.125)██
2
+ WANDB_ENTITY=NbAiLab WANDB_PROJECT=wav2vec2 python run_flax_speech_recognition_ctc.py \
3
+ --model_name_or_path="./" \
4
+ --hub_model_id="NbAiLab/wav2vec2-1b-npsc-nst" \
5
+ --tokenizer_name="./" \
6
+ --output_dir="./" \
7
+ --overwrite_output_dir \
8
+ --num_train_epochs="40" \
9
+ --per_device_train_batch_size="8" \
10
+ --per_device_eval_batch_size="8" \
11
+ --gradient_accumulation_steps="1" \
12
+ --precision="full_mixed" \
13
+ --matmul_precision="bfloat16" \
14
+ --learning_rate="0.00033713760785758495" \
15
+ --skip_steps="33100" \
16
+ --warmup_steps="0" \
17
+ --length_column_name="input_length" \
18
+ --evaluation_strategy="steps" \
19
+ --text_column_name="text" \
20
+ --save_steps="4000" \
21
+ --eval_steps="4000" \
22
+ --logging_steps="100" \
23
+ --layerdrop="0.041" \
24
+ --attention_dropout="0.094" \
25
+ --activation_dropout="0.055" \
26
+ --hidden_dropout="0.047" \
27
+ --save_total_limit="5" \
28
+ --freeze_feature_encoder \
29
+ --feat_proj_dropout="0.04" \
30
+ --mask_time_prob="0.082" \
31
+ --mask_time_length="10" \
32
+ --mask_feature_prob="0.25" \
33
+ --mask_feature_length="64" \
34
+ --gradient_checkpointing \
35
+ --min_duration_in_seconds="0.5" \
36
+ --max_duration_in_seconds="20.0" \
37
+ --use_auth_token \
38
+ --seed="42" \
39
+ --group_by_length \
40
+ --do_train --do_eval \
41
+ --push_to_hub \
42
+ --preprocessing_num_workers="32" \
43
+ --ctc_zero_infinity \
44
+ --do_lower_case \
45
+ --wandb_project="wav2vec2" \
46
+ --wandb_name="wav2vec2-1b-npsc-nst (cont.)" \
47
+ --remove_punctuation
48
+
49
+
50
+ # --fp16
run_flax_speech_recognition_ctc.py CHANGED
@@ -298,6 +298,12 @@ class DataTrainingArguments:
298
  remove_punctuation: bool = field(
299
  default=False, metadata={"help": "Whether or not to remove punctuation during training."}
300
  )
 
 
 
 
 
 
301
 
302
 
303
  # @flax.struct.dataclass
@@ -993,15 +999,25 @@ def main():
993
  dtype = jnp.float32
994
  training_args.mixed_precision = False
995
 
996
- model = FlaxWav2Vec2ForCTC.from_pretrained(
997
- model_args.model_name_or_path,
998
- config=config,
999
- dtype=dtype,
1000
- cache_dir=model_args.cache_dir,
1001
- revision=model_args.model_revision,
1002
- use_auth_token=True if model_args.use_auth_token else None,
1003
- from_pt=True,
1004
- )
 
 
 
 
 
 
 
 
 
 
1005
 
1006
  # 6. Resample speech dataset ALWAYS
1007
  raw_datasets = raw_datasets.cast_column(
@@ -1494,7 +1510,8 @@ def main():
1494
  logger.info(f" Fuse matmuls: {config.fuse_matmuls}")
1495
 
1496
  train_time = cur_step = 0
1497
- epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
 
1498
  for epoch in epochs:
1499
  if training_args.do_train:
1500
  # ======================== Training ================================
@@ -1512,12 +1529,16 @@ def main():
1512
  samples = [vectorized_datasets[data_args.train_split_name][int(idx)] for idx in batch_idx]
1513
  batch = data_collator(samples)
1514
  batch = shard(batch.data)
 
 
 
 
 
1515
  try:
1516
  state, train_metric = p_train_step(state, batch)
1517
  except TypeError as e:
1518
  logger.warning("Encountered following error: \n", e)
1519
 
1520
- cur_step = epoch * (num_train_samples // batch_size_per_update) + step
1521
 
1522
  if cur_step % training_args.logging_steps == 0:
1523
  # Save metrics
 
298
  remove_punctuation: bool = field(
299
  default=False, metadata={"help": "Whether or not to remove punctuation during training."}
300
  )
301
+ skip_steps: Optional[int] = field(
302
+ default=0,
303
+ metadata={
304
+ "help": "Skip this number of steps. Useful to continue training"
305
+ },
306
+ )
307
 
308
 
309
  # @flax.struct.dataclass
 
999
  dtype = jnp.float32
1000
  training_args.mixed_precision = False
1001
 
1002
+ try:
1003
+ model = FlaxWav2Vec2ForCTC.from_pretrained(
1004
+ model_args.model_name_or_path,
1005
+ config=config,
1006
+ dtype=dtype,
1007
+ cache_dir=model_args.cache_dir,
1008
+ revision=model_args.model_revision,
1009
+ use_auth_token=True if model_args.use_auth_token else None,
1010
+ )
1011
+ except:
1012
+ model = FlaxWav2Vec2ForCTC.from_pretrained(
1013
+ model_args.model_name_or_path,
1014
+ config=config,
1015
+ dtype=dtype,
1016
+ cache_dir=model_args.cache_dir,
1017
+ revision=model_args.model_revision,
1018
+ use_auth_token=True if model_args.use_auth_token else None,
1019
+ from_pt=True,
1020
+ )
1021
 
1022
  # 6. Resample speech dataset ALWAYS
1023
  raw_datasets = raw_datasets.cast_column(
 
1510
  logger.info(f" Fuse matmuls: {config.fuse_matmuls}")
1511
 
1512
  train_time = cur_step = 0
1513
+ skip_epochs = data_args.skip_steps % (num_train_samples // batch_size_per_update)
1514
+ epochs = tqdm(range(skip_epochs, num_epochs), desc=f"Epoch ... ({skip_epochs + 1}/{num_epochs})", position=0)
1515
  for epoch in epochs:
1516
  if training_args.do_train:
1517
  # ======================== Training ================================
 
1529
  samples = [vectorized_datasets[data_args.train_split_name][int(idx)] for idx in batch_idx]
1530
  batch = data_collator(samples)
1531
  batch = shard(batch.data)
1532
+
1533
+ cur_step = epoch * (num_train_samples // batch_size_per_update) + step
1534
+ if cur_step <= data_args.skip_steps:
1535
+ continue
1536
+
1537
  try:
1538
  state, train_metric = p_train_step(state, batch)
1539
  except TypeError as e:
1540
  logger.warning("Encountered following error: \n", e)
1541
 
 
1542
 
1543
  if cur_step % training_args.logging_steps == 0:
1544
  # Save metrics
special_tokens_map.json CHANGED
@@ -231,6 +231,20 @@
231
  "rstrip": false,
232
  "single_word": false
233
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  {
235
  "content": "</s>",
236
  "lstrip": false,
 
231
  "rstrip": false,
232
  "single_word": false
233
  },
234
+ {
235
+ "content": "</s>",
236
+ "lstrip": false,
237
+ "normalized": true,
238
+ "rstrip": false,
239
+ "single_word": false
240
+ },
241
+ {
242
+ "content": "<s>",
243
+ "lstrip": false,
244
+ "normalized": true,
245
+ "rstrip": false,
246
+ "single_word": false
247
+ },
248
  {
249
  "content": "</s>",
250
  "lstrip": false,
wandb/debug-internal.log CHANGED
@@ -1 +1 @@
1
- run-20220731_183904-2sjxhpmt/logs/debug-internal.log
 
1
+ run-20220802_074501-31ig5poi/logs/debug-internal.log
wandb/debug.log CHANGED
@@ -1 +1 @@
1
- run-20220731_183904-2sjxhpmt/logs/debug.log
 
1
+ run-20220802_074501-31ig5poi/logs/debug.log
wandb/latest-run CHANGED
@@ -1 +1 @@
1
- run-20220731_183904-2sjxhpmt
 
1
+ run-20220802_074501-31ig5poi
wandb/run-20220731_183904-2sjxhpmt/files/config.yaml CHANGED
@@ -18,6 +18,12 @@ _wandb:
18
  - 3
19
  - 11
20
  - 12
 
 
 
 
 
 
21
  3:
22
  - 13
23
  4: 3.8.10
 
18
  - 3
19
  - 11
20
  - 12
21
+ 2:
22
+ - 1
23
+ - 2
24
+ - 3
25
+ - 11
26
+ - 12
27
  3:
28
  - 13
29
  4: 3.8.10
wandb/run-20220731_183904-2sjxhpmt/files/media/table/eval/step_32k_32000_c06fd8316235f01c9293.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["label_str", "pred_str"], "data": [["men n\u00e5r n\u00e5r senterpartiet \u00f8nsker \u00e5 bruke denne anledningen til \u00e5 svekke e\u00f8s[UNK]avtalen s\u00e5 s\u00e5 er det det er lov \u00e5 fors\u00f8ke det men jeg er veldig glad for at det fors\u00f8ket ikke lykkes og at det fortsatt er trygt og godt flertall denne salen for \u00e5 slutte opp om e\u00f8s[UNK]avtalen eee og at denne utredningen n\u00e5 ikke blir noe av", "men n\u00e5 ema senterpartdie \u00f8nske \u00e5 bruke denne anleedning til \u00e5 svekke veseamvtalen s\u00e5 s\u00e5 er det det e love fors\u00f8ke det men e er veldig glad for at det fors\u00f8ke ikke lykkes og at e fortsatt er e trykt \u00e5 godte flderte ad dene salen for \u00e5 o slutte opp og med veseavtalen e og at dene utredningen n\u00e5 ikke blir na"], ["vil statsr\u00e5den p\u00e5 bakgrunn av de reaksjoner i finnmark og det forventede nei fra finnmark fylkesting til den fremforhandla avtalen sj\u00f8l ta ansvar for flertallets tvangsvedtak og ikke overlate gjennomf\u00f8ringa til fylkespolitikere som er motstandere av sammensl\u00e5inga", "vel statsr\u00e5den p\u00e5 bakgrunnene av dede aksjoner i finnmarlk og defor velte de nei fra finn mark fylkesting til den fremfor ehandlea avtalen skj\u00f8ll kaansvarg for flertalets tvangsvedtak og ikke over late gjennomf\u00f8ring a a det fylkes politike e som er motst\u00f8ndereav sammensl\u00e5ingar"], ["i samme \u00e5nd vil jeg ogs\u00e5 nevne forslaget fra kristelig folkeparti og arbeiderpartiet som stortinget voterte over tidligere i \u00e5r om \u00e5 starte prosessen med \u00e5 trekke fondet ut av spillselskaper med begrunnelsen at eierskapet i disse selskapene truer norsk tippings spillmonopol i norge", "i samme og vil agts\u00e5 nevne forslaget fra krise i folkepartit og arbeiderpartiet som stortinget ho terte over tidligere \u00e5r og n\u00e5 starte prosessen med traekke f\u00e5n den ut av spillse skaper med begremnnelsen at edeskape i disse selskapene truer norsk tippings spille med nopol norge"], ["takk president det er jo og verdt \u00e5 minne om at n\u00e5r vi snakker om oljefondet og oljepengebruk s\u00e5 er det noen partier i denne salen som er veldig glad i \u00e5 bruke penger mer oljepenger og mer p\u00e5 over offentlige eee finanser og over offentlig statsbudsjett men stemmer imot mange av prosjektene som bringer disse inntektene inn til statskassa", "dakk president det jog e vert o minner aaetee ne en snakke maldje for ne o die pengebruk s\u00e5 er e norgen partiear i denne salden som veldig glad jo brukar pengear mer alje penger og eer p\u00e5 og ver offentleggere f nanse ogve r offentlig statsbudsjett men demme mot mange prosjektene som bringe desse innpakten innen tel statsk assa"], ["vi har konkurransedyktige b\u00e5de l\u00f8nninger men og vi har god kompetanse i norge som og kunne ha v\u00e6rt videreutvikla gjennom at en hadde flytta en st\u00f8rre del av forvaltninga til norge og derfor s\u00e5 fremmer vi og forslag om \u00e5 utrede det", "ve har konkernansee dyktige b\u00e5de l\u00f8nninga men n\u00e5 re av god kompetanse i norge som m\u00e5 kunn v\u00e5r ee vi ar etvikla gj\u00f8nat enn hadde flytta en st\u00f8rre del av forvaltninga til norge mog derfor s\u00e5 fremav vei ogg forslag om og uttredee det"], ["ikke bare p\u00e5 gjennom arbeid men ogs\u00e5 for p\u00e5 mange andre samfunnsarenaer eee innafor frivilligheten innafor kultur og idrett som er viktig eee for \u00e5 f\u00e5 forst\u00e5elsen eee for eee qqq eee at at ogs\u00e5 barna skal delta p\u00e5 p\u00e5 de arenaene", "ikke bare p\u00e5 gjennomm arbeid men ogs\u00e5 for p\u00e5 mange andre samfunn saee earena e innafar previlligheeten innanfor kulture idrer som e viktig ee for \u00e5 f\u00e5 forst\u00e5elsen foree ee at at ogs\u00e5 barna skal delta p\u00e5 p\u00e5 de ar ena aven"], ["samtidig s\u00e5 var jeg jo byr\u00e5d i oslo p\u00e5 den tiden da dette ble innf\u00f8rt og da h\u00f8rte jeg egentlig det motsatte ogs\u00e5 av det som representanten b\u00f8hler tar opp nemlig en bekymring for at utsatte ungdom ogs\u00e5 ble trukket ut av skolen fordi de fire tusen kronene eller tiltakspengene at det ble ogs\u00e5 en viktig del av b\u00e5de ungdommen og til og med ogs\u00e5 kanskje til og med familienes \u00f8konomi", "samtidig o vae jeg byr\u00e5d d slo p\u00e5 demt ide da dete bleee innf\u00f8rt og dat heardte e kke e motsate te ogs\u00e5 av de som representanten b\u00f8r e t r opp nemlig en bekymmring for ate utsatte ungdom og s\u00e5 be plukket ut av skolen for di de fire tusen kroner tildags pengene at de t ble ogs\u00e5 e viktig dele av b\u00e5 de ungdome g ti eg gs\u00e5 kanskje til eee familienes ee \u00f8konomi"], ["milj\u00f8partiet de gr\u00f8nne har ingen tilhenger av store og dyre veiutbygginger p\u00e5 bekostning av mere og bedre milj\u00f8vennlige l\u00f8sninger xxx president men vi er like sterke tilhengere som alle andre partier av \u00e5 investere i sikkerhet p\u00e5 de veiene og tunellene vi har", "milj\u00f8pard e i e \u00f8nne ar ingen tilg enger a store og dyre veieutbygginge p\u00e5 bekostning av mere og bedre milje\u00f8vendige l\u00f8sninger milj \u00f8 par ee president men vi er like sterke tildgegere som alle andre partier av \u00e5g investere i sikkerhet p\u00e5 de vegene og tunnelende vi har"], ["eee n\u00e5r storbritannia skal ut av av eu s\u00e5 ser de ikke p\u00e5 e\u00f8s som et aktuelt alternativ eee men for norge \u00e5 skulle eee f\u00f8lge etter storbritannia og og velge \u00e5 g\u00e5 ut av eus indre marked det er ikke et alternativ jeg vil anbefale denne salen", "eee n\u00e5 s\u00e5 bita jeg skal ut av av eu s\u00e5 s er et ikke p\u00e5 er vss s m et ee aktuelt alterna tiv ee men for norge oag skulle ee f\u00f8ller t el stul beta ned oppb o velgdea g\u00e5 ut av a jus indre marked eddete eerkkeet alternative er vel ambefale dennde salllemnd"], ["for ei god skole forutsetter at dyktige l\u00e6rere og andre voksenpersoner gis tid og tillit til \u00e5 f\u00f8lge opp hver enkelt elev og til \u00e5 tilrettelegge undervisninga s\u00e5nn at elevene kan l\u00e6re p\u00e5 den m\u00e5ten de l\u00e6rer best", "forigg\u00e5skolee foredtsetdt at tektig e l\u00e6rdrere og andre vaksen personar ge tid ogg tildlidt ti f\u00f8lge opp kver endkelt elev og tel ag til reftelegg undervisningar som at elevandn kan ld\u00e6r p\u00e5 den m\u00e5ten de l\u00e6r basst"], ["det er innf\u00f8rt rentetak p\u00e5 l\u00e5n og kreditt i mange land i verden og rentetit e re rentetak vil inneb\u00e6re at det ikke lenger vil v\u00e6re l\u00f8nnsomt for bankene \u00e5 gi l\u00e5n til de personene som har h\u00f8yest kredittrisiko det vil si den gruppa som st\u00e5r i kj eee st\u00f8rst fare for \u00e5 f\u00e5 betalingsvansker", "det er inn f\u00f8rt remd te takk p\u00e5 lond og kredikk in mangeran i verden og rem tiet itt rrendte tak vi ineb \u00e6re at ede ikke lenge l \u00e6re l\u00f8nnnsamt for bandkend og gei l\u00e5d til de personen som hadrd h\u00f8yes predit tri siko det vel si den gruppa som slt\u00e5r i ke st\u00f8rst faret for \u00e5 f\u00e5 betalingsvamnskel"], ["jeg mener det som er avgj\u00f8rende n\u00e5 er at vi s\u00f8rger for at de ungdommene vi her snakker om som i utgangspunktet er utsatte mange faller ut av skolen de har f\u00e5 formelle kvalifikasjoner om ikke vi klarer \u00e5 kvalifisere de i ungdomslivet deres ja s\u00e5 vil de f\u00e5 store utfordringer n\u00e5r de bli n\u00e5r de blir eldre", "dje men ne det se av erde n\u00e5 er at det serke for at de ungdo en ji hegrg snakker om som mid utgan s onk tet er utsatt e mange tall iut a s olenr det ha f\u00e5 for melle p\u00e5 f asjoner og mikke vi klare og kalfi se rer i i ungdommslivet de res jal s\u00e5s\u00e5 vil die fo storvere utfordringer nd\u00e5r det blir n\u00e5r de blir eldre"], ["og vegtrafikksentralen starta umiddelbart brannventilasjon s\u00e5nn at r\u00f8yken fr\u00e5 brannen blei ventilert \u00e5tte og ein halv kilometer mot gudvangen ikkje sant men r\u00f8yken blokkerte dermed den einaste evakueringsmuligheten for trafikantane p\u00e5 gudvangen[UNK]sida av brannen", "og vi da fikksteen ta sterkt og midle ver brandnv ind ti l s sj\u00e5n sar r\u00f8yken fra b randnend dlde vendtelert otdte en hall hilom met tel mot gudvndgninnsannd med r\u00f8lken blokkelt de dermed den eineaste anakerings m buligheten for trafikkentane fo guvang aesidar abrende"], ["da har statsministeren og og inkluderingsministeren eee invitert sammen med meg til et frokostm\u00f8te i statsministerboligen hvor vi har invitert veldig mange kvinner fra minoritetsmilj\u00f8a som skal fortelle hvordan dem opplever det herre her og hva vi kan gj\u00f8re for \u00e5 bedre deres situasjon", "da har saasmisteere n\u00e5 ag ingkee ringsmainiseteren eee invitert e samdmlen me ee tel trokaossm\u00f8dt el stafminisembolin no r vi har invitert veldig ange kvinner fra minlitetsmiljad so\u00e5 s kale for tjelle hvordan dem oplever er er og va vde kan gj\u00f8r for \u00e5 bedre dem mers sipuasjone pad"], ["president eg registrerer i at i den behandlinga av stortingsmeldinga som representanten lundteigen refererer til s\u00e5 gjekk eee senterpartiet vekk fr\u00e5 dei resultatm\u00e5la for sj\u00f8lvforsyningsgrad som dei sj\u00f8l f\u00f8reslo fr\u00e5 stortinget sist dei sat i regjering", "presidendt jeg eg registrera eei at de den n behandlingea av stortings meldindga som ee repbresentanten ludndt e en referera tidl s o gikk e ee sen te pa atiee vekk f or a de repsjuletatm\u00e5dlad for sj\u00f8l forslyndningsrad s om de i sj\u00f8ld fo slor fra stortineget sistde satt i regjerindg"], ["noen av de som har gitt mest direkte st\u00f8tte til senterpartiet sitt innta initiativ er alts\u00e5 europabevegelsen og europ europabevegelsen sin leder eee heidi nordby lunde eee som sier i en uttalelse at europabevegelsen st\u00f8tter forslag om et regjeringsoppnevnt utvalg", "dn \u00e5 ona jeg som ha ar gitdt mesdt di rettestettette semtepartietsit inetalkl initiativ eer agts\u00e5 europ av vegelsen og er op p eeurofav vegelsen siln leder ee hei de n rbelunder ee soam sei en utalelse at ero avegelsen st\u00f8tta forslag og at regjeringsoppnevndt utvalg"], ["det som er faren er jo at utviklinga som har skjedd under eee siv jensen som finansminister vil fortsette nemlig at det vil bli \u00f8kt eiendomsskatt ute i kommunene at det vil bli \u00f8kt eiendomsskatt p\u00e5 bolig at det vil bli \u00f8kt eiendomsskatt p\u00e5 anna n\u00e6ringsliv fordi at kommunene ikke har noe oljefond \u00e5 hente ifra", "det som er faren er jo at et utktvrikling a s\u00e5 skjedd un det e siki jansen ag finangsminister e fortsetpter ne mig at dt vel bli \u00f8kt ege omskat ut i kommunene at det vil ble \u00f8kt eie omsk t f\u00e5 golig at det vil be \u00f8kte ege d omsk dft p\u00e5 ann a n\u00e6ringsliv fordi at kommunen ikke ha nok olje fan n hendter fra"], ["viss vi g\u00e5r tilbake igjen til det som var omstridt i nittennittito om innhaldet i e\u00f8s[UNK]avtalen s\u00e5 er det jo slik at i e\u00f8s[UNK]tilhengjarane har tatt feil p\u00e5 alle punkt og avtalen har hatt den verknaden som e\u00f8s[UNK]motstandarane sa", "dvin fel g r tilbak i e n til de som ar omsitritt nitt n ittito om innhald i \u00f8savtalen s\u00e5 er det jo slik at de ir ef ti henga de en har tat feil p\u00e5 alle punkt o avt ene hatt den verknaden som ir s otstanderdrene sa"], ["jeg eller xxx milj\u00f8partiet de gr\u00f8nne f\u00e5r ikke stemt fordi vi ikke faller inn under utbytteordningen men vi er alts\u00e5 en del av det eee enstemmige flertallet som st\u00e5r bak denne eee innstillingen og den forrige innstillingen", "ji eg i eller milj\u00f8for de milj\u00f8fer dei d rene for ikke stemt[UNK] fordi vi ikke fa l er inn nder e utbute ordningen men vi er algts\u00e5 en del av det e ense mige flertallen s st\u00e5 bak denne e ingnstiringer o n e fo g ed til en"], ["president jeg mener det er veldig bra at flertallet her sl\u00e5r fast at klimaendringene vil kunne p\u00e5virke verdiene av finansforetakenes investeringer og klimarisikoutvalget skal fram til desember totusenogatten vurdere betydningen av klimarisiko for norsk \u00f8konomi", "president jeg mener de veldig bra at flertallet hersog fast at klimendringean vil kunne p\u00e5virke verdian a finansforetakenes invensteringer og klimar is k o itv get ska l fremtil det semder to tisenatten vurderde betidningen av kimarisiko for norsk \u00f8konomi"], ["p\u00e5 samme m\u00e5te som barnehage er et velferdsgode slik eee representanten trettes trettebergstuen sa s\u00e5 er ogs\u00e5 kontantst\u00f8tte el velferdsgode en mulighet en valgfrihet for foreldrene fram til barnet er to \u00e5r", "p\u00e5 samme m\u00e5te som barneaget er et velfargdskode slik e representanten trettes stretteberst en sa s\u00e5 er ogs\u00e5 konmtontstetter veg lfergskode en mulighet en valgfrihet for fareldrene fram til barne er to \u00e5r"], ["dette mener vi fremdeles er eee hensyn som er viktig \u00e5 ha med seg i debatten og vi mener at det er ingenting i dag eee som er nytt som eee viser til at situasjonen her har eee har eee endret seg", "dette mener vi fremdel es er ee henlsyn som vir viktig og h medrd seg i debatten og vi mener at de ingenting i dag e som med nytt som ee viser til at situasjonen her har har r en endret seg"], ["men bestillinger fra flertallet p\u00e5 eller fra et samla storting si side ifra en samla komite er at den ekstra arbeidsgiveravgifta skal fjernes og det er utrolig viktig at det ikke blir skapt uklarhet rundt det slik at bransjen veit hva en har \u00e5 forholde seg til", "d n bestillninger fra flert l et p\u00e5 eller for eit sammle a storting s\u00e5 side r fer en samle a komite er at en ekstra arbeidsgivede a giffta skal fjernes og de utkrolig riketig at e kke bli sk a et uklarighet rundt det slik at irangjen veiktk n e har o f\u00e5 holde seg t ili"], ["der har vi jo tatt grep allerede men jeg tror forenklinger som bidrar til \u00e5 f\u00e5 kostnadene ned for bygging av nye boliger f\u00e5 balanse i de markedene som er ute av balanse alts\u00e5 mellom tilbud og ettersp\u00f8rsel er noe av det som skal til for at ikke veksten i boligprisene stiger eee mye i \u00e5rene fremover", "der har ei jo ta tt grep allerede troliforenklinger som vider er til \u00e5 f\u00e5 kost n at en e for byggeing a ny bolige f\u00e5 balangse i de markede som er ut ta vel an se som mellom tilbud e etsp\u00f8rs e og de som ska l ti for a ike beksen i boligprisene figer mie i \u00e5rene fremoee"], ["i den nye modellen m\u00e5 avdelingsoversykepleier og avdelingsoverlege drive et utstrakt samarbeide for \u00e5 ivareta pasienters interesser og behov[UNK]", "i den nye modellen m\u00e5 avdelingsover sykepleier o avdelingsoverleiget[UNK] drive et utstragt samarbeidet for \u00e5 ivarethar pasienters inteeresser og behov[UNK]"], ["og inn forbi olje[UNK] og gassbransjen og rederisektoren s\u00e5 er det s\u00e5nn at en kvinnelig toppleder tjener enog\u00e5tti komma seks prosent av den l\u00f8nninga som mannlige toppledere i andre eee i samme yrke da f\u00e5r", "og enen forbi ole og gassbransene og rederi sektoren se re son at den krinnelige topple der scjjene ei n\u00e5r i ofti komm r seks prosent av den l\u00f8nninger som m nlege topp lederar i andre i samme \u00f8rke d\u00e5r for"], ["forslaget betyr ogs\u00e5 at det no igangsetjast arbeid med \u00e5 finna andre modellar slik at den ekstraordin\u00e6re arbeidsgjevaravgifta p\u00e5 fem prosent kan avviklast og erstattast med ei ordning som ikkje er ein s\u00e6rskatt p\u00e5 arbeid", "forslaget betyr ogs\u00e5 at el noe gang settes arbeid med \u00e5 finne andre modellae slik at den eksturenede adbeidsgiver avgifter p\u00e5 fem prosent kal av vikla set og er stattas med ordninge som ikkje er e scerrskape p\u00e5 arbeid"], ["inneb\u00e6rer det at man i brevet til komiteen eee har eee lagt seg p\u00e5 en linje hvor man egentlig advarer mot det vedtaket vi n\u00e5 skal gj\u00f8re eller kan vi forvente at det ligger en god utredning i statsbudsjettet", "inne b\u00e6rer det at man i d revet til komiten see har lagt seg p\u00e5 en linj e hvor man entlig adbarer mot de vedtak ve m\u00e5 skal gj\u00f8re eller kan vi for vente at det ligge eg o utredning i stasbudsjeet"], ["i tillegg s\u00e5 kommer jo det elementet med at hvis man g\u00e5r en turnus som tilsier at du kanskje har et morgenskift eee og veksler til skift inn forbi kort tid kveld morgen og kanskje p\u00e5 igjen neste morgen og s\u00e5 videre at hviletida ikke er lang nok s\u00e5 er det en stor belastning", "i tillegs\u00e5 kommer jo det elemente med at e hvis man g\u00e5r en turne som til a t ve kanskje har et moere amskift ee og veks tla sa gkif ine for bi ort tid krele mre han og kans\u00e5e p\u00e5 i en neste m\u00e5ran er s\u00e5 vider t vile ti ed i si lange er s\u00e5 den stod belasd neg"], ["eee jeg er glad president for at vi har en av verdens mest rauseste ordninger og at det er en h\u00f8yre[UNK] frp[UNK] og venstre[UNK]regjering som skal legge fram den mest offensive eee likestilte foreldrepermisjonen som noen gang er gjort i norge", "e e gla e si den for at vi hare en a verdensme st rusestordningar og at en h\u00f8yre e fer pr og vens er e egjering som skal legge fram den mest offensive e i ldikestil te freldrepermisjoe som noen gang e m gr e norge"], ["eee og eee det vil jo eee som en del av det ligge eee i den vurderinga \u00e5 se p\u00e5 hvordan skal vi eee s\u00f8rge for at eee b barnefamiliene f\u00e5r en st\u00f8tte som som treffer dem", "e ee o g e de vil d j oe e som en del v det er lin ge e i den vurderinga o s etp po hvordan skal ve s\u00f8rger for at ee barnefamilien e foren st\u00f8tte som som treftear"], ["det er slik et revidert eee budsjett som oftest er og hovedhensikten er alts\u00e5 \u00e5 f\u00e5 seg forelagt oppdaterte tall analyser foreta justeringer der det trengs og f\u00e5 svar fra eee regjeringa p\u00e5 ulike anmodningsforslag som har blitt fremma tidligere", "det slike e r viderep budsjett sm ofde st er om hovedenegsdikten er alts\u00e5om \u00e5 f\u00e5 eg forelagt op pa terte tal analysar foreta jusderinga dee d eg trengs og f\u00e5 svare fr\u00e5 e regjeringa fog ulige adn menningsforslage som v r vortdi fremmea tidl ere"], ["hvorfor meiner statsr\u00e5den at bnl og fellesforbundet ikke er relevant \u00e5 ha med i ekspertutvalget spesielt med tanke p\u00e5 at bakgrunnen for saken er rapporten enkelt \u00e5 v\u00e6re seri\u00f8s som nettopp de her akt\u00f8rene sto bak", "horfei me en eg statsr\u00e5den ad bene elle og fellesforbun de ikke eir relevant og hamet i eksperteut valget spesielt med tanke p\u00e5 at bakegrun for saken er ra porten enkelt ei s er ig\u00f8s som nettop di her ag tj\u00f8rane sto bake"], ["det m\u00e5 samarbeides p\u00e5 tvers av sektorene for \u00e5 gj\u00f8re det lettere \u00e5 kombinere deltakelse i programmet med ordin\u00e6rt arbeid fornye og forbedre norskoppl\u00e6ringen og bidra til at alle f\u00e5r et godt grunnlag for l\u00e6ring og deltagelse i arbeids[UNK] og samfunnsliv", "deg m\u00e5 saba arbeidets po\u00e5 tversf av sektorene for \u00e5 gj\u00f8re de letter og kommbuner e deldtagels bregrammet med orden\u00e6rt arbeidd for nye forbede e skoppl\u00e6ringen og bidrattil at alle for e godt grounnlag for l\u00e6ring og bedledtargelse i arbeidsog samfunnseligr"], ["kan monica m\u00e6land si at det her er en godt gjennomarbeida klok reform som har hatt gode lokale prosesser som har lak skapt lokal begeistring eller har det blitt en prestisje bare \u00e5 gjennomf\u00f8re det koste hva det koste vil p\u00e5 grunn av at det var en del av kommunereformen", "kan bo de an er l and s i t e har e en godt igjennomarbeid dag klo k re form som r h t gode lokale prosesser som ar lagkt t skapt lokal begeistring ellea r de bgitt en pestirs jeg bare gjennomf\u00f8re de koste va det kostet vil p\u00e5 gunn ag l var en del av kom m unereformen"], ["eee det er noen av oss som mener at lavere skatter og avgifter er bra for husholdningene er bra for norsk n\u00e6ringsliv eee fordi man f\u00e5r beholde verdiskapingen sin selv beholde mer av inntektene sine selv fremfor at det er politikerne som skal fordele pengene p\u00e5 deres vegne", "e de er noen a som mener at laver skater avegfter er bra for us oldningen er bdra for nor o n e engs ligt e fri man for beholddet verdiskapring e n sin selg beholder med ag enntektene sine seg fremfr t de r politikerne som skal fordele pengene p\u00e5 deres mene"], ["stortinget ber regjeringa komme attende til stortinget p\u00e5 eigna m\u00e5te seinast i samband med statsbudsjettet for tjuetjue med framlegg om ei provenyn\u00f8ytral omlegging av finansskatten og der forh\u00f8gd arbeidsgivaravgift vert fjerna", "nstortinget bed regjeringa komm at e en dett e stortinget p\u00e5 egna m\u00e5tte seinast i sambane med statsbudsjett for tjuetjueat med framlegg o meg kro ed ny n\u00f8r teiall e omlegging a fi nnanks skatten og d er for h\u00f8gde arbeidsgive g gift hvert tfijeina"], ["eee jeg innr\u00f8mmer at det er uheldig at for enkelte s\u00e5 s\u00e5 kan det virke eee urimelig men eee her eee oppveies det eee av de positive effektene som eee botidskravet er ment \u00e5 ha", "ee dr\u00f8mmea at det e uheldig at for enkelte s\u00e5o s\u00e5m kan de virke urimelig men her e oppveyes dig e a de positive efektdaem som bodsgrave e ment a ha"], ["det viktige n\u00e5r ein inng\u00e5r internasjonalt forpliktande avtalar er at dei forpliktelsene ein teke p\u00e5 seg er kjende og blir behandla og tatt stilling til i samband med at ein inng\u00e5r den faktiske avtalen", "det e vikktig er nor ein inn g\u00e5r internasjonal forpliktae d e avtalige erg at der forpliktelsan og en te ke p\u00e5efseg e kjendt og blir bihandla og tatt stilling tigle i sambalen med at d en inng\u00e5rde edn faktisk avtalen"], ["president p\u00e5 h\u00f8ringa om finansmarkedsmeldinga s\u00e5 kom det et unisont krav om \u00e5 f\u00e5 gjort noe med smb[UNK]rabatten og f\u00e5 innf\u00f8rt smb[UNK]rabatten i norge", "president b\u00e5 h\u00f8ring a om fenansmarkedsmeldinga so kom de et undisioont grav om mo fo gjort nakke med e m beee rabatte n o fo innf\u00f8rte saom beea rabatten i norge"], ["appelsin[UNK] banan[UNK] eple og druer er hovedingredienser i en velsmakende fruktsalat som serveres p\u00e5 en popul\u00e6r restaurant i byen[UNK]", "appelsin[UNK] banan[UNK] eple og druer er hovedingredienser i en velsmakende fruktsalat som serveres p\u00e5 en popul\u00e6r restaurant i byen[UNK]"], ["eee for jeg tror vi ville f\u00e5tt mange sp\u00f8rsm\u00e5l og mange det hav hadde vekket oppmerksomhet om norge hadde brukt brexit som en eee forklaring p\u00e5 at vi \u00f8nsket \u00e5 se p\u00e5 v\u00e5r egen e\u00f8s[UNK]tilknytning", "ee o jeg tror vi ville f\u00e5tt mange sp\u00f8rsmel og mange e hadde vekket merksomhet og norge hadde brukkt berekke sitt som en ee forklaring p\u00e5 at vi \u00f8nsket \u00e5 se p\u00e5 hver egen euveses tilknytning"], ["s\u00e5 eee er det ogs\u00e5 s\u00e5nn at n\u00e5r man fjerner aktivitetskravet s\u00e5 vil det kunne ha likestillingsmessig effekt negativ effekt p\u00e5 yrkesaktivitet", "s\u00e5 ee er det ogs\u00e5 s\u00e5nn at ee nore maen fjerene aktivitets krave so bild det kunne ha likkesti gingsmessige effekt neggativ effekt p\u00e5 yrkes aktivitete"], ["man har glemt at vestens matematikk[UNK] astronomi[UNK] geografi[UNK] navigasjonskunst og medisin mer eller mindre direkte stammer fra araberverdenen[UNK]", "man har glremt tat vesten s maatematikk asteronomi geografi navigasjons kunst og medisin mer eller mindre direkte stamme fra arabeverdenen[UNK]"], ["appelsin[UNK] banan[UNK] eple og druer er hovedingredienser i en velsmakende fruktsalat som serveres p\u00e5 en popul\u00e6r restaurant i byen[UNK]", "appelsin[UNK] banan[UNK] eple og druer er boeidingredienser i en belsmakende fruktsalad som serveres p\u00e5 en populer restaurant i byen[UNK]"], ["jeg vil gj\u00f8re presidenten oppmerksom p\u00e5 at vi har en bestemmelse om at presidenten kan anmode deltakerne i debatten om \u00e5 holde seg til saken men det er alts\u00e5 representantforslaget som ikke har flertall i denne sal som vi behandler og ikke alt mulig annet", "jeg vil gj\u00f8re presidente oppmerksom p\u00e5 at vi har en bestemmelse m at presidenten kan anmodet eee deltagende debatten holde sette s aken og de e ogs \u00e5 representantforslaget som ikke e harer flertale i denne sal som iei behandler og kke alet muliane"], ["ja president n\u00e5 er jo jeg s\u00e5nn sett ikke ansvarlig for saksfeltet s\u00e5 jeg kan ikke eee tale p\u00e5 vegne av kunnskapsministeren p\u00e5 akkurat dette hva kunnskapsministeren da m\u00e5tte \u00f8nske eller ministeren for h\u00f8yere utdanning \u00f8nsker \u00e5 gj\u00f8re", "ja presidente n\u00e5 e r jo eg san ts tt ikke ansvareelig for sakssfel t e s eg kan ikke ee taele p\u00e5 vegne av kunnskappsministerene p\u00e5 ak keort dette hva knn skalsministere da mot te \u00f8nske lakket ministere for h\u00f8ye utdanning \u00f8nske j\u00f8rt"], ["eee jeg st\u00f8tter den konklusjonen eee og n\u00e5r statsr\u00e5den skal ta ordet s\u00e5 vil det v\u00e6re interessant \u00e5 h\u00f8re om han allerede n\u00e5 kan si noe om hvordan dette vil bli behandlet i ny nasjonal transportplan", "ee jeg st\u00f8pter en konpesjonen ee og nor statsr\u00e5den skal ta ordet s\u00e5 vile det v\u00e6re interassantt p\u00e5 h\u00f8e m en allerede n\u00e5 kan si n\u00e5 om hvordan dette vil bli behandlet i ny nasjonale kransport lan"], ["president det h\u00f8res veldig hult ut n\u00e5r statsr\u00e5den som er frontfiguren for de som \u00f8nsker \u00e5 bruke tvang seier at det er opp til finnmark \u00e5 styre finnmark", "president det eres beldigd huleltdt t b r statsr\u00e5den som er sromfiguren for de s m e n skal r ke tvanedn sider t et pp til findmark ag styre finmark"], ["eee jeg er opptatt av at dem skal eee n\u00e5 fram og s\u00e5 eee vil jeg samtidig si at det vedtaket som blir fatta her i fjor og som det er et bredt flertall for i stortinget om botidskravet det kjem vi til \u00e5 st\u00e5 p\u00e5", "e er opptatt av at e demn skal e n\u00e5 fram o s\u00e5 e vil e samtidig si at det vedtaket som blir fatt at her i fjor g s\u00e5n t et bredd flertall for i stortinget bot skrave det k e a n i t st\u00e5 p\u00e5"]]}
wandb/run-20220731_183904-2sjxhpmt/files/output.log CHANGED
@@ -33177,3 +33177,976 @@ Configuration saved in /data/wav2vec2-1b-npsc-nst/config.json
33177
  Model weights saved in /data/wav2vec2-1b-npsc-nst/flax_model.msgpack
33178
  tokenizer config file saved in ./tokenizer_config.json
33179
  Special tokens file saved in ./special_tokens_map.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33177
  Model weights saved in /data/wav2vec2-1b-npsc-nst/flax_model.msgpack
33178
  tokenizer config file saved in ./tokenizer_config.json
33179
  Special tokens file saved in ./special_tokens_map.json
33180
+ added tokens file saved in ./added_tokens.json
33181
+
33182
+
33183
+
33184
+
33185
+
33186
+
33187
+
33188
+
33189
+
33190
+
33191
+
33192
+
33193
+
33194
+
33195
+
33196
+
33197
+
33198
+
33199
+
33200
+
33201
+
33202
+
33203
+
33204
+
33205
+
33206
+
33207
+
33208
+
33209
+
33210
+
33211
+
33212
+
33213
+
33214
+
33215
+
33216
+
33217
+
33218
+
33219
+
33220
+
33221
+
33222
+
33223
+
33224
+
33225
+
33226
+
33227
+
33228
+
33229
+
33230
+
33231
+
33232
+
33233
+
33234
+
33235
+
33236
+
33237
+
33238
+
33239
+
33240
+
33241
+
33242
+
33243
+
33244
+
33245
+
33246
+
33247
+
33248
+
33249
+
33250
+
33251
+
33252
+
33253
+
33254
+
33255
+
33256
+
33257
+
33258
+
33259
+
33260
+
33261
+
33262
+
33263
+
33264
+
33265
+
33266
+
33267
+
33268
+ return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)█████████████████████████████████████████████████████████████████████████████████████████████████████████ | 3725/4729 [2:54:50<46:42, 2.79s/it]
33269
+ Step... (32100 | Loss: 0.3990156650543213, Learning Rate: 0.0003392978978808969, Gradient Norm: 8.1875)
33270
+ Step... (32000/189160 | Eval Loss: 0.6790516376495361 | Eval wer: 0.46884239453991444 | Eval cer: 0.12758939024118068 |): 15%|█████████████████ | 6/40 [25:24:29<127:58:40, 13550.60s/it]
33271
+
33272
+
33273
+
33274
+
33275
+
33276
+
33277
+
33278
+
33279
+
33280
+
33281
+
33282
+
33283
+
33284
+
33285
+
33286
+
33287
+
33288
+
33289
+
33290
+
33291
+
33292
+
33293
+
33294
+
33295
+
33296
+
33297
+
33298
+
33299
+
33300
+
33301
+
33302
+
33303
+
33304
+
33305
+
33306
+
33307
+
33308
+
33309
+
33310
+
33311
+
33312
+
33313
+
33314
+
33315
+
33316
+
33317
+
33318
+
33319
+
33320
+
33321
+
33322
+
33323
+
33324
+
33325
+
33326
+
33327
+
33328
+
33329
+
33330
+
33331
+
33332
+
33333
+
33334
+
33335
+
33336
+
33337
+
33338
+
33339
+
33340
+
33341
+
33342
+
33343
+
33344
+
33345
+
33346
+
33347
+
33348
+
33349
+
33350
+
33351
+
33352
+
33353
+
33354
+
33355
+
33356
+
33357
+
33358
+ Training...: 81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 3825/4729 [2:59:07<35:43, 2.37s/it]
33359
+
33360
+
33361
+
33362
+
33363
+
33364
+
33365
+
33366
+
33367
+
33368
+
33369
+
33370
+
33371
+
33372
+
33373
+
33374
+
33375
+
33376
+
33377
+
33378
+
33379
+
33380
+
33381
+
33382
+
33383
+
33384
+
33385
+
33386
+
33387
+
33388
+
33389
+
33390
+
33391
+
33392
+
33393
+
33394
+
33395
+
33396
+
33397
+
33398
+
33399
+
33400
+
33401
+
33402
+
33403
+
33404
+
33405
+
33406
+
33407
+
33408
+
33409
+
33410
+
33411
+
33412
+
33413
+
33414
+
33415
+
33416
+
33417
+
33418
+
33419
+
33420
+
33421
+
33422
+
33423
+
33424
+
33425
+
33426
+
33427
+
33428
+
33429
+
33430
+
33431
+
33432
+
33433
+
33434
+
33435
+
33436
+
33437
+
33438
+
33439
+
33440
+
33441
+
33442
+
33443
+
33444
+
33445
+
33446
+ Training...: 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 3925/4729 [3:03:05<31:40, 2.36s/it]
33447
+
33448
+
33449
+
33450
+
33451
+
33452
+
33453
+
33454
+
33455
+
33456
+
33457
+
33458
+
33459
+
33460
+
33461
+
33462
+
33463
+
33464
+
33465
+
33466
+
33467
+
33468
+
33469
+
33470
+
33471
+
33472
+
33473
+
33474
+
33475
+
33476
+
33477
+
33478
+
33479
+
33480
+
33481
+
33482
+
33483
+
33484
+
33485
+
33486
+
33487
+
33488
+
33489
+
33490
+
33491
+
33492
+
33493
+
33494
+
33495
+
33496
+
33497
+
33498
+
33499
+
33500
+
33501
+
33502
+
33503
+
33504
+
33505
+
33506
+
33507
+
33508
+
33509
+
33510
+
33511
+
33512
+
33513
+
33514
+
33515
+
33516
+
33517
+
33518
+
33519
+
33520
+
33521
+
33522
+
33523
+
33524
+
33525
+
33526
+
33527
+
33528
+
33529
+
33530
+
33531
+
33532
+ Training...: 85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 4025/4729 [3:06:56<27:04, 2.31s/it]
33533
+
33534
+
33535
+
33536
+
33537
+
33538
+
33539
+
33540
+
33541
+
33542
+
33543
+
33544
+
33545
+
33546
+
33547
+
33548
+
33549
+
33550
+
33551
+
33552
+
33553
+
33554
+
33555
+
33556
+
33557
+
33558
+
33559
+
33560
+
33561
+
33562
+
33563
+
33564
+
33565
+
33566
+
33567
+
33568
+
33569
+
33570
+
33571
+
33572
+
33573
+
33574
+
33575
+
33576
+
33577
+
33578
+
33579
+
33580
+
33581
+
33582
+
33583
+
33584
+
33585
+
33586
+
33587
+
33588
+
33589
+
33590
+
33591
+
33592
+
33593
+
33594
+
33595
+
33596
+
33597
+
33598
+
33599
+
33600
+
33601
+
33602
+
33603
+
33604
+
33605
+
33606
+
33607
+
33608
+
33609
+
33610
+
33611
+
33612
+
33613
+
33614
+
33615
+
33616
+
33617
+
33618
+ Training...: 87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 4125/4729 [3:10:49<23:14, 2.31s/it]
33619
+
33620
+
33621
+
33622
+
33623
+
33624
+
33625
+
33626
+
33627
+
33628
+
33629
+
33630
+
33631
+
33632
+
33633
+
33634
+
33635
+
33636
+
33637
+
33638
+
33639
+
33640
+
33641
+
33642
+
33643
+
33644
+
33645
+
33646
+
33647
+
33648
+
33649
+
33650
+
33651
+
33652
+
33653
+
33654
+
33655
+
33656
+
33657
+
33658
+
33659
+
33660
+
33661
+
33662
+
33663
+
33664
+
33665
+
33666
+
33667
+
33668
+
33669
+
33670
+
33671
+
33672
+
33673
+
33674
+
33675
+
33676
+
33677
+
33678
+
33679
+
33680
+
33681
+
33682
+
33683
+
33684
+
33685
+
33686
+
33687
+
33688
+
33689
+
33690
+
33691
+
33692
+
33693
+
33694
+
33695
+
33696
+
33697
+
33698
+
33699
+
33700
+
33701
+
33702
+
33703
+
33704
+
33705
+
33706
+ Training...: 89%|████████████████████████████████████████████████████████��████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 4226/4729 [3:14:47<21:18, 2.54s/it]
33707
+
33708
+
33709
+
33710
+
33711
+
33712
+
33713
+
33714
+
33715
+
33716
+
33717
+
33718
+
33719
+
33720
+
33721
+
33722
+
33723
+
33724
+
33725
+
33726
+
33727
+
33728
+
33729
+
33730
+
33731
+
33732
+
33733
+
33734
+
33735
+
33736
+
33737
+
33738
+
33739
+
33740
+
33741
+
33742
+
33743
+
33744
+
33745
+
33746
+
33747
+
33748
+
33749
+
33750
+
33751
+
33752
+
33753
+
33754
+
33755
+
33756
+
33757
+
33758
+
33759
+
33760
+
33761
+
33762
+
33763
+
33764
+
33765
+
33766
+
33767
+
33768
+
33769
+
33770
+
33771
+
33772
+
33773
+
33774
+
33775
+
33776
+
33777
+
33778
+
33779
+
33780
+
33781
+
33782
+
33783
+
33784
+
33785
+
33786
+
33787
+
33788
+
33789
+
33790
+
33791
+
33792
+ Training...: 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 4325/4729 [3:18:42<15:23, 2.29s/it]
33793
+
33794
+
33795
+
33796
+
33797
+
33798
+
33799
+
33800
+
33801
+
33802
+
33803
+
33804
+
33805
+
33806
+
33807
+
33808
+
33809
+
33810
+
33811
+
33812
+
33813
+
33814
+
33815
+
33816
+
33817
+
33818
+
33819
+
33820
+
33821
+
33822
+
33823
+
33824
+
33825
+
33826
+
33827
+
33828
+
33829
+
33830
+
33831
+
33832
+
33833
+
33834
+
33835
+
33836
+
33837
+
33838
+
33839
+
33840
+
33841
+
33842
+
33843
+
33844
+
33845
+
33846
+
33847
+
33848
+
33849
+
33850
+
33851
+
33852
+
33853
+
33854
+
33855
+
33856
+
33857
+
33858
+
33859
+
33860
+
33861
+
33862
+
33863
+
33864
+
33865
+
33866
+
33867
+
33868
+
33869
+
33870
+
33871
+
33872
+
33873
+
33874
+
33875
+
33876
+
33877
+
33878
+
33879
+ Training...: 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 4426/4729 [3:22:38<12:33, 2.49s/it]
33880
+
33881
+
33882
+
33883
+
33884
+
33885
+
33886
+
33887
+
33888
+
33889
+
33890
+
33891
+
33892
+
33893
+
33894
+
33895
+
33896
+
33897
+
33898
+
33899
+
33900
+
33901
+
33902
+
33903
+
33904
+
33905
+
33906
+
33907
+
33908
+
33909
+
33910
+
33911
+
33912
+
33913
+
33914
+
33915
+
33916
+
33917
+
33918
+
33919
+
33920
+
33921
+
33922
+
33923
+
33924
+
33925
+
33926
+
33927
+
33928
+
33929
+
33930
+
33931
+
33932
+
33933
+
33934
+
33935
+
33936
+
33937
+
33938
+
33939
+
33940
+
33941
+
33942
+
33943
+
33944
+
33945
+
33946
+
33947
+
33948
+
33949
+
33950
+
33951
+
33952
+
33953
+
33954
+
33955
+
33956
+
33957
+
33958
+
33959
+
33960
+
33961
+
33962
+
33963
+
33964
+
33965
+ Training...: 96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 4525/4729 [3:26:38<08:24, 2.47s/it]
33966
+
33967
+
33968
+
33969
+
33970
+
33971
+
33972
+
33973
+
33974
+
33975
+
33976
+
33977
+
33978
+
33979
+
33980
+
33981
+
33982
+
33983
+
33984
+
33985
+
33986
+
33987
+
33988
+
33989
+
33990
+
33991
+
33992
+
33993
+
33994
+
33995
+
33996
+
33997
+
33998
+
33999
+
34000
+
34001
+
34002
+
34003
+
34004
+
34005
+
34006
+
34007
+
34008
+
34009
+
34010
+
34011
+
34012
+
34013
+
34014
+
34015
+
34016
+
34017
+
34018
+
34019
+
34020
+
34021
+
34022
+
34023
+
34024
+
34025
+
34026
+
34027
+
34028
+
34029
+
34030
+
34031
+
34032
+
34033
+
34034
+
34035
+
34036
+
34037
+
34038
+
34039
+
34040
+
34041
+
34042
+
34043
+
34044
+
34045
+
34046
+
34047
+
34048
+
34049
+
34050
+
34051
+
34052
+ Training...: 98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 4625/4729 [3:30:35<04:03, 2.34s/it]
34053
+
34054
+
34055
+
34056
+
34057
+
34058
+
34059
+
34060
+
34061
+
34062
+
34063
+
34064
+
34065
+
34066
+
34067
+
34068
+
34069
+
34070
+
34071
+
34072
+
34073
+
34074
+
34075
+
34076
+
34077
+
34078
+
34079
+
34080
+
34081
+
34082
+
34083
+
34084
+
34085
+
34086
+
34087
+
34088
+
34089
+
34090
+
34091
+
34092
+
34093
+
34094
+
34095
+
34096
+
34097
+
34098
+
34099
+
34100
+
34101
+
34102
+
34103
+
34104
+
34105
+
34106
+
34107
+
34108
+
34109
+
34110
+
34111
+
34112
+
34113
+
34114
+
34115
+
34116
+
34117
+
34118
+
34119
+
34120
+
34121
+
34122
+
34123
+
34124
+
34125
+
34126
+
34127
+
34128
+
34129
+
34130
+
34131
+
34132
+
34133
+
34134
+
34135
+
34136
+ Training...: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 4726/4729 [3:34:17<00:04, 1.52s/it]
34137
+
34138
+ Step... (32000/189160 | Eval Loss: 0.6790516376495361 | Eval wer: 0.46884239453991444 | Eval cer: 0.12758939024118068 |): 18%|██████████████████���▉ | 7/40 [26:03:55<122:09:29, 13326.35s/it]
34139
+ 2022-08-01 20:55:25.840708: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 6 failed: RESOURCE_EXHAUSTED: Attempting to reserve 3.93G at the bottom of memory. That was not possible. There are 8.22G free, 0B reserved, and 3.72G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.22G, so compaction will enable this reservation. The nearest obstacle is at 3.72G from the bottom with size 16.0K.
34140
+ 2022-08-01 20:55:25.843140: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 5 failed: RESOURCE_EXHAUSTED: Attempting to reserve 3.93G at the bottom of memory. That was not possible. There are 8.22G free, 0B reserved, and 3.72G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.22G, so compaction will enable this reservation. The nearest obstacle is at 3.72G from the bottom with size 16.0K.
34141
+ 2022-08-01 20:55:25.843561: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 3 failed: RESOURCE_EXHAUSTED: Attempting to reserve 3.93G at the bottom of memory. That was not possible. There are 8.22G free, 0B reserved, and 3.72G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.22G, so compaction will enable this reservation. The nearest obstacle is at 3.72G from the bottom with size 16.0K.
34142
+ 2022-08-01 20:55:25.843992: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 2 failed: RESOURCE_EXHAUSTED: Attempting to reserve 3.93G at the bottom of memory. That was not possible. There are 8.22G free, 0B reserved, and 3.72G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.22G, so compaction will enable this reservation. The nearest obstacle is at 3.72G from the bottom with size 16.0K.
34143
+ 2022-08-01 20:55:25.844068: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 4 failed: RESOURCE_EXHAUSTED: Attempting to reserve 3.93G at the bottom of memory. That was not possible. There are 8.22G free, 0B reserved, and 3.72G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.22G, so compaction will enable this reservation. The nearest obstacle is at 3.72G from the bottom with size 16.0K.
34144
+ 2022-08-01 20:55:25.847392: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 1 failed: RESOURCE_EXHAUSTED: Attempting to reserve 3.93G at the bottom of memory. That was not possible. There are 8.22G free, 0B reserved, and 3.72G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.22G, so compaction will enable this reservation. The nearest obstacle is at 3.72G from the bottom with size 16.0K.
34145
+ Training...: 0%| | 0/4729 [00:05<?, ?it/s]
34146
+ Step... (32000/189160 | Eval Loss: 0.6790516376495361 | Eval wer: 0.46884239453991444 | Eval cer: 0.12758939024118068 |): 18%|███████████████████▉ | 7/40 [26:04:03<122:53:23, 13406.16s/it]
34147
+ Traceback (most recent call last):
34148
+ File "run_flax_speech_recognition_ctc.py", line 1604, in <module>
34149
+ main()
34150
+ File "run_flax_speech_recognition_ctc.py", line 1516, in main
34151
+ state, train_metric = p_train_step(state, batch)
34152
+ ValueError: RESOURCE_EXHAUSTED: Attempting to reserve 3.93G at the bottom of memory. That was not possible. There are 8.22G free, 0B reserved, and 3.72G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.22G, so compaction will enable this reservation. The nearest obstacle is at 3.72G from the bottom with size 16.0K.: while running replica 1 and partition 0 of a replicated computation (other replicas may have failed as well).
wandb/run-20220731_183904-2sjxhpmt/files/wandb-summary.json CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220731_183904-2sjxhpmt/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220731_183904-2sjxhpmt/logs/debug.log CHANGED
@@ -21,3 +21,167 @@ config: {}
21
  2022-07-31 18:39:08,496 INFO MainThread:2983955 [wandb_run.py:_redirect():1689] Redirecting console.
22
  2022-07-31 18:39:08,498 INFO MainThread:2983955 [wandb_run.py:_redirect():1745] Redirects installed.
23
  2022-07-31 18:39:08,498 INFO MainThread:2983955 [wandb_init.py:init():633] run started, returning control to user process
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  2022-07-31 18:39:08,496 INFO MainThread:2983955 [wandb_run.py:_redirect():1689] Redirecting console.
22
  2022-07-31 18:39:08,498 INFO MainThread:2983955 [wandb_run.py:_redirect():1745] Redirects installed.
23
  2022-07-31 18:39:08,498 INFO MainThread:2983955 [wandb_init.py:init():633] run started, returning control to user process
24
+ 2022-08-01 20:55:26,013 INFO MainThread:2983955 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
25
+ 2022-08-01 20:55:26,480 INFO MainThread:2983955 [wandb_run.py:_restore():1752] restore
26
+ 2022-08-01 20:55:28,979 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
27
+ wandb_count: 2
28
+ media_count: 8
29
+ artifact_count: 8
30
+ other_count: 1
31
+ }
32
+ pusher_stats {
33
+ uploaded_bytes: 834758
34
+ total_bytes: 834758
35
+ }
36
+
37
+ 2022-08-01 20:55:29,320 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
38
+ wandb_count: 2
39
+ media_count: 8
40
+ artifact_count: 8
41
+ other_count: 1
42
+ }
43
+ pusher_stats {
44
+ uploaded_bytes: 834758
45
+ total_bytes: 834758
46
+ }
47
+
48
+ 2022-08-01 20:55:30,294 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
49
+ wandb_count: 6
50
+ media_count: 8
51
+ artifact_count: 8
52
+ other_count: 1
53
+ }
54
+ pusher_stats {
55
+ uploaded_bytes: 834758
56
+ total_bytes: 1426801
57
+ }
58
+
59
+ 2022-08-01 20:55:30,396 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
60
+ wandb_count: 6
61
+ media_count: 8
62
+ artifact_count: 8
63
+ other_count: 1
64
+ }
65
+ pusher_stats {
66
+ uploaded_bytes: 834758
67
+ total_bytes: 1426801
68
+ }
69
+
70
+ 2022-08-01 20:55:30,498 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
71
+ wandb_count: 6
72
+ media_count: 8
73
+ artifact_count: 8
74
+ other_count: 1
75
+ }
76
+ pusher_stats {
77
+ uploaded_bytes: 1426801
78
+ total_bytes: 1426801
79
+ }
80
+
81
+ 2022-08-01 20:55:30,600 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
82
+ wandb_count: 6
83
+ media_count: 8
84
+ artifact_count: 8
85
+ other_count: 1
86
+ }
87
+ pusher_stats {
88
+ uploaded_bytes: 1426801
89
+ total_bytes: 1426801
90
+ }
91
+
92
+ 2022-08-01 20:55:30,701 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
93
+ wandb_count: 6
94
+ media_count: 8
95
+ artifact_count: 8
96
+ other_count: 1
97
+ }
98
+ pusher_stats {
99
+ uploaded_bytes: 1426801
100
+ total_bytes: 1426801
101
+ }
102
+
103
+ 2022-08-01 20:55:30,804 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
104
+ wandb_count: 6
105
+ media_count: 8
106
+ artifact_count: 8
107
+ other_count: 1
108
+ }
109
+ pusher_stats {
110
+ uploaded_bytes: 1426801
111
+ total_bytes: 1426801
112
+ }
113
+
114
+ 2022-08-01 20:55:30,906 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
115
+ wandb_count: 6
116
+ media_count: 8
117
+ artifact_count: 8
118
+ other_count: 1
119
+ }
120
+ pusher_stats {
121
+ uploaded_bytes: 1426801
122
+ total_bytes: 1426801
123
+ }
124
+
125
+ 2022-08-01 20:55:31,007 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
126
+ wandb_count: 6
127
+ media_count: 8
128
+ artifact_count: 8
129
+ other_count: 1
130
+ }
131
+ pusher_stats {
132
+ uploaded_bytes: 1426801
133
+ total_bytes: 1426801
134
+ }
135
+
136
+ 2022-08-01 20:55:31,109 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
137
+ wandb_count: 6
138
+ media_count: 8
139
+ artifact_count: 8
140
+ other_count: 1
141
+ }
142
+ pusher_stats {
143
+ uploaded_bytes: 1426801
144
+ total_bytes: 1426801
145
+ }
146
+
147
+ 2022-08-01 20:55:31,211 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
148
+ wandb_count: 6
149
+ media_count: 8
150
+ artifact_count: 8
151
+ other_count: 1
152
+ }
153
+ pusher_stats {
154
+ uploaded_bytes: 1426801
155
+ total_bytes: 1426801
156
+ }
157
+
158
+ 2022-08-01 20:55:32,335 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
159
+ wandb_count: 6
160
+ media_count: 8
161
+ artifact_count: 8
162
+ other_count: 1
163
+ }
164
+ pusher_stats {
165
+ uploaded_bytes: 1426801
166
+ total_bytes: 1426801
167
+ }
168
+
169
+ 2022-08-01 20:55:32,574 INFO MainThread:2983955 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
170
+ exit_result {
171
+ }
172
+ file_counts {
173
+ wandb_count: 6
174
+ media_count: 8
175
+ artifact_count: 8
176
+ other_count: 1
177
+ }
178
+ pusher_stats {
179
+ uploaded_bytes: 1426801
180
+ total_bytes: 1426801
181
+ }
182
+ local_info {
183
+ }
184
+
185
+ 2022-08-01 20:55:34,231 INFO MainThread:2983955 [wandb_run.py:_append_history():2130] rendering history
186
+ 2022-08-01 20:55:34,231 INFO MainThread:2983955 [wandb_run.py:_append_summary():2085] rendering summary
187
+ 2022-08-01 20:55:34,232 INFO MainThread:2983955 [wandb_run.py:_append_files():2180] logging synced files
wandb/run-20220731_183904-2sjxhpmt/run-2sjxhpmt.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5464c84f5c37f6cb52fb29c507276a5a2d20531247ce2b6a96d5346a95f7545f
3
- size 37255922
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bbe70a2ad4cde2cfdfea692e7a9e25b9cab9f44b8590b2bf563faa5a30973fc
3
+ size 38750598
wandb/run-20220802_073947-3q3jac0b/files/code/run_flax_speech_recognition_ctc.py ADDED
@@ -0,0 +1,1615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2022 The HuggingFace Team All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """
17
+ Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
18
+ """
19
+ # You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
20
+
21
+ import logging
22
+ import math
23
+ import os
24
+ import re
25
+ import sys
26
+ import time
27
+ from dataclasses import dataclass, field
28
+ from pathlib import Path
29
+ from typing import Any, Callable, Dict, List, Optional, Union
30
+
31
+ import datasets
32
+ import numpy as np
33
+ from datasets import DatasetDict, load_dataset, load_metric
34
+ from tqdm import tqdm
35
+
36
+ import flax
37
+ import jax
38
+ import jax.numpy as jnp
39
+ import optax
40
+ import transformers
41
+ import wandb as wandb
42
+ from flax import core, jax_utils, struct, traverse_util
43
+ from flax.jax_utils import unreplicate, pad_shard_unpad
44
+ from flax.training.common_utils import get_metrics, shard, shard_prng_key
45
+ from huggingface_hub import Repository
46
+ from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
47
+ from optax._src import linear_algebra
48
+ from transformers import (
49
+ AutoFeatureExtractor,
50
+ AutoProcessor,
51
+ AutoTokenizer,
52
+ HfArgumentParser,
53
+ TrainingArguments,
54
+ is_tensorboard_available,
55
+ set_seed,
56
+ )
57
+ from transformers.file_utils import get_full_repo_name
58
+ from transformers.utils import check_min_version
59
+ from transformers.utils.versions import require_version
60
+
61
+
62
+ # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
63
+ check_min_version("4.17.0.dev0")
64
+
65
+ require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
66
+
67
+ logger = logging.getLogger(__name__)
68
+
69
+
70
+ @flax.struct.dataclass
71
+ class ModelArguments:
72
+ """
73
+ Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
74
+ """
75
+
76
+ model_name_or_path: str = field(
77
+ metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
78
+ )
79
+ config_name: Optional[str] = field(
80
+ default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
81
+ )
82
+ tokenizer_name: Optional[str] = field(
83
+ default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
84
+ )
85
+ feature_extractor_name: Optional[str] = field(
86
+ default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
87
+ )
88
+ cache_dir: Optional[str] = field(
89
+ default=None,
90
+ metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
91
+ )
92
+ use_fast_tokenizer: bool = field(
93
+ default=True,
94
+ metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
95
+ )
96
+ model_revision: str = field(
97
+ default="main",
98
+ metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
99
+ )
100
+ use_auth_token: bool = field(
101
+ default=False,
102
+ metadata={
103
+ "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
104
+ "with private models)."
105
+ },
106
+ )
107
+ freeze_feature_encoder: bool = field(
108
+ default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
109
+ )
110
+ attention_dropout: float = field(
111
+ default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
112
+ )
113
+ activation_dropout: float = field(
114
+ default=0.1,
115
+ metadata={
116
+ "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
117
+ },
118
+ )
119
+ hidden_dropout: float = field(
120
+ default=0.1,
121
+ metadata={
122
+ "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
123
+ },
124
+ )
125
+ feat_proj_dropout: float = field(
126
+ default=0.0,
127
+ metadata={
128
+ "help": "The feat proj dropout probability for feature encoder representations."
129
+ },
130
+ )
131
+ final_dropout: float = field(
132
+ default=0.0,
133
+ metadata={"help": "The dropout probability for the final projection layer."},
134
+ )
135
+ mask_time_prob: float = field(
136
+ default=0.1,
137
+ metadata={
138
+ "help": "The spec aug dropout probability for feature encoder representations."
139
+ },
140
+ )
141
+ mask_time_length: int = field(
142
+ default=10,
143
+ metadata={"help": "Length of vector span to mask along the time axis."},
144
+ )
145
+ mask_feature_prob: float = field(
146
+ default=0.0,
147
+ metadata={
148
+ "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
149
+ "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
150
+ },
151
+ )
152
+ mask_feature_length: int = field(
153
+ default=10,
154
+ metadata={"help": "Length of vector span to mask along the feature axis."},
155
+ )
156
+ layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
157
+ ctc_loss_reduction: Optional[str] = field(
158
+ default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
159
+ )
160
+ ctc_zero_infinity: Optional[bool] = field(
161
+ default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
162
+ )
163
+
164
+
165
+ @flax.struct.dataclass
166
+ class DataTrainingArguments:
167
+ """
168
+ Arguments pertaining to what data we are going to input our model for training and eval.
169
+ """
170
+
171
+ dataset_name: str = field(
172
+ default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
173
+ )
174
+ dataset_config_name: Optional[str] = field(
175
+ default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
176
+ )
177
+ text_column: Optional[str] = field(
178
+ default=None,
179
+ metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
180
+ )
181
+ dataset_cache_dir: Optional[str] = field(
182
+ default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
183
+ )
184
+ overwrite_cache: bool = field(
185
+ default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
186
+ )
187
+ preprocessing_num_workers: Optional[int] = field(
188
+ default=None,
189
+ metadata={"help": "The number of processes to use for the preprocessing."},
190
+ )
191
+ max_train_samples: Optional[int] = field(
192
+ default=None,
193
+ metadata={
194
+ "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
195
+ "value if set."
196
+ },
197
+ )
198
+ max_eval_samples: Optional[int] = field(
199
+ default=None,
200
+ metadata={
201
+ "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
202
+ "value if set."
203
+ },
204
+ )
205
+ max_test_samples: Optional[int] = field(
206
+ default=None,
207
+ metadata={
208
+ "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
209
+ "value if set."
210
+ },
211
+ )
212
+ audio_column_name: str = field(
213
+ default="audio",
214
+ metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
215
+ )
216
+ text_column_name: str = field(
217
+ default="text",
218
+ metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
219
+ )
220
+ max_duration_in_seconds: float = field(
221
+ default=20.0,
222
+ metadata={
223
+ "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
224
+ },
225
+ )
226
+ min_duration_in_seconds: float = field(
227
+ default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
228
+ )
229
+ max_label_length: Optional[int] = field(
230
+ default=512,
231
+ metadata={
232
+ "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
233
+ "than this will be filtered."
234
+ },
235
+ )
236
+ min_label_length: Optional[int] = field(
237
+ default=2,
238
+ metadata={
239
+ "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
240
+ "than this will be filtered."
241
+ },
242
+ )
243
+ pad_input_to_multiple_of: Optional[int] = field(
244
+ default=32000,
245
+ metadata={
246
+ "help": "If set will pad the input sequence to a multiple of the provided value. "
247
+ "This is important to avoid triggering recompilations on TPU."
248
+ },
249
+ )
250
+ pad_target_to_multiple_of: Optional[int] = field(
251
+ default=None,
252
+ metadata={
253
+ "help": "If set will pad the target sequence to a multiple of the provided value. "
254
+ "This is important to avoid triggering recompilations on TPU."
255
+ },
256
+ )
257
+ preprocessing_only: bool = field(
258
+ default=False,
259
+ metadata={
260
+ "help": "Whether to only do data preprocessing and skip training. "
261
+ "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
262
+ "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
263
+ "so that the cached datasets can consequently be loaded in distributed training"
264
+ },
265
+ )
266
+ train_split_name: str = field(
267
+ default="train",
268
+ metadata={
269
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
270
+ },
271
+ )
272
+ eval_split_name: str = field(
273
+ default="validation",
274
+ metadata={
275
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
276
+ },
277
+ )
278
+ do_lower_case: bool = field(
279
+ default=True,
280
+ metadata={"help": "Whether the target text should be lower cased."},
281
+ )
282
+ wandb_project: str = field(
283
+ default="flax-speech-recognition-ctc",
284
+ metadata={"help": "The name of the wandb project."},
285
+ )
286
+ wandb_name: str = field(
287
+ default=None,
288
+ metadata={"help": "The name of the wandb run."},
289
+ )
290
+ wandb_job_type: str = field(
291
+ default="CTC",
292
+ metadata={"help": "The name of the wandb job type."},
293
+ )
294
+ test_split_name: str = field(
295
+ default="test",
296
+ metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
297
+ )
298
+ remove_punctuation: bool = field(
299
+ default=False, metadata={"help": "Whether or not to remove punctuation during training."}
300
+ )
301
+ skip_steps: Optional[int] = field(
302
+ default=0,
303
+ metadata={
304
+ "help": "Skip this number of steps. Useful to continue training"
305
+ },
306
+ )
307
+
308
+
309
+ # @flax.struct.dataclass
310
+ @dataclass
311
+ class FlaxTrainingArguments(TrainingArguments):
312
+ precision: str = field(
313
+ default="full",
314
+ metadata={
315
+ "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
316
+ "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
317
+ },
318
+ )
319
+ matmul_precision: str = field(
320
+ default="default",
321
+ metadata={
322
+ "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
323
+ "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
324
+ "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
325
+ "it only changes the behaviors of calls with no such argument provided. "
326
+ "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
327
+ },
328
+ )
329
+ multisteps: bool = field(
330
+ default=False,
331
+ metadata={
332
+ "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
333
+ "a custom gradient accumulation implementation will be employed."
334
+ },
335
+ )
336
+
337
+
338
+ def to_fp32(t):
339
+ return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
340
+
341
+
342
+ def to_bf16(t):
343
+ return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
344
+
345
+
346
+ class MixedPrecisionTrainState(struct.PyTreeNode):
347
+ """Train state for use with a single Optax optimizer.
348
+ Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
349
+
350
+ Synopsis::
351
+
352
+ state = TrainState.create(
353
+ apply_fn=model.apply,
354
+ params=variables['params'],
355
+ tx=tx)
356
+ grad_fn = jax.grad(make_loss_fn(state.apply_fn))
357
+ for batch in data:
358
+ grads = grad_fn(state.params, batch)
359
+ state = state.apply_gradients(grads=grads)
360
+
361
+ Args:
362
+ step: Counter starts at 0 and is incremented by every call to
363
+ `.apply_gradients()`.
364
+ apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
365
+ convenience to have a shorter params list for the `train_step()` function
366
+ in your training loop.
367
+ params: The parameters to be updated by `tx` and used by `apply_fn`.
368
+ tx: An Optax gradient transformation.
369
+ opt_state: The state for `tx`.
370
+ dropout_rng: PRNG key for stochastic operations.
371
+ bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
372
+ """
373
+
374
+ step: int
375
+ apply_fn: Callable = struct.field(pytree_node=False)
376
+ get_attention_mask_fn: Callable = struct.field(pytree_node=False)
377
+ params: core.FrozenDict[str, Any]
378
+ tx: optax.GradientTransformation = struct.field(pytree_node=False)
379
+ opt_state: optax.OptState
380
+ dropout_rng: jnp.ndarray
381
+ max_grad_norm: Optional[float] = 1.0
382
+
383
+ def apply_gradients(self, *, grads, to_dtype, **kwargs):
384
+ """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
385
+
386
+ Note that internally this function calls `.tx.update()` followed by a call
387
+ to `optax.apply_updates()` to update `params` and `opt_state`.
388
+
389
+ Args:
390
+ grads: Gradients that have the same pytree structure as `.params`.
391
+ **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
392
+
393
+ Returns:
394
+ An updated instance of `self` with `step` incremented by one, `params`
395
+ and `opt_state` updated by applying `grads`, and additional attributes
396
+ replaced as specified by `kwargs`.
397
+ """
398
+
399
+ # clip gradients by global l2 norm
400
+ casted_max_grad_norm = to_dtype(self.max_grad_norm)
401
+ g_norm = linear_algebra.global_norm(grads)
402
+ g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
403
+ grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
404
+
405
+ # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
406
+ # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
407
+ updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
408
+
409
+ new_params = optax.apply_updates(self.params, updates)
410
+ return self.replace(
411
+ step=self.step + 1,
412
+ params=new_params,
413
+ opt_state=to_dtype(new_opt_state),
414
+ **kwargs,
415
+ )
416
+
417
+ @classmethod
418
+ def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
419
+ """Creates a new instance with `step=0` and initialized `opt_state`."""
420
+ # downcast optimizer state to bf16 if mixed-precision training
421
+ opt_state = tx.init(to_dtype(params)) if tx is not None else None
422
+ return cls(
423
+ step=0,
424
+ apply_fn=apply_fn,
425
+ params=params,
426
+ tx=tx,
427
+ opt_state=opt_state,
428
+ **kwargs,
429
+ )
430
+
431
+ def replicate(self):
432
+ return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
433
+
434
+
435
+ @flax.struct.dataclass
436
+ class FlaxDataCollatorSpeechSeq2SeqWithPadding:
437
+ """
438
+ Data collator that will dynamically pad the inputs received.
439
+ Args:
440
+ processor ([`Wav2Vec2Processor`])
441
+ The processor used for proccessing the data.
442
+ decoder_start_token_id (:obj: `int`)
443
+ The begin-of-sentence of the decoder.
444
+ input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
445
+ Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
446
+ among:
447
+ * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
448
+ sequence if provided).
449
+ * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
450
+ maximum acceptable input length for the model if that argument is not provided.
451
+ * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
452
+ different lengths).
453
+ target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
454
+ Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
455
+ See above for details.
456
+ max_input_length (:obj:`float`, `optional`):
457
+ Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
458
+ pad_input_to_multiple_of (:obj:`int`, `optional`):
459
+ If set will pad the input sequence to a multiple of the provided value.
460
+ This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
461
+ 7.5 (Volta).
462
+ pad_target_to_multiple_of (:obj:`int`, `optional`):
463
+ If set will pad the target sequence to a multiple of the provided value.
464
+ This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
465
+ 7.5 (Volta).
466
+ """
467
+
468
+ processor: Any
469
+ input_padding: Union[bool, str] = "longest"
470
+ label_padding: Union[bool, str] = "max_length"
471
+ pad_input_to_multiple_of: Optional[int] = None
472
+ pad_to_multiple_of_label: Optional[int] = None
473
+ max_input_length: Optional[float] = None
474
+ max_label_length: Optional[float] = None
475
+
476
+ def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
477
+ # split inputs and labels since they have to be of different lengths and need
478
+ # different padding methods
479
+ input_features = [{"input_values": feature["input_values"]} for feature in features]
480
+ label_features = [{"input_ids": feature["labels"]} for feature in features]
481
+
482
+ # reformat list to dict and set to pytorch format
483
+ batch = self.processor.feature_extractor.pad(
484
+ input_features,
485
+ max_length=self.max_input_length,
486
+ padding=self.input_padding,
487
+ pad_to_multiple_of=self.pad_input_to_multiple_of,
488
+ return_tensors="np",
489
+ )
490
+
491
+ labels_batch = self.processor.tokenizer.pad(
492
+ label_features,
493
+ max_length=self.max_label_length,
494
+ padding=self.label_padding,
495
+ pad_to_multiple_of=self.pad_to_multiple_of_label,
496
+ return_tensors="np",
497
+ )
498
+
499
+ labels = labels_batch["input_ids"]
500
+ labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
501
+ labels = labels.filled(fill_value=-100)
502
+
503
+ batch["labels"] = labels
504
+
505
+ return batch
506
+
507
+
508
+ def get_grouped_indices(
509
+ dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
510
+ ) -> np.array:
511
+ """
512
+ Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
513
+ Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
514
+ lengths. To do this, the indices are:
515
+
516
+ - randomly permuted (if a JAX rng is specified)
517
+ - grouped in mega-batches of size `mega_batch_mult * batch_size`
518
+ - sorted by length in each mega-batch
519
+
520
+ The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
521
+ maximum length placed first, so that an OOM happens sooner rather than later.
522
+ """
523
+ lengths = dataset["input_length"]
524
+
525
+ # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
526
+ if mega_batch_mult is None:
527
+ mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
528
+ # Just in case, for tiny datasets
529
+ if mega_batch_mult == 0:
530
+ mega_batch_mult = 1
531
+
532
+ # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
533
+ num_samples = len(lengths)
534
+ indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
535
+
536
+ megabatch_size = mega_batch_mult * batch_size
537
+ megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
538
+ megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
539
+
540
+ # The rest is to get the biggest batch first.
541
+ # Since each megabatch is sorted by descending length, the longest element is the first
542
+ megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
543
+ max_idx = np.argmax(megabatch_maximums).item()
544
+ # Switch to put the longest batch in first position
545
+ # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
546
+ megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
547
+
548
+ megabatches = np.array([i for megabatch in megabatches for i in megabatch])
549
+
550
+ return megabatches
551
+
552
+
553
+ def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
554
+ """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
555
+ the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
556
+ num_samples = len(samples_idx)
557
+ if drop_last:
558
+ samples_to_remove = num_samples % batch_size
559
+ if samples_to_remove != 0:
560
+ samples_idx = samples_idx[:-samples_to_remove]
561
+ sections_split = num_samples // batch_size
562
+ samples_idx = samples_idx.reshape((sections_split, batch_size))
563
+ else:
564
+ sections_split = math.ceil(num_samples / batch_size)
565
+ samples_idx = np.array_split(samples_idx, sections_split)
566
+ return samples_idx
567
+
568
+
569
+ def write_train_metric(summary_writer, train_metrics, train_time, step):
570
+ summary_writer.scalar("train_time", train_time, step)
571
+
572
+ train_metrics = get_metrics(train_metrics)
573
+ for key, vals in train_metrics.items():
574
+ tag = f"train_{key}"
575
+ for i, val in enumerate(vals):
576
+ summary_writer.scalar(tag, val, step - len(vals) + i + 1)
577
+
578
+
579
+ def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
580
+ for metric_name, value in eval_metrics.items():
581
+ summary_writer.scalar(f"eval_{metric_name}", value, step)
582
+
583
+ if pred_str is not None:
584
+ # write output actual predictions for debugging
585
+ summary_writer.text("eval_predictions", "\n".join(pred_str), step)
586
+
587
+
588
+ def write_wandb_log(metrics, step, prefix=None):
589
+ if jax.process_index() == 0:
590
+ log_metrics = {}
591
+ for k, v in metrics.items():
592
+ if "layer" in k:
593
+ log_metrics[f"{k}/"] = v
594
+ elif prefix is not None:
595
+ log_metrics[f"{prefix}/{k}"] = v
596
+ else:
597
+ log_metrics[k] = v
598
+ wandb.log(log_metrics, step)
599
+
600
+
601
+ def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
602
+ if jax.process_index() == 0:
603
+ # convert str data to a wandb compatible format
604
+ str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
605
+ # we'll log the first 50 predictions for each epoch
606
+ wandb.log(
607
+ {
608
+ f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
609
+ columns=["label_str", "pred_str"], data=str_data[:num_log]
610
+ )
611
+ },
612
+ step,
613
+ )
614
+
615
+
616
+ def create_learning_rate_fn(
617
+ num_train_steps: int, num_warmup_steps: int, learning_rate: float
618
+ ) -> Callable[[int], jnp.array]:
619
+ """Returns a linear warmup, linear_decay learning rate function."""
620
+ warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
621
+ decay_fn = optax.linear_schedule(
622
+ init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
623
+ )
624
+ schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
625
+ return schedule_fn
626
+
627
+
628
+ def ctc_loss(
629
+ logits,
630
+ logits_attention_mask,
631
+ labels,
632
+ blank_id,
633
+ loss_reduction="mean",
634
+ output_emission_dict=False,
635
+ log_epsilon=-100000.0,
636
+ ):
637
+ """Computes CTC loss.
638
+ This function performs forward computation over an FSA with `N * 2` states
639
+ where `N` is the max number of labels. The states are split into two groups:
640
+ Phi states and emission states. a phi-state accepts repetition of
641
+ phi (blank)-symbols and transits to emission state when the correct label is
642
+ observed. An emission state accepts repetition of the label and transits to
643
+ the next phi states at any time (so called epsilon-transition).
644
+ Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
645
+ and `N` denotes the time steps in `labels`.
646
+ Args:
647
+ logits: (B, T, K)-array containing log-probabilities of each class.
648
+ logitpaddings: (B, T)-array. Padding indicators for `logits`.
649
+ labels: (B, N)-array containing reference integer labels.
650
+ labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
651
+ `labels` must be right-padded, i.e. each row of `labelpaddings` must be
652
+ repetition of zeroes, followed by repetition of ones.
653
+ blank_id: Id for blank token.
654
+ loss_reduction: one of "mean", "sum", "default"
655
+ - "none": no reduction is applied.
656
+ - "mean": output loss will be divided by target lengths and then the
657
+ mean over the batch is taken.
658
+ - "sum": output loss are summed over batch
659
+ output_emission_dict: whether to output additional information about the emission probs
660
+ Returns:
661
+ A pair of `(per_seq_loss, aux)`.
662
+ per_seq_loss:
663
+ (B,)-array containing loss values for each sequence in the batch.
664
+ aux: Dictionary containing interim variables used for computing losses.
665
+ aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
666
+ phi-state corresponding to the n-th label.
667
+ aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
668
+ emission-state corresponding to the n-th label.
669
+ aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
670
+ corresponding to each time frame.
671
+ aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
672
+ corresponding to each time frame.
673
+ """
674
+ # label paddings are indicated by -100
675
+ labelpaddings = labels < 0
676
+ # logit paddings are the inverse of attention_mask
677
+ logitpaddings = ~logits_attention_mask
678
+
679
+ # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
680
+ batchsize, unused_maxinputlen, num_classes = logits.shape
681
+ batchsize_, maxlabellen = labels.shape
682
+
683
+ logprobs = jax.nn.log_softmax(logits)
684
+ labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
685
+
686
+ # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
687
+ repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
688
+ repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
689
+
690
+ logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1]
691
+ logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1]
692
+
693
+ one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K]
694
+ logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
695
+ logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N]
696
+
697
+ logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N]
698
+ logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
699
+ logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N]
700
+
701
+ def loop_body(prev, x):
702
+ prev_phi, prev_emit = prev
703
+ # emit-to-phi epsilon transition, except if the next label is repetition
704
+ prev_phi_orig = prev_phi
705
+ prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
706
+
707
+ logprob_emit, logprob_phi, pad = x
708
+
709
+ # phi-to-emit transition
710
+ next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
711
+ # self-loop transition
712
+ next_phi = prev_phi + logprob_phi
713
+ # emit-to-phi blank transition only when the next label is repetition
714
+ next_phi = next_phi.at[:, 1:].set(
715
+ jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
716
+ )
717
+
718
+ pad = pad.reshape((batchsize, 1))
719
+ next_emit = pad * prev_emit + (1.0 - pad) * next_emit
720
+ next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
721
+
722
+ return (next_phi, next_emit), (next_phi, next_emit)
723
+
724
+ xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
725
+ _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
726
+
727
+ # last row needs to be updated with the last epsilon transition
728
+ logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
729
+ logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
730
+
731
+ # extract per_seq_loss
732
+ one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1]
733
+ per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
734
+
735
+ if loss_reduction == "mean":
736
+ target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
737
+ loss = (per_seq_loss / target_lengths).mean()
738
+ elif loss_reduction == "sum":
739
+ loss = per_seq_loss.sum()
740
+ else:
741
+ loss = per_seq_loss
742
+
743
+ if not output_emission_dict:
744
+ return loss
745
+
746
+ return loss, {
747
+ "logalpha_phi": logalpha_phi,
748
+ "logalpha_emit": logalpha_emit,
749
+ "logprobs_phi": logprobs_phi,
750
+ "logprobs_emit": logprobs_emit,
751
+ }
752
+
753
+
754
+ def make_dataset(data_args, seed=42):
755
+ # Pre-processing dataset
756
+ import re
757
+
758
+ def map_nst(entry):
759
+ text = entry["text"].lower()
760
+ text = text.replace("(...vær stille under dette opptaket...)", "")
761
+ text = re.sub('[áàâ]', 'a', text)
762
+ text = re.sub('[ä]', 'æ', text)
763
+ text = re.sub('[éèëê]', 'e', text)
764
+ text = re.sub('[íìïî]', 'i', text)
765
+ text = re.sub('[óòöô]', 'o', text)
766
+ text = re.sub('[ö]', 'ø', text)
767
+ text = re.sub('[ç]', 'c', text)
768
+ text = re.sub('[úùüû]', 'u', text)
769
+ # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
770
+ text = re.sub('\s+', ' ', text)
771
+ return {"text": text}
772
+
773
+ def filter_nst(entry):
774
+ if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
775
+ return False # Too short
776
+ if re.match(entry["type"], "pIW|CA"):
777
+ return False # Spelling out words
778
+ return True
779
+
780
+ def filter_npsc(entry):
781
+ # False if there are digits in the text
782
+ if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
783
+ return False # Too short
784
+ if re.search("\d", entry["text"]):
785
+ return False
786
+ return True
787
+
788
+ def map_npsc(entry):
789
+ batch = {"text": entry["text"].lower()}
790
+ batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
791
+ batch["text"] = re.sub('[ä]', 'æ', batch["text"])
792
+ batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
793
+ batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
794
+ batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
795
+ batch["text"] = re.sub('[ö]', 'ø', batch["text"])
796
+ batch["text"] = re.sub('[ç]', 'c', batch["text"])
797
+ batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
798
+ batch["text"] = re.sub('\s', ' ', batch["text"])
799
+ batch["text"] = re.sub('<ee>', 'eee', batch["text"])
800
+ batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
801
+ batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
802
+ batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
803
+ # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
804
+ if "<" in batch["text"]:
805
+ raise ValueError(batch["text"])
806
+ return batch
807
+
808
+ nst = datasets.load_dataset("NbAiLab/NST", "no-close")
809
+ npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
810
+ # TODO NST_hesitate
811
+
812
+ split = len(npsc[data_args.train_split_name]) / (len(npsc[data_args.train_split_name]) + len(npsc[data_args.eval_split_name])) # Use same train/val ratio as NPSC
813
+ nst_train = nst[data_args.train_split_name].train_test_split(train_size=split, seed=seed)
814
+ nst[data_args.train_split_name] = nst_train["train"]
815
+ nst[data_args.eval_split_name] = nst_train["test"]
816
+
817
+ nst = nst.filter(filter_nst).map(
818
+ map_nst,
819
+ num_proc=data_args.preprocessing_num_workers,
820
+ desc="filtering NST",
821
+ ).shuffle(seed=seed)
822
+ npsc = npsc.filter(filter_npsc).map(
823
+ map_npsc,
824
+ num_proc=data_args.preprocessing_num_workers,
825
+ desc="filtering NPSC",
826
+ ).shuffle(seed=seed)
827
+
828
+ npsc_base = npsc.remove_columns([col for col in npsc[data_args.train_split_name].column_names if col not in ["text", "audio"]])
829
+ nst_base = nst.remove_columns([col for col in nst[data_args.train_split_name].column_names if col not in ["text", "audio"]])
830
+
831
+ combined = {}
832
+ for split in data_args.train_split_name, data_args.eval_split_name, data_args.test_split_name:
833
+ probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples
834
+ probs = (probs / probs.sum()).tolist()
835
+ comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
836
+ combined[split] = comb
837
+
838
+ return datasets.DatasetDict(**combined)
839
+
840
+ def main():
841
+ # 1. Parse input arguments
842
+ # See all possible arguments in src/transformers/training_args.py
843
+ # or by passing the --help flag to this script.
844
+ # We now keep distinct sets of args, for a cleaner separation of concerns.
845
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
846
+
847
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
848
+ # If we pass only one argument to the script and it's the path to a json file,
849
+ # let's parse it to get our arguments.
850
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
851
+ else:
852
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
853
+
854
+ # 2. Setup logging
855
+ # Make one log on every process with the configuration for debugging.
856
+ logging.basicConfig(
857
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
858
+ datefmt="%m/%d/%Y %H:%M:%S",
859
+ handlers=[logging.StreamHandler(sys.stdout)],
860
+ )
861
+ # Set the verbosity to info of the Transformers logger.
862
+ # We only want one process per machine to log things on the screen.
863
+ logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
864
+ if jax.process_index() == 0:
865
+ datasets.utils.logging.set_verbosity_warning()
866
+ transformers.utils.logging.set_verbosity_info()
867
+ else:
868
+ datasets.utils.logging.set_verbosity_error()
869
+ transformers.utils.logging.set_verbosity_error()
870
+
871
+ # Set up wandb run
872
+ if jax.process_index() == 0:
873
+ wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
874
+
875
+ logger.info("Training/evaluation parameters %s", training_args)
876
+
877
+ # Set the default TPU matmul precision and display the number of devices
878
+ jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
879
+ logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
880
+
881
+ # 4. Load dataset
882
+
883
+ set_seed(training_args.seed)
884
+ raw_datasets = make_dataset(data_args, seed=training_args.seed)
885
+
886
+ # raw_datasets = DatasetDict()
887
+
888
+ # if training_args.do_train:
889
+ # raw_datasets[data_args.train_split_name] = load_dataset(
890
+ # data_args.dataset_name,
891
+ # data_args.dataset_config_name,
892
+ # split=data_args.train_split_name,
893
+ # cache_dir=data_args.dataset_cache_dir,
894
+ # use_auth_token=True if model_args.use_auth_token else None,
895
+ # )
896
+
897
+ # if training_args.do_eval:
898
+ # raw_datasets[data_args.eval_split_name] = load_dataset(
899
+ # data_args.dataset_name,
900
+ # data_args.dataset_config_name,
901
+ # split=data_args.eval_split_name,
902
+ # cache_dir=data_args.dataset_cache_dir,
903
+ # use_auth_token=True if model_args.use_auth_token else None,
904
+ # )
905
+
906
+ # if training_args.do_predict:
907
+ # test_split = data_args.test_split_name.split("+")
908
+ # for split in test_split:
909
+ # raw_datasets[split] = load_dataset(
910
+ # data_args.dataset_name,
911
+ # data_args.dataset_config_name,
912
+ # split=split,
913
+ # cache_dir=data_args.dataset_cache_dir,
914
+ # use_auth_token=True if model_args.use_auth_token else None,
915
+ # )
916
+
917
+ if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
918
+ raise ValueError(
919
+ "Cannot not train, not do evaluation and not do prediction. At least one of "
920
+ "training, evaluation or prediction has to be done."
921
+ )
922
+
923
+ # if not training, there is no need to run multiple epochs
924
+ if not training_args.do_train:
925
+ training_args.num_train_epochs = 1
926
+
927
+ if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
928
+ raise ValueError(
929
+ f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
930
+ "Make sure to set `--audio_column_name` to the correct audio column - one of "
931
+ f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
932
+ )
933
+
934
+ if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
935
+ raise ValueError(
936
+ f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
937
+ "Make sure to set `--text_column_name` to the correct text column - one of "
938
+ f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
939
+ )
940
+
941
+ # 5. Load pretrained model, tokenizer, and feature extractor
942
+ #
943
+ # Distributed training:
944
+ # The .from_pretrained methods guarantee that only one local process can concurrently
945
+ config = Wav2Vec2Config.from_pretrained(
946
+ model_args.config_name if model_args.config_name else model_args.model_name_or_path,
947
+ cache_dir=model_args.cache_dir,
948
+ revision=model_args.model_revision,
949
+ use_auth_token=True if model_args.use_auth_token else None,
950
+ )
951
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
952
+ model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
953
+ cache_dir=model_args.cache_dir,
954
+ revision=model_args.model_revision,
955
+ use_auth_token=True if model_args.use_auth_token else None,
956
+ )
957
+ tokenizer = AutoTokenizer.from_pretrained(
958
+ model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
959
+ cache_dir=model_args.cache_dir,
960
+ revision=model_args.model_revision,
961
+ use_auth_token=True if model_args.use_auth_token else None,
962
+ )
963
+ # update config according to training args, model args, and tokenizer attributes
964
+ config.update(
965
+ {
966
+ "feat_proj_dropout": model_args.feat_proj_dropout,
967
+ "attention_dropout": model_args.attention_dropout,
968
+ "hidden_dropout": model_args.hidden_dropout,
969
+ "final_dropout": model_args.final_dropout,
970
+ "mask_time_prob": model_args.mask_time_prob,
971
+ "mask_time_length": model_args.mask_time_length,
972
+ "mask_feature_prob": model_args.mask_feature_prob,
973
+ "mask_feature_length": model_args.mask_feature_length,
974
+ "gradient_checkpointing": training_args.gradient_checkpointing,
975
+ "layerdrop": model_args.layerdrop,
976
+ "ctc_loss_reduction": model_args.ctc_loss_reduction,
977
+ "ctc_zero_infinity": model_args.ctc_zero_infinity,
978
+ "pad_token_id": tokenizer.pad_token_id,
979
+ "vocab_size": tokenizer.vocab_size, # len(tokenizer),
980
+ "activation_dropout": model_args.activation_dropout,
981
+ }
982
+ )
983
+
984
+ if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
985
+ raise ValueError(
986
+ "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
987
+ "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
988
+ "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
989
+ "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
990
+ )
991
+
992
+ if training_args.precision == "full_mixed":
993
+ dtype = jnp.bfloat16
994
+ training_args.mixed_precision = True
995
+ elif training_args.precision == "half_mixed":
996
+ dtype = jnp.bfloat16
997
+ training_args.mixed_precision = False
998
+ else:
999
+ dtype = jnp.float32
1000
+ training_args.mixed_precision = False
1001
+
1002
+ model = FlaxWav2Vec2ForCTC.from_pretrained(
1003
+ model_args.model_name_or_path,
1004
+ config=config,
1005
+ dtype=dtype,
1006
+ cache_dir=model_args.cache_dir,
1007
+ revision=model_args.model_revision,
1008
+ use_auth_token=True if model_args.use_auth_token else None,
1009
+ from_pt=True,
1010
+ )
1011
+
1012
+ # 6. Resample speech dataset ALWAYS
1013
+ raw_datasets = raw_datasets.cast_column(
1014
+ data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
1015
+ )
1016
+
1017
+ # 7. Preprocessing the datasets.
1018
+ # We need to read the audio files as arrays and tokenize the targets.
1019
+ max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
1020
+ min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
1021
+ max_target_length = data_args.max_label_length
1022
+ min_target_length = data_args.min_label_length
1023
+ pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
1024
+ audio_column_name = data_args.audio_column_name
1025
+ num_workers = data_args.preprocessing_num_workers
1026
+ text_column_name = data_args.text_column_name
1027
+ model_input_name = feature_extractor.model_input_names[0]
1028
+ do_lower_case = data_args.do_lower_case
1029
+ dataset_name = data_args.dataset_name
1030
+ chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
1031
+ chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
1032
+ # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
1033
+ # gigaspeech_disfluencies = ["<other>", "<sil>"]
1034
+ # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
1035
+ # "[vocalized-noise]", "_1"]
1036
+ # swb_punctuations = ["{", "}", "[", "]-", "]"]
1037
+ # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
1038
+ ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
1039
+ "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
1040
+
1041
+ if training_args.do_train and data_args.max_train_samples is not None:
1042
+ raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].select(range(data_args.max_train_samples))
1043
+
1044
+ if training_args.do_eval and data_args.max_eval_samples is not None:
1045
+ raw_datasets[data_args.eval_split_name] = raw_datasets[data_args.eval_split_name].select(range(data_args.max_eval_samples))
1046
+
1047
+ if training_args.do_predict and data_args.max_test_samples is not None:
1048
+ raw_datasets[data_args.test_split_name] = raw_datasets[data_args.test_split_name].select(range(data_args.max_eval_samples))
1049
+
1050
+ if training_args.do_train and data_args.remove_punctuation:
1051
+
1052
+ def remove_punctuation(batch):
1053
+ batch[text_column_name] = (
1054
+ re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
1055
+ )
1056
+
1057
+ raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].map(
1058
+ remove_punctuation,
1059
+ num_proc=data_args.preprocessing_num_workers,
1060
+ desc="removing punctuation from train split",
1061
+ )
1062
+
1063
+ # filter data where the targets are ignored in scoring
1064
+ def is_target_labels(input_str):
1065
+ return input_str.lower() not in ignore_segments
1066
+
1067
+ raw_datasets = raw_datasets.filter(
1068
+ is_target_labels,
1069
+ num_proc=num_workers,
1070
+ input_columns=[text_column_name],
1071
+ desc="filtering data where the targets are ignored in scoring",
1072
+ )
1073
+
1074
+ def prepare_dataset(batch):
1075
+ # process audio
1076
+ try:
1077
+ sample = batch[audio_column_name]
1078
+ except ValueError:
1079
+ sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
1080
+ inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
1081
+ # process audio length
1082
+ batch[model_input_name] = inputs.input_values[0]
1083
+ batch["input_length"] = len(batch["input_values"])
1084
+
1085
+ # process targets
1086
+ input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
1087
+
1088
+ # if dataset_name == "google/xtreme_s":
1089
+ # # Finally, we tokenize the processed text
1090
+ # batch["labels"] = tokenizer(input_str).input_ids
1091
+ # batch["labels_length"] = len(batch["labels"])
1092
+ # return batch
1093
+
1094
+ # # Common Voice 9
1095
+ # if input_str.startswith('"') and input_str.endswith('"'):
1096
+ # # we can remove trailing quotation marks as they do not affect the transcription
1097
+ # input_str = input_str[1:-1]
1098
+ # # normalize quotation marks
1099
+ # input_str = re.sub(r'["“”]', '"', input_str)
1100
+ # # normalize apostrophes
1101
+ # input_str = re.sub(r"[’']", "'", input_str)
1102
+ # # normalize hyphens
1103
+ # input_str = re.sub(r"[—–]", "-", input_str)
1104
+ # # replace double quotation marks with single
1105
+ # input_str = input_str.replace('""', '"')
1106
+ # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
1107
+ # # for CV9, we'll normalize the text to always finish with punctuation
1108
+ # if input_str[-1] not in [".", "?", "!"]:
1109
+ # input_str = input_str + "."
1110
+
1111
+ # # TEDLIUM-3
1112
+ # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
1113
+ # input_str = input_str.replace("<unk>", "").replace(" '", "'")
1114
+
1115
+ # # GigaSpeech
1116
+ # for disfluency in gigaspeech_disfluencies:
1117
+ # input_str = input_str.replace(disfluency, "")
1118
+ # # convert spelled out punctuation to symbolic form
1119
+ # for punctuation, replacement in gigaspeech_punctuation.items():
1120
+ # input_str = input_str.replace(punctuation, replacement)
1121
+ # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
1122
+ # # for GS, we'll normalize the text to always finish with punctuation
1123
+ # if input_str[-1] not in [".", "?", "!"]:
1124
+ # input_str = input_str + "."
1125
+
1126
+ # # SWB
1127
+ # for disfluency in swb_disfluencies:
1128
+ # input_str = input_str.replace(disfluency, "")
1129
+ # # remove parenthesised text (test data only)
1130
+ # input_str = re.sub("[\(].*?[\)]", "", input_str)
1131
+ # for punctuation in swb_punctuations:
1132
+ # input_str = input_str.replace(punctuation, "")
1133
+ # # replace anomalous words with their correct transcriptions
1134
+ # split_str = input_str.split("/")
1135
+ # if len(split_str) > 1:
1136
+ # input_str = " ".join(
1137
+ # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
1138
+
1139
+ # # Earnings 22
1140
+ # for disfluency in earnings_disfluencies:
1141
+ # input_str = input_str.replace(disfluency, "")
1142
+ # # replace mal-formatted ellipsis
1143
+ # input_str = input_str.replace("…", ".")
1144
+
1145
+ # JIWER compliance
1146
+ # remove multiple spaces
1147
+ input_str = re.sub(r"\s\s+", " ", input_str)
1148
+ # strip trailing spaces
1149
+ input_str = input_str.strip()
1150
+
1151
+ # Finally, we tokenize the processed text
1152
+ batch["labels"] = tokenizer(input_str).input_ids
1153
+ batch["labels_length"] = len(batch["labels"])
1154
+ return batch
1155
+
1156
+ vectorized_datasets = raw_datasets.map(
1157
+ prepare_dataset,
1158
+ remove_columns=next(iter(raw_datasets.values())).column_names,
1159
+ num_proc=num_workers,
1160
+ desc="preprocess dataset",
1161
+ )
1162
+
1163
+ # filter data with inputs shorter than min_input_length or longer than max_input_length
1164
+ def is_audio_in_length_range(length):
1165
+ return length > min_input_length and length < max_input_length
1166
+
1167
+ vectorized_datasets = vectorized_datasets.filter(
1168
+ is_audio_in_length_range,
1169
+ num_proc=num_workers,
1170
+ input_columns=["input_length"],
1171
+ )
1172
+
1173
+ # filter data with targets shorter than min_target_length or longer than max_target_length
1174
+ def is_labels_in_length_range(length):
1175
+ return length > min_target_length # and length < max_target_length
1176
+
1177
+ vectorized_datasets = vectorized_datasets.filter(
1178
+ is_labels_in_length_range,
1179
+ num_proc=num_workers,
1180
+ input_columns=["labels_length"],
1181
+ )
1182
+
1183
+ # for large datasets it is advised to run the preprocessing on a
1184
+ # single machine first with `args.preprocessing_only` since there will mostly likely
1185
+ # be a timeout when running the script in distributed mode.
1186
+ # In a second step `args.preprocessing_only` can then be set to `False` to load the
1187
+ # cached dataset
1188
+ if data_args.preprocessing_only:
1189
+ cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
1190
+ logger.info(f"Data preprocessing finished. Files cached at {cache}.")
1191
+ return
1192
+
1193
+ # 8. Load Metrics
1194
+ wer_metric = load_metric("wer")
1195
+ cer_metric = load_metric("cer")
1196
+
1197
+ def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
1198
+ padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
1199
+
1200
+ pred_str = tokenizer.batch_decode(pred_ids)
1201
+ # we do not want to group tokens when computing the metrics
1202
+ label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
1203
+
1204
+ wer = wer_metric.compute(predictions=pred_str, references=label_str)
1205
+ cer = cer_metric.compute(predictions=pred_str, references=label_str)
1206
+
1207
+ return {"wer": wer, "cer": cer}, pred_str, label_str
1208
+
1209
+ # 9. save feature extractor, tokenizer and config
1210
+ feature_extractor.save_pretrained(training_args.output_dir)
1211
+ tokenizer.save_pretrained(training_args.output_dir)
1212
+ config.save_pretrained(training_args.output_dir)
1213
+
1214
+ processor = AutoProcessor.from_pretrained(training_args.output_dir)
1215
+
1216
+ data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
1217
+ processor=processor,
1218
+ input_padding="longest",
1219
+ pad_input_to_multiple_of=pad_input_to_multiple_of,
1220
+ max_label_length=data_args.max_label_length,
1221
+ )
1222
+
1223
+ # Enable tensorboard only on the master node
1224
+ has_tensorboard = is_tensorboard_available()
1225
+ if has_tensorboard and jax.process_index() == 0:
1226
+ try:
1227
+ from flax.metrics.tensorboard import SummaryWriter
1228
+
1229
+ summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
1230
+ except ImportError as ie:
1231
+ has_tensorboard = False
1232
+ logger.warning(
1233
+ f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
1234
+ )
1235
+ else:
1236
+ logger.warning(
1237
+ "Unable to display metrics through TensorBoard because the package is not installed: "
1238
+ "Please run `pip install tensorboard` to enable."
1239
+ )
1240
+
1241
+ # 10. Handle the repository creation
1242
+ if training_args.push_to_hub:
1243
+ with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
1244
+ git_lfs_extensions = f.read()
1245
+ if "*.wandb" not in git_lfs_extensions:
1246
+ f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
1247
+ if training_args.hub_model_id is None:
1248
+ repo_name = get_full_repo_name(
1249
+ Path(training_args.output_dir).absolute().name, token=training_args.hub_token
1250
+ )
1251
+ else:
1252
+ repo_name = training_args.hub_model_id
1253
+ repo = Repository(training_args.output_dir, clone_from=repo_name)
1254
+
1255
+ # 11. Initialize our training
1256
+ rng = jax.random.PRNGKey(training_args.seed)
1257
+ rng, dropout_rng = jax.random.split(rng)
1258
+
1259
+ # Store some constants
1260
+ max_steps = int(training_args.max_steps)
1261
+ gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
1262
+ train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
1263
+ batch_size_per_update = train_batch_size * gradient_accumulation_steps
1264
+ per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
1265
+ eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
1266
+ to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
1267
+
1268
+ if training_args.do_train:
1269
+ num_train_samples = len(vectorized_datasets[data_args.train_split_name])
1270
+ steps_per_epoch = num_train_samples // batch_size_per_update
1271
+ if max_steps > 0:
1272
+ num_epochs = -(training_args.max_steps // -steps_per_epoch)
1273
+ total_train_steps = max_steps
1274
+ else:
1275
+ num_epochs = int(training_args.num_train_epochs)
1276
+ total_train_steps = steps_per_epoch * num_epochs
1277
+
1278
+ # Create learning rate schedule
1279
+ # Create learning rate schedule
1280
+ linear_decay_lr_schedule_fn = create_learning_rate_fn(
1281
+ total_train_steps,
1282
+ training_args.warmup_steps,
1283
+ training_args.learning_rate,
1284
+ )
1285
+
1286
+ # We use Optax's "masking" functionality to not apply weight decay
1287
+ # to bias and LayerNorm scale parameters. decay_mask_fn returns a
1288
+ # mask boolean with the same structure as the parameters.
1289
+ # The mask is True for parameters that should be decayed.
1290
+ # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
1291
+ # For FlaxT5, one should correct the layer norm parameter naming
1292
+ # accordingly - see `run_t5_mlm_flax.py` e.g.
1293
+ def decay_mask_fn(params):
1294
+ flat_params = traverse_util.flatten_dict(params)
1295
+ layer_norm_params = [
1296
+ (name, "scale")
1297
+ for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
1298
+ ]
1299
+ flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
1300
+ return traverse_util.unflatten_dict(flat_mask)
1301
+
1302
+ if training_args.adafactor:
1303
+ # Create Adafactor optimizer
1304
+ optim = optax.adafactor(
1305
+ learning_rate=linear_decay_lr_schedule_fn,
1306
+ dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
1307
+ weight_decay_rate=training_args.weight_decay,
1308
+ weight_decay_mask=decay_mask_fn,
1309
+ )
1310
+ else:
1311
+ # Create AdamW optimizer
1312
+ optim = optax.adamw(
1313
+ learning_rate=linear_decay_lr_schedule_fn,
1314
+ b1=training_args.adam_beta1,
1315
+ b2=training_args.adam_beta2,
1316
+ eps=training_args.adam_epsilon,
1317
+ weight_decay=training_args.weight_decay,
1318
+ mask=decay_mask_fn,
1319
+ )
1320
+
1321
+ # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
1322
+ if training_args.multisteps and gradient_accumulation_steps > 1:
1323
+ optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
1324
+ else:
1325
+ num_epochs = 0
1326
+ total_train_steps = 0
1327
+ num_train_samples = 0
1328
+ optim = None
1329
+
1330
+ # Setup train state
1331
+ state = MixedPrecisionTrainState.create(
1332
+ apply_fn=model.__call__,
1333
+ get_attention_mask_fn=model._get_feature_vector_attention_mask,
1334
+ params=model.params,
1335
+ tx=optim,
1336
+ to_dtype=to_dtype,
1337
+ dropout_rng=dropout_rng,
1338
+ max_grad_norm=training_args.max_grad_norm,
1339
+ )
1340
+
1341
+ # Replicate the train state on each device
1342
+ state = state.replicate()
1343
+ blank_id = model.config.pad_token_id
1344
+
1345
+ # Define gradient update step fn
1346
+ def train_step(state, batch):
1347
+ # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
1348
+ dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
1349
+
1350
+ def compute_loss(params, minibatch):
1351
+ labels = minibatch.pop("labels")
1352
+ logits = state.apply_fn(
1353
+ **minibatch,
1354
+ params=params,
1355
+ dropout_rng=dropout_rng,
1356
+ freeze_feature_encoder=model_args.freeze_feature_encoder,
1357
+ train=True,
1358
+ )[0]
1359
+ logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
1360
+ loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
1361
+
1362
+ return loss
1363
+
1364
+ grad_fn = jax.value_and_grad(compute_loss)
1365
+
1366
+ if gradient_accumulation_steps == 1 or training_args.multisteps:
1367
+ loss, grad = grad_fn(to_dtype(state.params), batch)
1368
+
1369
+ # Custom gradient accumulation
1370
+ else:
1371
+ # add a first dimension over gradient_accumulation_steps for minibatch slices
1372
+ batch = jax.tree_map(
1373
+ lambda x: x.reshape(
1374
+ gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
1375
+ ),
1376
+ batch,
1377
+ )
1378
+
1379
+ def accum_minibatch_step(accum_grad, minibatch):
1380
+ # compute loss, num labels and grad over minibatch and accumulate
1381
+ loss, grad = grad_fn(to_dtype(state.params), minibatch)
1382
+ return jax.tree_map(jnp.add, accum_grad, grad), loss
1383
+
1384
+ # create an initial state for accumulating losses, num labels and gradients
1385
+ init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
1386
+ # loop accum minibatch step over the number of gradient accumulation steps
1387
+ grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
1388
+
1389
+ # update state
1390
+ new_state = state.apply_gradients(
1391
+ grads=grad,
1392
+ dropout_rng=new_dropout_rng,
1393
+ to_dtype=to_dtype,
1394
+ )
1395
+
1396
+ # compute gradient norms over all layers and globally for detailed monitoring
1397
+ layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
1398
+ logs = {
1399
+ "layer_grad_norm": layer_grad_norm,
1400
+ "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
1401
+ }
1402
+
1403
+ # compute parameter norms over all layers and globally for detailed monitoring
1404
+ layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
1405
+ logs["layer_param_norm"] = layer_param_norm
1406
+ logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
1407
+
1408
+ metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
1409
+ metrics.update(logs)
1410
+
1411
+ metrics = jax.lax.pmean(metrics, axis_name="batch")
1412
+ # metrics = to_fp32(metrics)
1413
+
1414
+ return new_state, metrics
1415
+
1416
+ # Define eval fn
1417
+ def eval_step(params, batch):
1418
+ labels = batch.pop("labels")
1419
+ logits = model(**batch, params=params, train=False)[0]
1420
+
1421
+ logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
1422
+ loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
1423
+
1424
+ pred_ids = jnp.argmax(logits, axis=-1)
1425
+
1426
+ # summarize metrics
1427
+ metrics = {"loss": loss}
1428
+ metrics = jax.lax.pmean(metrics, axis_name="batch")
1429
+ # metrics = to_fp32(metrics)
1430
+ return metrics, pred_ids
1431
+
1432
+ # Create parallel version of the train and eval step
1433
+ if training_args.do_train:
1434
+ p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
1435
+
1436
+ if training_args.do_eval:
1437
+ p_eval_step = jax.pmap(eval_step, "batch")
1438
+
1439
+ def run_evaluation(step):
1440
+ if training_args.do_eval:
1441
+ # ======================== Evaluating ==============================
1442
+ eval_metrics = []
1443
+ eval_preds = []
1444
+ eval_labels = []
1445
+
1446
+ # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
1447
+ eval_samples_idx = get_grouped_indices(vectorized_datasets[data_args.eval_split_name], eval_batch_size)
1448
+ eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
1449
+
1450
+ for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
1451
+ samples = [vectorized_datasets[data_args.eval_split_name][int(idx)] for idx in batch_idx]
1452
+ batch = data_collator(samples)
1453
+ labels = batch["labels"]
1454
+
1455
+ metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
1456
+ eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
1457
+ eval_metrics.append(metrics)
1458
+
1459
+ eval_labels.extend(labels)
1460
+
1461
+ # normalize eval metrics
1462
+ eval_metrics = get_metrics(eval_metrics)
1463
+ eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
1464
+ eval_metrics = to_fp32(eval_metrics)
1465
+
1466
+ # always run compute metrics
1467
+ error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
1468
+ eval_metrics.update(error_rate_metric)
1469
+ error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
1470
+
1471
+ # Print metrics and update progress bar
1472
+ desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
1473
+ epochs.write(desc)
1474
+ epochs.desc = desc
1475
+
1476
+ # Save metrics
1477
+ write_wandb_log(eval_metrics, step, prefix="eval")
1478
+ write_wandb_pred(pred_str, label_str, step)
1479
+ # if has_tensorboard and jax.process_index() == 0:
1480
+ # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
1481
+
1482
+ def save_checkpoint(step):
1483
+ # save and push checkpoint to the hub
1484
+ if jax.process_index() == 0:
1485
+ params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
1486
+ model.save_pretrained(training_args.output_dir, params=params)
1487
+ tokenizer.save_pretrained(training_args.output_dir)
1488
+ if training_args.push_to_hub:
1489
+ repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
1490
+
1491
+ logger.info("***** Running training *****")
1492
+ logger.info(f" Num examples = {num_train_samples}")
1493
+ logger.info(f" Num Epochs = {num_epochs}")
1494
+ logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
1495
+ logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}")
1496
+ logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
1497
+ logger.info(f" Total optimization steps = {total_train_steps}")
1498
+ logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}")
1499
+ logger.info(f" Use scan: {config.use_scan}")
1500
+ logger.info(f" Fuse matmuls: {config.fuse_matmuls}")
1501
+
1502
+ train_time = cur_step = 0
1503
+ skip_epochs = data_args.skip_steps % (num_train_samples // batch_size_per_update)
1504
+ epochs = tqdm(range(skip_epochs, num_epochs), desc=f"Epoch ... ({skip_epochs + 1}/{num_epochs})", position=0)
1505
+ for epoch in epochs:
1506
+ if training_args.do_train:
1507
+ # ======================== Training ================================
1508
+ train_start = time.time()
1509
+
1510
+ # Create sampling rng
1511
+ rng, input_rng = jax.random.split(rng)
1512
+
1513
+ # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
1514
+ train_samples_idx = get_grouped_indices(vectorized_datasets[data_args.train_split_name], batch_size_per_update, input_rng)
1515
+ train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
1516
+
1517
+ # Gather the indices for creating the batch and do a training step
1518
+ for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
1519
+ samples = [vectorized_datasets[data_args.train_split_name][int(idx)] for idx in batch_idx]
1520
+ batch = data_collator(samples)
1521
+ batch = shard(batch.data)
1522
+
1523
+ cur_step = epoch * (num_train_samples // batch_size_per_update) + step
1524
+ if cur_step <= data_args.skip_steps:
1525
+ continue
1526
+
1527
+ try:
1528
+ state, train_metric = p_train_step(state, batch)
1529
+ except TypeError as e:
1530
+ logger.warning("Encountered following error: \n", e)
1531
+
1532
+
1533
+ if cur_step % training_args.logging_steps == 0:
1534
+ # Save metrics
1535
+ train_metric = unreplicate(train_metric)
1536
+ train_time += time.time() - train_start
1537
+ # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
1538
+ write_wandb_log(to_fp32(train_metric), cur_step, prefix=data_args.train_split_name)
1539
+ # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
1540
+ # if has_tensorboard and jax.process_index() == 0:
1541
+ # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
1542
+
1543
+ epochs.write(
1544
+ f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
1545
+ )
1546
+
1547
+ if cur_step % total_train_steps == 0:
1548
+ break
1549
+
1550
+ if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
1551
+ run_evaluation(cur_step)
1552
+
1553
+ if cur_step % training_args.save_steps == 0:
1554
+ save_checkpoint(cur_step)
1555
+
1556
+ if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
1557
+ # run evaluation at the end of the epoch if eval steps are not specified
1558
+ run_evaluation(cur_step)
1559
+ save_checkpoint(cur_step)
1560
+
1561
+ if training_args.do_train:
1562
+ save_checkpoint(cur_step)
1563
+
1564
+ cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training
1565
+
1566
+ if training_args.do_eval:
1567
+ run_evaluation(cur_step)
1568
+
1569
+ # TODO: collapse 'do_predict' into the run_evaluation function
1570
+ if training_args.do_predict:
1571
+ for split in [data_args.test_split_name]:
1572
+ # ======================== Evaluating ==============================
1573
+ eval_metrics = []
1574
+ eval_preds = []
1575
+ eval_labels = []
1576
+
1577
+ # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
1578
+ eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
1579
+ eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
1580
+
1581
+ for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
1582
+ samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
1583
+ batch = data_collator(samples)
1584
+ labels = batch["labels"]
1585
+
1586
+ metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
1587
+ eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
1588
+ eval_metrics.append(metrics)
1589
+
1590
+ eval_labels.extend(labels)
1591
+
1592
+ # normalize eval metrics
1593
+ eval_metrics = get_metrics(eval_metrics)
1594
+ eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
1595
+ eval_metrics = to_fp32(eval_metrics)
1596
+
1597
+ # always run compute metrics
1598
+ error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
1599
+ eval_metrics.update(error_rate_metric)
1600
+ error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
1601
+
1602
+ # Print metrics and update progress bar
1603
+ desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
1604
+ epochs.write(desc)
1605
+ epochs.desc = desc
1606
+
1607
+ # Save metrics
1608
+ write_wandb_log(eval_metrics, cur_step, prefix=split)
1609
+ write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
1610
+ # if has_tensorboard and jax.process_index() == 0:
1611
+ # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
1612
+
1613
+
1614
+ if __name__ == "__main__":
1615
+ main()
wandb/run-20220802_073947-3q3jac0b/files/config.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.12.9
7
+ code_path: code/run_flax_speech_recognition_ctc.py
8
+ framework: huggingface
9
+ huggingface_version: 4.21.0
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ python_version: 3.8.10
13
+ start_time: 1659425987
14
+ t:
15
+ 1:
16
+ - 1
17
+ - 2
18
+ - 3
19
+ - 11
20
+ - 12
21
+ 2:
22
+ - 1
23
+ - 2
24
+ - 3
25
+ - 11
26
+ - 12
27
+ 3:
28
+ - 13
29
+ 4: 3.8.10
30
+ 5: 0.12.9
31
+ 6: 4.21.0
32
+ 8:
33
+ - 5
wandb/run-20220802_073947-3q3jac0b/files/diff.patch ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220802_073947-3q3jac0b/files/output.log ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
2
+ _n_gpu=0,
3
+ adafactor=False,
4
+ adam_beta1=0.9,
5
+ adam_beta2=0.999,
6
+ adam_epsilon=1e-08,
7
+ auto_find_batch_size=False,
8
+ bf16=False,
9
+ bf16_full_eval=False,
10
+ data_seed=None,
11
+ dataloader_drop_last=False,
12
+ dataloader_num_workers=0,
13
+ dataloader_pin_memory=True,
14
+ ddp_bucket_cap_mb=None,
15
+ ddp_find_unused_parameters=None,
16
+ debug=[],
17
+ deepspeed=None,
18
+ disable_tqdm=False,
19
+ do_eval=True,
20
+ do_predict=False,
21
+ do_train=True,
22
+ eval_accumulation_steps=None,
23
+ eval_delay=0,
24
+ eval_steps=4000,
25
+ evaluation_strategy=steps,
26
+ fp16=False,
27
+ fp16_backend=auto,
28
+ fp16_full_eval=False,
29
+ fp16_opt_level=O1,
30
+ fsdp=[],
31
+ fsdp_min_num_params=0,
32
+ fsdp_transformer_layer_cls_to_wrap=None,
33
+ full_determinism=False,
34
+ gradient_accumulation_steps=1,
35
+ gradient_checkpointing=True,
36
+ greater_is_better=None,
37
+ group_by_length=True,
38
+ half_precision_backend=auto,
39
+ hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
40
+ hub_private_repo=False,
41
+ hub_strategy=every_save,
42
+ hub_token=<HUB_TOKEN>,
43
+ ignore_data_skip=False,
44
+ include_inputs_for_metrics=False,
45
+ jit_mode_eval=False,
46
+ label_names=None,
47
+ label_smoothing_factor=0.0,
48
+ learning_rate=0.00033713760785758495,
49
+ length_column_name=input_length,
50
+ load_best_model_at_end=False,
51
+ local_rank=-1,
52
+ log_level=-1,
53
+ log_level_replica=-1,
54
+ log_on_each_node=True,
55
+ logging_dir=./runs/Aug02_07-39-43_t1v-n-eedfb410-w-0,
56
+ logging_first_step=False,
57
+ logging_nan_inf_filter=True,
58
+ logging_steps=100,
59
+ logging_strategy=steps,
60
+ lr_scheduler_type=linear,
61
+ matmul_precision=bfloat16,
62
+ max_grad_norm=1.0,
63
+ max_steps=-1,
64
+ metric_for_best_model=None,
65
+ mp_parameters=,
66
+ multisteps=False,
67
+ no_cuda=False,
68
+ num_train_epochs=40.0,
69
+ optim=adamw_hf,
70
+ output_dir=./,
71
+ overwrite_output_dir=True,
72
+ past_index=-1,
73
+ per_device_eval_batch_size=8,
74
+ per_device_train_batch_size=8,
75
+ precision=full_mixed,
76
+ prediction_loss_only=False,
77
+ push_to_hub=True,
78
+ push_to_hub_model_id=None,
79
+ push_to_hub_organization=None,
80
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
81
+ ray_scope=last,
82
+ remove_unused_columns=True,
83
+ report_to=['tensorboard', 'wandb'],
84
+ resume_from_checkpoint=None,
85
+ run_name=./,
86
+ save_on_each_node=False,
87
+ save_steps=4000,
88
+ save_strategy=steps,
89
+ save_total_limit=5,
90
+ seed=42,
91
+ sharded_ddp=[],
92
+ skip_memory_metrics=True,
93
+ tf32=None,
94
+ torchdynamo=None,
95
+ tpu_metrics_debug=False,
96
+ tpu_num_cores=None,
97
+ use_ipex=False,
98
+ use_legacy_prediction_loop=False,
99
+ warmup_ratio=0.0,
100
+ warmup_steps=0,
101
+ weight_decay=0.0,
102
+ xpu_backend=None,
103
+ )
104
+ INFO:__main__:JAX devices: 8, matmul precision: bfloat16
105
+ WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
106
+ 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.91it/s]
107
+ WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
108
+ 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 442.56it/s]
109
+ WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
110
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
111
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
112
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
113
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
114
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
115
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
116
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
117
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
118
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
119
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
120
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
121
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
122
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
123
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
124
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
125
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
126
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
127
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
128
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
129
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
130
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
131
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
132
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
133
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
134
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
135
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
136
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
137
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
138
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
139
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
140
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
141
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
142
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
143
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
144
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
145
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
146
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
147
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
148
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
149
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
150
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
151
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
152
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
153
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
154
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
155
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
156
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
157
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
158
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
159
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
160
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
161
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
162
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
163
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
164
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
165
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
166
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
167
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
168
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
169
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
170
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
171
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
172
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
173
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
174
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
175
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
176
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
177
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
178
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
179
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
180
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
181
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
182
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
183
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
184
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
185
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
186
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
187
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
188
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
189
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
190
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
191
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
192
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
193
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
194
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
195
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
196
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
197
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
198
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
199
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
200
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
201
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
202
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
203
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
204
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
205
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
206
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
207
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
208
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
209
+ WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
210
+ WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
211
+ WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
212
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
213
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
214
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
215
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
216
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
217
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
218
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
219
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
220
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
221
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
222
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
223
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
224
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
225
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
226
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
227
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
228
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
229
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
230
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
231
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
232
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
233
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
234
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
235
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
236
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
237
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
238
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
239
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
240
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
241
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
242
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
243
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
244
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
245
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
246
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
247
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
248
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
249
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
250
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
251
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
252
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
253
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
254
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
255
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
256
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
257
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
258
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
259
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
260
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
261
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
262
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
263
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
264
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
265
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
266
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
267
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
268
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
269
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
270
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
271
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
272
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
273
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
274
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
275
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
276
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
277
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
278
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
279
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
280
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
281
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
282
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
283
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
284
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
285
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
286
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
287
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
288
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
289
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
290
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
291
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
292
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
293
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
294
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
295
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
296
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
297
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
298
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
299
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
300
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
301
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
302
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
303
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
304
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
305
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
306
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
307
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
308
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
309
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
310
+ WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
311
+ WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
312
+ WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
313
+ WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
314
+ loading configuration file ./config.json
315
+ /data/flax/lib/python3.8/site-packages/transformers/configuration_utils.py:368: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
316
+ warnings.warn(
317
+ Model config Wav2Vec2Config {
318
+ "activation_dropout": 0.055,
319
+ "adapter_kernel_size": 3,
320
+ "adapter_stride": 2,
321
+ "add_adapter": false,
322
+ "apply_spec_augment": true,
323
+ "architectures": [
324
+ "Wav2Vec2ForCTC"
325
+ ],
326
+ "attention_dropout": 0.094,
327
+ "bos_token_id": 1,
328
+ "classifier_proj_size": 256,
329
+ "codevector_dim": 1024,
330
+ "contrastive_logits_temperature": 0.1,
331
+ "conv_bias": true,
332
+ "conv_dim": [
333
+ 512,
334
+ 512,
335
+ 512,
336
+ 512,
337
+ 512,
338
+ 512,
339
+ 512
340
+ ],
341
+ "conv_kernel": [
342
+ 10,
343
+ 3,
344
+ 3,
345
+ 3,
346
+ 3,
347
+ 2,
348
+ 2
349
+ ],
350
+ "conv_stride": [
351
+ 5,
352
+ 2,
353
+ 2,
354
+ 2,
355
+ 2,
356
+ 2,
357
+ 2
358
+ ],
359
+ "ctc_loss_reduction": "mean",
360
+ "ctc_zero_infinity": true,
361
+ "diversity_loss_weight": 0.1,
362
+ "do_stable_layer_norm": true,
363
+ "eos_token_id": 2,
364
+ "feat_extract_activation": "gelu",
365
+ "feat_extract_dropout": 0.0,
366
+ "feat_extract_norm": "layer",
367
+ "feat_proj_dropout": 0.04,
368
+ "feat_quantizer_dropout": 0.0,
369
+ "final_dropout": 0.0,
370
+ "fuse_matmuls": false,
371
+ "gradient_checkpointing": true,
372
+ "hidden_act": "gelu",
373
+ "hidden_dropout": 0.047,
374
+ "hidden_size": 1280,
375
+ "initializer_range": 0.02,
376
+ "intermediate_size": 5120,
377
+ "layer_norm_eps": 1e-05,
378
+ "layerdrop": 0.041,
379
+ "mask_feature_length": 64,
380
+ "mask_feature_min_masks": 0,
381
+ "mask_feature_prob": 0.25,
382
+ "mask_time_length": 10,
383
+ "mask_time_min_masks": 2,
384
+ "mask_time_prob": 0.082,
385
+ "model_type": "wav2vec2",
386
+ "num_adapter_layers": 3,
387
+ "num_attention_heads": 16,
388
+ "num_codevector_groups": 2,
389
+ "num_codevectors_per_group": 320,
390
+ "num_conv_pos_embedding_groups": 16,
391
+ "num_conv_pos_embeddings": 128,
392
+ "num_feat_extract_layers": 7,
393
+ "num_hidden_layers": 48,
394
+ "num_negatives": 100,
395
+ "output_hidden_size": 1280,
396
+ "pad_token_id": 38,
397
+ "proj_codevector_dim": 1024,
398
+ "tdnn_dilation": [
399
+ 1,
400
+ 2,
401
+ 3,
402
+ 1,
403
+ 1
404
+ ],
405
+ "tdnn_dim": [
406
+ 512,
407
+ 512,
408
+ 512,
409
+ 512,
410
+ 1500
411
+ ],
412
+ "tdnn_kernel": [
413
+ 5,
414
+ 3,
415
+ 3,
416
+ 1,
417
+ 1
418
+ ],
419
+ "torch_dtype": "float32",
420
+ "transformers_version": "4.21.0",
421
+ "use_scan": false,
422
+ "use_weighted_layer_sum": false,
423
+ "vocab_size": 39,
424
+ "xvector_output_dim": 512
425
+ }
426
+ loading feature extractor configuration file ./preprocessor_config.json
427
+ Feature extractor Wav2Vec2FeatureExtractor {
428
+ "do_normalize": true,
429
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
430
+ "feature_size": 1,
431
+ "padding_side": "right",
432
+ "padding_value": 0,
433
+ "return_attention_mask": true,
434
+ "sampling_rate": 16000
435
+ }
436
+ loading file ./vocab.json
437
+ loading file ./tokenizer_config.json
438
+ loading file ./added_tokens.json
439
+ loading file ./special_tokens_map.json
440
+ Adding <s> to the vocabulary
441
+ Adding </s> to the vocabulary
442
+ loading weights file ./flax_model.msgpack
443
+ Loading PyTorch weights from /data/wav2vec2-1b-npsc-nst/flax_model.msgpack
444
+ Traceback (most recent call last):
445
+ File "run_flax_speech_recognition_ctc.py", line 1615, in <module>
446
+ main()
447
+ File "run_flax_speech_recognition_ctc.py", line 1002, in main
448
+ model = FlaxWav2Vec2ForCTC.from_pretrained(
449
+ File "/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py", line 783, in from_pretrained
450
+ state = load_pytorch_checkpoint_in_flax_state_dict(model, resolved_archive_file)
451
+ File "/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_pytorch_utils.py", line 56, in load_pytorch_checkpoint_in_flax_state_dict
452
+ pt_state_dict = torch.load(pt_path, map_location="cpu")
453
+ File "/data/flax/lib/python3.8/site-packages/torch/serialization.py", line 713, in load
454
+ return _legacy_load(opened_file, map_location, pickle_module, **pickle_load_args)
455
+ File "/data/flax/lib/python3.8/site-packages/torch/serialization.py", line 920, in _legacy_load
456
+ magic_number = pickle_module.load(f, **pickle_load_args)
457
+ ValueError: unregistered extension code 167
wandb/run-20220802_073947-3q3jac0b/files/requirements.txt ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.0.0
2
+ aiohttp==3.8.1
3
+ aiosignal==1.2.0
4
+ appdirs==1.4.4
5
+ astunparse==1.6.3
6
+ async-timeout==4.0.2
7
+ attrs==21.4.0
8
+ audioread==2.1.9
9
+ backcall==0.2.0
10
+ cachetools==4.2.4
11
+ certifi==2021.10.8
12
+ cffi==1.15.1
13
+ charset-normalizer==2.0.10
14
+ chex==0.1.3
15
+ click==8.0.3
16
+ cloud-tpu-client==0.10
17
+ cloud-tpu-profiler==2.4.0
18
+ clu==0.0.6
19
+ colorama==0.4.5
20
+ commonmark==0.9.1
21
+ configparser==5.2.0
22
+ contextlib2==21.6.0
23
+ cycler==0.11.0
24
+ datasets==2.4.0
25
+ decorator==5.1.0
26
+ dill==0.3.4
27
+ dm-tree==0.1.6
28
+ docker-pycreds==0.4.0
29
+ etils==0.6.0
30
+ exceptiongroup==1.0.0rc8
31
+ filelock==3.4.2
32
+ flatbuffers==2.0
33
+ flax==0.5.3
34
+ fonttools==4.28.5
35
+ frozenlist==1.2.0
36
+ fsspec==2021.11.1
37
+ future==0.18.2
38
+ gast==0.4.0
39
+ gitdb==4.0.9
40
+ gitpython==3.1.26
41
+ google-api-core==1.31.5
42
+ google-api-python-client==1.8.0
43
+ google-auth-httplib2==0.1.0
44
+ google-auth-oauthlib==0.4.6
45
+ google-auth==2.3.3
46
+ google-pasta==0.2.0
47
+ googleapis-common-protos==1.54.0
48
+ grpcio==1.43.0
49
+ h5py==3.6.0
50
+ httplib2==0.20.2
51
+ huggingface-hub==0.2.1
52
+ hypothesis==6.53.0
53
+ idna==3.3
54
+ importlib-metadata==4.10.0
55
+ importlib-resources==5.4.0
56
+ ipython==7.31.0
57
+ jax==0.3.15
58
+ jaxlib==0.3.15
59
+ jedi==0.18.1
60
+ jiwer==2.3.0
61
+ joblib==1.1.0
62
+ keras-preprocessing==1.1.2
63
+ keras==2.7.0
64
+ kiwisolver==1.3.2
65
+ libclang==12.0.0
66
+ librosa==0.9.2
67
+ libtpu-nightly==0.1.dev20220722
68
+ llvmlite==0.39.0
69
+ markdown==3.3.6
70
+ matplotlib-inline==0.1.3
71
+ matplotlib==3.5.1
72
+ ml-collections==0.1.0
73
+ msgpack==1.0.3
74
+ multidict==5.2.0
75
+ multiprocess==0.70.12.2
76
+ numba==0.56.0
77
+ numpy==1.22.0
78
+ oauth2client==4.1.3
79
+ oauthlib==3.1.1
80
+ opt-einsum==3.3.0
81
+ optax==0.1.3
82
+ packaging==21.3
83
+ pandas==1.3.5
84
+ parso==0.8.3
85
+ pathtools==0.1.2
86
+ pexpect==4.8.0
87
+ pickleshare==0.7.5
88
+ pillow==9.0.0
89
+ pip==22.2.1
90
+ pkg-resources==0.0.0
91
+ pooch==1.6.0
92
+ promise==2.3
93
+ prompt-toolkit==3.0.24
94
+ protobuf==3.19.1
95
+ psutil==5.9.0
96
+ ptyprocess==0.7.0
97
+ pyarrow==6.0.1
98
+ pyasn1-modules==0.2.8
99
+ pyasn1==0.4.8
100
+ pycparser==2.21
101
+ pyctcdecode==0.4.0
102
+ pygments==2.11.1
103
+ pygtrie==2.5.0
104
+ pyparsing==3.0.6
105
+ python-dateutil==2.8.2
106
+ python-levenshtein==0.12.2
107
+ pytz==2021.3
108
+ pyyaml==6.0
109
+ regex==2021.11.10
110
+ requests-oauthlib==1.3.0
111
+ requests==2.27.0
112
+ resampy==0.3.1
113
+ responses==0.18.0
114
+ rich==11.2.0
115
+ rsa==4.8
116
+ sacremoses==0.0.46
117
+ scikit-learn==1.1.1
118
+ scipy==1.7.3
119
+ sentry-sdk==1.5.2
120
+ setuptools==44.0.0
121
+ shortuuid==1.0.8
122
+ six==1.16.0
123
+ smmap==5.0.0
124
+ sortedcontainers==2.4.0
125
+ soundfile==0.10.3.post1
126
+ sox==1.4.1
127
+ subprocess32==3.5.4
128
+ tensorboard-data-server==0.6.1
129
+ tensorboard-plugin-wit==1.8.0
130
+ tensorboard==2.7.0
131
+ tensorflow-cpu==2.7.0
132
+ tensorflow-datasets==4.4.0
133
+ tensorflow-estimator==2.7.0
134
+ tensorflow-io-gcs-filesystem==0.23.1
135
+ tensorflow-metadata==1.5.0
136
+ tensorflow==2.7.0
137
+ tensorstore==0.1.21
138
+ termcolor==1.1.0
139
+ threadpoolctl==3.1.0
140
+ tokenizers==0.11.2
141
+ toolz==0.11.2
142
+ torch==1.12.0
143
+ torchaudio==0.12.0+cpu
144
+ tqdm==4.62.3
145
+ traitlets==5.1.1
146
+ transformers==4.21.0
147
+ typing-extensions==4.3.0
148
+ uritemplate==3.0.1
149
+ urllib3==1.26.7
150
+ wandb==0.12.9
151
+ wcwidth==0.2.5
152
+ werkzeug==2.0.2
153
+ wheel==0.37.1
154
+ wrapt==1.13.3
155
+ xxhash==2.0.2
156
+ yarl==1.7.2
157
+ yaspin==2.1.0
158
+ zipp==3.7.0
wandb/run-20220802_073947-3q3jac0b/files/wandb-metadata.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2022-08-02T07:39:51.312631",
5
+ "startedAt": "2022-08-02T07:39:47.832662",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--model_name_or_path=./",
11
+ "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
12
+ "--tokenizer_name=./",
13
+ "--output_dir=./",
14
+ "--overwrite_output_dir",
15
+ "--num_train_epochs=40",
16
+ "--per_device_train_batch_size=8",
17
+ "--per_device_eval_batch_size=8",
18
+ "--gradient_accumulation_steps=1",
19
+ "--precision=full_mixed",
20
+ "--matmul_precision=bfloat16",
21
+ "--learning_rate=0.00033713760785758495",
22
+ "--skip_steps=33100",
23
+ "--warmup_steps=0",
24
+ "--length_column_name=input_length",
25
+ "--evaluation_strategy=steps",
26
+ "--text_column_name=text",
27
+ "--save_steps=4000",
28
+ "--eval_steps=4000",
29
+ "--logging_steps=100",
30
+ "--layerdrop=0.041",
31
+ "--attention_dropout=0.094",
32
+ "--activation_dropout=0.055",
33
+ "--hidden_dropout=0.047",
34
+ "--save_total_limit=5",
35
+ "--freeze_feature_encoder",
36
+ "--feat_proj_dropout=0.04",
37
+ "--mask_time_prob=0.082",
38
+ "--mask_time_length=10",
39
+ "--mask_feature_prob=0.25",
40
+ "--mask_feature_length=64",
41
+ "--gradient_checkpointing",
42
+ "--min_duration_in_seconds=0.5",
43
+ "--max_duration_in_seconds=20.0",
44
+ "--use_auth_token",
45
+ "--seed=42",
46
+ "--group_by_length",
47
+ "--do_train",
48
+ "--do_eval",
49
+ "--push_to_hub",
50
+ "--preprocessing_num_workers=32",
51
+ "--ctc_zero_infinity",
52
+ "--do_lower_case",
53
+ "--wandb_project=wav2vec2",
54
+ "--wandb_name=wav2vec2-1b-npsc-nst (cont.)",
55
+ "--remove_punctuation"
56
+ ],
57
+ "state": "running",
58
+ "program": "run_flax_speech_recognition_ctc.py",
59
+ "codePath": "run_flax_speech_recognition_ctc.py",
60
+ "git": {
61
+ "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
62
+ "commit": "4f995e8718adff5045133dd384c7aa42ebe89fa9"
63
+ },
64
+ "email": "versae@gmail.com",
65
+ "root": "/data/wav2vec2-1b-npsc-nst",
66
+ "host": "t1v-n-eedfb410-w-0",
67
+ "username": "javierr",
68
+ "executable": "/data/flax/bin/python"
69
+ }
wandb/run-20220802_073947-3q3jac0b/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb": {"runtime": 33}}
wandb/run-20220802_073947-3q3jac0b/logs/debug-internal.log ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-08-02 07:39:48,765 INFO MainThread:3977817 [internal.py:wandb_internal():87] W&B internal server running at pid: 3977817, started at: 2022-08-02 07:39:48.765289
2
+ 2022-08-02 07:39:48,767 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: check_version
3
+ 2022-08-02 07:39:48,767 INFO WriterThread:3977817 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/run-3q3jac0b.wandb
4
+ 2022-08-02 07:39:48,768 DEBUG SenderThread:3977817 [sender.py:send():234] send: header
5
+ 2022-08-02 07:39:48,768 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: check_version
6
+ 2022-08-02 07:39:48,809 DEBUG SenderThread:3977817 [sender.py:send():234] send: run
7
+ 2022-08-02 07:39:49,023 INFO SenderThread:3977817 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files
8
+ 2022-08-02 07:39:49,023 INFO SenderThread:3977817 [sender.py:_start_run_threads():804] run started: 3q3jac0b with start time 1659425987
9
+ 2022-08-02 07:39:49,023 DEBUG SenderThread:3977817 [sender.py:send():234] send: summary
10
+ 2022-08-02 07:39:49,023 INFO SenderThread:3977817 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
11
+ 2022-08-02 07:39:49,023 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: run_start
12
+ 2022-08-02 07:39:50,029 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/wandb-summary.json
13
+ 2022-08-02 07:39:51,312 DEBUG HandlerThread:3977817 [meta.py:__init__():40] meta init
14
+ 2022-08-02 07:39:51,312 DEBUG HandlerThread:3977817 [meta.py:__init__():54] meta init done
15
+ 2022-08-02 07:39:51,312 DEBUG HandlerThread:3977817 [meta.py:probe():214] probe
16
+ 2022-08-02 07:39:51,314 DEBUG HandlerThread:3977817 [meta.py:_setup_git():204] setup git
17
+ 2022-08-02 07:39:51,353 DEBUG HandlerThread:3977817 [meta.py:_setup_git():211] setup git done
18
+ 2022-08-02 07:39:51,353 DEBUG HandlerThread:3977817 [meta.py:_save_code():92] save code
19
+ 2022-08-02 07:39:51,366 DEBUG HandlerThread:3977817 [meta.py:_save_code():113] save code done
20
+ 2022-08-02 07:39:51,366 DEBUG HandlerThread:3977817 [meta.py:_save_patches():130] save patches
21
+ 2022-08-02 07:39:52,034 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/code/run_flax_speech_recognition_ctc.py
22
+ 2022-08-02 07:39:52,035 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/diff.patch
23
+ 2022-08-02 07:39:52,035 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/code
24
+ 2022-08-02 07:39:52,344 DEBUG HandlerThread:3977817 [meta.py:_save_patches():172] save patches done
25
+ 2022-08-02 07:39:52,344 DEBUG HandlerThread:3977817 [meta.py:_save_pip():58] save pip
26
+ 2022-08-02 07:39:52,345 DEBUG HandlerThread:3977817 [meta.py:_save_pip():72] save pip done
27
+ 2022-08-02 07:39:52,345 DEBUG HandlerThread:3977817 [meta.py:probe():252] probe done
28
+ 2022-08-02 07:39:52,364 DEBUG SenderThread:3977817 [sender.py:send():234] send: files
29
+ 2022-08-02 07:39:52,364 INFO SenderThread:3977817 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
30
+ 2022-08-02 07:39:52,365 INFO SenderThread:3977817 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
31
+ 2022-08-02 07:39:52,366 INFO SenderThread:3977817 [sender.py:_save_file():939] saving file diff.patch with policy now
32
+ 2022-08-02 07:39:52,372 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: stop_status
33
+ 2022-08-02 07:39:52,372 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: stop_status
34
+ 2022-08-02 07:39:53,034 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/diff.patch
35
+ 2022-08-02 07:39:53,036 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/wandb-metadata.json
36
+ 2022-08-02 07:39:53,036 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/output.log
37
+ 2022-08-02 07:39:53,036 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/requirements.txt
38
+ 2022-08-02 07:39:53,060 INFO Thread-12 :3977817 [upload_job.py:push():137] Uploaded file /tmp/tmpki5sholowandb/19x6t1r2-code/run_flax_speech_recognition_ctc.py
39
+ 2022-08-02 07:39:53,064 INFO Thread-13 :3977817 [upload_job.py:push():137] Uploaded file /tmp/tmpki5sholowandb/2lizhqc0-diff.patch
40
+ 2022-08-02 07:39:53,447 INFO Thread-11 :3977817 [upload_job.py:push():137] Uploaded file /tmp/tmpki5sholowandb/5wl8ghg8-wandb-metadata.json
41
+ 2022-08-02 07:39:55,035 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/output.log
42
+ 2022-08-02 07:39:57,036 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/output.log
43
+ 2022-08-02 07:39:59,037 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/output.log
44
+ 2022-08-02 07:40:01,038 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/output.log
45
+ 2022-08-02 07:40:07,041 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/output.log
46
+ 2022-08-02 07:40:07,521 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: stop_status
47
+ 2022-08-02 07:40:07,522 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: stop_status
48
+ 2022-08-02 07:40:19,401 DEBUG SenderThread:3977817 [sender.py:send():234] send: stats
49
+ 2022-08-02 07:40:21,048 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/output.log
50
+ 2022-08-02 07:40:22,156 DEBUG SenderThread:3977817 [sender.py:send():234] send: telemetry
51
+ 2022-08-02 07:40:22,156 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
52
+ 2022-08-02 07:40:22,157 DEBUG SenderThread:3977817 [sender.py:send():234] send: exit
53
+ 2022-08-02 07:40:22,157 INFO SenderThread:3977817 [sender.py:send_exit():366] handling exit code: 1
54
+ 2022-08-02 07:40:22,157 INFO SenderThread:3977817 [sender.py:send_exit():368] handling runtime: 33
55
+ 2022-08-02 07:40:22,160 INFO SenderThread:3977817 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
56
+ 2022-08-02 07:40:22,160 INFO SenderThread:3977817 [sender.py:send_exit():374] send defer
57
+ 2022-08-02 07:40:22,160 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
58
+ 2022-08-02 07:40:22,161 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: defer
59
+ 2022-08-02 07:40:22,161 INFO HandlerThread:3977817 [handler.py:handle_request_defer():147] handle defer: 0
60
+ 2022-08-02 07:40:22,161 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: defer
61
+ 2022-08-02 07:40:22,161 INFO SenderThread:3977817 [sender.py:send_request_defer():383] handle sender defer: 0
62
+ 2022-08-02 07:40:22,161 INFO SenderThread:3977817 [sender.py:transition_state():387] send defer: 1
63
+ 2022-08-02 07:40:22,162 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: defer
64
+ 2022-08-02 07:40:22,162 INFO HandlerThread:3977817 [handler.py:handle_request_defer():147] handle defer: 1
65
+ 2022-08-02 07:40:22,208 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: defer
66
+ 2022-08-02 07:40:22,208 INFO SenderThread:3977817 [sender.py:send_request_defer():383] handle sender defer: 1
67
+ 2022-08-02 07:40:22,209 INFO SenderThread:3977817 [sender.py:transition_state():387] send defer: 2
68
+ 2022-08-02 07:40:22,209 DEBUG SenderThread:3977817 [sender.py:send():234] send: stats
69
+ 2022-08-02 07:40:22,209 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: defer
70
+ 2022-08-02 07:40:22,209 INFO HandlerThread:3977817 [handler.py:handle_request_defer():147] handle defer: 2
71
+ 2022-08-02 07:40:22,209 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: defer
72
+ 2022-08-02 07:40:22,209 INFO SenderThread:3977817 [sender.py:send_request_defer():383] handle sender defer: 2
73
+ 2022-08-02 07:40:22,210 INFO SenderThread:3977817 [sender.py:transition_state():387] send defer: 3
74
+ 2022-08-02 07:40:22,210 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: defer
75
+ 2022-08-02 07:40:22,210 INFO HandlerThread:3977817 [handler.py:handle_request_defer():147] handle defer: 3
76
+ 2022-08-02 07:40:22,210 DEBUG SenderThread:3977817 [sender.py:send():234] send: summary
77
+ 2022-08-02 07:40:22,210 INFO SenderThread:3977817 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
78
+ 2022-08-02 07:40:22,210 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: defer
79
+ 2022-08-02 07:40:22,210 INFO SenderThread:3977817 [sender.py:send_request_defer():383] handle sender defer: 3
80
+ 2022-08-02 07:40:22,210 INFO SenderThread:3977817 [sender.py:transition_state():387] send defer: 4
81
+ 2022-08-02 07:40:22,211 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: defer
82
+ 2022-08-02 07:40:22,211 INFO HandlerThread:3977817 [handler.py:handle_request_defer():147] handle defer: 4
83
+ 2022-08-02 07:40:22,211 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: defer
84
+ 2022-08-02 07:40:22,211 INFO SenderThread:3977817 [sender.py:send_request_defer():383] handle sender defer: 4
85
+ 2022-08-02 07:40:22,262 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
86
+ 2022-08-02 07:40:22,374 INFO SenderThread:3977817 [sender.py:transition_state():387] send defer: 5
87
+ 2022-08-02 07:40:22,374 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
88
+ 2022-08-02 07:40:22,374 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: defer
89
+ 2022-08-02 07:40:22,375 INFO HandlerThread:3977817 [handler.py:handle_request_defer():147] handle defer: 5
90
+ 2022-08-02 07:40:22,375 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: defer
91
+ 2022-08-02 07:40:22,375 INFO SenderThread:3977817 [sender.py:send_request_defer():383] handle sender defer: 5
92
+ 2022-08-02 07:40:22,375 INFO SenderThread:3977817 [dir_watcher.py:finish():283] shutting down directory watcher
93
+ 2022-08-02 07:40:22,475 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
94
+ 2022-08-02 07:40:23,049 INFO Thread-8 :3977817 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/wandb-summary.json
95
+ 2022-08-02 07:40:23,050 INFO SenderThread:3977817 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/config.yaml
96
+ 2022-08-02 07:40:23,051 INFO SenderThread:3977817 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/output.log
97
+ 2022-08-02 07:40:23,051 INFO SenderThread:3977817 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files
98
+ 2022-08-02 07:40:23,051 INFO SenderThread:3977817 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/config.yaml config.yaml
99
+ 2022-08-02 07:40:23,051 INFO SenderThread:3977817 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/diff.patch diff.patch
100
+ 2022-08-02 07:40:23,051 INFO SenderThread:3977817 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/requirements.txt requirements.txt
101
+ 2022-08-02 07:40:23,051 INFO SenderThread:3977817 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/output.log output.log
102
+ 2022-08-02 07:40:23,052 INFO SenderThread:3977817 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/wandb-summary.json wandb-summary.json
103
+ 2022-08-02 07:40:23,052 INFO SenderThread:3977817 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/wandb-metadata.json wandb-metadata.json
104
+ 2022-08-02 07:40:23,052 INFO SenderThread:3977817 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
105
+ 2022-08-02 07:40:23,052 INFO SenderThread:3977817 [sender.py:transition_state():387] send defer: 6
106
+ 2022-08-02 07:40:23,058 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
107
+ 2022-08-02 07:40:23,059 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: defer
108
+ 2022-08-02 07:40:23,065 INFO HandlerThread:3977817 [handler.py:handle_request_defer():147] handle defer: 6
109
+ 2022-08-02 07:40:23,065 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: defer
110
+ 2022-08-02 07:40:23,065 INFO SenderThread:3977817 [sender.py:send_request_defer():383] handle sender defer: 6
111
+ 2022-08-02 07:40:23,065 INFO SenderThread:3977817 [file_pusher.py:finish():177] shutting down file pusher
112
+ 2022-08-02 07:40:23,160 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
113
+ 2022-08-02 07:40:23,161 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
114
+ 2022-08-02 07:40:23,262 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
115
+ 2022-08-02 07:40:23,263 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
116
+ 2022-08-02 07:40:23,364 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
117
+ 2022-08-02 07:40:23,364 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
118
+ 2022-08-02 07:40:23,466 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
119
+ 2022-08-02 07:40:23,466 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
120
+ 2022-08-02 07:40:23,546 INFO Thread-14 :3977817 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/config.yaml
121
+ 2022-08-02 07:40:23,555 INFO Thread-16 :3977817 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/output.log
122
+ 2022-08-02 07:40:23,568 INFO Thread-15 :3977817 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/requirements.txt
123
+ 2022-08-02 07:40:23,568 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
124
+ 2022-08-02 07:40:23,568 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
125
+ 2022-08-02 07:40:23,579 INFO Thread-17 :3977817 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/files/wandb-summary.json
126
+ 2022-08-02 07:40:23,670 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
127
+ 2022-08-02 07:40:23,670 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
128
+ 2022-08-02 07:40:23,772 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
129
+ 2022-08-02 07:40:23,772 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
130
+ 2022-08-02 07:40:23,780 INFO Thread-7 :3977817 [sender.py:transition_state():387] send defer: 7
131
+ 2022-08-02 07:40:23,780 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: defer
132
+ 2022-08-02 07:40:23,781 INFO HandlerThread:3977817 [handler.py:handle_request_defer():147] handle defer: 7
133
+ 2022-08-02 07:40:23,781 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: defer
134
+ 2022-08-02 07:40:23,781 INFO SenderThread:3977817 [sender.py:send_request_defer():383] handle sender defer: 7
135
+ 2022-08-02 07:40:23,873 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
136
+ 2022-08-02 07:40:24,259 INFO SenderThread:3977817 [sender.py:transition_state():387] send defer: 8
137
+ 2022-08-02 07:40:24,260 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
138
+ 2022-08-02 07:40:24,260 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: defer
139
+ 2022-08-02 07:40:24,260 INFO HandlerThread:3977817 [handler.py:handle_request_defer():147] handle defer: 8
140
+ 2022-08-02 07:40:24,261 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: defer
141
+ 2022-08-02 07:40:24,261 INFO SenderThread:3977817 [sender.py:send_request_defer():383] handle sender defer: 8
142
+ 2022-08-02 07:40:24,261 INFO SenderThread:3977817 [sender.py:transition_state():387] send defer: 9
143
+ 2022-08-02 07:40:24,261 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: defer
144
+ 2022-08-02 07:40:24,261 INFO HandlerThread:3977817 [handler.py:handle_request_defer():147] handle defer: 9
145
+ 2022-08-02 07:40:24,262 DEBUG SenderThread:3977817 [sender.py:send():234] send: final
146
+ 2022-08-02 07:40:24,262 DEBUG SenderThread:3977817 [sender.py:send():234] send: footer
147
+ 2022-08-02 07:40:24,262 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: defer
148
+ 2022-08-02 07:40:24,262 INFO SenderThread:3977817 [sender.py:send_request_defer():383] handle sender defer: 9
149
+ 2022-08-02 07:40:24,361 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: poll_exit
150
+ 2022-08-02 07:40:24,361 DEBUG SenderThread:3977817 [sender.py:send_request():248] send_request: poll_exit
151
+ 2022-08-02 07:40:24,362 INFO SenderThread:3977817 [file_pusher.py:join():182] waiting for file pusher
152
+ 2022-08-02 07:40:24,617 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: get_summary
153
+ 2022-08-02 07:40:24,618 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: sampled_history
154
+ 2022-08-02 07:40:24,618 DEBUG HandlerThread:3977817 [handler.py:handle_request():130] handle_request: shutdown
155
+ 2022-08-02 07:40:24,619 INFO HandlerThread:3977817 [handler.py:finish():731] shutting down handler
156
+ 2022-08-02 07:40:25,262 INFO WriterThread:3977817 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/run-3q3jac0b.wandb
157
+ 2022-08-02 07:40:25,616 INFO SenderThread:3977817 [sender.py:finish():1070] shutting down sender
158
+ 2022-08-02 07:40:25,616 INFO SenderThread:3977817 [file_pusher.py:finish():177] shutting down file pusher
159
+ 2022-08-02 07:40:25,616 INFO SenderThread:3977817 [file_pusher.py:join():182] waiting for file pusher
160
+ 2022-08-02 07:40:25,619 INFO MainThread:3977817 [internal.py:handle_exit():77] Internal process exited
wandb/run-20220802_073947-3q3jac0b/logs/debug.log ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-08-02 07:39:47,835 INFO MainThread:3976529 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
2
+ 2022-08-02 07:39:47,835 INFO MainThread:3976529 [wandb_setup.py:_flush():71] setting login settings: {}
3
+ 2022-08-02 07:39:47,835 INFO MainThread:3976529 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/logs/debug.log
4
+ 2022-08-02 07:39:47,835 INFO MainThread:3976529 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_073947-3q3jac0b/logs/debug-internal.log
5
+ 2022-08-02 07:39:47,835 INFO MainThread:3976529 [wandb_init.py:init():404] calling init triggers
6
+ 2022-08-02 07:39:47,835 INFO MainThread:3976529 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2022-08-02 07:39:47,835 INFO MainThread:3976529 [wandb_init.py:init():460] starting backend
9
+ 2022-08-02 07:39:47,835 INFO MainThread:3976529 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2022-08-02 07:39:47,883 INFO MainThread:3976529 [backend.py:ensure_launched():216] starting backend process...
11
+ 2022-08-02 07:39:47,927 INFO MainThread:3976529 [backend.py:ensure_launched():221] started backend process with pid: 3977817
12
+ 2022-08-02 07:39:47,930 INFO MainThread:3976529 [wandb_init.py:init():469] backend started and connected
13
+ 2022-08-02 07:39:47,948 INFO MainThread:3976529 [wandb_init.py:init():533] updated telemetry
14
+ 2022-08-02 07:39:48,061 INFO MainThread:3976529 [wandb_init.py:init():563] communicating current version
15
+ 2022-08-02 07:39:48,807 INFO MainThread:3976529 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
16
+
17
+ 2022-08-02 07:39:48,808 INFO MainThread:3976529 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
18
+ 2022-08-02 07:39:49,023 INFO MainThread:3976529 [wandb_init.py:init():606] starting run threads in backend
19
+ 2022-08-02 07:39:52,368 INFO MainThread:3976529 [wandb_run.py:_console_start():1810] atexit reg
20
+ 2022-08-02 07:39:52,369 INFO MainThread:3976529 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
21
+ 2022-08-02 07:39:52,369 INFO MainThread:3976529 [wandb_run.py:_redirect():1689] Redirecting console.
22
+ 2022-08-02 07:39:52,371 INFO MainThread:3976529 [wandb_run.py:_redirect():1745] Redirects installed.
23
+ 2022-08-02 07:39:52,371 INFO MainThread:3976529 [wandb_init.py:init():633] run started, returning control to user process
24
+ 2022-08-02 07:40:20,016 INFO MainThread:3976529 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
25
+ 2022-08-02 07:40:20,018 INFO MainThread:3976529 [wandb_run.py:_restore():1752] restore
26
+ 2022-08-02 07:40:22,161 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
27
+ wandb_count: 2
28
+ other_count: 1
29
+ }
30
+ pusher_stats {
31
+ uploaded_bytes: 484657
32
+ total_bytes: 484657
33
+ }
34
+
35
+ 2022-08-02 07:40:22,375 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
36
+ wandb_count: 2
37
+ other_count: 1
38
+ }
39
+ pusher_stats {
40
+ uploaded_bytes: 484657
41
+ total_bytes: 484657
42
+ }
43
+
44
+ 2022-08-02 07:40:23,059 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
45
+ wandb_count: 6
46
+ other_count: 1
47
+ }
48
+ pusher_stats {
49
+ uploaded_bytes: 484657
50
+ total_bytes: 544336
51
+ }
52
+
53
+ 2022-08-02 07:40:23,161 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
54
+ wandb_count: 6
55
+ other_count: 1
56
+ }
57
+ pusher_stats {
58
+ uploaded_bytes: 484657
59
+ total_bytes: 544336
60
+ }
61
+
62
+ 2022-08-02 07:40:23,263 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
63
+ wandb_count: 6
64
+ other_count: 1
65
+ }
66
+ pusher_stats {
67
+ uploaded_bytes: 544336
68
+ total_bytes: 544336
69
+ }
70
+
71
+ 2022-08-02 07:40:23,365 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
72
+ wandb_count: 6
73
+ other_count: 1
74
+ }
75
+ pusher_stats {
76
+ uploaded_bytes: 544336
77
+ total_bytes: 544336
78
+ }
79
+
80
+ 2022-08-02 07:40:23,467 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
81
+ wandb_count: 6
82
+ other_count: 1
83
+ }
84
+ pusher_stats {
85
+ uploaded_bytes: 544336
86
+ total_bytes: 544336
87
+ }
88
+
89
+ 2022-08-02 07:40:23,569 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
90
+ wandb_count: 6
91
+ other_count: 1
92
+ }
93
+ pusher_stats {
94
+ uploaded_bytes: 544336
95
+ total_bytes: 544336
96
+ }
97
+
98
+ 2022-08-02 07:40:23,671 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
99
+ wandb_count: 6
100
+ other_count: 1
101
+ }
102
+ pusher_stats {
103
+ uploaded_bytes: 544336
104
+ total_bytes: 544336
105
+ }
106
+
107
+ 2022-08-02 07:40:23,773 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
108
+ wandb_count: 6
109
+ other_count: 1
110
+ }
111
+ pusher_stats {
112
+ uploaded_bytes: 544336
113
+ total_bytes: 544336
114
+ }
115
+
116
+ 2022-08-02 07:40:24,260 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
117
+ wandb_count: 6
118
+ other_count: 1
119
+ }
120
+ pusher_stats {
121
+ uploaded_bytes: 544336
122
+ total_bytes: 544336
123
+ }
124
+
125
+ 2022-08-02 07:40:24,616 INFO MainThread:3976529 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
126
+ exit_result {
127
+ }
128
+ file_counts {
129
+ wandb_count: 6
130
+ other_count: 1
131
+ }
132
+ pusher_stats {
133
+ uploaded_bytes: 544336
134
+ total_bytes: 544336
135
+ }
136
+ local_info {
137
+ }
138
+
139
+ 2022-08-02 07:40:26,145 INFO MainThread:3976529 [wandb_run.py:_append_files():2180] logging synced files
wandb/run-20220802_073947-3q3jac0b/run-3q3jac0b.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbacd374c2df78d05cf4c0353094781230fcd46c659507728e7ee459a84af474
3
+ size 57535
wandb/run-20220802_074501-31ig5poi/files/code/run_flax_speech_recognition_ctc.py ADDED
@@ -0,0 +1,1625 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2022 The HuggingFace Team All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """
17
+ Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
18
+ """
19
+ # You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
20
+
21
+ import logging
22
+ import math
23
+ import os
24
+ import re
25
+ import sys
26
+ import time
27
+ from dataclasses import dataclass, field
28
+ from pathlib import Path
29
+ from typing import Any, Callable, Dict, List, Optional, Union
30
+
31
+ import datasets
32
+ import numpy as np
33
+ from datasets import DatasetDict, load_dataset, load_metric
34
+ from tqdm import tqdm
35
+
36
+ import flax
37
+ import jax
38
+ import jax.numpy as jnp
39
+ import optax
40
+ import transformers
41
+ import wandb as wandb
42
+ from flax import core, jax_utils, struct, traverse_util
43
+ from flax.jax_utils import unreplicate, pad_shard_unpad
44
+ from flax.training.common_utils import get_metrics, shard, shard_prng_key
45
+ from huggingface_hub import Repository
46
+ from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
47
+ from optax._src import linear_algebra
48
+ from transformers import (
49
+ AutoFeatureExtractor,
50
+ AutoProcessor,
51
+ AutoTokenizer,
52
+ HfArgumentParser,
53
+ TrainingArguments,
54
+ is_tensorboard_available,
55
+ set_seed,
56
+ )
57
+ from transformers.file_utils import get_full_repo_name
58
+ from transformers.utils import check_min_version
59
+ from transformers.utils.versions import require_version
60
+
61
+
62
+ # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
63
+ check_min_version("4.17.0.dev0")
64
+
65
+ require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
66
+
67
+ logger = logging.getLogger(__name__)
68
+
69
+
70
+ @flax.struct.dataclass
71
+ class ModelArguments:
72
+ """
73
+ Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
74
+ """
75
+
76
+ model_name_or_path: str = field(
77
+ metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
78
+ )
79
+ config_name: Optional[str] = field(
80
+ default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
81
+ )
82
+ tokenizer_name: Optional[str] = field(
83
+ default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
84
+ )
85
+ feature_extractor_name: Optional[str] = field(
86
+ default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
87
+ )
88
+ cache_dir: Optional[str] = field(
89
+ default=None,
90
+ metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
91
+ )
92
+ use_fast_tokenizer: bool = field(
93
+ default=True,
94
+ metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
95
+ )
96
+ model_revision: str = field(
97
+ default="main",
98
+ metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
99
+ )
100
+ use_auth_token: bool = field(
101
+ default=False,
102
+ metadata={
103
+ "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
104
+ "with private models)."
105
+ },
106
+ )
107
+ freeze_feature_encoder: bool = field(
108
+ default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
109
+ )
110
+ attention_dropout: float = field(
111
+ default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
112
+ )
113
+ activation_dropout: float = field(
114
+ default=0.1,
115
+ metadata={
116
+ "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
117
+ },
118
+ )
119
+ hidden_dropout: float = field(
120
+ default=0.1,
121
+ metadata={
122
+ "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
123
+ },
124
+ )
125
+ feat_proj_dropout: float = field(
126
+ default=0.0,
127
+ metadata={
128
+ "help": "The feat proj dropout probability for feature encoder representations."
129
+ },
130
+ )
131
+ final_dropout: float = field(
132
+ default=0.0,
133
+ metadata={"help": "The dropout probability for the final projection layer."},
134
+ )
135
+ mask_time_prob: float = field(
136
+ default=0.1,
137
+ metadata={
138
+ "help": "The spec aug dropout probability for feature encoder representations."
139
+ },
140
+ )
141
+ mask_time_length: int = field(
142
+ default=10,
143
+ metadata={"help": "Length of vector span to mask along the time axis."},
144
+ )
145
+ mask_feature_prob: float = field(
146
+ default=0.0,
147
+ metadata={
148
+ "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
149
+ "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
150
+ },
151
+ )
152
+ mask_feature_length: int = field(
153
+ default=10,
154
+ metadata={"help": "Length of vector span to mask along the feature axis."},
155
+ )
156
+ layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
157
+ ctc_loss_reduction: Optional[str] = field(
158
+ default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
159
+ )
160
+ ctc_zero_infinity: Optional[bool] = field(
161
+ default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
162
+ )
163
+
164
+
165
+ @flax.struct.dataclass
166
+ class DataTrainingArguments:
167
+ """
168
+ Arguments pertaining to what data we are going to input our model for training and eval.
169
+ """
170
+
171
+ dataset_name: str = field(
172
+ default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
173
+ )
174
+ dataset_config_name: Optional[str] = field(
175
+ default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
176
+ )
177
+ text_column: Optional[str] = field(
178
+ default=None,
179
+ metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
180
+ )
181
+ dataset_cache_dir: Optional[str] = field(
182
+ default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
183
+ )
184
+ overwrite_cache: bool = field(
185
+ default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
186
+ )
187
+ preprocessing_num_workers: Optional[int] = field(
188
+ default=None,
189
+ metadata={"help": "The number of processes to use for the preprocessing."},
190
+ )
191
+ max_train_samples: Optional[int] = field(
192
+ default=None,
193
+ metadata={
194
+ "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
195
+ "value if set."
196
+ },
197
+ )
198
+ max_eval_samples: Optional[int] = field(
199
+ default=None,
200
+ metadata={
201
+ "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
202
+ "value if set."
203
+ },
204
+ )
205
+ max_test_samples: Optional[int] = field(
206
+ default=None,
207
+ metadata={
208
+ "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
209
+ "value if set."
210
+ },
211
+ )
212
+ audio_column_name: str = field(
213
+ default="audio",
214
+ metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
215
+ )
216
+ text_column_name: str = field(
217
+ default="text",
218
+ metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
219
+ )
220
+ max_duration_in_seconds: float = field(
221
+ default=20.0,
222
+ metadata={
223
+ "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
224
+ },
225
+ )
226
+ min_duration_in_seconds: float = field(
227
+ default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
228
+ )
229
+ max_label_length: Optional[int] = field(
230
+ default=512,
231
+ metadata={
232
+ "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
233
+ "than this will be filtered."
234
+ },
235
+ )
236
+ min_label_length: Optional[int] = field(
237
+ default=2,
238
+ metadata={
239
+ "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
240
+ "than this will be filtered."
241
+ },
242
+ )
243
+ pad_input_to_multiple_of: Optional[int] = field(
244
+ default=32000,
245
+ metadata={
246
+ "help": "If set will pad the input sequence to a multiple of the provided value. "
247
+ "This is important to avoid triggering recompilations on TPU."
248
+ },
249
+ )
250
+ pad_target_to_multiple_of: Optional[int] = field(
251
+ default=None,
252
+ metadata={
253
+ "help": "If set will pad the target sequence to a multiple of the provided value. "
254
+ "This is important to avoid triggering recompilations on TPU."
255
+ },
256
+ )
257
+ preprocessing_only: bool = field(
258
+ default=False,
259
+ metadata={
260
+ "help": "Whether to only do data preprocessing and skip training. "
261
+ "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
262
+ "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
263
+ "so that the cached datasets can consequently be loaded in distributed training"
264
+ },
265
+ )
266
+ train_split_name: str = field(
267
+ default="train",
268
+ metadata={
269
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
270
+ },
271
+ )
272
+ eval_split_name: str = field(
273
+ default="validation",
274
+ metadata={
275
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
276
+ },
277
+ )
278
+ do_lower_case: bool = field(
279
+ default=True,
280
+ metadata={"help": "Whether the target text should be lower cased."},
281
+ )
282
+ wandb_project: str = field(
283
+ default="flax-speech-recognition-ctc",
284
+ metadata={"help": "The name of the wandb project."},
285
+ )
286
+ wandb_name: str = field(
287
+ default=None,
288
+ metadata={"help": "The name of the wandb run."},
289
+ )
290
+ wandb_job_type: str = field(
291
+ default="CTC",
292
+ metadata={"help": "The name of the wandb job type."},
293
+ )
294
+ test_split_name: str = field(
295
+ default="test",
296
+ metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
297
+ )
298
+ remove_punctuation: bool = field(
299
+ default=False, metadata={"help": "Whether or not to remove punctuation during training."}
300
+ )
301
+ skip_steps: Optional[int] = field(
302
+ default=0,
303
+ metadata={
304
+ "help": "Skip this number of steps. Useful to continue training"
305
+ },
306
+ )
307
+
308
+
309
+ # @flax.struct.dataclass
310
+ @dataclass
311
+ class FlaxTrainingArguments(TrainingArguments):
312
+ precision: str = field(
313
+ default="full",
314
+ metadata={
315
+ "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
316
+ "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
317
+ },
318
+ )
319
+ matmul_precision: str = field(
320
+ default="default",
321
+ metadata={
322
+ "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
323
+ "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
324
+ "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
325
+ "it only changes the behaviors of calls with no such argument provided. "
326
+ "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
327
+ },
328
+ )
329
+ multisteps: bool = field(
330
+ default=False,
331
+ metadata={
332
+ "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
333
+ "a custom gradient accumulation implementation will be employed."
334
+ },
335
+ )
336
+
337
+
338
+ def to_fp32(t):
339
+ return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
340
+
341
+
342
+ def to_bf16(t):
343
+ return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
344
+
345
+
346
+ class MixedPrecisionTrainState(struct.PyTreeNode):
347
+ """Train state for use with a single Optax optimizer.
348
+ Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
349
+
350
+ Synopsis::
351
+
352
+ state = TrainState.create(
353
+ apply_fn=model.apply,
354
+ params=variables['params'],
355
+ tx=tx)
356
+ grad_fn = jax.grad(make_loss_fn(state.apply_fn))
357
+ for batch in data:
358
+ grads = grad_fn(state.params, batch)
359
+ state = state.apply_gradients(grads=grads)
360
+
361
+ Args:
362
+ step: Counter starts at 0 and is incremented by every call to
363
+ `.apply_gradients()`.
364
+ apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
365
+ convenience to have a shorter params list for the `train_step()` function
366
+ in your training loop.
367
+ params: The parameters to be updated by `tx` and used by `apply_fn`.
368
+ tx: An Optax gradient transformation.
369
+ opt_state: The state for `tx`.
370
+ dropout_rng: PRNG key for stochastic operations.
371
+ bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
372
+ """
373
+
374
+ step: int
375
+ apply_fn: Callable = struct.field(pytree_node=False)
376
+ get_attention_mask_fn: Callable = struct.field(pytree_node=False)
377
+ params: core.FrozenDict[str, Any]
378
+ tx: optax.GradientTransformation = struct.field(pytree_node=False)
379
+ opt_state: optax.OptState
380
+ dropout_rng: jnp.ndarray
381
+ max_grad_norm: Optional[float] = 1.0
382
+
383
+ def apply_gradients(self, *, grads, to_dtype, **kwargs):
384
+ """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
385
+
386
+ Note that internally this function calls `.tx.update()` followed by a call
387
+ to `optax.apply_updates()` to update `params` and `opt_state`.
388
+
389
+ Args:
390
+ grads: Gradients that have the same pytree structure as `.params`.
391
+ **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
392
+
393
+ Returns:
394
+ An updated instance of `self` with `step` incremented by one, `params`
395
+ and `opt_state` updated by applying `grads`, and additional attributes
396
+ replaced as specified by `kwargs`.
397
+ """
398
+
399
+ # clip gradients by global l2 norm
400
+ casted_max_grad_norm = to_dtype(self.max_grad_norm)
401
+ g_norm = linear_algebra.global_norm(grads)
402
+ g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
403
+ grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
404
+
405
+ # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
406
+ # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
407
+ updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
408
+
409
+ new_params = optax.apply_updates(self.params, updates)
410
+ return self.replace(
411
+ step=self.step + 1,
412
+ params=new_params,
413
+ opt_state=to_dtype(new_opt_state),
414
+ **kwargs,
415
+ )
416
+
417
+ @classmethod
418
+ def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
419
+ """Creates a new instance with `step=0` and initialized `opt_state`."""
420
+ # downcast optimizer state to bf16 if mixed-precision training
421
+ opt_state = tx.init(to_dtype(params)) if tx is not None else None
422
+ return cls(
423
+ step=0,
424
+ apply_fn=apply_fn,
425
+ params=params,
426
+ tx=tx,
427
+ opt_state=opt_state,
428
+ **kwargs,
429
+ )
430
+
431
+ def replicate(self):
432
+ return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
433
+
434
+
435
+ @flax.struct.dataclass
436
+ class FlaxDataCollatorSpeechSeq2SeqWithPadding:
437
+ """
438
+ Data collator that will dynamically pad the inputs received.
439
+ Args:
440
+ processor ([`Wav2Vec2Processor`])
441
+ The processor used for proccessing the data.
442
+ decoder_start_token_id (:obj: `int`)
443
+ The begin-of-sentence of the decoder.
444
+ input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
445
+ Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
446
+ among:
447
+ * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
448
+ sequence if provided).
449
+ * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
450
+ maximum acceptable input length for the model if that argument is not provided.
451
+ * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
452
+ different lengths).
453
+ target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
454
+ Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
455
+ See above for details.
456
+ max_input_length (:obj:`float`, `optional`):
457
+ Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
458
+ pad_input_to_multiple_of (:obj:`int`, `optional`):
459
+ If set will pad the input sequence to a multiple of the provided value.
460
+ This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
461
+ 7.5 (Volta).
462
+ pad_target_to_multiple_of (:obj:`int`, `optional`):
463
+ If set will pad the target sequence to a multiple of the provided value.
464
+ This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
465
+ 7.5 (Volta).
466
+ """
467
+
468
+ processor: Any
469
+ input_padding: Union[bool, str] = "longest"
470
+ label_padding: Union[bool, str] = "max_length"
471
+ pad_input_to_multiple_of: Optional[int] = None
472
+ pad_to_multiple_of_label: Optional[int] = None
473
+ max_input_length: Optional[float] = None
474
+ max_label_length: Optional[float] = None
475
+
476
+ def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
477
+ # split inputs and labels since they have to be of different lengths and need
478
+ # different padding methods
479
+ input_features = [{"input_values": feature["input_values"]} for feature in features]
480
+ label_features = [{"input_ids": feature["labels"]} for feature in features]
481
+
482
+ # reformat list to dict and set to pytorch format
483
+ batch = self.processor.feature_extractor.pad(
484
+ input_features,
485
+ max_length=self.max_input_length,
486
+ padding=self.input_padding,
487
+ pad_to_multiple_of=self.pad_input_to_multiple_of,
488
+ return_tensors="np",
489
+ )
490
+
491
+ labels_batch = self.processor.tokenizer.pad(
492
+ label_features,
493
+ max_length=self.max_label_length,
494
+ padding=self.label_padding,
495
+ pad_to_multiple_of=self.pad_to_multiple_of_label,
496
+ return_tensors="np",
497
+ )
498
+
499
+ labels = labels_batch["input_ids"]
500
+ labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
501
+ labels = labels.filled(fill_value=-100)
502
+
503
+ batch["labels"] = labels
504
+
505
+ return batch
506
+
507
+
508
+ def get_grouped_indices(
509
+ dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
510
+ ) -> np.array:
511
+ """
512
+ Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
513
+ Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
514
+ lengths. To do this, the indices are:
515
+
516
+ - randomly permuted (if a JAX rng is specified)
517
+ - grouped in mega-batches of size `mega_batch_mult * batch_size`
518
+ - sorted by length in each mega-batch
519
+
520
+ The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
521
+ maximum length placed first, so that an OOM happens sooner rather than later.
522
+ """
523
+ lengths = dataset["input_length"]
524
+
525
+ # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
526
+ if mega_batch_mult is None:
527
+ mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
528
+ # Just in case, for tiny datasets
529
+ if mega_batch_mult == 0:
530
+ mega_batch_mult = 1
531
+
532
+ # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
533
+ num_samples = len(lengths)
534
+ indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
535
+
536
+ megabatch_size = mega_batch_mult * batch_size
537
+ megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
538
+ megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
539
+
540
+ # The rest is to get the biggest batch first.
541
+ # Since each megabatch is sorted by descending length, the longest element is the first
542
+ megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
543
+ max_idx = np.argmax(megabatch_maximums).item()
544
+ # Switch to put the longest batch in first position
545
+ # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
546
+ megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
547
+
548
+ megabatches = np.array([i for megabatch in megabatches for i in megabatch])
549
+
550
+ return megabatches
551
+
552
+
553
+ def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
554
+ """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
555
+ the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
556
+ num_samples = len(samples_idx)
557
+ if drop_last:
558
+ samples_to_remove = num_samples % batch_size
559
+ if samples_to_remove != 0:
560
+ samples_idx = samples_idx[:-samples_to_remove]
561
+ sections_split = num_samples // batch_size
562
+ samples_idx = samples_idx.reshape((sections_split, batch_size))
563
+ else:
564
+ sections_split = math.ceil(num_samples / batch_size)
565
+ samples_idx = np.array_split(samples_idx, sections_split)
566
+ return samples_idx
567
+
568
+
569
+ def write_train_metric(summary_writer, train_metrics, train_time, step):
570
+ summary_writer.scalar("train_time", train_time, step)
571
+
572
+ train_metrics = get_metrics(train_metrics)
573
+ for key, vals in train_metrics.items():
574
+ tag = f"train_{key}"
575
+ for i, val in enumerate(vals):
576
+ summary_writer.scalar(tag, val, step - len(vals) + i + 1)
577
+
578
+
579
+ def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
580
+ for metric_name, value in eval_metrics.items():
581
+ summary_writer.scalar(f"eval_{metric_name}", value, step)
582
+
583
+ if pred_str is not None:
584
+ # write output actual predictions for debugging
585
+ summary_writer.text("eval_predictions", "\n".join(pred_str), step)
586
+
587
+
588
+ def write_wandb_log(metrics, step, prefix=None):
589
+ if jax.process_index() == 0:
590
+ log_metrics = {}
591
+ for k, v in metrics.items():
592
+ if "layer" in k:
593
+ log_metrics[f"{k}/"] = v
594
+ elif prefix is not None:
595
+ log_metrics[f"{prefix}/{k}"] = v
596
+ else:
597
+ log_metrics[k] = v
598
+ wandb.log(log_metrics, step)
599
+
600
+
601
+ def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
602
+ if jax.process_index() == 0:
603
+ # convert str data to a wandb compatible format
604
+ str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
605
+ # we'll log the first 50 predictions for each epoch
606
+ wandb.log(
607
+ {
608
+ f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
609
+ columns=["label_str", "pred_str"], data=str_data[:num_log]
610
+ )
611
+ },
612
+ step,
613
+ )
614
+
615
+
616
+ def create_learning_rate_fn(
617
+ num_train_steps: int, num_warmup_steps: int, learning_rate: float
618
+ ) -> Callable[[int], jnp.array]:
619
+ """Returns a linear warmup, linear_decay learning rate function."""
620
+ warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
621
+ decay_fn = optax.linear_schedule(
622
+ init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
623
+ )
624
+ schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
625
+ return schedule_fn
626
+
627
+
628
+ def ctc_loss(
629
+ logits,
630
+ logits_attention_mask,
631
+ labels,
632
+ blank_id,
633
+ loss_reduction="mean",
634
+ output_emission_dict=False,
635
+ log_epsilon=-100000.0,
636
+ ):
637
+ """Computes CTC loss.
638
+ This function performs forward computation over an FSA with `N * 2` states
639
+ where `N` is the max number of labels. The states are split into two groups:
640
+ Phi states and emission states. a phi-state accepts repetition of
641
+ phi (blank)-symbols and transits to emission state when the correct label is
642
+ observed. An emission state accepts repetition of the label and transits to
643
+ the next phi states at any time (so called epsilon-transition).
644
+ Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
645
+ and `N` denotes the time steps in `labels`.
646
+ Args:
647
+ logits: (B, T, K)-array containing log-probabilities of each class.
648
+ logitpaddings: (B, T)-array. Padding indicators for `logits`.
649
+ labels: (B, N)-array containing reference integer labels.
650
+ labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
651
+ `labels` must be right-padded, i.e. each row of `labelpaddings` must be
652
+ repetition of zeroes, followed by repetition of ones.
653
+ blank_id: Id for blank token.
654
+ loss_reduction: one of "mean", "sum", "default"
655
+ - "none": no reduction is applied.
656
+ - "mean": output loss will be divided by target lengths and then the
657
+ mean over the batch is taken.
658
+ - "sum": output loss are summed over batch
659
+ output_emission_dict: whether to output additional information about the emission probs
660
+ Returns:
661
+ A pair of `(per_seq_loss, aux)`.
662
+ per_seq_loss:
663
+ (B,)-array containing loss values for each sequence in the batch.
664
+ aux: Dictionary containing interim variables used for computing losses.
665
+ aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
666
+ phi-state corresponding to the n-th label.
667
+ aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
668
+ emission-state corresponding to the n-th label.
669
+ aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
670
+ corresponding to each time frame.
671
+ aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
672
+ corresponding to each time frame.
673
+ """
674
+ # label paddings are indicated by -100
675
+ labelpaddings = labels < 0
676
+ # logit paddings are the inverse of attention_mask
677
+ logitpaddings = ~logits_attention_mask
678
+
679
+ # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
680
+ batchsize, unused_maxinputlen, num_classes = logits.shape
681
+ batchsize_, maxlabellen = labels.shape
682
+
683
+ logprobs = jax.nn.log_softmax(logits)
684
+ labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
685
+
686
+ # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
687
+ repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
688
+ repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
689
+
690
+ logprobs_phi = logprobs[:, :, blank_id : blank_id + 1] # [B, T, 1]
691
+ logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2)) # [T, B, 1]
692
+
693
+ one_hot = jax.nn.one_hot(labels, num_classes=num_classes) # [B, N, K]
694
+ logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
695
+ logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2)) # [T, B, N]
696
+
697
+ logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon # [B, N]
698
+ logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
699
+ logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon # [B, N]
700
+
701
+ def loop_body(prev, x):
702
+ prev_phi, prev_emit = prev
703
+ # emit-to-phi epsilon transition, except if the next label is repetition
704
+ prev_phi_orig = prev_phi
705
+ prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
706
+
707
+ logprob_emit, logprob_phi, pad = x
708
+
709
+ # phi-to-emit transition
710
+ next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
711
+ # self-loop transition
712
+ next_phi = prev_phi + logprob_phi
713
+ # emit-to-phi blank transition only when the next label is repetition
714
+ next_phi = next_phi.at[:, 1:].set(
715
+ jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
716
+ )
717
+
718
+ pad = pad.reshape((batchsize, 1))
719
+ next_emit = pad * prev_emit + (1.0 - pad) * next_emit
720
+ next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
721
+
722
+ return (next_phi, next_emit), (next_phi, next_emit)
723
+
724
+ xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
725
+ _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
726
+
727
+ # last row needs to be updated with the last epsilon transition
728
+ logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
729
+ logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
730
+
731
+ # extract per_seq_loss
732
+ one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1) # [B, N+1]
733
+ per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
734
+
735
+ if loss_reduction == "mean":
736
+ target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
737
+ loss = (per_seq_loss / target_lengths).mean()
738
+ elif loss_reduction == "sum":
739
+ loss = per_seq_loss.sum()
740
+ else:
741
+ loss = per_seq_loss
742
+
743
+ if not output_emission_dict:
744
+ return loss
745
+
746
+ return loss, {
747
+ "logalpha_phi": logalpha_phi,
748
+ "logalpha_emit": logalpha_emit,
749
+ "logprobs_phi": logprobs_phi,
750
+ "logprobs_emit": logprobs_emit,
751
+ }
752
+
753
+
754
+ def make_dataset(data_args, seed=42):
755
+ # Pre-processing dataset
756
+ import re
757
+
758
+ def map_nst(entry):
759
+ text = entry["text"].lower()
760
+ text = text.replace("(...vær stille under dette opptaket...)", "")
761
+ text = re.sub('[áàâ]', 'a', text)
762
+ text = re.sub('[ä]', 'æ', text)
763
+ text = re.sub('[éèëê]', 'e', text)
764
+ text = re.sub('[íìïî]', 'i', text)
765
+ text = re.sub('[óòöô]', 'o', text)
766
+ text = re.sub('[ö]', 'ø', text)
767
+ text = re.sub('[ç]', 'c', text)
768
+ text = re.sub('[úùüû]', 'u', text)
769
+ # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
770
+ text = re.sub('\s+', ' ', text)
771
+ return {"text": text}
772
+
773
+ def filter_nst(entry):
774
+ if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
775
+ return False # Too short
776
+ if re.match(entry["type"], "pIW|CA"):
777
+ return False # Spelling out words
778
+ return True
779
+
780
+ def filter_npsc(entry):
781
+ # False if there are digits in the text
782
+ if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
783
+ return False # Too short
784
+ if re.search("\d", entry["text"]):
785
+ return False
786
+ return True
787
+
788
+ def map_npsc(entry):
789
+ batch = {"text": entry["text"].lower()}
790
+ batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
791
+ batch["text"] = re.sub('[ä]', 'æ', batch["text"])
792
+ batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
793
+ batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
794
+ batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
795
+ batch["text"] = re.sub('[ö]', 'ø', batch["text"])
796
+ batch["text"] = re.sub('[ç]', 'c', batch["text"])
797
+ batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
798
+ batch["text"] = re.sub('\s', ' ', batch["text"])
799
+ batch["text"] = re.sub('<ee>', 'eee', batch["text"])
800
+ batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
801
+ batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
802
+ batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
803
+ # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
804
+ if "<" in batch["text"]:
805
+ raise ValueError(batch["text"])
806
+ return batch
807
+
808
+ nst = datasets.load_dataset("NbAiLab/NST", "no-close")
809
+ npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
810
+ # TODO NST_hesitate
811
+
812
+ split = len(npsc[data_args.train_split_name]) / (len(npsc[data_args.train_split_name]) + len(npsc[data_args.eval_split_name])) # Use same train/val ratio as NPSC
813
+ nst_train = nst[data_args.train_split_name].train_test_split(train_size=split, seed=seed)
814
+ nst[data_args.train_split_name] = nst_train["train"]
815
+ nst[data_args.eval_split_name] = nst_train["test"]
816
+
817
+ nst = nst.filter(filter_nst).map(
818
+ map_nst,
819
+ num_proc=data_args.preprocessing_num_workers,
820
+ desc="filtering NST",
821
+ ).shuffle(seed=seed)
822
+ npsc = npsc.filter(filter_npsc).map(
823
+ map_npsc,
824
+ num_proc=data_args.preprocessing_num_workers,
825
+ desc="filtering NPSC",
826
+ ).shuffle(seed=seed)
827
+
828
+ npsc_base = npsc.remove_columns([col for col in npsc[data_args.train_split_name].column_names if col not in ["text", "audio"]])
829
+ nst_base = nst.remove_columns([col for col in nst[data_args.train_split_name].column_names if col not in ["text", "audio"]])
830
+
831
+ combined = {}
832
+ for split in data_args.train_split_name, data_args.eval_split_name, data_args.test_split_name:
833
+ probs = np.array([len(nst_base[split]), len(npsc_base[split])]) # Weight by number of examples
834
+ probs = (probs / probs.sum()).tolist()
835
+ comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
836
+ combined[split] = comb
837
+
838
+ return datasets.DatasetDict(**combined)
839
+
840
+ def main():
841
+ # 1. Parse input arguments
842
+ # See all possible arguments in src/transformers/training_args.py
843
+ # or by passing the --help flag to this script.
844
+ # We now keep distinct sets of args, for a cleaner separation of concerns.
845
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
846
+
847
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
848
+ # If we pass only one argument to the script and it's the path to a json file,
849
+ # let's parse it to get our arguments.
850
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
851
+ else:
852
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
853
+
854
+ # 2. Setup logging
855
+ # Make one log on every process with the configuration for debugging.
856
+ logging.basicConfig(
857
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
858
+ datefmt="%m/%d/%Y %H:%M:%S",
859
+ handlers=[logging.StreamHandler(sys.stdout)],
860
+ )
861
+ # Set the verbosity to info of the Transformers logger.
862
+ # We only want one process per machine to log things on the screen.
863
+ logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
864
+ if jax.process_index() == 0:
865
+ datasets.utils.logging.set_verbosity_warning()
866
+ transformers.utils.logging.set_verbosity_info()
867
+ else:
868
+ datasets.utils.logging.set_verbosity_error()
869
+ transformers.utils.logging.set_verbosity_error()
870
+
871
+ # Set up wandb run
872
+ if jax.process_index() == 0:
873
+ wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
874
+
875
+ logger.info("Training/evaluation parameters %s", training_args)
876
+
877
+ # Set the default TPU matmul precision and display the number of devices
878
+ jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
879
+ logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
880
+
881
+ # 4. Load dataset
882
+
883
+ set_seed(training_args.seed)
884
+ raw_datasets = make_dataset(data_args, seed=training_args.seed)
885
+
886
+ # raw_datasets = DatasetDict()
887
+
888
+ # if training_args.do_train:
889
+ # raw_datasets[data_args.train_split_name] = load_dataset(
890
+ # data_args.dataset_name,
891
+ # data_args.dataset_config_name,
892
+ # split=data_args.train_split_name,
893
+ # cache_dir=data_args.dataset_cache_dir,
894
+ # use_auth_token=True if model_args.use_auth_token else None,
895
+ # )
896
+
897
+ # if training_args.do_eval:
898
+ # raw_datasets[data_args.eval_split_name] = load_dataset(
899
+ # data_args.dataset_name,
900
+ # data_args.dataset_config_name,
901
+ # split=data_args.eval_split_name,
902
+ # cache_dir=data_args.dataset_cache_dir,
903
+ # use_auth_token=True if model_args.use_auth_token else None,
904
+ # )
905
+
906
+ # if training_args.do_predict:
907
+ # test_split = data_args.test_split_name.split("+")
908
+ # for split in test_split:
909
+ # raw_datasets[split] = load_dataset(
910
+ # data_args.dataset_name,
911
+ # data_args.dataset_config_name,
912
+ # split=split,
913
+ # cache_dir=data_args.dataset_cache_dir,
914
+ # use_auth_token=True if model_args.use_auth_token else None,
915
+ # )
916
+
917
+ if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
918
+ raise ValueError(
919
+ "Cannot not train, not do evaluation and not do prediction. At least one of "
920
+ "training, evaluation or prediction has to be done."
921
+ )
922
+
923
+ # if not training, there is no need to run multiple epochs
924
+ if not training_args.do_train:
925
+ training_args.num_train_epochs = 1
926
+
927
+ if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
928
+ raise ValueError(
929
+ f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
930
+ "Make sure to set `--audio_column_name` to the correct audio column - one of "
931
+ f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
932
+ )
933
+
934
+ if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
935
+ raise ValueError(
936
+ f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
937
+ "Make sure to set `--text_column_name` to the correct text column - one of "
938
+ f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
939
+ )
940
+
941
+ # 5. Load pretrained model, tokenizer, and feature extractor
942
+ #
943
+ # Distributed training:
944
+ # The .from_pretrained methods guarantee that only one local process can concurrently
945
+ config = Wav2Vec2Config.from_pretrained(
946
+ model_args.config_name if model_args.config_name else model_args.model_name_or_path,
947
+ cache_dir=model_args.cache_dir,
948
+ revision=model_args.model_revision,
949
+ use_auth_token=True if model_args.use_auth_token else None,
950
+ )
951
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
952
+ model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
953
+ cache_dir=model_args.cache_dir,
954
+ revision=model_args.model_revision,
955
+ use_auth_token=True if model_args.use_auth_token else None,
956
+ )
957
+ tokenizer = AutoTokenizer.from_pretrained(
958
+ model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
959
+ cache_dir=model_args.cache_dir,
960
+ revision=model_args.model_revision,
961
+ use_auth_token=True if model_args.use_auth_token else None,
962
+ )
963
+ # update config according to training args, model args, and tokenizer attributes
964
+ config.update(
965
+ {
966
+ "feat_proj_dropout": model_args.feat_proj_dropout,
967
+ "attention_dropout": model_args.attention_dropout,
968
+ "hidden_dropout": model_args.hidden_dropout,
969
+ "final_dropout": model_args.final_dropout,
970
+ "mask_time_prob": model_args.mask_time_prob,
971
+ "mask_time_length": model_args.mask_time_length,
972
+ "mask_feature_prob": model_args.mask_feature_prob,
973
+ "mask_feature_length": model_args.mask_feature_length,
974
+ "gradient_checkpointing": training_args.gradient_checkpointing,
975
+ "layerdrop": model_args.layerdrop,
976
+ "ctc_loss_reduction": model_args.ctc_loss_reduction,
977
+ "ctc_zero_infinity": model_args.ctc_zero_infinity,
978
+ "pad_token_id": tokenizer.pad_token_id,
979
+ "vocab_size": tokenizer.vocab_size, # len(tokenizer),
980
+ "activation_dropout": model_args.activation_dropout,
981
+ }
982
+ )
983
+
984
+ if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
985
+ raise ValueError(
986
+ "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
987
+ "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
988
+ "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
989
+ "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
990
+ )
991
+
992
+ if training_args.precision == "full_mixed":
993
+ dtype = jnp.bfloat16
994
+ training_args.mixed_precision = True
995
+ elif training_args.precision == "half_mixed":
996
+ dtype = jnp.bfloat16
997
+ training_args.mixed_precision = False
998
+ else:
999
+ dtype = jnp.float32
1000
+ training_args.mixed_precision = False
1001
+
1002
+ try:
1003
+ model = FlaxWav2Vec2ForCTC.from_pretrained(
1004
+ model_args.model_name_or_path,
1005
+ config=config,
1006
+ dtype=dtype,
1007
+ cache_dir=model_args.cache_dir,
1008
+ revision=model_args.model_revision,
1009
+ use_auth_token=True if model_args.use_auth_token else None,
1010
+ )
1011
+ except:
1012
+ model = FlaxWav2Vec2ForCTC.from_pretrained(
1013
+ model_args.model_name_or_path,
1014
+ config=config,
1015
+ dtype=dtype,
1016
+ cache_dir=model_args.cache_dir,
1017
+ revision=model_args.model_revision,
1018
+ use_auth_token=True if model_args.use_auth_token else None,
1019
+ from_pt=True,
1020
+ )
1021
+
1022
+ # 6. Resample speech dataset ALWAYS
1023
+ raw_datasets = raw_datasets.cast_column(
1024
+ data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
1025
+ )
1026
+
1027
+ # 7. Preprocessing the datasets.
1028
+ # We need to read the audio files as arrays and tokenize the targets.
1029
+ max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
1030
+ min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
1031
+ max_target_length = data_args.max_label_length
1032
+ min_target_length = data_args.min_label_length
1033
+ pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
1034
+ audio_column_name = data_args.audio_column_name
1035
+ num_workers = data_args.preprocessing_num_workers
1036
+ text_column_name = data_args.text_column_name
1037
+ model_input_name = feature_extractor.model_input_names[0]
1038
+ do_lower_case = data_args.do_lower_case
1039
+ dataset_name = data_args.dataset_name
1040
+ chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
1041
+ chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
1042
+ # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
1043
+ # gigaspeech_disfluencies = ["<other>", "<sil>"]
1044
+ # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
1045
+ # "[vocalized-noise]", "_1"]
1046
+ # swb_punctuations = ["{", "}", "[", "]-", "]"]
1047
+ # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
1048
+ ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
1049
+ "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
1050
+
1051
+ if training_args.do_train and data_args.max_train_samples is not None:
1052
+ raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].select(range(data_args.max_train_samples))
1053
+
1054
+ if training_args.do_eval and data_args.max_eval_samples is not None:
1055
+ raw_datasets[data_args.eval_split_name] = raw_datasets[data_args.eval_split_name].select(range(data_args.max_eval_samples))
1056
+
1057
+ if training_args.do_predict and data_args.max_test_samples is not None:
1058
+ raw_datasets[data_args.test_split_name] = raw_datasets[data_args.test_split_name].select(range(data_args.max_eval_samples))
1059
+
1060
+ if training_args.do_train and data_args.remove_punctuation:
1061
+
1062
+ def remove_punctuation(batch):
1063
+ batch[text_column_name] = (
1064
+ re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
1065
+ )
1066
+
1067
+ raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].map(
1068
+ remove_punctuation,
1069
+ num_proc=data_args.preprocessing_num_workers,
1070
+ desc="removing punctuation from train split",
1071
+ )
1072
+
1073
+ # filter data where the targets are ignored in scoring
1074
+ def is_target_labels(input_str):
1075
+ return input_str.lower() not in ignore_segments
1076
+
1077
+ raw_datasets = raw_datasets.filter(
1078
+ is_target_labels,
1079
+ num_proc=num_workers,
1080
+ input_columns=[text_column_name],
1081
+ desc="filtering data where the targets are ignored in scoring",
1082
+ )
1083
+
1084
+ def prepare_dataset(batch):
1085
+ # process audio
1086
+ try:
1087
+ sample = batch[audio_column_name]
1088
+ except ValueError:
1089
+ sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
1090
+ inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
1091
+ # process audio length
1092
+ batch[model_input_name] = inputs.input_values[0]
1093
+ batch["input_length"] = len(batch["input_values"])
1094
+
1095
+ # process targets
1096
+ input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
1097
+
1098
+ # if dataset_name == "google/xtreme_s":
1099
+ # # Finally, we tokenize the processed text
1100
+ # batch["labels"] = tokenizer(input_str).input_ids
1101
+ # batch["labels_length"] = len(batch["labels"])
1102
+ # return batch
1103
+
1104
+ # # Common Voice 9
1105
+ # if input_str.startswith('"') and input_str.endswith('"'):
1106
+ # # we can remove trailing quotation marks as they do not affect the transcription
1107
+ # input_str = input_str[1:-1]
1108
+ # # normalize quotation marks
1109
+ # input_str = re.sub(r'["“”]', '"', input_str)
1110
+ # # normalize apostrophes
1111
+ # input_str = re.sub(r"[’']", "'", input_str)
1112
+ # # normalize hyphens
1113
+ # input_str = re.sub(r"[—–]", "-", input_str)
1114
+ # # replace double quotation marks with single
1115
+ # input_str = input_str.replace('""', '"')
1116
+ # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
1117
+ # # for CV9, we'll normalize the text to always finish with punctuation
1118
+ # if input_str[-1] not in [".", "?", "!"]:
1119
+ # input_str = input_str + "."
1120
+
1121
+ # # TEDLIUM-3
1122
+ # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
1123
+ # input_str = input_str.replace("<unk>", "").replace(" '", "'")
1124
+
1125
+ # # GigaSpeech
1126
+ # for disfluency in gigaspeech_disfluencies:
1127
+ # input_str = input_str.replace(disfluency, "")
1128
+ # # convert spelled out punctuation to symbolic form
1129
+ # for punctuation, replacement in gigaspeech_punctuation.items():
1130
+ # input_str = input_str.replace(punctuation, replacement)
1131
+ # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
1132
+ # # for GS, we'll normalize the text to always finish with punctuation
1133
+ # if input_str[-1] not in [".", "?", "!"]:
1134
+ # input_str = input_str + "."
1135
+
1136
+ # # SWB
1137
+ # for disfluency in swb_disfluencies:
1138
+ # input_str = input_str.replace(disfluency, "")
1139
+ # # remove parenthesised text (test data only)
1140
+ # input_str = re.sub("[\(].*?[\)]", "", input_str)
1141
+ # for punctuation in swb_punctuations:
1142
+ # input_str = input_str.replace(punctuation, "")
1143
+ # # replace anomalous words with their correct transcriptions
1144
+ # split_str = input_str.split("/")
1145
+ # if len(split_str) > 1:
1146
+ # input_str = " ".join(
1147
+ # [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
1148
+
1149
+ # # Earnings 22
1150
+ # for disfluency in earnings_disfluencies:
1151
+ # input_str = input_str.replace(disfluency, "")
1152
+ # # replace mal-formatted ellipsis
1153
+ # input_str = input_str.replace("…", ".")
1154
+
1155
+ # JIWER compliance
1156
+ # remove multiple spaces
1157
+ input_str = re.sub(r"\s\s+", " ", input_str)
1158
+ # strip trailing spaces
1159
+ input_str = input_str.strip()
1160
+
1161
+ # Finally, we tokenize the processed text
1162
+ batch["labels"] = tokenizer(input_str).input_ids
1163
+ batch["labels_length"] = len(batch["labels"])
1164
+ return batch
1165
+
1166
+ vectorized_datasets = raw_datasets.map(
1167
+ prepare_dataset,
1168
+ remove_columns=next(iter(raw_datasets.values())).column_names,
1169
+ num_proc=num_workers,
1170
+ desc="preprocess dataset",
1171
+ )
1172
+
1173
+ # filter data with inputs shorter than min_input_length or longer than max_input_length
1174
+ def is_audio_in_length_range(length):
1175
+ return length > min_input_length and length < max_input_length
1176
+
1177
+ vectorized_datasets = vectorized_datasets.filter(
1178
+ is_audio_in_length_range,
1179
+ num_proc=num_workers,
1180
+ input_columns=["input_length"],
1181
+ )
1182
+
1183
+ # filter data with targets shorter than min_target_length or longer than max_target_length
1184
+ def is_labels_in_length_range(length):
1185
+ return length > min_target_length # and length < max_target_length
1186
+
1187
+ vectorized_datasets = vectorized_datasets.filter(
1188
+ is_labels_in_length_range,
1189
+ num_proc=num_workers,
1190
+ input_columns=["labels_length"],
1191
+ )
1192
+
1193
+ # for large datasets it is advised to run the preprocessing on a
1194
+ # single machine first with `args.preprocessing_only` since there will mostly likely
1195
+ # be a timeout when running the script in distributed mode.
1196
+ # In a second step `args.preprocessing_only` can then be set to `False` to load the
1197
+ # cached dataset
1198
+ if data_args.preprocessing_only:
1199
+ cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
1200
+ logger.info(f"Data preprocessing finished. Files cached at {cache}.")
1201
+ return
1202
+
1203
+ # 8. Load Metrics
1204
+ wer_metric = load_metric("wer")
1205
+ cer_metric = load_metric("cer")
1206
+
1207
+ def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
1208
+ padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
1209
+
1210
+ pred_str = tokenizer.batch_decode(pred_ids)
1211
+ # we do not want to group tokens when computing the metrics
1212
+ label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
1213
+
1214
+ wer = wer_metric.compute(predictions=pred_str, references=label_str)
1215
+ cer = cer_metric.compute(predictions=pred_str, references=label_str)
1216
+
1217
+ return {"wer": wer, "cer": cer}, pred_str, label_str
1218
+
1219
+ # 9. save feature extractor, tokenizer and config
1220
+ feature_extractor.save_pretrained(training_args.output_dir)
1221
+ tokenizer.save_pretrained(training_args.output_dir)
1222
+ config.save_pretrained(training_args.output_dir)
1223
+
1224
+ processor = AutoProcessor.from_pretrained(training_args.output_dir)
1225
+
1226
+ data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
1227
+ processor=processor,
1228
+ input_padding="longest",
1229
+ pad_input_to_multiple_of=pad_input_to_multiple_of,
1230
+ max_label_length=data_args.max_label_length,
1231
+ )
1232
+
1233
+ # Enable tensorboard only on the master node
1234
+ has_tensorboard = is_tensorboard_available()
1235
+ if has_tensorboard and jax.process_index() == 0:
1236
+ try:
1237
+ from flax.metrics.tensorboard import SummaryWriter
1238
+
1239
+ summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
1240
+ except ImportError as ie:
1241
+ has_tensorboard = False
1242
+ logger.warning(
1243
+ f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
1244
+ )
1245
+ else:
1246
+ logger.warning(
1247
+ "Unable to display metrics through TensorBoard because the package is not installed: "
1248
+ "Please run `pip install tensorboard` to enable."
1249
+ )
1250
+
1251
+ # 10. Handle the repository creation
1252
+ if training_args.push_to_hub:
1253
+ with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
1254
+ git_lfs_extensions = f.read()
1255
+ if "*.wandb" not in git_lfs_extensions:
1256
+ f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
1257
+ if training_args.hub_model_id is None:
1258
+ repo_name = get_full_repo_name(
1259
+ Path(training_args.output_dir).absolute().name, token=training_args.hub_token
1260
+ )
1261
+ else:
1262
+ repo_name = training_args.hub_model_id
1263
+ repo = Repository(training_args.output_dir, clone_from=repo_name)
1264
+
1265
+ # 11. Initialize our training
1266
+ rng = jax.random.PRNGKey(training_args.seed)
1267
+ rng, dropout_rng = jax.random.split(rng)
1268
+
1269
+ # Store some constants
1270
+ max_steps = int(training_args.max_steps)
1271
+ gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
1272
+ train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
1273
+ batch_size_per_update = train_batch_size * gradient_accumulation_steps
1274
+ per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
1275
+ eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
1276
+ to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
1277
+
1278
+ if training_args.do_train:
1279
+ num_train_samples = len(vectorized_datasets[data_args.train_split_name])
1280
+ steps_per_epoch = num_train_samples // batch_size_per_update
1281
+ if max_steps > 0:
1282
+ num_epochs = -(training_args.max_steps // -steps_per_epoch)
1283
+ total_train_steps = max_steps
1284
+ else:
1285
+ num_epochs = int(training_args.num_train_epochs)
1286
+ total_train_steps = steps_per_epoch * num_epochs
1287
+
1288
+ # Create learning rate schedule
1289
+ # Create learning rate schedule
1290
+ linear_decay_lr_schedule_fn = create_learning_rate_fn(
1291
+ total_train_steps,
1292
+ training_args.warmup_steps,
1293
+ training_args.learning_rate,
1294
+ )
1295
+
1296
+ # We use Optax's "masking" functionality to not apply weight decay
1297
+ # to bias and LayerNorm scale parameters. decay_mask_fn returns a
1298
+ # mask boolean with the same structure as the parameters.
1299
+ # The mask is True for parameters that should be decayed.
1300
+ # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
1301
+ # For FlaxT5, one should correct the layer norm parameter naming
1302
+ # accordingly - see `run_t5_mlm_flax.py` e.g.
1303
+ def decay_mask_fn(params):
1304
+ flat_params = traverse_util.flatten_dict(params)
1305
+ layer_norm_params = [
1306
+ (name, "scale")
1307
+ for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
1308
+ ]
1309
+ flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
1310
+ return traverse_util.unflatten_dict(flat_mask)
1311
+
1312
+ if training_args.adafactor:
1313
+ # Create Adafactor optimizer
1314
+ optim = optax.adafactor(
1315
+ learning_rate=linear_decay_lr_schedule_fn,
1316
+ dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
1317
+ weight_decay_rate=training_args.weight_decay,
1318
+ weight_decay_mask=decay_mask_fn,
1319
+ )
1320
+ else:
1321
+ # Create AdamW optimizer
1322
+ optim = optax.adamw(
1323
+ learning_rate=linear_decay_lr_schedule_fn,
1324
+ b1=training_args.adam_beta1,
1325
+ b2=training_args.adam_beta2,
1326
+ eps=training_args.adam_epsilon,
1327
+ weight_decay=training_args.weight_decay,
1328
+ mask=decay_mask_fn,
1329
+ )
1330
+
1331
+ # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
1332
+ if training_args.multisteps and gradient_accumulation_steps > 1:
1333
+ optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
1334
+ else:
1335
+ num_epochs = 0
1336
+ total_train_steps = 0
1337
+ num_train_samples = 0
1338
+ optim = None
1339
+
1340
+ # Setup train state
1341
+ state = MixedPrecisionTrainState.create(
1342
+ apply_fn=model.__call__,
1343
+ get_attention_mask_fn=model._get_feature_vector_attention_mask,
1344
+ params=model.params,
1345
+ tx=optim,
1346
+ to_dtype=to_dtype,
1347
+ dropout_rng=dropout_rng,
1348
+ max_grad_norm=training_args.max_grad_norm,
1349
+ )
1350
+
1351
+ # Replicate the train state on each device
1352
+ state = state.replicate()
1353
+ blank_id = model.config.pad_token_id
1354
+
1355
+ # Define gradient update step fn
1356
+ def train_step(state, batch):
1357
+ # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
1358
+ dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
1359
+
1360
+ def compute_loss(params, minibatch):
1361
+ labels = minibatch.pop("labels")
1362
+ logits = state.apply_fn(
1363
+ **minibatch,
1364
+ params=params,
1365
+ dropout_rng=dropout_rng,
1366
+ freeze_feature_encoder=model_args.freeze_feature_encoder,
1367
+ train=True,
1368
+ )[0]
1369
+ logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
1370
+ loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
1371
+
1372
+ return loss
1373
+
1374
+ grad_fn = jax.value_and_grad(compute_loss)
1375
+
1376
+ if gradient_accumulation_steps == 1 or training_args.multisteps:
1377
+ loss, grad = grad_fn(to_dtype(state.params), batch)
1378
+
1379
+ # Custom gradient accumulation
1380
+ else:
1381
+ # add a first dimension over gradient_accumulation_steps for minibatch slices
1382
+ batch = jax.tree_map(
1383
+ lambda x: x.reshape(
1384
+ gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
1385
+ ),
1386
+ batch,
1387
+ )
1388
+
1389
+ def accum_minibatch_step(accum_grad, minibatch):
1390
+ # compute loss, num labels and grad over minibatch and accumulate
1391
+ loss, grad = grad_fn(to_dtype(state.params), minibatch)
1392
+ return jax.tree_map(jnp.add, accum_grad, grad), loss
1393
+
1394
+ # create an initial state for accumulating losses, num labels and gradients
1395
+ init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
1396
+ # loop accum minibatch step over the number of gradient accumulation steps
1397
+ grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
1398
+
1399
+ # update state
1400
+ new_state = state.apply_gradients(
1401
+ grads=grad,
1402
+ dropout_rng=new_dropout_rng,
1403
+ to_dtype=to_dtype,
1404
+ )
1405
+
1406
+ # compute gradient norms over all layers and globally for detailed monitoring
1407
+ layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
1408
+ logs = {
1409
+ "layer_grad_norm": layer_grad_norm,
1410
+ "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
1411
+ }
1412
+
1413
+ # compute parameter norms over all layers and globally for detailed monitoring
1414
+ layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
1415
+ logs["layer_param_norm"] = layer_param_norm
1416
+ logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
1417
+
1418
+ metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
1419
+ metrics.update(logs)
1420
+
1421
+ metrics = jax.lax.pmean(metrics, axis_name="batch")
1422
+ # metrics = to_fp32(metrics)
1423
+
1424
+ return new_state, metrics
1425
+
1426
+ # Define eval fn
1427
+ def eval_step(params, batch):
1428
+ labels = batch.pop("labels")
1429
+ logits = model(**batch, params=params, train=False)[0]
1430
+
1431
+ logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
1432
+ loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
1433
+
1434
+ pred_ids = jnp.argmax(logits, axis=-1)
1435
+
1436
+ # summarize metrics
1437
+ metrics = {"loss": loss}
1438
+ metrics = jax.lax.pmean(metrics, axis_name="batch")
1439
+ # metrics = to_fp32(metrics)
1440
+ return metrics, pred_ids
1441
+
1442
+ # Create parallel version of the train and eval step
1443
+ if training_args.do_train:
1444
+ p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
1445
+
1446
+ if training_args.do_eval:
1447
+ p_eval_step = jax.pmap(eval_step, "batch")
1448
+
1449
+ def run_evaluation(step):
1450
+ if training_args.do_eval:
1451
+ # ======================== Evaluating ==============================
1452
+ eval_metrics = []
1453
+ eval_preds = []
1454
+ eval_labels = []
1455
+
1456
+ # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
1457
+ eval_samples_idx = get_grouped_indices(vectorized_datasets[data_args.eval_split_name], eval_batch_size)
1458
+ eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
1459
+
1460
+ for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
1461
+ samples = [vectorized_datasets[data_args.eval_split_name][int(idx)] for idx in batch_idx]
1462
+ batch = data_collator(samples)
1463
+ labels = batch["labels"]
1464
+
1465
+ metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
1466
+ eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
1467
+ eval_metrics.append(metrics)
1468
+
1469
+ eval_labels.extend(labels)
1470
+
1471
+ # normalize eval metrics
1472
+ eval_metrics = get_metrics(eval_metrics)
1473
+ eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
1474
+ eval_metrics = to_fp32(eval_metrics)
1475
+
1476
+ # always run compute metrics
1477
+ error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
1478
+ eval_metrics.update(error_rate_metric)
1479
+ error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
1480
+
1481
+ # Print metrics and update progress bar
1482
+ desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
1483
+ epochs.write(desc)
1484
+ epochs.desc = desc
1485
+
1486
+ # Save metrics
1487
+ write_wandb_log(eval_metrics, step, prefix="eval")
1488
+ write_wandb_pred(pred_str, label_str, step)
1489
+ # if has_tensorboard and jax.process_index() == 0:
1490
+ # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
1491
+
1492
+ def save_checkpoint(step):
1493
+ # save and push checkpoint to the hub
1494
+ if jax.process_index() == 0:
1495
+ params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
1496
+ model.save_pretrained(training_args.output_dir, params=params)
1497
+ tokenizer.save_pretrained(training_args.output_dir)
1498
+ if training_args.push_to_hub:
1499
+ repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
1500
+
1501
+ logger.info("***** Running training *****")
1502
+ logger.info(f" Num examples = {num_train_samples}")
1503
+ logger.info(f" Num Epochs = {num_epochs}")
1504
+ logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
1505
+ logger.info(f" Num gradient accumulation steps = {gradient_accumulation_steps}")
1506
+ logger.info(f" Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
1507
+ logger.info(f" Total optimization steps = {total_train_steps}")
1508
+ logger.info(f" Gradient checkpointing: {config.gradient_checkpointing}")
1509
+ logger.info(f" Use scan: {config.use_scan}")
1510
+ logger.info(f" Fuse matmuls: {config.fuse_matmuls}")
1511
+
1512
+ train_time = cur_step = 0
1513
+ skip_epochs = data_args.skip_steps % (num_train_samples // batch_size_per_update)
1514
+ epochs = tqdm(range(skip_epochs, num_epochs), desc=f"Epoch ... ({skip_epochs + 1}/{num_epochs})", position=0)
1515
+ for epoch in epochs:
1516
+ if training_args.do_train:
1517
+ # ======================== Training ================================
1518
+ train_start = time.time()
1519
+
1520
+ # Create sampling rng
1521
+ rng, input_rng = jax.random.split(rng)
1522
+
1523
+ # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
1524
+ train_samples_idx = get_grouped_indices(vectorized_datasets[data_args.train_split_name], batch_size_per_update, input_rng)
1525
+ train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
1526
+
1527
+ # Gather the indices for creating the batch and do a training step
1528
+ for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
1529
+ samples = [vectorized_datasets[data_args.train_split_name][int(idx)] for idx in batch_idx]
1530
+ batch = data_collator(samples)
1531
+ batch = shard(batch.data)
1532
+
1533
+ cur_step = epoch * (num_train_samples // batch_size_per_update) + step
1534
+ if cur_step <= data_args.skip_steps:
1535
+ continue
1536
+
1537
+ try:
1538
+ state, train_metric = p_train_step(state, batch)
1539
+ except TypeError as e:
1540
+ logger.warning("Encountered following error: \n", e)
1541
+
1542
+
1543
+ if cur_step % training_args.logging_steps == 0:
1544
+ # Save metrics
1545
+ train_metric = unreplicate(train_metric)
1546
+ train_time += time.time() - train_start
1547
+ # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
1548
+ write_wandb_log(to_fp32(train_metric), cur_step, prefix=data_args.train_split_name)
1549
+ # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
1550
+ # if has_tensorboard and jax.process_index() == 0:
1551
+ # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
1552
+
1553
+ epochs.write(
1554
+ f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
1555
+ )
1556
+
1557
+ if cur_step % total_train_steps == 0:
1558
+ break
1559
+
1560
+ if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
1561
+ run_evaluation(cur_step)
1562
+
1563
+ if cur_step % training_args.save_steps == 0:
1564
+ save_checkpoint(cur_step)
1565
+
1566
+ if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
1567
+ # run evaluation at the end of the epoch if eval steps are not specified
1568
+ run_evaluation(cur_step)
1569
+ save_checkpoint(cur_step)
1570
+
1571
+ if training_args.do_train:
1572
+ save_checkpoint(cur_step)
1573
+
1574
+ cur_step = max_steps if max_steps > 0 else cur_step # set step to max steps so that eval happens in alignment with training
1575
+
1576
+ if training_args.do_eval:
1577
+ run_evaluation(cur_step)
1578
+
1579
+ # TODO: collapse 'do_predict' into the run_evaluation function
1580
+ if training_args.do_predict:
1581
+ for split in [data_args.test_split_name]:
1582
+ # ======================== Evaluating ==============================
1583
+ eval_metrics = []
1584
+ eval_preds = []
1585
+ eval_labels = []
1586
+
1587
+ # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
1588
+ eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
1589
+ eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
1590
+
1591
+ for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
1592
+ samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
1593
+ batch = data_collator(samples)
1594
+ labels = batch["labels"]
1595
+
1596
+ metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
1597
+ eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
1598
+ eval_metrics.append(metrics)
1599
+
1600
+ eval_labels.extend(labels)
1601
+
1602
+ # normalize eval metrics
1603
+ eval_metrics = get_metrics(eval_metrics)
1604
+ eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
1605
+ eval_metrics = to_fp32(eval_metrics)
1606
+
1607
+ # always run compute metrics
1608
+ error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
1609
+ eval_metrics.update(error_rate_metric)
1610
+ error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
1611
+
1612
+ # Print metrics and update progress bar
1613
+ desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
1614
+ epochs.write(desc)
1615
+ epochs.desc = desc
1616
+
1617
+ # Save metrics
1618
+ write_wandb_log(eval_metrics, cur_step, prefix=split)
1619
+ write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
1620
+ # if has_tensorboard and jax.process_index() == 0:
1621
+ # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
1622
+
1623
+
1624
+ if __name__ == "__main__":
1625
+ main()
wandb/run-20220802_074501-31ig5poi/files/config.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ cli_version: 0.12.9
7
+ code_path: code/run_flax_speech_recognition_ctc.py
8
+ framework: huggingface
9
+ huggingface_version: 4.21.0
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ python_version: 3.8.10
13
+ start_time: 1659426301
14
+ t:
15
+ 1:
16
+ - 1
17
+ - 2
18
+ - 3
19
+ - 11
20
+ - 12
21
+ 3:
22
+ - 13
23
+ 4: 3.8.10
24
+ 5: 0.12.9
25
+ 6: 4.21.0
26
+ 8:
27
+ - 5
wandb/run-20220802_074501-31ig5poi/files/diff.patch ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220802_074501-31ig5poi/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220802_074501-31ig5poi/files/requirements.txt ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.0.0
2
+ aiohttp==3.8.1
3
+ aiosignal==1.2.0
4
+ appdirs==1.4.4
5
+ astunparse==1.6.3
6
+ async-timeout==4.0.2
7
+ attrs==21.4.0
8
+ audioread==2.1.9
9
+ backcall==0.2.0
10
+ cachetools==4.2.4
11
+ certifi==2021.10.8
12
+ cffi==1.15.1
13
+ charset-normalizer==2.0.10
14
+ chex==0.1.3
15
+ click==8.0.3
16
+ cloud-tpu-client==0.10
17
+ cloud-tpu-profiler==2.4.0
18
+ clu==0.0.6
19
+ colorama==0.4.5
20
+ commonmark==0.9.1
21
+ configparser==5.2.0
22
+ contextlib2==21.6.0
23
+ cycler==0.11.0
24
+ datasets==2.4.0
25
+ decorator==5.1.0
26
+ dill==0.3.4
27
+ dm-tree==0.1.6
28
+ docker-pycreds==0.4.0
29
+ etils==0.6.0
30
+ exceptiongroup==1.0.0rc8
31
+ filelock==3.4.2
32
+ flatbuffers==2.0
33
+ flax==0.5.3
34
+ fonttools==4.28.5
35
+ frozenlist==1.2.0
36
+ fsspec==2021.11.1
37
+ future==0.18.2
38
+ gast==0.4.0
39
+ gitdb==4.0.9
40
+ gitpython==3.1.26
41
+ google-api-core==1.31.5
42
+ google-api-python-client==1.8.0
43
+ google-auth-httplib2==0.1.0
44
+ google-auth-oauthlib==0.4.6
45
+ google-auth==2.3.3
46
+ google-pasta==0.2.0
47
+ googleapis-common-protos==1.54.0
48
+ grpcio==1.43.0
49
+ h5py==3.6.0
50
+ httplib2==0.20.2
51
+ huggingface-hub==0.2.1
52
+ hypothesis==6.53.0
53
+ idna==3.3
54
+ importlib-metadata==4.10.0
55
+ importlib-resources==5.4.0
56
+ ipython==7.31.0
57
+ jax==0.3.15
58
+ jaxlib==0.3.15
59
+ jedi==0.18.1
60
+ jiwer==2.3.0
61
+ joblib==1.1.0
62
+ keras-preprocessing==1.1.2
63
+ keras==2.7.0
64
+ kiwisolver==1.3.2
65
+ libclang==12.0.0
66
+ librosa==0.9.2
67
+ libtpu-nightly==0.1.dev20220722
68
+ llvmlite==0.39.0
69
+ markdown==3.3.6
70
+ matplotlib-inline==0.1.3
71
+ matplotlib==3.5.1
72
+ ml-collections==0.1.0
73
+ msgpack==1.0.3
74
+ multidict==5.2.0
75
+ multiprocess==0.70.12.2
76
+ numba==0.56.0
77
+ numpy==1.22.0
78
+ oauth2client==4.1.3
79
+ oauthlib==3.1.1
80
+ opt-einsum==3.3.0
81
+ optax==0.1.3
82
+ packaging==21.3
83
+ pandas==1.3.5
84
+ parso==0.8.3
85
+ pathtools==0.1.2
86
+ pexpect==4.8.0
87
+ pickleshare==0.7.5
88
+ pillow==9.0.0
89
+ pip==22.2.1
90
+ pkg-resources==0.0.0
91
+ pooch==1.6.0
92
+ promise==2.3
93
+ prompt-toolkit==3.0.24
94
+ protobuf==3.19.1
95
+ psutil==5.9.0
96
+ ptyprocess==0.7.0
97
+ pyarrow==6.0.1
98
+ pyasn1-modules==0.2.8
99
+ pyasn1==0.4.8
100
+ pycparser==2.21
101
+ pyctcdecode==0.4.0
102
+ pygments==2.11.1
103
+ pygtrie==2.5.0
104
+ pyparsing==3.0.6
105
+ python-dateutil==2.8.2
106
+ python-levenshtein==0.12.2
107
+ pytz==2021.3
108
+ pyyaml==6.0
109
+ regex==2021.11.10
110
+ requests-oauthlib==1.3.0
111
+ requests==2.27.0
112
+ resampy==0.3.1
113
+ responses==0.18.0
114
+ rich==11.2.0
115
+ rsa==4.8
116
+ sacremoses==0.0.46
117
+ scikit-learn==1.1.1
118
+ scipy==1.7.3
119
+ sentry-sdk==1.5.2
120
+ setuptools==44.0.0
121
+ shortuuid==1.0.8
122
+ six==1.16.0
123
+ smmap==5.0.0
124
+ sortedcontainers==2.4.0
125
+ soundfile==0.10.3.post1
126
+ sox==1.4.1
127
+ subprocess32==3.5.4
128
+ tensorboard-data-server==0.6.1
129
+ tensorboard-plugin-wit==1.8.0
130
+ tensorboard==2.7.0
131
+ tensorflow-cpu==2.7.0
132
+ tensorflow-datasets==4.4.0
133
+ tensorflow-estimator==2.7.0
134
+ tensorflow-io-gcs-filesystem==0.23.1
135
+ tensorflow-metadata==1.5.0
136
+ tensorflow==2.7.0
137
+ tensorstore==0.1.21
138
+ termcolor==1.1.0
139
+ threadpoolctl==3.1.0
140
+ tokenizers==0.11.2
141
+ toolz==0.11.2
142
+ torch==1.12.0
143
+ torchaudio==0.12.0+cpu
144
+ tqdm==4.62.3
145
+ traitlets==5.1.1
146
+ transformers==4.21.0
147
+ typing-extensions==4.3.0
148
+ uritemplate==3.0.1
149
+ urllib3==1.26.7
150
+ wandb==0.12.9
151
+ wcwidth==0.2.5
152
+ werkzeug==2.0.2
153
+ wheel==0.37.1
154
+ wrapt==1.13.3
155
+ xxhash==2.0.2
156
+ yarl==1.7.2
157
+ yaspin==2.1.0
158
+ zipp==3.7.0
wandb/run-20220802_074501-31ig5poi/files/wandb-metadata.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
3
+ "python": "3.8.10",
4
+ "heartbeatAt": "2022-08-02T07:45:05.227908",
5
+ "startedAt": "2022-08-02T07:45:01.837235",
6
+ "docker": null,
7
+ "cpu_count": 96,
8
+ "cuda": null,
9
+ "args": [
10
+ "--model_name_or_path=./",
11
+ "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
12
+ "--tokenizer_name=./",
13
+ "--output_dir=./",
14
+ "--overwrite_output_dir",
15
+ "--num_train_epochs=40",
16
+ "--per_device_train_batch_size=8",
17
+ "--per_device_eval_batch_size=8",
18
+ "--gradient_accumulation_steps=1",
19
+ "--precision=full_mixed",
20
+ "--matmul_precision=bfloat16",
21
+ "--learning_rate=0.00033713760785758495",
22
+ "--skip_steps=33100",
23
+ "--warmup_steps=0",
24
+ "--length_column_name=input_length",
25
+ "--evaluation_strategy=steps",
26
+ "--text_column_name=text",
27
+ "--save_steps=4000",
28
+ "--eval_steps=4000",
29
+ "--logging_steps=100",
30
+ "--layerdrop=0.041",
31
+ "--attention_dropout=0.094",
32
+ "--activation_dropout=0.055",
33
+ "--hidden_dropout=0.047",
34
+ "--save_total_limit=5",
35
+ "--freeze_feature_encoder",
36
+ "--feat_proj_dropout=0.04",
37
+ "--mask_time_prob=0.082",
38
+ "--mask_time_length=10",
39
+ "--mask_feature_prob=0.25",
40
+ "--mask_feature_length=64",
41
+ "--gradient_checkpointing",
42
+ "--min_duration_in_seconds=0.5",
43
+ "--max_duration_in_seconds=20.0",
44
+ "--use_auth_token",
45
+ "--seed=42",
46
+ "--group_by_length",
47
+ "--do_train",
48
+ "--do_eval",
49
+ "--push_to_hub",
50
+ "--preprocessing_num_workers=32",
51
+ "--ctc_zero_infinity",
52
+ "--do_lower_case",
53
+ "--wandb_project=wav2vec2",
54
+ "--wandb_name=wav2vec2-1b-npsc-nst (cont.)",
55
+ "--remove_punctuation"
56
+ ],
57
+ "state": "running",
58
+ "program": "run_flax_speech_recognition_ctc.py",
59
+ "codePath": "run_flax_speech_recognition_ctc.py",
60
+ "git": {
61
+ "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
62
+ "commit": "4f995e8718adff5045133dd384c7aa42ebe89fa9"
63
+ },
64
+ "email": "versae@gmail.com",
65
+ "root": "/data/wav2vec2-1b-npsc-nst",
66
+ "host": "t1v-n-eedfb410-w-0",
67
+ "username": "javierr",
68
+ "executable": "/data/flax/bin/python"
69
+ }
wandb/run-20220802_074501-31ig5poi/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
wandb/run-20220802_074501-31ig5poi/logs/debug-internal.log ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-08-02 07:45:02,738 INFO MainThread:3984200 [internal.py:wandb_internal():87] W&B internal server running at pid: 3984200, started at: 2022-08-02 07:45:02.737806
2
+ 2022-08-02 07:45:02,739 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: check_version
3
+ 2022-08-02 07:45:02,740 INFO WriterThread:3984200 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/run-31ig5poi.wandb
4
+ 2022-08-02 07:45:02,741 DEBUG SenderThread:3984200 [sender.py:send():234] send: header
5
+ 2022-08-02 07:45:02,741 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: check_version
6
+ 2022-08-02 07:45:02,780 DEBUG SenderThread:3984200 [sender.py:send():234] send: run
7
+ 2022-08-02 07:45:02,948 INFO SenderThread:3984200 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files
8
+ 2022-08-02 07:45:02,948 INFO SenderThread:3984200 [sender.py:_start_run_threads():804] run started: 31ig5poi with start time 1659426301
9
+ 2022-08-02 07:45:02,948 DEBUG SenderThread:3984200 [sender.py:send():234] send: summary
10
+ 2022-08-02 07:45:02,949 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: run_start
11
+ 2022-08-02 07:45:02,949 INFO SenderThread:3984200 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
12
+ 2022-08-02 07:45:03,950 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/wandb-summary.json
13
+ 2022-08-02 07:45:05,227 DEBUG HandlerThread:3984200 [meta.py:__init__():40] meta init
14
+ 2022-08-02 07:45:05,227 DEBUG HandlerThread:3984200 [meta.py:__init__():54] meta init done
15
+ 2022-08-02 07:45:05,227 DEBUG HandlerThread:3984200 [meta.py:probe():214] probe
16
+ 2022-08-02 07:45:05,229 DEBUG HandlerThread:3984200 [meta.py:_setup_git():204] setup git
17
+ 2022-08-02 07:45:05,268 DEBUG HandlerThread:3984200 [meta.py:_setup_git():211] setup git done
18
+ 2022-08-02 07:45:05,269 DEBUG HandlerThread:3984200 [meta.py:_save_code():92] save code
19
+ 2022-08-02 07:45:05,282 DEBUG HandlerThread:3984200 [meta.py:_save_code():113] save code done
20
+ 2022-08-02 07:45:05,282 DEBUG HandlerThread:3984200 [meta.py:_save_patches():130] save patches
21
+ 2022-08-02 07:45:05,954 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/diff.patch
22
+ 2022-08-02 07:45:05,956 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/code/run_flax_speech_recognition_ctc.py
23
+ 2022-08-02 07:45:05,956 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/code
24
+ 2022-08-02 07:45:06,234 DEBUG HandlerThread:3984200 [meta.py:_save_patches():172] save patches done
25
+ 2022-08-02 07:45:06,235 DEBUG HandlerThread:3984200 [meta.py:_save_pip():58] save pip
26
+ 2022-08-02 07:45:06,235 DEBUG HandlerThread:3984200 [meta.py:_save_pip():72] save pip done
27
+ 2022-08-02 07:45:06,235 DEBUG HandlerThread:3984200 [meta.py:probe():252] probe done
28
+ 2022-08-02 07:45:06,238 DEBUG SenderThread:3984200 [sender.py:send():234] send: files
29
+ 2022-08-02 07:45:06,239 INFO SenderThread:3984200 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
30
+ 2022-08-02 07:45:06,239 INFO SenderThread:3984200 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
31
+ 2022-08-02 07:45:06,240 INFO SenderThread:3984200 [sender.py:_save_file():939] saving file diff.patch with policy now
32
+ 2022-08-02 07:45:06,247 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
33
+ 2022-08-02 07:45:06,248 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
34
+ 2022-08-02 07:45:06,737 INFO Thread-11 :3984200 [upload_job.py:push():137] Uploaded file /tmp/tmpjt6l9kmzwandb/ox4bihe1-wandb-metadata.json
35
+ 2022-08-02 07:45:06,948 INFO Thread-13 :3984200 [upload_job.py:push():137] Uploaded file /tmp/tmpjt6l9kmzwandb/3itqmoyr-diff.patch
36
+ 2022-08-02 07:45:06,955 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/diff.patch
37
+ 2022-08-02 07:45:06,955 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/wandb-metadata.json
38
+ 2022-08-02 07:45:06,955 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
39
+ 2022-08-02 07:45:06,955 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/requirements.txt
40
+ 2022-08-02 07:45:06,979 INFO Thread-12 :3984200 [upload_job.py:push():137] Uploaded file /tmp/tmpjt6l9kmzwandb/32ez21om-code/run_flax_speech_recognition_ctc.py
41
+ 2022-08-02 07:45:08,956 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
42
+ 2022-08-02 07:45:10,957 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
43
+ 2022-08-02 07:45:12,958 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
44
+ 2022-08-02 07:45:14,959 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
45
+ 2022-08-02 07:45:20,962 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
46
+ 2022-08-02 07:45:21,394 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
47
+ 2022-08-02 07:45:21,395 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
48
+ 2022-08-02 07:45:33,321 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
49
+ 2022-08-02 07:45:34,969 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
50
+ 2022-08-02 07:45:36,594 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
51
+ 2022-08-02 07:45:36,594 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
52
+ 2022-08-02 07:45:40,972 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
53
+ 2022-08-02 07:45:48,976 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
54
+ 2022-08-02 07:45:51,782 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
55
+ 2022-08-02 07:45:51,782 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
56
+ 2022-08-02 07:45:51,977 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
57
+ 2022-08-02 07:46:03,397 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
58
+ 2022-08-02 07:46:03,983 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
59
+ 2022-08-02 07:46:05,984 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
60
+ 2022-08-02 07:46:06,960 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
61
+ 2022-08-02 07:46:06,961 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
62
+ 2022-08-02 07:46:22,172 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
63
+ 2022-08-02 07:46:22,172 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
64
+ 2022-08-02 07:46:33,472 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
65
+ 2022-08-02 07:46:37,502 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
66
+ 2022-08-02 07:46:37,502 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
67
+ 2022-08-02 07:46:47,003 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
68
+ 2022-08-02 07:46:49,004 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
69
+ 2022-08-02 07:46:51,005 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
70
+ 2022-08-02 07:46:52,749 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
71
+ 2022-08-02 07:46:52,749 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
72
+ 2022-08-02 07:46:53,006 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
73
+ 2022-08-02 07:46:55,007 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
74
+ 2022-08-02 07:46:57,008 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
75
+ 2022-08-02 07:46:59,009 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
76
+ 2022-08-02 07:47:01,011 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
77
+ 2022-08-02 07:47:03,012 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
78
+ 2022-08-02 07:47:03,541 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
79
+ 2022-08-02 07:47:05,013 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
80
+ 2022-08-02 07:47:07,013 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
81
+ 2022-08-02 07:47:08,222 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
82
+ 2022-08-02 07:47:08,222 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
83
+ 2022-08-02 07:47:10,015 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
84
+ 2022-08-02 07:47:12,017 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
85
+ 2022-08-02 07:47:14,019 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
86
+ 2022-08-02 07:47:16,020 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
87
+ 2022-08-02 07:47:18,021 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
88
+ 2022-08-02 07:47:20,022 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
89
+ 2022-08-02 07:47:22,023 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
90
+ 2022-08-02 07:47:23,391 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
91
+ 2022-08-02 07:47:23,391 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
92
+ 2022-08-02 07:47:24,025 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
93
+ 2022-08-02 07:47:26,026 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
94
+ 2022-08-02 07:47:28,027 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
95
+ 2022-08-02 07:47:30,028 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
96
+ 2022-08-02 07:47:32,030 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
97
+ 2022-08-02 07:47:33,625 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
98
+ 2022-08-02 07:47:34,031 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
99
+ 2022-08-02 07:47:36,032 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
100
+ 2022-08-02 07:47:38,033 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
101
+ 2022-08-02 07:47:38,541 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
102
+ 2022-08-02 07:47:38,541 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
103
+ 2022-08-02 07:47:40,034 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
104
+ 2022-08-02 07:47:42,035 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
105
+ 2022-08-02 07:47:44,035 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
106
+ 2022-08-02 07:47:46,036 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
107
+ 2022-08-02 07:47:48,037 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
108
+ 2022-08-02 07:47:50,038 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
109
+ 2022-08-02 07:47:52,039 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
110
+ 2022-08-02 07:47:53,697 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
111
+ 2022-08-02 07:47:53,698 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
112
+ 2022-08-02 07:47:54,040 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
113
+ 2022-08-02 07:47:56,041 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
114
+ 2022-08-02 07:47:58,042 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
115
+ 2022-08-02 07:48:00,043 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
116
+ 2022-08-02 07:48:02,044 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
117
+ 2022-08-02 07:48:03,708 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
118
+ 2022-08-02 07:48:04,046 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
119
+ 2022-08-02 07:48:06,047 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
120
+ 2022-08-02 07:48:08,048 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
121
+ 2022-08-02 07:48:08,839 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
122
+ 2022-08-02 07:48:08,839 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
123
+ 2022-08-02 07:48:10,049 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
124
+ 2022-08-02 07:48:12,050 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
125
+ 2022-08-02 07:48:14,053 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
126
+ 2022-08-02 07:48:16,054 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
127
+ 2022-08-02 07:48:18,055 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
128
+ 2022-08-02 07:48:20,056 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
129
+ 2022-08-02 07:48:22,058 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
130
+ 2022-08-02 07:48:23,980 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
131
+ 2022-08-02 07:48:23,981 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
132
+ 2022-08-02 07:48:24,059 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
133
+ 2022-08-02 07:48:26,060 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
134
+ 2022-08-02 07:48:28,061 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
135
+ 2022-08-02 07:48:30,062 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
136
+ 2022-08-02 07:48:32,063 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
137
+ 2022-08-02 07:48:33,795 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
138
+ 2022-08-02 07:48:34,064 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
139
+ 2022-08-02 07:48:36,065 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
140
+ 2022-08-02 07:48:38,066 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
141
+ 2022-08-02 07:48:39,148 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
142
+ 2022-08-02 07:48:39,149 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
143
+ 2022-08-02 07:48:40,068 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
144
+ 2022-08-02 07:48:42,069 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
145
+ 2022-08-02 07:48:44,070 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
146
+ 2022-08-02 07:48:46,071 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
147
+ 2022-08-02 07:48:48,072 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
148
+ 2022-08-02 07:48:50,073 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
149
+ 2022-08-02 07:48:52,074 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
150
+ 2022-08-02 07:48:54,075 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
151
+ 2022-08-02 07:48:54,294 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
152
+ 2022-08-02 07:48:54,294 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
153
+ 2022-08-02 07:48:56,076 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
154
+ 2022-08-02 07:48:58,078 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
155
+ 2022-08-02 07:49:00,079 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
156
+ 2022-08-02 07:49:02,080 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
157
+ 2022-08-02 07:49:03,880 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
158
+ 2022-08-02 07:49:04,081 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
159
+ 2022-08-02 07:49:06,082 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
160
+ 2022-08-02 07:49:08,083 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
161
+ 2022-08-02 07:49:09,441 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
162
+ 2022-08-02 07:49:09,442 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
163
+ 2022-08-02 07:49:10,085 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
164
+ 2022-08-02 07:49:12,085 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
165
+ 2022-08-02 07:49:14,086 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
166
+ 2022-08-02 07:49:16,087 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
167
+ 2022-08-02 07:49:18,092 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
168
+ 2022-08-02 07:49:20,097 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
169
+ 2022-08-02 07:49:22,094 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
170
+ 2022-08-02 07:49:24,095 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
171
+ 2022-08-02 07:49:24,600 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
172
+ 2022-08-02 07:49:24,601 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
173
+ 2022-08-02 07:49:26,096 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
174
+ 2022-08-02 07:49:28,097 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
175
+ 2022-08-02 07:49:30,099 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
176
+ 2022-08-02 07:49:32,100 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
177
+ 2022-08-02 07:49:33,963 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
178
+ 2022-08-02 07:49:34,103 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
179
+ 2022-08-02 07:49:36,104 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
180
+ 2022-08-02 07:49:38,106 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
181
+ 2022-08-02 07:49:39,769 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
182
+ 2022-08-02 07:49:39,769 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
183
+ 2022-08-02 07:49:40,107 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
184
+ 2022-08-02 07:49:42,108 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
185
+ 2022-08-02 07:49:44,109 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
186
+ 2022-08-02 07:49:46,110 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
187
+ 2022-08-02 07:49:48,112 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
188
+ 2022-08-02 07:49:50,113 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
189
+ 2022-08-02 07:49:52,114 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
190
+ 2022-08-02 07:49:54,115 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
191
+ 2022-08-02 07:49:54,953 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
192
+ 2022-08-02 07:49:54,953 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
193
+ 2022-08-02 07:49:56,116 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
194
+ 2022-08-02 07:49:58,118 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
195
+ 2022-08-02 07:50:00,119 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
196
+ 2022-08-02 07:50:02,120 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
197
+ 2022-08-02 07:50:04,058 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
198
+ 2022-08-02 07:50:04,121 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
199
+ 2022-08-02 07:50:06,122 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
200
+ 2022-08-02 07:50:08,123 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
201
+ 2022-08-02 07:50:10,099 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
202
+ 2022-08-02 07:50:10,099 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
203
+ 2022-08-02 07:50:10,124 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
204
+ 2022-08-02 07:50:12,125 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
205
+ 2022-08-02 07:50:14,126 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
206
+ 2022-08-02 07:50:16,127 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
207
+ 2022-08-02 07:50:18,128 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
208
+ 2022-08-02 07:50:20,129 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
209
+ 2022-08-02 07:50:22,130 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
210
+ 2022-08-02 07:50:24,131 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
211
+ 2022-08-02 07:50:25,253 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
212
+ 2022-08-02 07:50:25,254 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
213
+ 2022-08-02 07:50:26,132 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
214
+ 2022-08-02 07:50:28,133 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
215
+ 2022-08-02 07:50:30,134 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
216
+ 2022-08-02 07:50:32,135 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
217
+ 2022-08-02 07:50:34,131 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
218
+ 2022-08-02 07:50:34,136 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
219
+ 2022-08-02 07:50:36,137 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
220
+ 2022-08-02 07:50:38,138 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
221
+ 2022-08-02 07:50:40,139 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
222
+ 2022-08-02 07:50:40,400 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
223
+ 2022-08-02 07:50:40,401 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
224
+ 2022-08-02 07:50:42,140 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
225
+ 2022-08-02 07:50:44,141 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
226
+ 2022-08-02 07:50:46,142 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
227
+ 2022-08-02 07:50:48,143 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
228
+ 2022-08-02 07:50:50,144 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
229
+ 2022-08-02 07:50:52,145 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
230
+ 2022-08-02 07:50:55,146 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
231
+ 2022-08-02 07:50:55,559 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
232
+ 2022-08-02 07:50:55,559 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
233
+ 2022-08-02 07:50:57,147 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
234
+ 2022-08-02 07:50:59,148 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
235
+ 2022-08-02 07:51:01,149 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
236
+ 2022-08-02 07:51:03,150 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
237
+ 2022-08-02 07:51:04,212 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
238
+ 2022-08-02 07:51:05,151 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
239
+ 2022-08-02 07:51:07,152 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
240
+ 2022-08-02 07:51:09,153 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
241
+ 2022-08-02 07:51:10,711 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
242
+ 2022-08-02 07:51:10,712 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
243
+ 2022-08-02 07:51:11,154 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
244
+ 2022-08-02 07:51:13,156 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
245
+ 2022-08-02 07:51:15,157 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
246
+ 2022-08-02 07:51:17,158 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
247
+ 2022-08-02 07:51:19,159 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
248
+ 2022-08-02 07:51:21,160 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
249
+ 2022-08-02 07:51:23,161 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
250
+ 2022-08-02 07:51:25,162 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
251
+ 2022-08-02 07:51:25,866 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
252
+ 2022-08-02 07:51:25,866 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
253
+ 2022-08-02 07:51:27,163 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
254
+ 2022-08-02 07:51:29,164 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
255
+ 2022-08-02 07:51:31,168 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
256
+ 2022-08-02 07:51:34,295 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
257
+ 2022-08-02 07:51:41,020 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
258
+ 2022-08-02 07:51:41,021 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
259
+ 2022-08-02 07:51:51,173 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
260
+ 2022-08-02 07:51:53,175 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
261
+ 2022-08-02 07:51:55,175 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
262
+ 2022-08-02 07:51:56,183 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
263
+ 2022-08-02 07:51:56,183 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
264
+ 2022-08-02 07:51:57,177 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
265
+ 2022-08-02 07:51:59,178 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
266
+ 2022-08-02 07:52:01,179 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
267
+ 2022-08-02 07:52:03,180 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
268
+ 2022-08-02 07:52:04,385 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
269
+ 2022-08-02 07:52:05,181 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
270
+ 2022-08-02 07:52:07,182 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
271
+ 2022-08-02 07:52:09,182 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
272
+ 2022-08-02 07:52:11,183 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
273
+ 2022-08-02 07:52:11,328 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
274
+ 2022-08-02 07:52:11,328 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
275
+ 2022-08-02 07:52:13,190 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
276
+ 2022-08-02 07:52:15,191 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
277
+ 2022-08-02 07:52:17,191 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
278
+ 2022-08-02 07:52:19,193 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
279
+ 2022-08-02 07:52:21,194 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
280
+ 2022-08-02 07:52:23,195 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
281
+ 2022-08-02 07:52:25,196 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
282
+ 2022-08-02 07:52:26,490 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
283
+ 2022-08-02 07:52:26,490 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
284
+ 2022-08-02 07:52:34,455 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
285
+ 2022-08-02 07:52:41,626 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
286
+ 2022-08-02 07:52:41,627 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
287
+ 2022-08-02 07:52:56,839 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
288
+ 2022-08-02 07:52:56,840 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
289
+ 2022-08-02 07:53:03,214 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
290
+ 2022-08-02 07:53:04,529 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
291
+ 2022-08-02 07:53:05,215 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
292
+ 2022-08-02 07:53:07,216 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
293
+ 2022-08-02 07:53:09,217 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
294
+ 2022-08-02 07:53:11,218 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
295
+ 2022-08-02 07:53:12,011 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
296
+ 2022-08-02 07:53:12,011 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
297
+ 2022-08-02 07:53:13,219 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
298
+ 2022-08-02 07:53:15,220 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
299
+ 2022-08-02 07:53:17,221 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
300
+ 2022-08-02 07:53:19,223 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
301
+ 2022-08-02 07:53:21,224 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
302
+ 2022-08-02 07:53:23,225 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
303
+ 2022-08-02 07:53:25,226 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
304
+ 2022-08-02 07:53:27,150 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
305
+ 2022-08-02 07:53:27,150 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
306
+ 2022-08-02 07:53:27,227 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
307
+ 2022-08-02 07:53:29,228 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
308
+ 2022-08-02 07:53:31,229 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
309
+ 2022-08-02 07:53:33,231 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
310
+ 2022-08-02 07:53:34,612 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
311
+ 2022-08-02 07:53:41,234 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
312
+ 2022-08-02 07:53:42,296 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
313
+ 2022-08-02 07:53:42,296 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
314
+ 2022-08-02 07:53:43,237 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
315
+ 2022-08-02 07:53:45,238 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
316
+ 2022-08-02 07:53:47,239 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
317
+ 2022-08-02 07:53:49,240 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
318
+ 2022-08-02 07:53:57,474 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
319
+ 2022-08-02 07:53:57,475 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
320
+ 2022-08-02 07:54:04,679 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
321
+ 2022-08-02 07:54:12,610 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
322
+ 2022-08-02 07:54:12,611 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
323
+ 2022-08-02 07:54:15,252 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
324
+ 2022-08-02 07:54:26,256 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
325
+ 2022-08-02 07:54:27,746 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
326
+ 2022-08-02 07:54:27,747 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
327
+ 2022-08-02 07:54:28,257 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
328
+ 2022-08-02 07:54:30,258 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
329
+ 2022-08-02 07:54:32,259 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
330
+ 2022-08-02 07:54:34,260 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
331
+ 2022-08-02 07:54:34,757 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
332
+ 2022-08-02 07:54:36,262 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
333
+ 2022-08-02 07:54:38,263 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
334
+ 2022-08-02 07:54:40,263 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
335
+ 2022-08-02 07:54:42,264 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
336
+ 2022-08-02 07:54:42,900 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
337
+ 2022-08-02 07:54:42,900 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
338
+ 2022-08-02 07:54:44,265 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
339
+ 2022-08-02 07:54:46,266 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
340
+ 2022-08-02 07:54:48,271 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
341
+ 2022-08-02 07:54:50,272 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
342
+ 2022-08-02 07:54:52,273 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
343
+ 2022-08-02 07:54:54,274 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
344
+ 2022-08-02 07:54:56,275 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
345
+ 2022-08-02 07:54:58,037 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
346
+ 2022-08-02 07:54:58,037 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
347
+ 2022-08-02 07:55:00,277 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
348
+ 2022-08-02 07:55:02,278 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
349
+ 2022-08-02 07:55:04,279 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
350
+ 2022-08-02 07:55:04,837 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
351
+ 2022-08-02 07:55:06,280 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
352
+ 2022-08-02 07:55:08,281 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
353
+ 2022-08-02 07:55:10,282 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
354
+ 2022-08-02 07:55:12,283 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
355
+ 2022-08-02 07:55:13,188 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
356
+ 2022-08-02 07:55:13,188 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
357
+ 2022-08-02 07:55:14,284 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
358
+ 2022-08-02 07:55:16,289 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
359
+ 2022-08-02 07:55:18,290 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
360
+ 2022-08-02 07:55:20,291 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
361
+ 2022-08-02 07:55:22,292 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
362
+ 2022-08-02 07:55:24,293 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
363
+ 2022-08-02 07:55:26,294 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
364
+ 2022-08-02 07:55:28,295 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
365
+ 2022-08-02 07:55:28,327 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
366
+ 2022-08-02 07:55:28,327 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
367
+ 2022-08-02 07:55:30,296 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
368
+ 2022-08-02 07:55:32,297 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
369
+ 2022-08-02 07:55:34,298 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
370
+ 2022-08-02 07:55:34,920 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
371
+ 2022-08-02 07:55:36,299 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
372
+ 2022-08-02 07:55:38,300 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
373
+ 2022-08-02 07:55:40,301 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
374
+ 2022-08-02 07:55:42,303 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
375
+ 2022-08-02 07:55:43,472 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
376
+ 2022-08-02 07:55:43,473 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
377
+ 2022-08-02 07:55:44,304 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
378
+ 2022-08-02 07:55:46,305 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
379
+ 2022-08-02 07:55:48,306 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
380
+ 2022-08-02 07:55:58,641 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
381
+ 2022-08-02 07:55:58,641 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
382
+ 2022-08-02 07:56:05,000 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
383
+ 2022-08-02 07:56:13,774 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
384
+ 2022-08-02 07:56:13,775 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
385
+ 2022-08-02 07:56:28,975 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
386
+ 2022-08-02 07:56:28,975 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
387
+ 2022-08-02 07:56:31,325 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
388
+ 2022-08-02 07:56:35,080 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
389
+ 2022-08-02 07:56:37,328 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
390
+ 2022-08-02 07:56:44,223 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
391
+ 2022-08-02 07:56:44,224 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
392
+ 2022-08-02 07:56:44,331 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
393
+ 2022-08-02 07:56:50,334 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
394
+ 2022-08-02 07:56:56,336 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
395
+ 2022-08-02 07:56:59,451 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
396
+ 2022-08-02 07:56:59,451 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
397
+ 2022-08-02 07:57:02,339 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
398
+ 2022-08-02 07:57:05,159 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
399
+ 2022-08-02 07:57:06,341 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
400
+ 2022-08-02 07:57:08,342 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
401
+ 2022-08-02 07:57:12,344 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
402
+ 2022-08-02 07:57:15,673 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
403
+ 2022-08-02 07:57:15,673 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
404
+ 2022-08-02 07:57:16,346 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
405
+ 2022-08-02 07:57:24,349 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
406
+ 2022-08-02 07:57:26,350 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
407
+ 2022-08-02 07:57:30,823 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
408
+ 2022-08-02 07:57:30,824 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
409
+ 2022-08-02 07:57:35,236 DEBUG SenderThread:3984200 [sender.py:send():234] send: stats
410
+ 2022-08-02 07:57:38,356 INFO Thread-8 :3984200 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/files/output.log
411
+ 2022-08-02 07:57:45,984 DEBUG HandlerThread:3984200 [handler.py:handle_request():130] handle_request: stop_status
412
+ 2022-08-02 07:57:45,985 DEBUG SenderThread:3984200 [sender.py:send_request():248] send_request: stop_status
wandb/run-20220802_074501-31ig5poi/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-08-02 07:45:01,839 INFO MainThread:3982953 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
2
+ 2022-08-02 07:45:01,839 INFO MainThread:3982953 [wandb_setup.py:_flush():71] setting login settings: {}
3
+ 2022-08-02 07:45:01,840 INFO MainThread:3982953 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/logs/debug.log
4
+ 2022-08-02 07:45:01,840 INFO MainThread:3982953 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220802_074501-31ig5poi/logs/debug-internal.log
5
+ 2022-08-02 07:45:01,840 INFO MainThread:3982953 [wandb_init.py:init():404] calling init triggers
6
+ 2022-08-02 07:45:01,840 INFO MainThread:3982953 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
7
+ config: {}
8
+ 2022-08-02 07:45:01,840 INFO MainThread:3982953 [wandb_init.py:init():460] starting backend
9
+ 2022-08-02 07:45:01,840 INFO MainThread:3982953 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
10
+ 2022-08-02 07:45:01,887 INFO MainThread:3982953 [backend.py:ensure_launched():216] starting backend process...
11
+ 2022-08-02 07:45:01,933 INFO MainThread:3982953 [backend.py:ensure_launched():221] started backend process with pid: 3984200
12
+ 2022-08-02 07:45:01,935 INFO MainThread:3982953 [wandb_init.py:init():469] backend started and connected
13
+ 2022-08-02 07:45:01,950 INFO MainThread:3982953 [wandb_init.py:init():533] updated telemetry
14
+ 2022-08-02 07:45:02,064 INFO MainThread:3982953 [wandb_init.py:init():563] communicating current version
15
+ 2022-08-02 07:45:02,778 INFO MainThread:3982953 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
16
+
17
+ 2022-08-02 07:45:02,779 INFO MainThread:3982953 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
18
+ 2022-08-02 07:45:02,948 INFO MainThread:3982953 [wandb_init.py:init():606] starting run threads in backend
19
+ 2022-08-02 07:45:06,243 INFO MainThread:3982953 [wandb_run.py:_console_start():1810] atexit reg
20
+ 2022-08-02 07:45:06,243 INFO MainThread:3982953 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
21
+ 2022-08-02 07:45:06,244 INFO MainThread:3982953 [wandb_run.py:_redirect():1689] Redirecting console.
22
+ 2022-08-02 07:45:06,246 INFO MainThread:3982953 [wandb_run.py:_redirect():1745] Redirects installed.
23
+ 2022-08-02 07:45:06,246 INFO MainThread:3982953 [wandb_init.py:init():633] run started, returning control to user process
wandb/run-20220802_074501-31ig5poi/run-31ig5poi.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8002a15dcc52b07d01af74dc49bf603acdebc0a74c8fa5f3f1e322b444bec21c
3
+ size 421207