ylacombe commited on
Commit
8496047
1 Parent(s): 2df3fbd

Update analyze.py

Browse files
Files changed (1) hide show
  1. analyze.py +13 -3
analyze.py CHANGED
@@ -8,7 +8,7 @@ import torch
8
 
9
  from datasets import Dataset, Audio
10
 
11
- from dataspeech import rate_apply, pitch_apply, snr_apply
12
  from metadata_to_text import bins_to_text, speaker_level_relative_to_gender
13
 
14
  Row = Dict[str, Any]
@@ -59,6 +59,16 @@ def analyze(
59
 
60
 
61
  ## 1. Extract continous tags
 
 
 
 
 
 
 
 
 
 
62
  pitch_dataset = tmp_dataset.map(
63
  pitch_apply,
64
  batched=True,
@@ -89,7 +99,7 @@ def analyze(
89
 
90
  enriched_dataset = pitch_dataset.add_column("snr", snr_dataset["snr"]).add_column("c50", snr_dataset["c50"])
91
  enriched_dataset = enriched_dataset.add_column("speaking_rate", rate_dataset["speaking_rate"]).add_column("phonemes", rate_dataset["phonemes"])
92
-
93
 
94
  ## 2. Map continuous tags to text tags
95
 
@@ -126,7 +136,7 @@ def analyze(
126
  for i,sample in enumerate(batch):
127
  new_sample = {}
128
  new_sample[audio_column_name] = f"<audio src='{sample[audio_column_name][0]['src']}' controls></audio>"
129
- for col in ["speaking_rate", "reverberation", "noise", "speech_monotony", "c50", "snr",]: # phonemes, speaking_rate, utterance_pitch_std, utterance_pitch_mean
130
  new_sample[col] = enriched_dataset[col][i]
131
  if "gender" in batch[0] and "speaker_id" in batch[0]:
132
  new_sample["pitch"] = enriched_dataset["pitch"][i]
 
8
 
9
  from datasets import Dataset, Audio
10
 
11
+ from dataspeech import rate_apply, pitch_apply, snr_apply, squim_apply
12
  from metadata_to_text import bins_to_text, speaker_level_relative_to_gender
13
 
14
  Row = Dict[str, Any]
 
59
 
60
 
61
  ## 1. Extract continous tags
62
+ squim_dataset = tmp_dataset.map(
63
+ squim_apply,
64
+ batched=True,
65
+ batch_size=BATCH_SIZE,
66
+ with_rank=True if torch.cuda.device_count()>0 else False,
67
+ num_proc=torch.cuda.device_count(),
68
+ remove_columns=[audio_column_name], # tricks to avoid rewritting audio
69
+ fn_kwargs={"audio_column_name": audio_column_name,},
70
+ )
71
+
72
  pitch_dataset = tmp_dataset.map(
73
  pitch_apply,
74
  batched=True,
 
99
 
100
  enriched_dataset = pitch_dataset.add_column("snr", snr_dataset["snr"]).add_column("c50", snr_dataset["c50"])
101
  enriched_dataset = enriched_dataset.add_column("speaking_rate", rate_dataset["speaking_rate"]).add_column("phonemes", rate_dataset["phonemes"])
102
+ enriched_dataset = enriched_dataset.add_column("stoi", squim_dataset["stoi"]).add_column("si-sdr", squim_dataset["sdr"]).add_column("pesq", squim_dataset["pesq"])
103
 
104
  ## 2. Map continuous tags to text tags
105
 
 
136
  for i,sample in enumerate(batch):
137
  new_sample = {}
138
  new_sample[audio_column_name] = f"<audio src='{sample[audio_column_name][0]['src']}' controls></audio>"
139
+ for col in ["speaking_rate", "reverberation", "noise", "speech_monotony", "c50", "snr", "stoi", "pesq", "si-sdr"]: # phonemes, speaking_rate, utterance_pitch_std, utterance_pitch_mean
140
  new_sample[col] = enriched_dataset[col][i]
141
  if "gender" in batch[0] and "speaker_id" in batch[0]:
142
  new_sample["pitch"] = enriched_dataset["pitch"][i]