Spaces:
Build error
Build error
Update analyze.py
Browse files- analyze.py +13 -3
analyze.py
CHANGED
@@ -8,7 +8,7 @@ import torch
|
|
8 |
|
9 |
from datasets import Dataset, Audio
|
10 |
|
11 |
-
from dataspeech import rate_apply, pitch_apply, snr_apply
|
12 |
from metadata_to_text import bins_to_text, speaker_level_relative_to_gender
|
13 |
|
14 |
Row = Dict[str, Any]
|
@@ -59,6 +59,16 @@ def analyze(
|
|
59 |
|
60 |
|
61 |
## 1. Extract continous tags
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
pitch_dataset = tmp_dataset.map(
|
63 |
pitch_apply,
|
64 |
batched=True,
|
@@ -89,7 +99,7 @@ def analyze(
|
|
89 |
|
90 |
enriched_dataset = pitch_dataset.add_column("snr", snr_dataset["snr"]).add_column("c50", snr_dataset["c50"])
|
91 |
enriched_dataset = enriched_dataset.add_column("speaking_rate", rate_dataset["speaking_rate"]).add_column("phonemes", rate_dataset["phonemes"])
|
92 |
-
|
93 |
|
94 |
## 2. Map continuous tags to text tags
|
95 |
|
@@ -126,7 +136,7 @@ def analyze(
|
|
126 |
for i,sample in enumerate(batch):
|
127 |
new_sample = {}
|
128 |
new_sample[audio_column_name] = f"<audio src='{sample[audio_column_name][0]['src']}' controls></audio>"
|
129 |
-
for col in ["speaking_rate", "reverberation", "noise", "speech_monotony", "c50", "snr",]: # phonemes, speaking_rate, utterance_pitch_std, utterance_pitch_mean
|
130 |
new_sample[col] = enriched_dataset[col][i]
|
131 |
if "gender" in batch[0] and "speaker_id" in batch[0]:
|
132 |
new_sample["pitch"] = enriched_dataset["pitch"][i]
|
|
|
8 |
|
9 |
from datasets import Dataset, Audio
|
10 |
|
11 |
+
from dataspeech import rate_apply, pitch_apply, snr_apply, squim_apply
|
12 |
from metadata_to_text import bins_to_text, speaker_level_relative_to_gender
|
13 |
|
14 |
Row = Dict[str, Any]
|
|
|
59 |
|
60 |
|
61 |
## 1. Extract continous tags
|
62 |
+
squim_dataset = tmp_dataset.map(
|
63 |
+
squim_apply,
|
64 |
+
batched=True,
|
65 |
+
batch_size=BATCH_SIZE,
|
66 |
+
with_rank=True if torch.cuda.device_count()>0 else False,
|
67 |
+
num_proc=torch.cuda.device_count(),
|
68 |
+
remove_columns=[audio_column_name], # tricks to avoid rewritting audio
|
69 |
+
fn_kwargs={"audio_column_name": audio_column_name,},
|
70 |
+
)
|
71 |
+
|
72 |
pitch_dataset = tmp_dataset.map(
|
73 |
pitch_apply,
|
74 |
batched=True,
|
|
|
99 |
|
100 |
enriched_dataset = pitch_dataset.add_column("snr", snr_dataset["snr"]).add_column("c50", snr_dataset["c50"])
|
101 |
enriched_dataset = enriched_dataset.add_column("speaking_rate", rate_dataset["speaking_rate"]).add_column("phonemes", rate_dataset["phonemes"])
|
102 |
+
enriched_dataset = enriched_dataset.add_column("stoi", squim_dataset["stoi"]).add_column("si-sdr", squim_dataset["sdr"]).add_column("pesq", squim_dataset["pesq"])
|
103 |
|
104 |
## 2. Map continuous tags to text tags
|
105 |
|
|
|
136 |
for i,sample in enumerate(batch):
|
137 |
new_sample = {}
|
138 |
new_sample[audio_column_name] = f"<audio src='{sample[audio_column_name][0]['src']}' controls></audio>"
|
139 |
+
for col in ["speaking_rate", "reverberation", "noise", "speech_monotony", "c50", "snr", "stoi", "pesq", "si-sdr"]: # phonemes, speaking_rate, utterance_pitch_std, utterance_pitch_mean
|
140 |
new_sample[col] = enriched_dataset[col][i]
|
141 |
if "gender" in batch[0] and "speaker_id" in batch[0]:
|
142 |
new_sample["pitch"] = enriched_dataset["pitch"][i]
|