Spaces:

ylacombe
/

speech-explorer

Build error

App Files Files Community

ylacombe commited on May 15, 2024

Commit

8496047

•

1 Parent(s): 2df3fbd

Update analyze.py

Browse files

Files changed (1) hide show

analyze.py +13 -3

analyze.py CHANGED Viewed

@@ -8,7 +8,7 @@ import torch
 from datasets import Dataset, Audio
-from dataspeech import rate_apply, pitch_apply, snr_apply
 from metadata_to_text import bins_to_text, speaker_level_relative_to_gender
 Row = Dict[str, Any]
@@ -59,6 +59,16 @@ def analyze(
     ## 1. Extract continous tags
     pitch_dataset = tmp_dataset.map(
         pitch_apply,
         batched=True,
@@ -89,7 +99,7 @@ def analyze(
     enriched_dataset = pitch_dataset.add_column("snr", snr_dataset["snr"]).add_column("c50", snr_dataset["c50"])
     enriched_dataset = enriched_dataset.add_column("speaking_rate", rate_dataset["speaking_rate"]).add_column("phonemes", rate_dataset["phonemes"])
     ## 2. Map continuous tags to text tags
@@ -126,7 +136,7 @@ def analyze(
     for i,sample in enumerate(batch):
         new_sample = {}
         new_sample[audio_column_name] = f"<audio src='{sample[audio_column_name][0]['src']}' controls></audio>"
-        for col in ["speaking_rate", "reverberation", "noise", "speech_monotony", "c50", "snr",]: # phonemes, speaking_rate, utterance_pitch_std, utterance_pitch_mean
             new_sample[col] = enriched_dataset[col][i]
         if "gender" in batch[0] and "speaker_id" in batch[0]:
             new_sample["pitch"] = enriched_dataset["pitch"][i]

 from datasets import Dataset, Audio
+from dataspeech import rate_apply, pitch_apply, snr_apply, squim_apply
 from metadata_to_text import bins_to_text, speaker_level_relative_to_gender
 Row = Dict[str, Any]
     ## 1. Extract continous tags
+    squim_dataset = tmp_dataset.map(
+        squim_apply,
+        batched=True,
+        batch_size=BATCH_SIZE,
+        with_rank=True if torch.cuda.device_count()>0 else False,
+            num_proc=torch.cuda.device_count(),
+        remove_columns=[audio_column_name], # tricks to avoid rewritting audio
+        fn_kwargs={"audio_column_name": audio_column_name,},
+    )
     pitch_dataset = tmp_dataset.map(
         pitch_apply,
         batched=True,
     enriched_dataset = pitch_dataset.add_column("snr", snr_dataset["snr"]).add_column("c50", snr_dataset["c50"])
     enriched_dataset = enriched_dataset.add_column("speaking_rate", rate_dataset["speaking_rate"]).add_column("phonemes", rate_dataset["phonemes"])
+    enriched_dataset = enriched_dataset.add_column("stoi", squim_dataset["stoi"]).add_column("si-sdr", squim_dataset["sdr"]).add_column("pesq", squim_dataset["pesq"])
     ## 2. Map continuous tags to text tags
     for i,sample in enumerate(batch):
         new_sample = {}
         new_sample[audio_column_name] = f"<audio src='{sample[audio_column_name][0]['src']}' controls></audio>"
+        for col in ["speaking_rate", "reverberation", "noise", "speech_monotony", "c50", "snr", "stoi", "pesq", "si-sdr"]: # phonemes, speaking_rate, utterance_pitch_std, utterance_pitch_mean
             new_sample[col] = enriched_dataset[col][i]
         if "gender" in batch[0] and "speaker_id" in batch[0]:
             new_sample["pitch"] = enriched_dataset["pitch"][i]