Spaces:

stachu86
/

HeightCeleb-estimator-demo

Running

App Files Files Community

Stanisław Kacprzak commited on Oct 17

Commit

99cda37

•

1 Parent(s): ea5b130

first commit

Browse files

Files changed (6) hide show

Description.md +5 -0
app.py +79 -0
gender_classifier.pickle +3 -0
height_estimator_0.pickle +3 -0
height_estimator_1.pickle +3 -0
requirements.txt +6 -0

Description.md ADDED Viewed

	@@ -0,0 +1,5 @@

+This is a simple demo of height estimator from a speaker speech sample.
+The height estimator is trained using [HeightCeleb](https://github.com/stachu86/HeightCeleb).
+There are two separte models for male and female speakers, the gender detection model was trained using TIMIT dataset.
+The details of the dataset and models are described in [SLT 2024 article](https://arxiv.org/abs/2410.12668).

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from pathlib import Path
+import gradio as gr
+import pickle
+import torchaudio
+import torch
+from speechbrain.inference.speaker import EncoderClassifier
+from silero_vad import load_silero_vad, read_audio, get_speech_timestamps, collect_chunks
+classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb")
+with open("gender_classifier.pickle", "rb") as file:
+    gender_clf = pickle.load(file)
+with open("height_estimator_1.pickle", "rb") as file:
+    male_clf = pickle.load(file)
+with open("height_estimator_0.pickle", "rb") as file:
+    female_clf = pickle.load(file)
+article_md = Path("Description.md")
+def read_markdown_file(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        markdown_string = file.read()
+    return markdown_string
+def metric_to_imperial(height):
+    inches = round(height / 2.54)
+    return f"{int(inches / 12)}'{inches % 12}\""
+def get_speech(wav):
+    model = load_silero_vad()
+    speech_timestamps = get_speech_timestamps(wav, model)
+    return collect_chunks(speech_timestamps, wav)
+def estimate_height(gender,vad, filepath, imperial):
+    signal = read_audio(filepath)
+    if vad:
+        signal = get_speech(signal)
+    embedding = torch.squeeze(classifier.encode_batch(signal), 0)
+    if gender == "Detect" or gender is None:
+        gender = gender_clf.predict(embedding)
+    else:
+        gender = 1 if gender == "Male" else 0
+    height_estimator = male_clf if gender else female_clf
+    height = height_estimator.predict(embedding)[0]
+    if imperial:
+        height = metric_to_imperial(height)
+    else:
+        height = str(round(height)) + " cm"
+    return f"{'Male' if gender else 'Female'} {height}"
+theme = gr.themes.Glass()
+with gr.Blocks(theme=theme) as demo:
+    gr.Interface(
+        fn=estimate_height, inputs=[
+            gr.Radio(["Detect", "Male", "Female"], label="Gender of a speaker", value="Detect"),
+            gr.Checkbox(label="VAD", info="If there is a lot of silence in your audio, maybe try using VAD"),
+            gr.Audio(label="Audio", type="filepath"),
+            gr.Checkbox(label="Imperial units")
+        ],
+        outputs=[gr.Label(label="Prediction")],
+        title="Speaker height estimator",
+        description="Demo of estimator trained using [HeightCeleb](https://github.com/stachu86/HeightCeleb) dataset",
+        allow_flagging="never",
+        article=read_markdown_file(article_md)
+    )
+demo.launch(False, debug=True)

gender_classifier.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfbaeda5674736fa530ddb341e1e102c84fdbc54b57d5d0f47beac088a689a7a
+size 2247

height_estimator_0.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bb6798d2f3b0fb772d1c67a736843022474f5860e2757dab01d7bfc0df2a02f
+size 996179

height_estimator_1.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ed101b447d415e34c0407b2e4d5521f651cef83f7bf6ad9d022a09442b186c2
+size 2799794

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+silero-vad
+torch
+torchaudio
+speechbrain
+scikit-learn==1.4.0