from pathlib import Path import gradio as gr import pickle import torchaudio import torch from speechbrain.inference.speaker import EncoderClassifier from silero_vad import load_silero_vad, read_audio, get_speech_timestamps, collect_chunks classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb") with open("gender_classifier.pickle", "rb") as file: gender_clf = pickle.load(file) with open("height_estimator_1.pickle", "rb") as file: male_clf = pickle.load(file) with open("height_estimator_0.pickle", "rb") as file: female_clf = pickle.load(file) article_md = Path("Description.md") error_message = "No speech detected or signal too short!" def read_markdown_file(file_path): with open(file_path, 'r', encoding='utf-8') as file: markdown_string = file.read() return markdown_string def metric_to_imperial(height): inches = round(height / 2.54) return f"{int(inches / 12)}'{inches % 12}\"" def get_speech(wav): model = load_silero_vad() speech_timestamps = get_speech_timestamps(wav, model) return collect_chunks(speech_timestamps, wav) def estimate_height(gender, vad, filepath, imperial): if filepath is None: return error_message signal = read_audio(filepath) if vad: signal = get_speech(signal) if len(signal) < 1: return error_message embedding = torch.squeeze(classifier.encode_batch(signal), 0) if gender == "Detect" or gender is None: gender = gender_clf.predict(embedding) else: gender = 1 if gender == "Male" else 0 height_estimator = male_clf if gender else female_clf height = height_estimator.predict(embedding)[0] if imperial: height = metric_to_imperial(height) else: height = str(round(height)) + " cm" return f"{'Male' if gender else 'Female'} {height}" theme = gr.themes.Glass() with gr.Blocks(theme=theme) as demo: gr.Interface( fn=estimate_height, inputs=[ gr.Radio(["Detect", "Male", "Female"], label="Gender of a speaker", value="Detect"), gr.Checkbox(label="VAD", info="If there is a lot of silence in your audio, maybe try using VAD"), gr.Audio(label="Audio", type="filepath"), gr.Checkbox(label="Imperial units") ], outputs=[gr.Label(label="Prediction")], title="Speaker height estimator", description="Demo of estimator trained using [HeightCeleb](https://github.com/stachu86/HeightCeleb) dataset", allow_flagging="never", article=read_markdown_file(article_md) ) demo.launch(False, debug=True)