Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
from sidlingvo import wav_to_lang | |
from huggingface_hub import hf_hub_download | |
import numpy as np | |
title = "Spoken Language Identification" | |
description = """ | |
A demo of conformer-based spoken language identification. | |
Paper: https://arxiv.org/abs/2202.12163 | |
Model: https://huggingface.co/tflite-hub/conformer-lang-id | |
""" | |
repo_id = "tflite-hub/conformer-lang-id" | |
model_path = "models" | |
hf_hub_download(repo_id=repo_id, filename="vad_short_model.tflite", local_dir=model_path) | |
hf_hub_download(repo_id=repo_id, filename="vad_short_mean_stddev.csv", local_dir=model_path) | |
hf_hub_download(repo_id=repo_id, filename="conformer_langid_medium.tflite", local_dir=model_path) | |
runner = wav_to_lang.WavToLangRunner( | |
vad_model_file=os.path.join(model_path, "vad_short_model.tflite"), | |
vad_mean_stddev_file=os.path.join(model_path, "vad_short_mean_stddev.csv"), | |
langid_model_file=os.path.join(model_path, "conformer_langid_medium.tflite")) | |
def predict(wav_file): | |
top_lang, probs = runner.wav_to_lang(wav_file) | |
top_lang_prob = np.max(probs) | |
return "Predicted language: " + top_lang + "\nProbability: " + str(top_lang_prob) | |
if __name__ == "__main__": | |
demo = gr.Interface( | |
fn=predict, | |
inputs=gr.Audio(type="filepath"), | |
outputs="text", | |
title=title, | |
description=description,) | |
demo.launch() |