yavuzkomecoglu's picture
update model
3268a18
raw
history blame
1.69 kB
import gradio as gr
from utils import SpeechRecognition
sp = SpeechRecognition()
sp.load_model()
#sample_file = "assets/samples/sample1378.flac"
def recognition(audio_file):
print("audio_file", audio_file.name)
speech, rate = sp.load_speech_with_file(audio_file.name)
result = sp.predict_audio_file(speech)
print(result)
return result
inputs = gr.inputs.Audio(label="Input Audio", type="file")
outputs = "text"
title = "Turkish Automatic Speech Recognition"
description = "Demo for Turkish Automatic Speech Recognition with Huggingface wav2vec Turkish Model. To use it, simply upload your audio, or click one of the examples to load them."
article = "<p style='text-align: center'>This is the model for <a href='https://huggingface.co/patrickvonplaten/wav2vec2-common_voice-tr-demo' target='_blank'>patrickvonplaten/wav2vec2-common_voice-tr-demo</a>, a fine-tuned <a href='https://huggingface.co/facebook/wav2vec2-large-xlsr-53' target='_blank'>facebook/wav2vec2-large-xlsr-53</a> model on the <a href='https://commonvoice.mozilla.org/en/datasets' target='_blank'>Turkish Common Voice dataset</a>.<br/>When using this model, make sure that your speech input is sampled at 16kHz.<br/><a href='https://github.com/yavuzKomecoglu' target='_blank'>Contact me</a></p>"
examples = [
['assets/samples/common_voice_sample_1378.flac'],
['assets/samples/common_voice_sample_1589.flac'],
['assets/samples/baris_ozcan_sample_1.wav'],
['assets/samples/baris_ozcan_sample_2.wav'],
['assets/samples/baris_ozcan_sample_3.wav']
]
gr.Interface(recognition, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()