Spaces:
Build error
Build error
File size: 1,674 Bytes
170f816 44b82fc 170f816 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
import malaya_speech
from malaya_speech.utils.astype import float_to_int
from pyctcdecode import build_ctcdecoder
from malaya_speech.utils.char import CTC_VOCAB
from glob import glob
import kenlm
import gradio as gr
import logging
import json
logging.basicConfig(level=logging.INFO)
SR = 16000
MODELS = {}
AVAILABLE_MODELS = malaya_speech.stt.available_transducer().index.tolist()
wavs = glob('audio/*.wav')
def load_audio_wav(filename):
print(filename)
y, sr = malaya_speech.load(filename)
return y, sr
def tts(upload, record, model):
if record:
rate, y = record
else:
y, rate = load_audio_wav(upload)
if len(y.shape) == 2:
y = y.T[0]
N = len(y)
print(y, N)
y_16k = malaya_speech.resample(y, rate, SR)
global MODELS
if model not in MODELS:
logging.info(f'{model} not in MODELS')
MODELS[model] = malaya_speech.stt.deep_transducer(model=model)
t = MODELS[model].greedy_decoder([y_16k])[0]
return t
examples = []
for f in wavs:
examples.append([f, None, 'conformer'])
demo = gr.Interface(
fn=tts,
inputs=[
gr.Audio(source='upload', label = 'upload WAV file', type='filepath'),
gr.Audio(source='microphone', label = 'or record using microphone'),
gr.components.Dropdown(label='Available models', choices=AVAILABLE_MODELS, value = 'conformer'),
],
outputs=['text'],
examples=examples,
cache_examples=False,
title='ASR TRANSDUCER - TNB VOICE',
description='Fastest'
)
demo.launch(server_name='0.0.0.0')
|