MMS / app.py
GreenRaptor's picture
Update app.py
2e00f3e
raw
history blame
2.18 kB
import gradio as gr
import argparse
import soundfile as sf
import tempfile
from pathlib import Path
import os
import subprocess
import sys
import re
# from transformers import AutoProcessor, AutoModelForPreTraining
# processor = AutoProcessor.from_pretrained("patrickvonplaten/mms-1b")
# model = AutoModelForPreTraining.from_pretrained("patrickvonplaten/mms-1b")
def process(audio, model, lang, format):
with tempfile.TemporaryDirectory() as tmpdir:
print(">>> preparing tmp manifest dir ...", file=sys.stderr)
tmpdir = Path(tmpdir)
with open(tmpdir / "dev.tsv", "w") as fw:
fw.write("/\n")
for audio in audio:
nsample = sf.SoundFile(audio).frames
fw.write(f"{audio}\t{nsample}\n")
with open(tmpdir / "dev.uid", "w") as fw:
fw.write(f"{audio}\n"*len(audio))
with open(tmpdir / "dev.ltr", "w") as fw:
fw.write("d u m m y | d u m m y\n"*len(audio))
with open(tmpdir / "dev.wrd", "w") as fw:
fw.write("dummy dummy\n"*len(audio))
cmd = f"""
PYTHONPATH=. PREFIX=INFER HYDRA_FULL_ERROR=1 python infer.py -m decoding.type=viterbi dataset.max_tokens=4000000 distributed_training.distributed_world_size=1 "common_eval.path='{model}'" task.data={tmpdir} dataset.gen_subset="{lang}:dev" common_eval.post_process={format} decoding.results_path={tmpdir}
"""
print(">>> loading model & running inference ...", file=sys.stderr)
subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL,)
with open(tmpdir/"hypo.word") as fr:
for ii, hypo in enumerate(fr):
hypo = re.sub("\(\S+\)$", "", hypo).strip()
print(f'===============\nInput: {audio[ii]}\nOutput: {hypo}')
def transcribe(audio):
model = "base_300m.pt"
lang = "eng"
format = "letter"
process(audio, model, lang, format)
gr.Interface(
title = 'MetaAI (Facebook Research) MMS (Massively Multilingual Speech) ASR',
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath")
],
outputs=[
"textbox"
],
live=True).launch()