Spaces:

spookyspaghetti
/

Speech-Analyser

Runtime error

Speech-Analyser / app.py

Create app.py

0432ec8 over 1 year ago

1.53 kB

	!pip install -U git+https://github.com/PrithivirajDamodaran/Gramformer.git
	!pip install gradio -q
	## Install dependencies
	!pip install wget
	!apt-get install sox libsndfile1 ffmpeg
	!pip install text-unidecode
	!pip install matplotlib>=3.3.2

	## Install NeMo
	BRANCH = 'r1.13.0'
	!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]

	## Grab the config we'll use in this example
	!mkdir configs
	!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml

	!python -m spacy download en_core_web_md
	!python -m spacy link en_core_web_md en

	import gradio as gr
	import time
	from nemo.collections.asr.models import ASRModel
	import torch
	if torch.cuda.is_available():
	device = torch.device(f'cuda:0')
	asr_model = ASRModel.from_pretrained(model_name='stt_en_citrinet_1024')

	from gramformer import Gramformer
	import torch

	def set_seed(seed):
	torch.manual_seed(seed)
	if torch.cuda.is_available():
	torch.cuda.manual_seed_all(seed)

	set_seed(1212)

	gf = Gramformer(models = 1, use_gpu=False) # 1=corrector, 2=detector

	def transcribe(audio):
	"""Speech to text using Nvidia Nemo"""
	text = asr_model.transcribe(paths2audio_files=[audio])[0]
	correct = list(gf.correct(text, max_candidates = 1))[0]
	return text, correct

	# we need input, output and interface components for gradio
	gr.Interface(
	fn=transcribe,
	inputs=[
	gr.components.Audio(type="filepath"),
	],
	outputs=[
	"textbox",
	"textbox"
	]).launch()