Spaces:

qgyd2021
/

vm_sound_classification

Sleeping

App Files Files Community

vm_sound_classification / main.py

HoneyTian

update

1204717 8 months ago

raw

history blame

4.55 kB

	#!/usr/bin/python3
	# -- coding: utf-8 --
	import argparse
	from functools import lru_cache
	import json
	from pathlib import Path
	import platform
	import shutil
	import tempfile
	import zipfile

	import gradio as gr
	import numpy as np
	import torch

	from project_settings import environment, project_path
	from toolbox.torch.utils.data.vocabulary import Vocabulary


	def get_args():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--examples_dir",
	default=(project_path / "data/examples").as_posix(),
	type=str
	)
	parser.add_argument(
	"--trained_model_dir",
	default=(project_path / "trained_models").as_posix(),
	type=str
	)
	parser.add_argument(
	"--server_port",
	default=environment.get("server_port", 7860),
	type=int
	)
	args = parser.parse_args()
	return args


	@lru_cache(maxsize=100)
	def load_model(model_file: Path):
	with zipfile.ZipFile(model_file, "r") as f_zip:
	out_root = Path(tempfile.gettempdir()) / "vm_sound_classification"
	if out_root.exists():
	shutil.rmtree(out_root.as_posix())
	out_root.mkdir(parents=True, exist_ok=True)
	f_zip.extractall(path=out_root)

	tgt_path = out_root / model_file.stem
	jit_model_file = tgt_path / "trace_model.zip"
	vocab_path = tgt_path / "vocabulary"

	vocabulary = Vocabulary.from_files(vocab_path.as_posix())

	with open(jit_model_file.as_posix(), "rb") as f:
	model = torch.jit.load(f)
	model.eval()

	shutil.rmtree(tgt_path)

	d = {
	"model": model,
	"vocabulary": vocabulary
	}
	return d


	def click_button(audio: np.ndarray,
	model_name: str,
	ground_true: str) -> str:

	sample_rate, signal = audio

	model_file = "trained_models/{}.zip".format(model_name)
	model_file = Path(model_file)
	d = load_model(model_file)

	model = d["model"]
	vocabulary = d["vocabulary"]

	inputs = signal / (1 << 15)
	inputs = torch.tensor(inputs, dtype=torch.float32)
	inputs = torch.unsqueeze(inputs, dim=0)

	with torch.no_grad():
	logits = model.forward(inputs)
	probs = torch.nn.functional.softmax(logits, dim=-1)
	label_idx = torch.argmax(probs, dim=-1)

	label_idx = label_idx.cpu()
	probs = probs.cpu()

	label_idx = label_idx.numpy()[0]
	prob = probs.numpy()[0][label_idx]

	label_str = vocabulary.get_token_from_index(label_idx, namespace="labels")

	return label_str, round(prob, 4)


	def main():
	args = get_args()

	examples_dir = Path(args.examples_dir)
	trained_model_dir = Path(args.trained_model_dir)

	# models
	model_choices = list()
	for filename in trained_model_dir.glob("*.zip"):
	model_name = filename.stem
	model_choices.append(model_name)

	# examples
	examples = list()
	for filename in examples_dir.glob("//*.wav"):
	label = filename.parts[-2]

	examples.append([
	filename.as_posix(),
	model_choices[0],
	label
	])

	# ui
	brief_description = """
	国际语音智能外呼系统, 电话声音分类.
	"""

	# ui
	with gr.Blocks() as blocks:
	gr.Markdown(value=brief_description)

	with gr.Row():
	with gr.Column(scale=3):
	c_audio = gr.Audio(label="audio")
	with gr.Row():
	with gr.Column(scale=3):
	c_model_name = gr.Dropdown(choices=model_choices, value=model_choices[0], label="model_name")
	with gr.Column(scale=3):
	c_ground_true = gr.Textbox(label="ground_true")

	c_button = gr.Button("run", variant="primary")
	with gr.Column(scale=3):
	c_label = gr.Textbox(label="label")
	c_probability = gr.Number(label="probability")

	gr.Examples(
	examples,
	inputs=[c_audio, c_model_name, c_ground_true],
	outputs=[c_label, c_probability],
	fn=click_button,
	examples_per_page=5,
	)

	c_button.click(
	click_button,
	inputs=[c_audio, c_model_name, c_ground_true],
	outputs=[c_label, c_probability],
	)

	blocks.queue().launch(
	share=False if platform.system() == "Windows" else False,
	server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
	server_port=args.server_port
	)
	return


	if __name__ == "__main__":
	main()