Spaces:

Liusuthu
/

Portable-Depression-Detecting-System

Runtime error

App Files Files Community

Portable-Depression-Detecting-System / app.py

Liusuthu

Upload folder using huggingface_hub

890de26 verified 8 months ago

raw

history blame

4.61 kB

	import os

	import gradio as gr
	import numpy as np
	import soundfile as sf
	import torchaudio
	from speechbrain.pretrained.interfaces import foreign_class

	from app_utils import preprocess_video_and_rank
	from authors import AUTHORS

	# Importing necessary components for the Gradio app
	from description import DESCRIPTION_DYNAMIC # , DESCRIPTION_STATIC

	# import scipy.io.wavfile as wav
	from paraformer import AudioReader, CttPunctuator, FSMNVad, ParaformerOffline

	os.environ["no_proxy"] = "localhost,127.0.0.1,::1"
	###########################语音部分######################################
	classifier = foreign_class(
	source="pretrained_models/local-speechbrain/emotion-recognition-wav2vec2-IEMOCAP", # ".\\emotion-recognition-wav2vec2-IEMOCAP"
	pymodule_file="custom_interface.py",
	classname="CustomEncoderWav2vec2Classifier",
	savedir="pretrained_models/local-speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
	)
	ASR_model = ParaformerOffline()
	vad = FSMNVad()
	punc = CttPunctuator()


	def classify_continuous(audio):
	print(type(audio))
	print(audio)
	sample_rate, signal = audio # 这是语音的输入
	signal = signal.astype(np.float32)
	signal /= np.max(np.abs(signal))
	sf.write("a.wav", signal, sample_rate)
	signal, sample_rate = torchaudio.load("a.wav")
	signal1 = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(
	signal
	)
	torchaudio.save("out.wav", signal1, 16000, encoding="PCM_S", bits_per_sample=16)
	Audio = "out.wav"
	speech, sample_rate = AudioReader.read_wav_file(Audio)
	if signal == "none":
	return "none", "none", "haha"
	else:
	segments = vad.segments_offline(speech)
	text_results = ""
	for part in segments:
	_result = ASR_model.infer_offline(
	speech[part[0] * 16 : part[1] * 16], hot_words="任意热词空格分开"
	)
	text_results += punc.punctuate(_result)[0]

	out_prob, score, index, text_lab = classifier.classify_batch(signal1)
	return text_results, out_prob.squeeze(0).numpy(), text_lab[-1]


	#########################################视频部分###################################
	def clear_dynamic_info():
	return (
	gr.Video(value=None),
	gr.Plot(value=None),
	gr.Textbox(""),
	)


	##################################设置各自的app类####################
	with gr.Blocks(css="app.css") as video:
	with gr.Tab("Dynamic App"):
	gr.Markdown(value=DESCRIPTION_DYNAMIC)
	with gr.Row():
	with gr.Column(scale=2):
	input_video = gr.Video(
	sources=["webcam", "upload"], elem_classes="video1"
	)
	with gr.Row():
	clear_btn_dynamic = gr.Button(
	value="Clear", interactive=True, scale=1
	)
	# submit_dynamic = gr.Button(
	# value="Submit", interactive=True, scale=1, elem_classes="submit"
	# )
	submit_and_rank = gr.Button(
	value="Score", interactive=True, scale=1, elem_classes="submit"
	)
	with gr.Column(scale=2, elem_classes="dl4"):
	with gr.Row():
	output_score = gr.Textbox(label="scores")
	output_statistics = gr.Plot(
	label="Statistics of emotions", elem_classes="stat"
	)
	gr.Examples(
	[
	"videos/video1.mp4",
	"videos/video2.mp4",
	"videos/sample.webm",
	"videos/cnm.mp4",
	],
	[input_video],
	)

	with gr.Tab("Authors"):
	gr.Markdown(value=AUTHORS)

	clear_btn_dynamic.click(
	fn=clear_dynamic_info,
	inputs=[],
	outputs=[
	input_video,
	output_statistics,
	output_score,
	],
	queue=True,
	)
	submit_and_rank.click(
	fn=preprocess_video_and_rank,
	inputs=input_video,
	outputs=[
	output_statistics,
	output_score,
	],
	)

	####################################
	speech = gr.Interface(
	classify_continuous,
	gr.Audio(sources=["microphone"]),
	[
	gr.Text(label="语音识别结果"),
	gr.Text(label="音频情感识别1"),
	gr.Text(label="音频情感识别2"),
	],
	)

	with gr.Blocks() as app:
	with gr.Tab("语音"):
	speech.render()
	with gr.Tab("视频"):
	video.render()

	app.launch()