Spaces:

Liusuthu
/

SpeechDepression

Runtime error

App Files Files Community

SpeechDepression / audio_DD.py

Liusuthu

Upload folder using huggingface_hub

1464d1f verified 9 months ago

raw

history blame

4.17 kB

	import csv
	import json
	import math
	import os
	import struct
	import time

	import numpy as np
	import pyaudio
	import scipy.io.wavfile as wav
	import sounddevice as sd
	import soundfile as sf
	import torch
	# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
	from pydub import AudioSegment
	from speechbrain.pretrained.interfaces import foreign_class
	from tqdm.contrib import tqdm

	from paraformer import AudioReader, CttPunctuator, FSMNVad, ParaformerOffline

	recording = False
	recorded_audio = []

	def check_prefix(string, prefix):
	if string[:len(prefix)] == prefix:
	return True
	else:
	return False

	def luyin(self):
	def callback(indata, frames, time, status):
	if status:
	print('录音错误:', status)
	if recording:
	# 将录音数据追加到变量中
	# if indata.copy()>1.5 or indata.copy()< -1.5:
	arr = np.array(indata.copy()) # 假设数组中有416个元素

	sum_value = np.sum(arr)


	recorded_audio.append(indata.copy())

	a = int(input('请输入数字1开始:'))
	if a == 1:
	recording = True
	stream = sd.InputStream(callback=callback, channels=1, samplerate=self.sample_rate, blocksize=4096)
	stream.start()
	begin = time.time()
	b = int(input('请输入数字2停止:'))
	if b == 2:
	recording = False
	print("Stop recording")
	stream.stop()
	fina = time.time()
	t = fina - begin
	print('录音时间为%ds' % t)
	# print(recorded_audio)
	if len(recorded_audio) == 0:
	return "none"
	else:
	signal = np.vstack(recorded_audio)
	sf.write("out.wav",np.array(signal),self.sample_rate)
	signal = torch.from_numpy(np.squeeze(signal)).float()
	recorded_audio.clear()
	return signal


	class Recorder:
	'''
	Records audio from the microphone and returns the signal tensor.
	'''

	def __init__(self):
	self.sample_rate = 16000 # sample rate for recording
	self.channels = 1 # number of audio channels


	def record(self,path): #数据处理



	signal2 = luyin(self)
	return signal2


	class ContinuousInferencer:
	'''
	get the record signal continuously from the microphone,
	and return the classification results
	'''
	def __init__(self):
	self.recorder = Recorder() # create an instance of the Recorder class
	self.classifier = foreign_class(
	source="pretrained_models\\speechbrain\\emotion-recognition-wav2vec2-IEMOCAP",
	pymodule_file="custom_interface.py",
	classname="CustomEncoderWav2vec2Classifier",
	savedir="pretrained_models\\speechbrain\\emotion-recognition-wav2vec2-IEMOCAP",
	)


	def classify_continuous(self):
	'''
	Record audio for a specified duration, at a specified interval,
	and classify the recorded audio using the emotion recognition model.
	'''


	signal = self.recorder.record(0)#这是语音的输入
	audio = "out.wav"
	speech, sample_rate = AudioReader.read_wav_file(audio)
	if signal == "none":
	return "none"
	else:
	segments = vad.segments_offline(speech)
	text_results = ""
	for part in segments:
	_result = ASR_model.infer_offline(
	speech[part[0] * 16 : part[1] * 16], hot_words="任意热词空格分开"
	)
	text_results += punc.punctuate(_result)[0]

	out_prob, score, index, text_lab = self.classifier.classify_batch(signal)
	print(out_prob.squeeze(0).numpy(), text_lab[-1])
	print("文本内容：",text_results)
	return out_prob.squeeze(0).numpy(), text_lab[-1]


	if __name__ == "__main__":
	print("inference start")
	inferencer = ContinuousInferencer()
	ASR_model = ParaformerOffline()
	vad = FSMNVad()
	punc = CttPunctuator()


	while True:
	res = inferencer.classify_continuous()