Spaces:

hhz520
/

webchat

Configuration error

App Files Files Community

webchat / voice /audio_convert.py

hhz520

Upload 170 files

61517de 8 months ago

raw

history blame contribute delete

No virus

4.01 kB

	import shutil
	import wave

	from common.log import logger

	try:
	import pysilk
	except ImportError:
	logger.warn("import pysilk failed, wechaty voice message will not be supported.")

	from pydub import AudioSegment

	sil_supports = [8000, 12000, 16000, 24000, 32000, 44100, 48000] # slk转wav时，支持的采样率


	def find_closest_sil_supports(sample_rate):
	"""
	找到最接近的支持的采样率
	"""
	if sample_rate in sil_supports:
	return sample_rate
	closest = 0
	mindiff = 9999999
	for rate in sil_supports:
	diff = abs(rate - sample_rate)
	if diff < mindiff:
	closest = rate
	mindiff = diff
	return closest


	def get_pcm_from_wav(wav_path):
	"""
	从 wav 文件中读取 pcm

	:param wav_path: wav 文件路径
	:returns: pcm 数据
	"""
	wav = wave.open(wav_path, "rb")
	return wav.readframes(wav.getnframes())


	def any_to_mp3(any_path, mp3_path):
	"""
	把任意格式转成mp3文件
	"""
	if any_path.endswith(".mp3"):
	shutil.copy2(any_path, mp3_path)
	return
	if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
	sil_to_wav(any_path, any_path)
	any_path = mp3_path
	audio = AudioSegment.from_file(any_path)
	audio.export(mp3_path, format="mp3")


	def any_to_wav(any_path, wav_path):
	"""
	把任意格式转成wav文件
	"""
	if any_path.endswith(".wav"):
	shutil.copy2(any_path, wav_path)
	return
	if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
	return sil_to_wav(any_path, wav_path)
	audio = AudioSegment.from_file(any_path)
	audio.export(wav_path, format="wav")


	def any_to_sil(any_path, sil_path):
	"""
	把任意格式转成sil文件
	"""
	if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
	shutil.copy2(any_path, sil_path)
	return 10000
	audio = AudioSegment.from_file(any_path)
	rate = find_closest_sil_supports(audio.frame_rate)
	# Convert to PCM_s16
	pcm_s16 = audio.set_sample_width(2)
	pcm_s16 = pcm_s16.set_frame_rate(rate)
	wav_data = pcm_s16.raw_data
	silk_data = pysilk.encode(wav_data, data_rate=rate, sample_rate=rate)
	with open(sil_path, "wb") as f:
	f.write(silk_data)
	return audio.duration_seconds * 1000


	def any_to_amr(any_path, amr_path):
	"""
	把任意格式转成amr文件
	"""
	if any_path.endswith(".amr"):
	shutil.copy2(any_path, amr_path)
	return
	if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
	raise NotImplementedError("Not support file type: {}".format(any_path))
	audio = AudioSegment.from_file(any_path)
	audio = audio.set_frame_rate(8000) # only support 8000
	audio.export(amr_path, format="amr")
	return audio.duration_seconds * 1000


	def sil_to_wav(silk_path, wav_path, rate: int = 24000):
	"""
	silk 文件转 wav
	"""
	wav_data = pysilk.decode_file(silk_path, to_wav=True, sample_rate=rate)
	with open(wav_path, "wb") as f:
	f.write(wav_data)


	def split_audio(file_path, max_segment_length_ms=60000):
	"""
	分割音频文件
	"""
	audio = AudioSegment.from_file(file_path)
	audio_length_ms = len(audio)
	if audio_length_ms <= max_segment_length_ms:
	return audio_length_ms, [file_path]
	segments = []
	for start_ms in range(0, audio_length_ms, max_segment_length_ms):
	end_ms = min(audio_length_ms, start_ms + max_segment_length_ms)
	segment = audio[start_ms:end_ms]
	segments.append(segment)
	file_prefix = file_path[: file_path.rindex(".")]
	format = file_path[file_path.rindex(".") + 1 :]
	files = []
	for i, segment in enumerate(segments):
	path = f"{file_prefix}_{i+1}" + f".{format}"
	segment.export(path, format=format)
	files.append(path)
	return audio_length_ms, files