voice_conversion_service

Runtime error

App Files Files Community

voice_conversion_service / vc_service_request.py

NSect

Duplicate from balacoon/voice_conversion_service

d673948 10 months ago

raw history blame contribute delete

No virus

3.21 kB

	"""
	Copyright 2023 Balacoon

	contains implementation
	for voice conversion request
	"""

	import os
	import asyncio
	import base64
	import hashlib
	import json
	import ssl
	import time
	from typing import Tuple

	import numpy as np
	import resampy
	import websockets


	def prepare_audio(audio: Tuple[int, np.ndarray]) -> np.ndarray:
	"""
	ensures that audio is in int16 format, 16khz mono
	"""
	sr, wav = audio
	# ensure proper type
	if wav.dtype == np.int32:
	max_val = np.max(np.abs(wav))
	mult = (32767.0 / 2**31) if max_val > 32768 else 1.0
	wav = (wav.astype(np.float32) * mult).astype(np.int16)
	elif wav.dtype == np.float32 or wav.dtype == np.float64:
	mult = 32767.0 if np.max(np.abs(wav)) <= 1.0 else 1.0
	wav = (wav * mult).astype(np.int16)

	if wav.ndim == 2:
	# average channels
	if wav.shape[0] == 2:
	wav = np.mean(wav, axis=0, keepdims=False)
	if wav.shape[1] == 2:
	wav = np.mean(wav, axis=1, keepdims=False)

	if wav.ndim != 1:
	return None

	# ensure proper sampling rate
	if sr != 16000:
	wav = (wav / 32768.0).astype(np.float)
	wav = resampy.resample(wav, sr, 16000)
	wav = (wav * 32768.0).astype(np.int16)
	return wav


	def create_signature() -> str:
	"""
	helper function that creates signature,
	required to authentificate the request
	"""
	int_time = int(time.time() / 1000)
	signature_input = (os.environ["api_secret"] + str(int_time)).encode()
	signature = hashlib.sha256(signature_input).hexdigest()
	return signature


	async def async_service_request(source: np.ndarray, target: np.ndarray) -> np.ndarray:
	ssl_context = ssl.create_default_context()

	async with websockets.connect(
	os.environ["endpoint"], close_timeout=1024, ssl=ssl_context
	) as websocket:
	request_dict = {
	"source": base64.b64encode(source.tobytes()).decode("utf-8"),
	"target": base64.b64encode(target.tobytes()).decode("utf-8"),
	"api_key": os.environ["api_key"],
	"signature": create_signature(),
	}
	request = json.dumps(request_dict)
	await websocket.send(request)

	# read reply
	result_lst = []
	while True:
	try:
	data = await websocket.recv()
	result_lst.append(np.frombuffer(data, dtype="int16"))
	except websockets.exceptions.ConnectionClosed:
	break
	if data is None:
	break
	result = np.concatenate(result_lst) if result_lst else None
	return result


	def vc_service_request(
	source_audio: Tuple[int, np.ndarray], target_audio: Tuple[int, np.ndarray]
	) -> Tuple[int, np.ndarray]:
	"""
	prepares audio (has to be 16khz mono)
	and runs request to a voice conversion service
	"""
	src = prepare_audio(source_audio)
	tgt = prepare_audio(target_audio)
	if src is None or tgt is None:
	return
	if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
	# input is way too long, dont return anything
	return

	res = asyncio.run(async_service_request(src, tgt))
	return 16000, res