Spaces:
Running
Running
| import time, logging, json, sys, struct | |
| import numpy as np | |
| from scipy.io.wavfile import WAVE_FORMAT | |
| def write_numpy_to_wave(filename, rate, data, add_header=False): | |
| """ | |
| Write a NumPy array as a WAV file. | |
| """ | |
| def _array_tofile(fid, data): | |
| # ravel gives a c-contiguous buffer | |
| fid.write(data.ravel().view('b').data) | |
| if hasattr(filename, 'write'): | |
| fid = filename | |
| else: | |
| fid = open(filename, 'wb') | |
| fs = rate | |
| try: | |
| dkind = data.dtype.kind | |
| if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and | |
| data.dtype.itemsize == 1)): | |
| raise ValueError("Unsupported data type '%s'" % data.dtype) | |
| header_data = b'' | |
| header_data += b'RIFF' | |
| header_data += b'\x00\x00\x00\x00' | |
| header_data += b'WAVE' | |
| # fmt chunk | |
| header_data += b'fmt ' | |
| if dkind == 'f': | |
| format_tag = WAVE_FORMAT.IEEE_FLOAT | |
| else: | |
| format_tag = WAVE_FORMAT.PCM | |
| if data.ndim == 1: | |
| channels = 1 | |
| else: | |
| channels = data.shape[1] | |
| bit_depth = data.dtype.itemsize * 8 | |
| bytes_per_second = fs*(bit_depth // 8)*channels | |
| block_align = channels * (bit_depth // 8) | |
| fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs, | |
| bytes_per_second, block_align, bit_depth) | |
| if not (dkind == 'i' or dkind == 'u'): | |
| # add cbSize field for non-PCM files | |
| fmt_chunk_data += b'\x00\x00' | |
| header_data += struct.pack('<I', len(fmt_chunk_data)) | |
| header_data += fmt_chunk_data | |
| # fact chunk (non-PCM files) | |
| if not (dkind == 'i' or dkind == 'u'): | |
| header_data += b'fact' | |
| header_data += struct.pack('<II', 4, data.shape[0]) | |
| # check data size (needs to be immediately before the data chunk) | |
| if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF: | |
| raise ValueError("Data exceeds wave file size limit") | |
| if add_header: | |
| fid.write(header_data) | |
| # data chunk | |
| fid.write(b'data') | |
| fid.write(struct.pack('<I', data.nbytes)) | |
| if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and | |
| sys.byteorder == 'big'): | |
| data = data.byteswap() | |
| _array_tofile(fid, data) | |
| if add_header: | |
| # Determine file size and place it in correct | |
| # position at start of the file. | |
| size = fid.tell() | |
| fid.seek(4) | |
| fid.write(struct.pack('<I', size-8)) | |
| finally: | |
| if not hasattr(filename, 'write'): | |
| fid.close() | |
| else: | |
| fid.seek(0) | |
| def is_speaker_speaking(vad, data, sample_rate): | |
| # Function to detect if the speaker is speaking | |
| # The WebRTC VAD only accepts 16-bit mono PCM audio, | |
| # sampled at 8000, 16000, 32000 or 48000 Hz. | |
| # A frame must be either 10, 20, or 30 ms in duration: | |
| frame_duration = 30 | |
| n_bit_each = int(sample_rate * frame_duration / 1000)*2 # x2 because audio is 16 bit (2 bytes) | |
| res_list = [] | |
| for t in range(len(data)): | |
| if t!=0 and t % n_bit_each == 0: | |
| res_list.append(vad.is_speech(data[t-n_bit_each:t], sample_rate)) | |
| info = ''.join(['^' if r else '.' for r in res_list]) | |
| info = info[:10] | |
| if any(res_list): | |
| return True, info | |
| else: | |
| return False, info | |
| class AliyunASR(): | |
| def test_on_sentence_begin(self, message, *args): | |
| # print("test_on_sentence_begin:{}".format(message)) | |
| pass | |
| def test_on_sentence_end(self, message, *args): | |
| # print("test_on_sentence_end:{}".format(message)) | |
| message = json.loads(message) | |
| self.parsed_sentence = message['payload']['result'] | |
| self.event_on_entence_end.set() | |
| # print(self.parsed_sentence) | |
| def test_on_start(self, message, *args): | |
| # print("test_on_start:{}".format(message)) | |
| pass | |
| def test_on_error(self, message, *args): | |
| logging.error("on_error args=>{}".format(args)) | |
| pass | |
| def test_on_close(self, *args): | |
| self.aliyun_service_ok = False | |
| pass | |
| def test_on_result_chg(self, message, *args): | |
| # print("test_on_chg:{}".format(message)) | |
| message = json.loads(message) | |
| self.parsed_text = message['payload']['result'] | |
| self.event_on_result_chg.set() | |
| def test_on_completed(self, message, *args): | |
| # print("on_completed:args=>{} message=>{}".format(args, message)) | |
| pass | |
| def audio_convertion_thread(self, uuid): | |
| # 在一个异步线程中采集音频 | |
| import nls # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git | |
| import tempfile | |
| from scipy import io | |
| from toolbox import get_conf | |
| from .audio_io import change_sample_rate | |
| from .audio_io import RealtimeAudioDistribution | |
| NEW_SAMPLERATE = 16000 | |
| rad = RealtimeAudioDistribution() | |
| rad.clean_up() | |
| temp_folder = tempfile.gettempdir() | |
| TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY') | |
| if len(TOKEN) == 0: | |
| TOKEN = self.get_token() | |
| self.aliyun_service_ok = True | |
| URL="wss://nls-gateway.aliyuncs.com/ws/v1" | |
| sr = nls.NlsSpeechTranscriber( | |
| url=URL, | |
| token=TOKEN, | |
| appkey=APPKEY, | |
| on_sentence_begin=self.test_on_sentence_begin, | |
| on_sentence_end=self.test_on_sentence_end, | |
| on_start=self.test_on_start, | |
| on_result_changed=self.test_on_result_chg, | |
| on_completed=self.test_on_completed, | |
| on_error=self.test_on_error, | |
| on_close=self.test_on_close, | |
| callback_args=[uuid.hex] | |
| ) | |
| timeout_limit_second = 20 | |
| r = sr.start(aformat="pcm", | |
| timeout=timeout_limit_second, | |
| enable_intermediate_result=True, | |
| enable_punctuation_prediction=True, | |
| enable_inverse_text_normalization=True) | |
| import webrtcvad | |
| vad = webrtcvad.Vad() | |
| vad.set_mode(1) | |
| is_previous_frame_transmitted = False # 上一帧是否有人说话 | |
| previous_frame_data = None | |
| echo_cnt = 0 # 在没有声音之后,继续向服务器发送n次音频数据 | |
| echo_cnt_max = 4 # 在没有声音之后,继续向服务器发送n次音频数据 | |
| keep_alive_last_send_time = time.time() | |
| while not self.stop: | |
| # time.sleep(self.capture_interval) | |
| audio = rad.read(uuid.hex) | |
| if audio is not None: | |
| # convert to pcm file | |
| temp_file = f'{temp_folder}/{uuid.hex}.pcm' # | |
| dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000 | |
| write_numpy_to_wave(temp_file, NEW_SAMPLERATE, dsdata) | |
| # read pcm binary | |
| with open(temp_file, "rb") as f: data = f.read() | |
| is_speaking, info = is_speaker_speaking(vad, data, NEW_SAMPLERATE) | |
| if is_speaking or echo_cnt > 0: | |
| # 如果话筒激活 / 如果处于回声收尾阶段 | |
| echo_cnt -= 1 | |
| if not is_previous_frame_transmitted: # 上一帧没有人声,但是我们把上一帧同样加上 | |
| if previous_frame_data is not None: data = previous_frame_data + data | |
| if is_speaking: | |
| echo_cnt = echo_cnt_max | |
| slices = zip(*(iter(data),) * 640) # 640个字节为一组 | |
| for i in slices: sr.send_audio(bytes(i)) | |
| keep_alive_last_send_time = time.time() | |
| is_previous_frame_transmitted = True | |
| else: | |
| is_previous_frame_transmitted = False | |
| echo_cnt = 0 | |
| # 保持链接激活,即使没有声音,也根据时间间隔,发送一些音频片段给服务器 | |
| if time.time() - keep_alive_last_send_time > timeout_limit_second/2: | |
| slices = zip(*(iter(data),) * 640) # 640个字节为一组 | |
| for i in slices: sr.send_audio(bytes(i)) | |
| keep_alive_last_send_time = time.time() | |
| is_previous_frame_transmitted = True | |
| self.audio_shape = info | |
| else: | |
| time.sleep(0.1) | |
| if not self.aliyun_service_ok: | |
| self.stop = True | |
| self.stop_msg = 'Aliyun音频服务异常,请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。' | |
| r = sr.stop() | |
| def get_token(self): | |
| from toolbox import get_conf | |
| import json | |
| from aliyunsdkcore.request import CommonRequest | |
| from aliyunsdkcore.client import AcsClient | |
| AccessKey_ID, AccessKey_secret = get_conf('ALIYUN_ACCESSKEY', 'ALIYUN_SECRET') | |
| # 创建AcsClient实例 | |
| client = AcsClient( | |
| AccessKey_ID, | |
| AccessKey_secret, | |
| "cn-shanghai" | |
| ) | |
| # 创建request,并设置参数。 | |
| request = CommonRequest() | |
| request.set_method('POST') | |
| request.set_domain('nls-meta.cn-shanghai.aliyuncs.com') | |
| request.set_version('2019-02-28') | |
| request.set_action_name('CreateToken') | |
| try: | |
| response = client.do_action_with_exception(request) | |
| print(response) | |
| jss = json.loads(response) | |
| if 'Token' in jss and 'Id' in jss['Token']: | |
| token = jss['Token']['Id'] | |
| expireTime = jss['Token']['ExpireTime'] | |
| print("token = " + token) | |
| print("expireTime = " + str(expireTime)) | |
| except Exception as e: | |
| print(e) | |
| return token | |