Spaces:
Runtime error
Runtime error
# 对 sambert 训练的数据标注处理 | |
import os | |
import shutil | |
import uuid | |
import librosa | |
import gradio as gr | |
from scipy.io import wavfile | |
import numpy as np | |
import whisper | |
from modelscope.tools import run_auto_label | |
from utils_base import ensure_empty_dir, datasets_dir, get_dataset_list | |
# 绝对路径获取方法 | |
curPath = os.path.dirname(os.path.abspath(__file__)) | |
def getAbsPath (relativePath): | |
joinPath = os.path.join(curPath, relativePath) | |
return os.path.normpath( | |
os.path.abspath(joinPath) | |
) | |
# 初始化 whisper 模型的加载 | |
model_path = getAbsPath('../../models/whisper/medium.pt') | |
whisper_model = None | |
if shutil.os.path.exists(model_path): | |
whisper_model = whisper.load_model(model_path) | |
else: | |
whisper_model = whisper.load_model('medium') | |
# whisper 音频分割方法 ---------------------------------------------- | |
def split_long_audio(model, filepaths, save_path, out_sr=44100): | |
# 格式化输入的音频路径(兼容单个音频和多个音频) | |
if isinstance(filepaths, str): | |
filepaths = [filepaths] | |
# 对音频依次做拆分并存放到临时路径 | |
for file_idx, filepath in enumerate(filepaths): | |
print(f"Transcribing file {file_idx}: '{filepath}' to segments...") | |
result = model.transcribe(filepath, word_timestamps=True, task="transcribe", beam_size=5, best_of=5) | |
segments = result['segments'] | |
# 采用 librosa 配合 scipy 做音频数据分割 | |
wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True) | |
wav, _ = librosa.effects.trim(wav, top_db=20) | |
peak = np.abs(wav).max() | |
if peak > 1.0: | |
wav = 0.98 * wav / peak | |
wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr) | |
wav2 /= max(wav2.max(), -wav2.min()) | |
# 将长音频文件分割成一条条的短音频并放入指定的目录 | |
for i, seg in enumerate(segments): | |
start_time = seg['start'] | |
end_time = seg['end'] | |
wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)] | |
wav_seg_name = f"{file_idx}_{i}.wav" | |
out_fpath = os.path.join(save_path, wav_seg_name) | |
wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16)) | |
# 自动标注与标注后的文件打包 -------------------------------------------- | |
def auto_label(audio, name): | |
if not audio or not name: | |
return '', gr.update(choices=get_dataset_list()) | |
# 创建临时目录用于存放分割后的音频与再次标注的信息 | |
input_wav = getAbsPath(f'./temp/input-{ uuid.uuid4() }') | |
ensure_empty_dir(input_wav) | |
work_dir = os.path.join(datasets_dir, name) | |
ensure_empty_dir(work_dir) | |
# 音频分割 | |
split_long_audio(whisper_model, audio, input_wav) | |
# 音频自动标注 | |
# 第一次会自动下载对应的模型 | |
run_auto_label( | |
input_wav=input_wav, | |
work_dir=work_dir, | |
resource_revision='v1.0.7' | |
) | |
# 移除目录 | |
shutil.rmtree(input_wav) | |
# 返回结果 | |
return '打标成功', gr.update(choices=get_dataset_list()) | |
# 删除数据集 ---------------------------------------------------- | |
# name - 删除的数据集名称 | |
def delete_dataset(name): | |
try: | |
if not name: | |
return gr.update(choices=get_dataset_list()) | |
target_dir = os.path.join(datasets_dir, name) | |
shutil.rmtree(target_dir) | |
return gr.update(choices=get_dataset_list(), value=None) | |
except Exception: | |
return gr.update(choices=get_dataset_list(), value=None) | |