Laronix_ASR_TTS_VC / local /PAL_dataset.py
KevinGeng's picture
Update ASR engine to whisper based
f5460b4
## ADD dataset appendning
from datasets import load_dataset, Dataset, DatasetDict, concatenate_datasets
import pdb
import numpy as np
# to_dataset = load_dataset("KevinGeng/testdataset")
base_dataset = load_dataset("../laronix_automos/data/Patient_sil_trim_16k_normed_5_snr_40")
base_extra_dataset = load_dataset("../laronix_automos/data/John_p326_large")
PAL_dataset = DatasetDict({"base": base_dataset['train'], "base_extra": base_extra_dataset['train']})
# PAL_dataset.push_to_hub("KevinGeng/PAL_dataset")
concatenate_datasets(base_dataset['train'], base_extra_dataset['train'])
pdb.set_trace()
new_record = {"audio":
{'path': 'Arthur_set1_001_noisy.wav',
'array': np.array([0.02526855, 0.04602051, 0.04873657, 0.00045776, 0.00201416, 0.00167847]),
'sampling_rate': 16000},
"transcription": "TOD"}
pdb.set_trace()
import requests
headers = {"Authorization": f"KevinGeng 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"}
# headers = {"Authorization": "Haopeng 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"}
# pdb.set_trace()
API_URL = "https://datasets-server.huggingface.co/is-valid?dataset=KevinGeng/testdataset"
def query():
response = requests.request("GET", API_URL, headers=headers)
# pdb.set_trace()
return response.json()
data = query()
pdb.set_trace()