File size: 1,283 Bytes
f5460b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
## ADD dataset appendning
from datasets import load_dataset, Dataset, DatasetDict, concatenate_datasets
import pdb

import numpy as np
# to_dataset = load_dataset("KevinGeng/testdataset")
base_dataset = load_dataset("../laronix_automos/data/Patient_sil_trim_16k_normed_5_snr_40")
base_extra_dataset = load_dataset("../laronix_automos/data/John_p326_large")

PAL_dataset = DatasetDict({"base": base_dataset['train'], "base_extra": base_extra_dataset['train']})
# PAL_dataset.push_to_hub("KevinGeng/PAL_dataset")
concatenate_datasets(base_dataset['train'], base_extra_dataset['train'])
pdb.set_trace()

new_record = {"audio": 
    {'path': 'Arthur_set1_001_noisy.wav',
     'array': np.array([0.02526855, 0.04602051, 0.04873657, 0.00045776, 0.00201416, 0.00167847]),
     'sampling_rate': 16000},
    "transcription": "TOD"}
pdb.set_trace()

import requests
headers = {"Authorization": f"KevinGeng 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"}
# headers = {"Authorization": "Haopeng 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"}
# pdb.set_trace()
API_URL = "https://datasets-server.huggingface.co/is-valid?dataset=KevinGeng/testdataset"

def query():
    response = requests.request("GET", API_URL, headers=headers)
    # pdb.set_trace()
    return response.json()
data = query()

pdb.set_trace()