File size: 2,834 Bytes
5ae57bc
 
 
 
7fce61a
 
4875545
c74c9ba
7fce61a
 
 
 
 
 
71736ce
 
7fce61a
 
4875545
7fce61a
 
 
fe1bf49
 
7fce61a
838bef0
 
 
 
 
 
 
 
 
7fce61a
 
 
 
 
 
5ae57bc
 
 
c74c9ba
 
7fce61a
 
 
 
 
 
c74c9ba
 
 
7fce61a
c74c9ba
 
 
 
 
7fce61a
c74c9ba
 
7fce61a
c74c9ba
 
7fce61a
c74c9ba
 
 
 
efa7621
7fce61a
 
5ae57bc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
from datasets import load_dataset
import numpy as np
import tqdm.auto as tqdm
import os
import io
import torch
import time

# Import your model and anything else you want
# You can even install other packages included in your repo
# However, during the evaluation the container will not have access to the internet. 
# So you must include everything you need in your model repo. Common python libraries will be installed.
# Feel free to contact us to add dependencies to the requiremnts.txt
# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile 
# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags 

from models import Model
from preprocess import preprocess


# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
DATASET_PATH = "/tmp/data"
dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)

# this should fail since there is not network access during model run
try:
    import requests
    r = requests.get("https://stresearch.github.io/SAFE/logo.jpg")
    print(r.text)
except Exception as e:
    print("download test faild as expected")
    print(e)
    
# load your model
device = "cuda:0"
model = Model().to(device)


# iterate over the dataset
out = []
for el in tqdm.tqdm(dataset_remote):

    start_time = time.time()

    # each element is a dict
    # el["id"] id of example and el["audio"] contains the audio file
    # el["audio"]["bytes"] contains bytes from reading the raw audio
    # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it

    # if you are using libraries that expect a file. You can use BytesIO object
    try:
        file_like = io.BytesIO(el["audio"]["bytes"])
        tensor = preprocess(file_like)

        with torch.no_grad():
            # soft decision (such as log likelihood score)
            # positive score correspond to synthetic prediction
            # negative score correspond to pristine prediction
            score = model(tensor.to(device)).cpu().item()

            # we require a hard decision to be submited. so you need to pick a threshold
            pred = "generated" if score > model.threshold else "pristine"

        # append your prediction
        # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results

        out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
    except Exception as e:
        print(e)
        print("failed", el["id"])
        out.append(dict(id = el["id"], pred = "none", score = None))

# save the final result and that's it
pd.DataFrame(out).to_csv("submission.csv",index = False)