File size: 2,850 Bytes
5ae57bc
 
 
 
7fce61a
 
4875545
c74c9ba
7fce61a
 
 
 
 
 
71736ce
 
7fce61a
 
4875545
7fce61a
 
 
fe1bf49
 
7fce61a
13557af
 
 
 
 
 
 
 
838bef0
7fce61a
 
 
 
 
 
5ae57bc
 
 
c74c9ba
 
7fce61a
 
 
 
 
 
c74c9ba
 
 
7fce61a
c74c9ba
 
 
 
 
7fce61a
c74c9ba
 
7fce61a
c74c9ba
 
7fce61a
c74c9ba
 
 
 
efa7621
7fce61a
 
5ae57bc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
from datasets import load_dataset
import numpy as np
import tqdm.auto as tqdm
import os
import io
import torch
import time

# Import your model and anything else you want
# You can even install other packages included in your repo
# However, during the evaluation the container will not have access to the internet. 
# So you must include everything you need in your model repo. Common python libraries will be installed.
# Feel free to contact us to add dependencies to the requiremnts.txt
# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile 
# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags 

from models import Model
from preprocess import preprocess


# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
DATASET_PATH = "/tmp/data"
dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)

# # this should fail since there is not network access during model run
# try:
#     import requests
#     r = requests.get("https://stresearch.github.io/SAFE/logo.jpg")
#     print(r.text)
# except Exception as e:
#     print("download test faild as expected")
#     print(e)
    
# load your model
device = "cuda:0"
model = Model().to(device)


# iterate over the dataset
out = []
for el in tqdm.tqdm(dataset_remote):

    start_time = time.time()

    # each element is a dict
    # el["id"] id of example and el["audio"] contains the audio file
    # el["audio"]["bytes"] contains bytes from reading the raw audio
    # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it

    # if you are using libraries that expect a file. You can use BytesIO object
    try:
        file_like = io.BytesIO(el["audio"]["bytes"])
        tensor = preprocess(file_like)

        with torch.no_grad():
            # soft decision (such as log likelihood score)
            # positive score correspond to synthetic prediction
            # negative score correspond to pristine prediction
            score = model(tensor.to(device)).cpu().item()

            # we require a hard decision to be submited. so you need to pick a threshold
            pred = "generated" if score > model.threshold else "pristine"

        # append your prediction
        # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results

        out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
    except Exception as e:
        print(e)
        print("failed", el["id"])
        out.append(dict(id = el["id"], pred = "none", score = None))

# save the final result and that's it
pd.DataFrame(out).to_csv("submission.csv",index = False)