kt-test-account commited on
Commit
7fce61a
·
1 Parent(s): 5ae57bc

Update script.py

Browse files
Files changed (1) hide show
  1. script.py +48 -3
script.py CHANGED
@@ -2,10 +2,55 @@ import pandas as pd
2
  from datasets import load_dataset
3
  import numpy as np
4
  import tqdm.auto as tqdm
5
- dataset_remote = load_dataset("/tmp/data",split = "test",streaming = True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  out = []
7
  for el in tqdm.tqdm(dataset_remote):
8
- print(el["id"], len(el["audio"]["bytes"]))
9
- out.append(dict(id = el["id"], pred = np.random.choice(["generated","pristine"])))
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  pd.DataFrame(out).to_csv("submission.csv",index = False)
 
2
  from datasets import load_dataset
3
  import numpy as np
4
  import tqdm.auto as tqdm
5
+ import os
6
+ import io
7
+
8
+ # Import your model and anything else you want
9
+ # You can even install other packages included in your repo
10
+ # However, during the evaluation the container will not have access to the internet.
11
+ # So you must include everything you need in your model repo. Common python libraries will be installed.
12
+ # Feel free to contact us to add dependencies to the requiremnts.txt
13
+
14
+ from models import Model
15
+ from preprocess import preproccess
16
+
17
+
18
+ # load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
19
+ SAFE_DATASET = os.environ.get("SAFE_DATASET","/tmp/data")
20
+ dataset_remote = load_dataset(SAFE_DATASET,split = "test",streaming = True)
21
+
22
+
23
+ # load your model
24
+ device = "cuda:0"
25
+ model = Model().to(device)
26
+
27
+
28
+ # iterate over the dataset
29
  out = []
30
  for el in tqdm.tqdm(dataset_remote):
 
 
31
 
32
+ # each element is a dict
33
+ # el["id"] id of example and el["audio"] contains the audio file
34
+ # el["audio"]["bytes"] contains bytes from reading the raw audio
35
+ # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
36
+
37
+ # if you are using libraries that expect a file. You can use BytesIO object
38
+ file_like = io.BytesIO(el["audio"]["bytes"])
39
+ tensor = preproces(file_like)
40
+
41
+ with torch.no_grad():
42
+ # soft decision (such as log likelihood score)
43
+ # positive score correspond to synthetic prediction
44
+ # negative score correspond to pristine prediction
45
+ score = model(tensor.to(device)).cpu().item()
46
+
47
+ # we require a hard decision to be submited. so you need to pick a threshold
48
+ pred = "generated" if score > model.threshold else "pristine"
49
+
50
+ # append your prediction
51
+ # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use for analysis of the results
52
+
53
+ out.append(dict(id = el["id"], pred = pred, score = score)))
54
+
55
+ # save the final result and that's it
56
  pd.DataFrame(out).to_csv("submission.csv",index = False)