kt-test-account commited on
Commit
db4aeca
·
1 Parent(s): f2b5113
Files changed (12) hide show
  1. .dockerignore +1 -0
  2. .gitignore +2 -0
  3. Dockerfile +8 -0
  4. README.md +4 -0
  5. _script.py +67 -0
  6. docker-compose.yml +14 -0
  7. make_custom_env.sh +9 -0
  8. models.py +11 -0
  9. preprocess.py +10 -0
  10. requirements-custom.txt +125 -0
  11. run.sh +6 -0
  12. script.py +4 -0
.dockerignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *.gz
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__
2
+ submission.csv
Dockerfile ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ FROM huggingface/competitions:latest
2
+ WORKDIR /app/tmp
3
+ # COPY requirements.txt .
4
+ # RUN pip install -r requirements.txt
5
+ COPY * .
6
+ # CMD bash debug.sh
7
+ # RUN bash make_custom_env.sh
8
+ # CMD bash make_custom_env.sh
README.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # SAFE Example Submission
2
+
3
+ The key requirements is to have a `script.py` file in the top level directory of the repo.
4
+
_script.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from datasets import load_dataset
3
+ import numpy as np
4
+ import tqdm.auto as tqdm
5
+ import os
6
+ import io
7
+ import torch
8
+ import time
9
+
10
+ # Import your model and anything else you want
11
+ # You can even install other packages included in your repo
12
+ # However, during the evaluation the container will not have access to the internet.
13
+ # So you must include everything you need in your model repo. Common python libraries will be installed.
14
+ # Feel free to contact us to add dependencies to the requiremnts.txt
15
+ # For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
16
+ # It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
17
+
18
+ from models import Model
19
+ from preprocess import preprocess
20
+
21
+
22
+ # load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
23
+ DATASET_PATH = "/tmp/data"
24
+ dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)
25
+
26
+
27
+ # load your model
28
+ device = "cuda:0"
29
+ model = Model().to(device)
30
+
31
+
32
+ # iterate over the dataset
33
+ out = []
34
+ for el in tqdm.tqdm(dataset_remote):
35
+
36
+ start_time = time.time()
37
+
38
+ # each element is a dict
39
+ # el["id"] id of example and el["audio"] contains the audio file
40
+ # el["audio"]["bytes"] contains bytes from reading the raw audio
41
+ # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
42
+
43
+ # if you are using libraries that expect a file. You can use BytesIO object
44
+ try:
45
+ file_like = io.BytesIO(el["audio"]["bytes"])
46
+ tensor = preprocess(file_like)
47
+
48
+ with torch.no_grad():
49
+ # soft decision (such as log likelihood score)
50
+ # positive score correspond to synthetic prediction
51
+ # negative score correspond to pristine prediction
52
+ score = model(tensor.to(device)).cpu().item()
53
+
54
+ # we require a hard decision to be submited. so you need to pick a threshold
55
+ pred = "generated" if score > model.threshold else "pristine"
56
+
57
+ # append your prediction
58
+ # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
59
+
60
+ out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
61
+ except Exception as e:
62
+ print(e)
63
+ print("failed", el["id"])
64
+ out.append(dict(id = el["id"]))
65
+
66
+ # save the final result and that's it
67
+ pd.DataFrame(out).to_csv("submission.csv",index = False)
docker-compose.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ make_env:
3
+ image: safe-make-env:latest
4
+ build: .
5
+ command: bash make_custom_env.sh
6
+ volumes:
7
+ - /disk1/kirill/custom-env:/app/custom-env
8
+ deploy:
9
+ resources:
10
+ reservations:
11
+ devices:
12
+ - driver: nvidia
13
+ device_ids: [ '0','1','2','3']
14
+ capabilities: [ gpu ]
make_custom_env.sh ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ conda init
2
+ source ~/.bashrc
3
+ conda create -n custom python=3.10 -y
4
+ conda activate custom
5
+ pip install -r requirements-custom.txt
6
+ export OUTPUT=/app/custom-env/custom.tar.gz
7
+ conda deactivate
8
+ pip install conda-pack
9
+ conda-pack -n custom -o $OUTPUT
models.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ class Model(torch.nn.Module):
4
+ def __init__(self):
5
+ super(Model, self).__init__()
6
+ self.fc1 = torch.nn.Linear(10, 5)
7
+ self.threshold = 0.
8
+
9
+ def forward(self, x):
10
+ ## generates a random float the same size as x
11
+ return torch.randn(x.shape[0]).to(x.device)
preprocess.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import librosa
3
+ import torch
4
+
5
+ def preprocess(audio_file):
6
+ # Load the audio file
7
+ y, sr = librosa.load(audio_file, sr=None)
8
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
9
+ tensor = torch.from_numpy(mfccs)[None]
10
+ return tensor
requirements-custom.txt ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu121
2
+ absl-py==2.2.2
3
+ aiohappyeyeballs==2.6.1
4
+ aiohttp==3.11.16
5
+ aiosignal==1.3.2
6
+ astunparse==1.6.3
7
+ async-timeout==5.0.1
8
+ attrs==25.3.0
9
+ audioread==3.0.1
10
+ Automat==24.8.1
11
+ av==14.2.0
12
+ avro==1.12.0
13
+ certifi==2025.1.31
14
+ cffi==1.17.1
15
+ charset-normalizer==3.4.1
16
+ configobj==5.0.9
17
+ constantly==23.10.4
18
+ datasets==3.5.0
19
+ decorator==5.2.1
20
+ dill==0.3.8
21
+ filelock==3.18.0
22
+ flatbuffers==25.2.10
23
+ frozenlist==1.5.0
24
+ fsspec==2024.12.0
25
+ gast==0.6.0
26
+ gitdb==4.0.12
27
+ GitPython==3.1.44
28
+ google-pasta==0.2.0
29
+ grpcio==1.71.0
30
+ h5py==3.13.0
31
+ huggingface-hub==0.30.1
32
+ hyperlink==21.0.0
33
+ idna==3.10
34
+ imageio==2.37.0
35
+ incremental==24.7.2
36
+ Jinja2==3.1.6
37
+ joblib==1.4.2
38
+ keras==3.9.2
39
+ lazy_loader==0.4
40
+ libclang==18.1.1
41
+ librosa==0.11.0
42
+ lightning-utilities==0.14.3
43
+ llvmlite==0.44.0
44
+ Markdown==3.7
45
+ markdown-it-py==3.0.0
46
+ MarkupSafe==3.0.2
47
+ mdurl==0.1.2
48
+ ml-dtypes==0.3.2
49
+ mpmath==1.3.0
50
+ msgpack==1.1.0
51
+ multidict==6.2.0
52
+ multiprocess==0.70.16
53
+ munch==4.0.0
54
+ namex==0.0.8
55
+ networkx==3.4.2
56
+ numba==0.61.0
57
+ numpy==1.26.4
58
+ nvidia-cublas-cu12==12.3.4.1
59
+ nvidia-cuda-cupti-cu12==12.3.101
60
+ nvidia-cuda-nvcc-cu12==12.3.107
61
+ nvidia-cuda-nvrtc-cu12==12.3.107
62
+ nvidia-cuda-runtime-cu12==12.3.101
63
+ nvidia-cudnn-cu12==8.9.7.29
64
+ nvidia-cufft-cu12==11.0.12.1
65
+ nvidia-curand-cu12==10.3.4.107
66
+ nvidia-cusolver-cu12==11.5.4.101
67
+ nvidia-cusparse-cu12==12.2.0.103
68
+ nvidia-nccl-cu12==2.19.3
69
+ nvidia-nvjitlink-cu12==12.3.101
70
+ opt_einsum==3.4.0
71
+ optree==0.14.1
72
+ packaging==24.2
73
+ pandas==2.2.3
74
+ pillow==11.1.0
75
+ platformdirs==4.3.7
76
+ pooch==1.8.2
77
+ propcache==0.3.1
78
+ protobuf==4.25.6
79
+ pyarrow==19.0.1
80
+ pyasn1==0.6.1
81
+ pycparser==2.22
82
+ Pygments==2.19.1
83
+ python-dateutil==2.9.0.post0
84
+ pytorch-lightning==2.5.1
85
+ pytz==2025.2
86
+ PyYAML==6.0.2
87
+ regex==2024.11.6
88
+ requests==2.32.3
89
+ rich==14.0.0
90
+ rsa==4.9
91
+ safetensors==0.5.3
92
+ scikit-learn==1.6.1
93
+ scipy==1.15.2
94
+ six==1.17.0
95
+ Smap==2.0.524
96
+ smmap==5.0.2
97
+ soundfile==0.13.1
98
+ soxr==0.5.0.post1
99
+ sympy==1.13.3
100
+ tensorboard==2.16.2
101
+ tensorboard-data-server==0.7.2
102
+ tensorflow==2.16.2
103
+ tensorflow-io-gcs-filesystem==0.37.1
104
+ termcolor==3.0.1
105
+ threadpoolctl==3.6.0
106
+ timm==1.0.15
107
+ tokenizers==0.21.1
108
+ tomli==2.2.1
109
+ torch==2.1.2+cu121
110
+ torchaudio==2.1.2+cu121
111
+ torchmetrics==1.7.0
112
+ torchvision==0.16.2+cu121
113
+ tqdm==4.67.1
114
+ transformers==4.50.3
115
+ triton==2.1.0
116
+ Twisted==24.11.0
117
+ typing_extensions==4.13.0
118
+ tzdata==2025.2
119
+ urllib3==2.3.0
120
+ webrtcvad==2.0.10
121
+ Werkzeug==3.1.3
122
+ wrapt==1.17.2
123
+ xxhash==3.5.0
124
+ yarl==1.18.3
125
+ zope.interface==7.2
run.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ echo "uncompressing environment"
2
+ mkdir -p custom
3
+ tar -xzf custom.tar.gz -C custom
4
+ source custom/bin/activate
5
+ conda-unpack
6
+ python _script.py
script.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import subprocess
2
+
3
+ # Runs another script (replace 'script.py' with your script's filename)
4
+ subprocess.run(["bash", "run.sh"])