first

Browse files

Files changed (12) hide show

.dockerignore +1 -0
.gitignore +2 -0
Dockerfile +8 -0
README.md +4 -0
_script.py +67 -0
docker-compose.yml +14 -0
make_custom_env.sh +9 -0
models.py +11 -0
preprocess.py +10 -0
requirements-custom.txt +125 -0
run.sh +6 -0
script.py +4 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.gz

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__
2	+ submission.csv

Dockerfile ADDED Viewed

	@@ -0,0 +1,8 @@

+FROM huggingface/competitions:latest
+WORKDIR /app/tmp
+# COPY requirements.txt .
+# RUN pip install -r requirements.txt
+COPY * .
+# CMD bash debug.sh
+# RUN bash make_custom_env.sh
+# CMD bash make_custom_env.sh

README.md ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ # SAFE Example Submission
2	+
3	+ The key requirements is to have a `script.py` file in the top level directory of the repo.
4	+

_script.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import pandas as pd
+from datasets import load_dataset
+import numpy as np
+import tqdm.auto as tqdm
+import os
+import io
+import torch
+import time
+# Import your model and anything else you want
+# You can even install other packages included in your repo
+# However, during the evaluation the container will not have access to the internet.
+# So you must include everything you need in your model repo. Common python libraries will be installed.
+# Feel free to contact us to add dependencies to the requiremnts.txt
+# For testing, this is the docker image that will be used https://github.com/huggingface/competitions/blob/main/Dockerfile
+# It can be pulled here https://hub.docker.com/r/huggingface/competitions/tags
+from models import Model
+from preprocess import preprocess
+# load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation
+DATASET_PATH = "/tmp/data"
+dataset_remote = load_dataset(DATASET_PATH,split = "test",streaming = True)
+# load your model
+device = "cuda:0"
+model = Model().to(device)
+# iterate over the dataset
+out = []
+for el in tqdm.tqdm(dataset_remote):
+    start_time = time.time()
+    # each element is a dict
+    # el["id"] id of example and el["audio"] contains the audio file
+    # el["audio"]["bytes"] contains bytes from reading the raw audio
+    # el["audio"]["path"] containts the filename. This is just for reference and you cant actually load it
+    # if you are using libraries that expect a file. You can use BytesIO object
+    try:
+        file_like = io.BytesIO(el["audio"]["bytes"])
+        tensor = preprocess(file_like)
+        with torch.no_grad():
+            # soft decision (such as log likelihood score)
+            # positive score correspond to synthetic prediction
+            # negative score correspond to pristine prediction
+            score = model(tensor.to(device)).cpu().item()
+            # we require a hard decision to be submited. so you need to pick a threshold
+            pred = "generated" if score > model.threshold else "pristine"
+        # append your prediction
+        # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results
+        out.append(dict(id = el["id"], pred = pred, score = score, time = time.time() - start_time))
+    except Exception as e:
+        print(e)
+        print("failed", el["id"])
+        out.append(dict(id = el["id"]))
+# save the final result and that's it
+pd.DataFrame(out).to_csv("submission.csv",index = False)

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,14 @@

+services:
+  make_env:
+    image: safe-make-env:latest
+    build: .
+    command: bash make_custom_env.sh
+    volumes:
+      - /disk1/kirill/custom-env:/app/custom-env
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: [ '0','1','2','3']
+              capabilities: [ gpu ]

make_custom_env.sh ADDED Viewed

	@@ -0,0 +1,9 @@

+conda init
+source ~/.bashrc
+conda create -n custom python=3.10 -y
+conda activate custom
+pip install -r requirements-custom.txt
+export OUTPUT=/app/custom-env/custom.tar.gz
+conda deactivate
+pip install conda-pack
+conda-pack -n custom -o $OUTPUT

models.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import torch
+class Model(torch.nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+        self.fc1 = torch.nn.Linear(10, 5)
+        self.threshold = 0.
+    def forward(self, x):
+       ## generates a random float the same size as x
+       return torch.randn(x.shape[0]).to(x.device)

preprocess.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import librosa
+import torch
+def preprocess(audio_file):
+    # Load the audio file
+    y, sr = librosa.load(audio_file, sr=None)
+    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
+    tensor  = torch.from_numpy(mfccs)[None]
+    return tensor

requirements-custom.txt ADDED Viewed

	@@ -0,0 +1,125 @@

+--extra-index-url https://download.pytorch.org/whl/cu121
+absl-py==2.2.2
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.16
+aiosignal==1.3.2
+astunparse==1.6.3
+async-timeout==5.0.1
+attrs==25.3.0
+audioread==3.0.1
+Automat==24.8.1
+av==14.2.0
+avro==1.12.0
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+configobj==5.0.9
+constantly==23.10.4
+datasets==3.5.0
+decorator==5.2.1
+dill==0.3.8
+filelock==3.18.0
+flatbuffers==25.2.10
+frozenlist==1.5.0
+fsspec==2024.12.0
+gast==0.6.0
+gitdb==4.0.12
+GitPython==3.1.44
+google-pasta==0.2.0
+grpcio==1.71.0
+h5py==3.13.0
+huggingface-hub==0.30.1
+hyperlink==21.0.0
+idna==3.10
+imageio==2.37.0
+incremental==24.7.2
+Jinja2==3.1.6
+joblib==1.4.2
+keras==3.9.2
+lazy_loader==0.4
+libclang==18.1.1
+librosa==0.11.0
+lightning-utilities==0.14.3
+llvmlite==0.44.0
+Markdown==3.7
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+ml-dtypes==0.3.2
+mpmath==1.3.0
+msgpack==1.1.0
+multidict==6.2.0
+multiprocess==0.70.16
+munch==4.0.0
+namex==0.0.8
+networkx==3.4.2
+numba==0.61.0
+numpy==1.26.4
+nvidia-cublas-cu12==12.3.4.1
+nvidia-cuda-cupti-cu12==12.3.101
+nvidia-cuda-nvcc-cu12==12.3.107
+nvidia-cuda-nvrtc-cu12==12.3.107
+nvidia-cuda-runtime-cu12==12.3.101
+nvidia-cudnn-cu12==8.9.7.29
+nvidia-cufft-cu12==11.0.12.1
+nvidia-curand-cu12==10.3.4.107
+nvidia-cusolver-cu12==11.5.4.101
+nvidia-cusparse-cu12==12.2.0.103
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.3.101
+opt_einsum==3.4.0
+optree==0.14.1
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+platformdirs==4.3.7
+pooch==1.8.2
+propcache==0.3.1
+protobuf==4.25.6
+pyarrow==19.0.1
+pyasn1==0.6.1
+pycparser==2.22
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
+pytorch-lightning==2.5.1
+pytz==2025.2
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+rich==14.0.0
+rsa==4.9
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.2
+six==1.17.0
+Smap==2.0.524
+smmap==5.0.2
+soundfile==0.13.1
+soxr==0.5.0.post1
+sympy==1.13.3
+tensorboard==2.16.2
+tensorboard-data-server==0.7.2
+tensorflow==2.16.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.0.1
+threadpoolctl==3.6.0
+timm==1.0.15
+tokenizers==0.21.1
+tomli==2.2.1
+torch==2.1.2+cu121
+torchaudio==2.1.2+cu121
+torchmetrics==1.7.0
+torchvision==0.16.2+cu121
+tqdm==4.67.1
+transformers==4.50.3
+triton==2.1.0
+Twisted==24.11.0
+typing_extensions==4.13.0
+tzdata==2025.2
+urllib3==2.3.0
+webrtcvad==2.0.10
+Werkzeug==3.1.3
+wrapt==1.17.2
+xxhash==3.5.0
+yarl==1.18.3
+zope.interface==7.2

run.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+echo "uncompressing environment"
+mkdir -p custom
+tar -xzf custom.tar.gz -C custom
+source custom/bin/activate
+conda-unpack
+python _script.py

script.py ADDED Viewed

	@@ -0,0 +1,4 @@

+import subprocess
+# Runs another script (replace 'script.py' with your script's filename)
+subprocess.run(["bash", "run.sh"])