caw2rng commited on
Commit
f96d2a2
·
1 Parent(s): 759168b

use full competition code

Browse files
Dockerfile CHANGED
@@ -1,2 +1,82 @@
1
- FROM huggingface/competitions:latest
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  CMD uvicorn competitions.app:app --host 0.0.0.0 --port 7860 --workers 1
 
1
+ FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive \
4
+ TZ=UTC \
5
+ HF_HUB_ENABLE_HF_TRANSFER=1
6
+
7
+ ENV PATH="${HOME}/miniconda3/bin:${PATH}"
8
+ ARG PATH="${HOME}/miniconda3/bin:${PATH}"
9
+
10
+ RUN mkdir -p /tmp/model && \
11
+ chown -R 1000:1000 /tmp/model && \
12
+ mkdir -p /tmp/data && \
13
+ chown -R 1000:1000 /tmp/data
14
+
15
+ RUN apt-get update && \
16
+ apt-get upgrade -y && \
17
+ apt-get install -y \
18
+ build-essential \
19
+ cmake \
20
+ curl \
21
+ ca-certificates \
22
+ gcc \
23
+ git \
24
+ locales \
25
+ net-tools \
26
+ wget \
27
+ libpq-dev \
28
+ libsndfile1-dev \
29
+ git \
30
+ git-lfs \
31
+ libgl1 \
32
+ unzip \
33
+ openjdk-11-jre-headless \
34
+ libseccomp-dev \
35
+ && rm -rf /var/lib/apt/lists/* && \
36
+ apt-get clean
37
+
38
+ RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash && \
39
+ git lfs install
40
+
41
+ WORKDIR /app
42
+
43
+ RUN mkdir -p /app/.cache
44
+ ENV HF_HOME="/app/.cache"
45
+ RUN chown -R 1000:1000 /app
46
+ USER 1000
47
+ ENV HOME=/app
48
+
49
+ ENV PYTHONPATH=$HOME/app \
50
+ PYTHONUNBUFFERED=1 \
51
+ GRADIO_ALLOW_FLAGGING=never \
52
+ GRADIO_NUM_PORTS=1 \
53
+ GRADIO_SERVER_NAME=0.0.0.0 \
54
+ GRADIO_THEME=huggingface \
55
+ SYSTEM=spaces
56
+
57
+ RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
58
+ && sh Miniconda3-latest-Linux-x86_64.sh -b -p /app/miniconda \
59
+ && rm -f Miniconda3-latest-Linux-x86_64.sh
60
+ ENV PATH /app/miniconda/bin:$PATH
61
+
62
+ RUN conda create -p /app/env -y python=3.10
63
+
64
+ SHELL ["conda", "run","--no-capture-output", "-p","/app/env", "/bin/bash", "-c"]
65
+
66
+ RUN conda install pytorch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 pytorch-cuda=12.1 -c pytorch -c nvidia && \
67
+ conda clean -ya && \
68
+ conda install -c "nvidia/label/cuda-12.1.0" cuda-nvcc && conda clean -ya
69
+
70
+ COPY --chown=1000:1000 . /app/
71
+ RUN make sandbox
72
+
73
+ # give permissions to run sandbox
74
+ RUN chmod +x /app/sandbox
75
+
76
+ ENV PATH="/app:${PATH}"
77
+
78
+ RUN pip install -U pip
79
+ RUN pip install -e .
80
+ RUN pip install -r requirements_docker.txt
81
+
82
  CMD uvicorn competitions.app:app --host 0.0.0.0 --port 7860 --workers 1
LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [2022] [Hugging Face Inc.]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
Makefile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CFLAGS += -std=c99 -Wall -O2
2
+ LDFLAGS += -lseccomp
3
+ .PHONY: quality style test
4
+
5
+ quality:
6
+ python -m black --check --line-length 119 --target-version py38 .
7
+ python -m isort --check-only .
8
+ python -m flake8 --max-line-length 119
9
+
10
+ style:
11
+ python -m black --line-length 119 --target-version py38 .
12
+ python -m isort .
13
+
14
+ docker:
15
+ docker build -t competitions:latest .
16
+ docker tag competitions:latest huggingface/competitions:latest
17
+ docker push huggingface/competitions:latest
18
+
19
+ test:
20
+ pytest -sv .
21
+
22
+ sandbox: sandbox.c
23
+ gcc $(CFLAGS) $^ -o $@ $(LDFLAGS)
24
+
25
+ clean:
26
+ rm *.so sandbox
27
+
28
+ pip:
29
+ rm -rf build/
30
+ rm -rf dist/
31
+ make style && make quality
32
+ python setup.py sdist bdist_wheel
33
+ twine upload dist/* --verbose
README_competitions.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Competitions
2
+
3
+ Create a machine learning competition for your organization, friends or the world!
4
+
5
+ ## Getting Started
6
+
7
+ There are two types of competitions you can create:
8
+
9
+ - generic: a competition where you provide the data and the participants provide the predictions as a CSV file. all the test data is always available to the participants.
10
+
11
+ - script: a competition where you provide the data and the participants provide the code that generates the predictions. test data can be hidden from the participants.
12
+
13
+ ## Costs
14
+
15
+ Creating a competition is free. However, you will need to pay for the compute resources used to run the competition. The cost of the compute resources depends the type of competition you create.
16
+
17
+ - generic: generic competitions are free to run. you can, however, upgrade the compute to cpu-basic to speed up the metric calculation and reduce the waiting time for the participants.
18
+
19
+ - script: script competitions require a compute resource to run the participant's code. you can choose between a variety of cpu and gpu instances (T4, A10g and even A100). the cost of the compute resource is charged per hour.
20
+
21
+ For information on the cost of the compute resources, please see the [pricing page](https://huggingface.co/docs/hub/spaces-overview#hardware-resources).
22
+
23
+ ## Visibility
24
+
25
+ You can choose to make your competition public or private. Public competitions are visible to everyone and anyone can participate. Private competitions are only visible to the people you invite!
26
+
27
+ ## How to create a competition?
28
+
29
+ Please read the [docs](https://huggingface.co/docs/competitions) to learn how to create a competition.
competitions ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 447fff164c586ae3d42617c7726d8563514c3f17
docs/source/_toctree.yml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - sections:
2
+ - local: index
3
+ title: 🤗 Competitions
4
+ - local: pricing
5
+ title: Pricing
6
+ title: Get started
7
+ - sections:
8
+ - local: create_competition
9
+ title: Create competition
10
+ - local: competition_repo
11
+ title: Competition repo
12
+ - local: competition_space
13
+ title: Competition space
14
+ - local: custom_metric
15
+ title: Custom metric
16
+ title: Organizing
17
+ - sections:
18
+ - local: submit
19
+ title: Making a submission
20
+ - local: leaderboard
21
+ title: Leaderboard
22
+ - local: teams
23
+ title: Teams
24
+ title: Participating
docs/source/competition_repo.mdx ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Competition Repo
2
+
3
+ NOTE: Competition repo must always be kept private. Do NOT make it public!
4
+
5
+ The competition repo consists of the following files:
6
+
7
+ ```
8
+ ├── COMPETITION_DESC.md
9
+ ├── conf.json
10
+ ├── DATASET_DESC.md
11
+ ├── solution.csv
12
+ ├── SUBMISSION_DESC.md
13
+ ├── submission_info
14
+ │   └── *.json
15
+ ├── submissions
16
+ │   └── *.csv
17
+ ├── teams.json
18
+ └── user_team.json
19
+ ```
20
+
21
+ ### COMPETITION_DESC.md
22
+
23
+ This file contains the description of the competition. It is a markdown file.
24
+ You can use the markdown syntax to format the text and modify the file according to your needs.
25
+ Competition description is shown on the front page of the competition.
26
+
27
+ ### DATASET_DESC.md
28
+
29
+ This file contains the description of the dataset. It is again a markdown file.
30
+ This file is used to describe the dataset and is shown on the dataset page.
31
+ In this file you can mention which columns are present in the dataset, what is the meaning of each column, what is the format of the dataset, etc.
32
+
33
+ ### conf.json
34
+
35
+ conf.json is the configuration file for the competition. An example conf.json is shown below:
36
+
37
+ ```
38
+ {
39
+ "COMPETITION_TYPE":"generic",
40
+ "SUBMISSION_LIMIT":5,
41
+ "TIME_LIMIT": 10,
42
+ "HARDWARE":"cpu-basic",
43
+ "SELECTION_LIMIT":10,
44
+ "END_DATE":"2024-05-25",
45
+ "EVAL_HIGHER_IS_BETTER":1,
46
+ "SUBMISSION_ID_COLUMN":"id",
47
+ "SUBMISSION_COLUMNS":"id,pred",
48
+ "SUBMISSION_ROWS":10000,
49
+ "EVAL_METRIC":"roc_auc_score",
50
+ "LOGO":"https://github.com/abhishekkrthakur/public_images/blob/main/song.png?raw=true",
51
+ "DATASET": "",
52
+ "SUBMISSION_FILENAMES": ["submission.csv"],
53
+ "SCORING_METRIC": "roc_auc_score"
54
+ }
55
+ ```
56
+
57
+ This file is created when you create a new competition. You can modify this file according to your needs.
58
+ However, we do not recommend changing the evaluation metric field once the competition has started
59
+ as it would require you to re-evaluate all the submissions.
60
+
61
+ - COMPETITION_TYPE: This field is used to specify the type of competition. Currently, we support two types of competitions: `generic` and `script`.
62
+ - `generic` competition is a competition where the users can submit a csv file (or a different format) and the submissions are evaluated using a metric.
63
+ - `script` competition is a competition where the users can submit a huggingface model repo containing a script.py. The script.py is run to generate submission.csv which is then evaluated using a metric.
64
+ - SUBMISSION_LIMIT: This field is used to specify the number of submissions a user can make in a day.
65
+ - TIME_LIMIT: This field is used to specify the time limit for each submission in seconds. (used only for `script` competitions)
66
+ - HARDWARE: This field is used to specify the hardware on which the submissions will be evaluated.
67
+ - SELECTION_LIMIT: This field is used to specify the number of submissions that will be selected for the leaderboard. (used only for `script` competitions)
68
+ - END_DATE: This field is used to specify the end date of the competition. The competition will be automatically closed on the end date. Private leaderboard will be made available on the end date.
69
+ - EVAL_HIGHER_IS_BETTER: This field is used to specify if the evaluation metric is higher the better or lower the better. If the value is 1, then higher the better. If the value is 0, then lower the better.
70
+ - SUBMISSION_ID_COLUMN: This field is used to specify the name of the id column in the submission file.
71
+ - SUBMISSION_COLUMNS: This field is used to specify the names of the columns in the submission file. The names must be comma separated without any spaces.
72
+ - SUBMISSION_ROWS: This field is used to specify the number of rows in the submission file without the header.
73
+ - EVAL_METRIC: This field is used to specify the evaluation metric. We support all the scikit-learn metrics and even custom metrics.
74
+ - LOGO: This field is used to specify the logo of the competition. The logo must be a png file. The logo is shown on the all pages of the competition.
75
+ - DATASET: This field is used to specify the PRIVATE dataset used in the competition. The dataset is available to the users only during the script run. This is only used for script competitions.
76
+ - SUBMISSION_FILENAMES: This field is used to specify the name of the submission file. This is only used for script competitions with custom metrics and must not be changed for generic competitions.
77
+ - SCORING_METRIC: When using a custom metric / multiple metrics, this field is used to specify the metric name that will be used for scoring the submissions.
78
+
79
+ ### solution.csv
80
+
81
+ This file contains the solution for the competition. It is a csv file. A sample is shown below:
82
+
83
+ ```
84
+ id,pred,split
85
+ 0,1,public
86
+ 1,0,private
87
+ 2,0,private
88
+ 3,1,private
89
+ 4,0,public
90
+ 5,1,private
91
+ 6,1,public
92
+ 7,1,private
93
+ 8,0,public
94
+ 9,0,private
95
+ 10,0,private
96
+ 11,0,private
97
+ 12,1,private
98
+ 13,0,private
99
+ 14,1,public
100
+ ````
101
+
102
+ The solution file is used to evaluate the submissions. The solution file must always have an id column and a split column.
103
+ The split column is used to split the solution into public and private parts. The split column can have two values: `public` and `private`.
104
+ You can have multiple columns in the solution file. However, the evaluation metric must support multiple columns.
105
+
106
+ For example, if the evaluation metric is `roc_auc_score` then the solution file must have two columns: `id` and `pred`.
107
+ The names of id and pred columns can be anything. The names will be grabbed from the `conf.json` file.
108
+ Please make sure you have appropriate column names in the `conf.json` file and that you have both public and private splits in the solution file.
109
+
110
+ ### SUBMISSION_DESC.md
111
+
112
+ This file contains the description of the submission. It is a markdown file.
113
+ You can use the markdown syntax to format the text and modify the file according to your needs.
114
+ Submission description is shown on the submission page.
115
+
116
+ Here you can mention the format of the submission file, what columns are required in the submission file, etc.
117
+
118
+ For the example solution file shown above, the submission file must have two columns: `id` and `pred`.
119
+ An example of sample_submission.csv is shown below:
120
+
121
+ ```
122
+ id,pred
123
+ 0,0.6
124
+ 1,0.1
125
+ 2,0.5
126
+ 3,1.6
127
+ 4,0.8
128
+ 5,1
129
+ 6,1
130
+ 7,1
131
+ 8,0
132
+ 9,0
133
+ 10,0.1
134
+ 11,0.4
135
+ 12,1.9
136
+ 13,0.01
137
+ 14,1.1
138
+ ```
139
+
140
+ When a user submits a submission file, the system will check if the submission file has the required columns.
141
+ If the submission file does not have the required columns, the submission will be rejected.
142
+
143
+ It is the responsibility of the organzier to make sure they provide a sample submission file in correct format and a submission description file.
144
+
145
+ ### submission_info
146
+
147
+ This folder contains the submission info files. Each submission info file contains the information about a submission.
148
+ This folder is created when a first submission is made. The submission info files are json files.
149
+
150
+ ### submissions
151
+
152
+ This folder contains the submissions made by the users. Each submission is a csv file (or a different format).
153
+ This folder is created when a first submission is made.
154
+
155
+ ### other files
156
+
157
+ The other files teams.json and user_team.json are used to store the information about the teams.
docs/source/competition_space.mdx ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Competition Space
2
+
3
+ A competition space is a Hugging Face Space where the actual competition takes place. It is a space where you can submit your model and get a score. It is also a space where competitors can see the leaderboard, discuss, and make submissions.
4
+
5
+ Check out an example competition space below:
6
+
7
+ ![competition space](https://github.com/abhishekkrthakur/public_images/blob/main/competition_space.png?raw=true)
8
+
9
+ The competition space consists of the following:
10
+
11
+ - Competition description (content is fetched from private competition repo)
12
+ - Dataset description (content is fetched from private competition repo)
13
+ - Leaderboard (content is fetched from private competition repo)
14
+ - Public (available to everyone, all the time)
15
+ - Private (available to everyone, but only after the competition ends)
16
+ - Submissions
17
+ - Submission guidelines (content is fetched from private competition repo)
18
+ - My submissions (users can see their own submissions)
19
+ - New submission (users can make new submissions)
20
+ - Discussions (accessible via community tab)
21
+
22
+ ### Secrets
23
+
24
+ The competition space requires two secrets:
25
+
26
+ - `HF_TOKEN`: this is the Hugging Face write token of the user who created the competition space. This token must be kept alive for the duration of the competition. In case the token expires, the competition space will stop working. If you change/refresh/delete this token, you will need to update this secret.
27
+ - `COMPETITION_ID`: this is the path of private competition repo. e.g. `abhishek/private-competition-data`. If you change the name of the private competition repo, you will need to update this secret.
28
+
29
+ Note: The above two secrets are crucial for the competition space to work!
30
+
31
+ ### Public & private competition spaces
32
+
33
+ A competition space can be public or private. A public competition space is available to everyone, all the time.
34
+ A private competition space is only available to the members of the organization the competition space is created in.
35
+
36
+ You can at any point make the competition public.
37
+
38
+ Generally, we recommend testing every aspect of the competition space in a private competition space before making it public.
docs/source/create_competition.mdx ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Create a competition
2
+
3
+ Creating a competition is super easy and you have full control over the data, evaluation metric and the hardware used.
4
+
5
+ To create a competition, you need to have a [Hugging Face](https://huggingface.co) account. You will also need a write token which will be used throughout the competition to upload data and submissions.
6
+ Please note that the write token is private and should not be shared with anyone and must not be refreshed during the course of the competition.
7
+ In case you decide to refresh the token, you will need to update the token in the competition space's settings otherwise the competitors will not be able to upload submissions and competition will stop working.
8
+ You can find/generate a write token [here](https://huggingface.co/settings/tokens).
9
+
10
+ To create a competition, you also need an organization. You can either create a new organization or use an existing one that you are already a member of.
11
+ You can create a new organization [here](https://huggingface.co/organizations/new).
12
+
13
+ To create a competition, click [here](https://huggingface.co/spaces/competitions/create).
14
+
15
+ ### Competition creator
16
+
17
+ ![competition](https://github.com/abhishekkrthakur/public_images/blob/main/competition_creator.png?raw=true)
18
+
19
+ Please note: you will be able to change almost every setting later on. However, we dont recommend changing evaluation metric once the competition has started.
20
+ As it will require you to re-evaluate all the submissions.
21
+
22
+
23
+ ### Types of competitions
24
+
25
+ - generic: generic competitions are competitions where the participants submit a CSV file (or any other file) containing the predictions for the whole test set.
26
+ The predictions are then evaluated against solution.csv (or a solution file) using the evaluation metric provided by the competition creator.
27
+ These competitions are easy to setup and free to host (if you use cpu-basic). You can improve the evaluation runtime by upgrading generic competitions to cpu-upgrade.
28
+ For generic competition, all the test data (without labels) is available to the participants all the time.
29
+
30
+ - script: script competitions are competitions where the participants submit a python script that takes in the test set and outputs the predictions.
31
+ The predictions are then evaluated against solution.csv (or a solution file) using the evaluation metric provided by the competition creator.
32
+ These competitions are only free to host if you use cpu-basic as the backend for evaluation, and this is not recommended!
33
+ In script competition, the test data can be kept private. The participants wont be able to see the test data at all.
34
+ The participants submit a huggingface model repo containing `script.py` which is run to generate predictions on hidden test data.
35
+
docs/source/custom_metric.mdx ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Custom metric
2
+
3
+ In case you don't settle for the default scikit-learn metrics, you can define your own metric.
4
+
5
+ Here, we expect the organizer to know python.
6
+
7
+ ### How to define a custom metric
8
+
9
+ To define a custom metric, change `EVAL_METRIC` in `conf.json` to `custom`. You must also make sure that `EVAL_HIGHER_IS_BETTER` is set to `1` or `0` depending on whether a higher value of the metric is better or not.
10
+
11
+ The second step is to create a file `metric.py` in the private competition repo.
12
+ The file should contain a `compute` function that takes competition params as input.
13
+
14
+ Here is the part where we check if metric is custom and calculate the metric value:
15
+
16
+ ```python
17
+ def compute_metrics(params):
18
+ if params.metric == "custom":
19
+ metric_file = hf_hub_download(
20
+ repo_id=params.competition_id,
21
+ filename="metric.py",
22
+ token=params.token,
23
+ repo_type="dataset",
24
+ )
25
+ sys.path.append(os.path.dirname(metric_file))
26
+ metric = importlib.import_module("metric")
27
+ evaluation = metric.compute(params)
28
+ .
29
+ .
30
+ .
31
+ ````
32
+
33
+ You can find the above part in competitions github repo `compute_metrics.py`
34
+
35
+ `params` is defined as:
36
+
37
+ ```python
38
+ class EvalParams(BaseModel):
39
+ competition_id: str
40
+ competition_type: str
41
+ metric: str
42
+ token: str
43
+ team_id: str
44
+ submission_id: str
45
+ submission_id_col: str
46
+ submission_cols: List[str]
47
+ submission_rows: int
48
+ output_path: str
49
+ submission_repo: str
50
+ time_limit: int
51
+ dataset: str # private test dataset, used only for script competitions
52
+ ```
53
+
54
+ You are free to do whatever you want to in the `compute` function.
55
+ In the end it must return a dictionary with the following keys:
56
+
57
+ ```python
58
+ {
59
+ "public_score": {
60
+ "metric1": metric_value,
61
+ },,
62
+ "private_score": {
63
+ "metric1": metric_value,
64
+ },,
65
+ }
66
+ ```
67
+
68
+ public and private scores must be dictionaries! You can also use multiple metrics.
69
+ Example for multiple metrics:
70
+
71
+ ```python
72
+ {
73
+ "public_score": {
74
+ "metric1": metric_value,
75
+ "metric2": metric_value,
76
+ },
77
+ "private_score": {
78
+ "metric1": metric_value,
79
+ "metric2": metric_value,
80
+ },
81
+ }
82
+ ```
83
+
84
+ Note: When using multiple metrics, conf.json must have `SCORING_METRIC` specified to rank the participants in the competition.
85
+
86
+ For example, if I want to use metric2 to rank the participants, I will set `SCORING_METRIC` to `metric2` in `conf.json`.
87
+
88
+ ### Example of a custom metric
89
+
90
+ ```python
91
+ import pandas as pd
92
+ from huggingface_hub import hf_hub_download
93
+
94
+
95
+ def compute(params):
96
+ solution_file = hf_hub_download(
97
+ repo_id=params.competition_id,
98
+ filename="solution.csv",
99
+ token=params.token,
100
+ repo_type="dataset",
101
+ )
102
+
103
+ solution_df = pd.read_csv(solution_file)
104
+
105
+ submission_filename = f"submissions/{params.team_id}-{params.submission_id}.csv"
106
+ submission_file = hf_hub_download(
107
+ repo_id=params.competition_id,
108
+ filename=submission_filename,
109
+ token=params.token,
110
+ repo_type="dataset",
111
+ )
112
+ submission_df = pd.read_csv(submission_file)
113
+
114
+ public_ids = solution_df[solution_df.split == "public"][params.submission_id_col].values
115
+ private_ids = solution_df[solution_df.split == "private"][params.submission_id_col].values
116
+
117
+ public_solution_df = solution_df[solution_df[params.submission_id_col].isin(public_ids)]
118
+ public_submission_df = submission_df[submission_df[params.submission_id_col].isin(public_ids)]
119
+
120
+ private_solution_df = solution_df[solution_df[params.submission_id_col].isin(private_ids)]
121
+ private_submission_df = submission_df[submission_df[params.submission_id_col].isin(private_ids)]
122
+
123
+ public_solution_df = public_solution_df.sort_values(params.submission_id_col).reset_index(drop=True)
124
+ public_submission_df = public_submission_df.sort_values(params.submission_id_col).reset_index(drop=True)
125
+
126
+ private_solution_df = private_solution_df.sort_values(params.submission_id_col).reset_index(drop=True)
127
+ private_submission_df = private_submission_df.sort_values(params.submission_id_col).reset_index(drop=True)
128
+
129
+ # CALCULATE METRICS HERE.......
130
+ # _metric = SOME METRIC FUNCTION
131
+ target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
132
+ public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
133
+ private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
134
+
135
+ evaluation = {
136
+ "public_score": {
137
+ "metric1": public_score,
138
+ },
139
+ "private_score": {
140
+ "metric1": public_score,
141
+ }
142
+ }
143
+ return evaluation
144
+ ```
145
+
146
+ Take a careful look at the above code.
147
+ You can see that we are downloading the solution file and the submission file from the dataset repo.
148
+ We are then calculating the metric on the public and private splits of the solution and submission files.
149
+ Finally, we are returning the metric values in a dictionary.
docs/source/index.mdx ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Competitions
2
+
3
+ Create a machine learning competition for your organization, friends or the world!
4
+
5
+ ![competition](https://github.com/abhishekkrthakur/public_images/blob/main/competition_page.png?raw=true)
6
+
7
+ ## Supported competition types
8
+
9
+ There are two types of competitions you can create:
10
+
11
+ - generic: a competition where you provide the data and the participants provide the predictions as a CSV file. all the test data is always available to the participants.
12
+
13
+ - script: a competition where you provide the data and the participants provide the code that generates the predictions. test data can be hidden from the participants.
14
+
15
+ You can choose to make your competition public or private. Public competitions are visible to everyone and anyone can participate. Private competitions are only visible to the people you invite!
16
+
17
+ ## Why choose Hugging Face Competitions?
18
+
19
+ - you can create totally private competitions that are only visible to the people you invite.
20
+
21
+ - generic competition can be hosted for free.
22
+
23
+ - script competitions have a variety of compute options to choose from: CPU, T4, A10g & even A100.
24
+
25
+ - you have full control over the data you want to use for your competition.
26
+
27
+ - its open source!
28
+
29
+
30
+ ## Issues / feature requests
31
+
32
+ Something missing? Found a bug? Please open an issue on this [GitHub repository](https://github.com/huggingface/competitions) and we'll fix it as soon as possible!
33
+
34
+ To host competition on hf.co/competitions, please contact us at autotrain [at] huggingface [dot] co
docs/source/leaderboard.mdx ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Understanding the leaderboard
2
+
3
+ There are two types of leaderboards for all competitions:
4
+
5
+ - Public Leaderboard: This leaderboard is calculated on X% of the test dataset, and is what you see on the competition page all the time.
6
+ The value of X will be mentioned in the problem statement by the organizers.
7
+
8
+ - Private Leaderboard: This leaderboard is calculated on the remaining (100-X)% of the test dataset, and is made public only after the competition is over.
9
+ Your final ranking is based on the private leaderboard.
10
+
11
+ When you make a submission, both the public and private leaderboard scores are calculated but only the public leaderboard score is displayed.
12
+ When a competition is over, the private leaderboard score is revealed and the final rankings are calculated based on that.
13
+ You can select N submissions to be considered for the private leaderboard score calculation.
14
+ The value of N will be mentioned in the problem statement by the organizers.
docs/source/pricing.mdx ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pricing
2
+
3
+ Creating a competition is free. However, you will need to pay for the compute resources used to run the competition. The cost of the compute resources depends the type of competition you create.
4
+
5
+ - generic: generic competitions are free to run. you can, however, upgrade the compute to cpu-upgrade to speed up the metric calculation and reduce the waiting time for the participants.
6
+
7
+ - script: script competitions require a compute resource to run the participant's code. you can choose between a variety of cpu and gpu instances (T4, A10g and even A100). the cost of the compute resource is charged per minute.
8
+
9
+ For script competitions, the script submitted by the participants will run without internet for a time limit provided by the organizer.
10
+ You will be charged for the compute resources used by the participants. The cost of the compute resources depends on the type of compute resource you choose.
11
+
12
+ For information on the cost of the compute resources, please see the [pricing page](https://huggingface.co/docs/hub/spaces-overview#hardware-resources).
docs/source/submit.mdx ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Making a submission
2
+
3
+ The submission format and example submissions are usually provided by the competition organizer.
4
+ This page describes how to make submissions using the competition UI.
5
+
6
+ To make a submission, you must provide a read-only token that allows the competition to access user info.
7
+
8
+ ### `generic` competition: submitting a file
9
+
10
+ - provide your hugginface read-only token
11
+ - upload a file
12
+ - add a description
13
+ - click "Submit"
14
+
15
+ ![submit_generic](https://github.com/abhishekkrthakur/public_images/blob/main/submission_generic.png?raw=true)
16
+
17
+ ### `script` competition: submitting a model repo with `script.py`
18
+
19
+ - provide your hugginface read-only token
20
+ - provide a link to your model repo (e.g. abhishek/submission-model-1). The model repo must contain a `script.py` file that generates submission in the required format.
21
+ - add a description
22
+ - click "Submit"
23
+
24
+ ![submit_script](https://github.com/abhishekkrthakur/public_images/blob/main/submission_script.png?raw=true)
docs/source/teams.mdx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Teaming up
2
+
3
+ Coming soon!
examples/generate_fake_submissions.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import json
3
+ import os
4
+ import random
5
+ from datetime import datetime
6
+
7
+ from huggingface_hub import HfApi
8
+ from tqdm import tqdm
9
+
10
+
11
+ NUM_USERS = 1000
12
+ NUM_SUBMISSIONS = 10
13
+ MIN_SCORE = 0.0
14
+ MAX_SCORE = 1.0
15
+ START_DATE = datetime(2022, 1, 1, 0, 0, 0)
16
+ END_DATE = datetime(2022, 11, 23, 0, 0, 0)
17
+ COMPETITION_ID = os.environ.get("COMPETITION_ID")
18
+ AUTOTRAIN_TOKEN = os.environ.get("AUTOTRAIN_TOKEN")
19
+
20
+ if __name__ == "__main__":
21
+ # example submission:
22
+ # {"name": "abhishek", "id": "5fa19f4ba13e063b8b2b5e11", "submissions": [{"date": "2022-11-09", "time": "12:54:55", "submission_id": "c0eed646-838f-482f-bf4d-2c651f8de43b", "submission_comment": "", "status": "pending", "selected": true, "public_score": -1, "private_score": -1}, {"date": "2022-11-09", "time": "14:02:21", "submission_id": "bc6b08c2-c684-4ee1-9be2-35cf717ce618", "submission_comment": "", "status": "done", "selected": true, "public_score": 0.3333333333333333, "private_score": 0.3333333333333333}, {"date": "2022-11-17", "time": "21:31:04", "submission_id": "4a2984b1-de10-411d-be0f-9aa7be16245f", "submission_comment": "", "status": "done", "selected": false, "public_score": 0.3333333333333333, "private_score": 0.3333333333333333}]}
23
+
24
+ for i in tqdm(range(NUM_USERS)):
25
+ name = f"test_{i}"
26
+ # generate random id
27
+ id = "".join(random.choices("0123456789abcdef", k=24))
28
+ submissions = []
29
+ for j in range(NUM_SUBMISSIONS):
30
+ date = START_DATE + (END_DATE - START_DATE) * random.random()
31
+ time = date.strftime("%H:%M:%S")
32
+ date = date.strftime("%Y-%m-%d")
33
+ submission_id = "".join(random.choices("0123456789abcdef", k=36))
34
+ submission_comment = ""
35
+ status = "done"
36
+ selected = False
37
+ public_score = MIN_SCORE + (MAX_SCORE - MIN_SCORE) * random.random()
38
+ private_score = MIN_SCORE + (MAX_SCORE - MIN_SCORE) * random.random()
39
+ submission = {
40
+ "date": date,
41
+ "time": time,
42
+ "submission_id": submission_id,
43
+ "submission_comment": submission_comment,
44
+ "status": status,
45
+ "selected": selected,
46
+ "public_score": public_score,
47
+ "private_score": private_score,
48
+ }
49
+ submissions.append(submission)
50
+
51
+ submission = {
52
+ "name": name,
53
+ "id": id,
54
+ "submissions": submissions,
55
+ }
56
+ fname = f"{id}.json"
57
+ user_submission_info_json = json.dumps(submission)
58
+ user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
59
+ user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
60
+ api = HfApi()
61
+ api.upload_file(
62
+ path_or_fileobj=user_submission_info_json_buffer,
63
+ path_in_repo=fname,
64
+ repo_id=COMPETITION_ID,
65
+ repo_type="dataset",
66
+ token=AUTOTRAIN_TOKEN,
67
+ )
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # essentials
2
+ fastapi==0.111.0
3
+ joblib==1.4.2
4
+ loguru==0.7.2
5
+ pandas==2.2.2
6
+ huggingface_hub==0.24.6
7
+ tabulate==0.9.0
8
+ markdown==3.6
9
+ psutil==6.0.0
10
+ python-multipart==0.0.9
11
+ uvicorn==0.30.1
12
+ py7zr==0.21.1
13
+ pydantic==2.8.2
14
+ gradio==4.37.2
15
+ authlib==1.3.1
16
+ itsdangerous==2.2.0
17
+ hf-transfer
requirements_docker.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # machine learning libs
2
+ scikit-learn==1.5.1
3
+ jiwer==3.0.4
4
+ sacremoses==0.1.1
5
+ sentencepiece==0.2.0
6
+ xgboost==2.1.0
7
+ nltk==3.8.1
8
+ optuna==3.6.1
9
+ einops==0.8.0
10
+ peft==0.12.0
11
+ trl==0.9.6
12
+ tiktoken==0.7.0
13
+ transformers==4.44.1
14
+ accelerate==0.33.0
15
+ diffusers==0.30.0
16
+ bitsandbytes==0.43.3
17
+ tqdm==4.66.4
sandbox.c ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <errno.h>
2
+ #include <seccomp.h>
3
+ #include <stdio.h>
4
+ #include <stdlib.h>
5
+ #include <unistd.h>
6
+
7
+ int main(int argc, char* argv[]) {
8
+ if (argc < 2) {
9
+ fprintf(stderr, "Usage: %s <command> [args...]\n", argv[0]);
10
+ return EXIT_FAILURE;
11
+ }
12
+
13
+ scmp_filter_ctx ctx;
14
+
15
+ // Initialize the seccomp filter in blocklist mode
16
+ ctx = seccomp_init(SCMP_ACT_ALLOW);
17
+ if (ctx == NULL) {
18
+ perror("seccomp_init");
19
+ return EXIT_FAILURE;
20
+ }
21
+
22
+ // Block relevant network-related syscalls, so as to block egress internet access
23
+
24
+ // We cannot deny these calls as they are needed by cuda
25
+ // This should not be a big deal for our use case if what we want is to block egress network access
26
+ // (just blocking connect should actually be enough)
27
+
28
+ // seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(socket), 0);
29
+ // seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(bind), 0);
30
+ // seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(listen), 0);
31
+
32
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(connect), 0);
33
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(accept), 0);
34
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(send), 0);
35
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(sendto), 0);
36
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(sendmsg), 0);
37
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(recv), 0);
38
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(recvfrom), 0);
39
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(recvmsg), 0);
40
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(setsockopt), 0);
41
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(getsockopt), 0);
42
+
43
+ // Load the filter into the kernel
44
+ if (seccomp_load(ctx) < 0) {
45
+ perror("seccomp_load");
46
+ seccomp_release(ctx);
47
+ return EXIT_FAILURE;
48
+ }
49
+
50
+ #ifdef DEBUG
51
+ printf("seccomp filter installed. Network access is blocked.\n");
52
+ #endif
53
+
54
+ // Execute the target program
55
+ execvp(argv[1], argv + 1);
56
+
57
+ seccomp_release(ctx);
58
+ return EXIT_SUCCESS;
59
+ }
setup.cfg ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [metadata]
2
+ license_file = LICENSE
3
+ version = attr: competitions.__version__
4
+
5
+ [isort]
6
+ ensure_newline_before_comments = True
7
+ force_grid_wrap = 0
8
+ include_trailing_comma = True
9
+ line_length = 119
10
+ lines_after_imports = 2
11
+ multi_line_output = 3
12
+ use_parentheses = True
13
+
14
+ [flake8]
15
+ ignore = E203, E501, W503
16
+ max-line-length = 119
setup.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022 Hugging Face Inc
3
+ #
4
+ # Lint as: python3
5
+ # pylint: enable=line-too-long
6
+ """Hugging Face Competitions
7
+ """
8
+ import os
9
+
10
+ from setuptools import find_packages, setup
11
+
12
+
13
+ this_directory = os.path.abspath(os.path.dirname(__file__))
14
+ with open(os.path.join(this_directory, "README.md"), encoding="utf-8") as f:
15
+ LONG_DESCRIPTION = f.read()
16
+
17
+ QUALITY_REQUIRE = [
18
+ "black~=23.0",
19
+ "isort==5.13.2",
20
+ "flake8==7.0.0",
21
+ "mypy==1.8.0",
22
+ ]
23
+
24
+ TEST_REQUIRE = ["pytest", "pytest-cov"]
25
+
26
+ EXTRAS_REQUIRE = {
27
+ "dev": QUALITY_REQUIRE,
28
+ "quality": QUALITY_REQUIRE,
29
+ "test": QUALITY_REQUIRE + TEST_REQUIRE,
30
+ "docs": QUALITY_REQUIRE + TEST_REQUIRE + ["hf-doc-builder"],
31
+ }
32
+
33
+ with open("requirements.txt", encoding="utf-8") as f:
34
+ INSTALL_REQUIRES = f.read().splitlines()
35
+
36
+ setup(
37
+ name="competitions",
38
+ description="Hugging Face Competitions",
39
+ long_description=LONG_DESCRIPTION,
40
+ author="HuggingFace Inc.",
41
+ url="https://github.com/huggingface/competitions",
42
+ download_url="https://github.com/huggingface/competitions/tags",
43
+ packages=find_packages("."),
44
+ entry_points={"console_scripts": ["competitions=competitions.cli.competitions:main"]},
45
+ install_requires=INSTALL_REQUIRES,
46
+ extras_require=EXTRAS_REQUIRE,
47
+ python_requires=">=3.10",
48
+ license="Apache 2.0",
49
+ classifiers=[
50
+ "Intended Audience :: Developers",
51
+ "Intended Audience :: Education",
52
+ "Intended Audience :: Science/Research",
53
+ "License :: OSI Approved :: Apache Software License",
54
+ "Operating System :: OS Independent",
55
+ "Programming Language :: Python :: 3.10",
56
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
57
+ ],
58
+ keywords="huggingface competitions machine learning ai nlp tabular",
59
+ data_files=[
60
+ (
61
+ "templates",
62
+ [
63
+ "competitions/templates/index.html",
64
+ ],
65
+ ),
66
+ ],
67
+ include_package_data=True,
68
+ )