robot-bengali-2 commited on
Commit
92a6755
1 Parent(s): 2068f16

Write "Make Your Own" section

Browse files
Files changed (6) hide show
  1. app.py +4 -4
  2. charts.py +1 -1
  3. st_helpers.py +1 -1
  4. static/content_style.css +2 -2
  5. static/header_style.css +2 -2
  6. static/tabs.html +128 -34
app.py CHANGED
@@ -20,10 +20,10 @@ content_text(f"""
20
  There was a time when you could comfortably train state-of-the-art vision and language models at home on your workstation.
21
  The first convolutional neural net to beat ImageNet
22
  (<a target="_blank" href="https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf">AlexNet</a>)
23
- was trained for 5-6 days on two gamer-grade GPUs. Today's TOP-1 ImageNet model
24
  (<a target="_blank" href="https://arxiv.org/abs/2106.04803">CoAtNet</a>)
25
  takes 20,000 TPU-v3 days. And things are even worse in the NLP world: training
26
- <a target="_blank" href="https://arxiv.org/abs/2005.14165">GPT-3</a> on a top-tier server
27
  with 8x A100 would take decades.""")
28
 
29
  content_text(f"""
@@ -47,7 +47,7 @@ content_title("How do I join?")
47
  content_text("""
48
  That's easy. First, make sure you're logged in at Hugging Face. If you don't have an account, create one <b>TODO</b>.<br>
49
 
50
- <ul style="text-align: left; list-style-position: inside; margin-top: 12px; margin-left: -32px;">
51
  <li style="margin-top: 4px;">
52
  Join our organization on Hugging Face here: <b>TODO</b>. </li>
53
  <li style="margin-top: 4px;">
@@ -62,7 +62,7 @@ Please note that we currently limit the number of colab participants to <b>TODO<
62
  with other users. If there are too many active peers, take a look at alternative starter kits here <b>TODO</b>
63
  """)
64
 
65
- content_title("But how does it work?")
66
  content_text("<b> TODO </b> General Story That Weaves Together Three Tabs Below . Lorem ipsum dolor sit amet, "
67
  "consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim"
68
  " ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. "
 
20
  There was a time when you could comfortably train state-of-the-art vision and language models at home on your workstation.
21
  The first convolutional neural net to beat ImageNet
22
  (<a target="_blank" href="https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf">AlexNet</a>)
23
+ was trained for 5-6 days on two gamer-grade GPUs. In contrast, today's TOP-1 ImageNet model
24
  (<a target="_blank" href="https://arxiv.org/abs/2106.04803">CoAtNet</a>)
25
  takes 20,000 TPU-v3 days. And things are even worse in the NLP world: training
26
+ <a target="_blank" href="https://arxiv.org/abs/2005.14165">GPT&#8209;3</a> on a top-tier server
27
  with 8x A100 would take decades.""")
28
 
29
  content_text(f"""
 
47
  content_text("""
48
  That's easy. First, make sure you're logged in at Hugging Face. If you don't have an account, create one <b>TODO</b>.<br>
49
 
50
+ <ul style="text-align: left; list-style-position: inside; margin-top: 12px; margin-left: -24px;">
51
  <li style="margin-top: 4px;">
52
  Join our organization on Hugging Face here: <b>TODO</b>. </li>
53
  <li style="margin-top: 4px;">
 
62
  with other users. If there are too many active peers, take a look at alternative starter kits here <b>TODO</b>
63
  """)
64
 
65
+ content_title("How does it work?")
66
  content_text("<b> TODO </b> General Story That Weaves Together Three Tabs Below . Lorem ipsum dolor sit amet, "
67
  "consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim"
68
  " ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. "
charts.py CHANGED
@@ -12,7 +12,7 @@ def draw_current_progress():
12
  source, {
13
  "height": 200,
14
  "title": {
15
- "text": "Training DALLE with volunteers. Updated every few minutes during NeurIPS.",
16
  "dy": 6,
17
  },
18
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
 
12
  source, {
13
  "height": 200,
14
  "title": {
15
+ "text": "Training DALL-E with volunteers (updated every few minutes during NeurIPS 2021)",
16
  "dy": 6,
17
  },
18
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
st_helpers.py CHANGED
@@ -30,7 +30,7 @@ def make_header():
30
 
31
 
32
  def make_tabs():
33
- components.html(f"{tabs_html}", height=400, scrolling=True)
34
 
35
 
36
  def make_footer():
 
30
 
31
 
32
  def make_tabs():
33
+ components.html(f"{tabs_html}", height=1000, scrolling=True)
34
 
35
 
36
  def make_footer():
static/content_style.css CHANGED
@@ -60,11 +60,11 @@
60
  border-color:rgba(27,31,35,.35);
61
  background-image:linear-gradient(180deg, #f0f3f6, #e6ebf1 90%)
62
  }
63
- a:link {
64
  color: #00194a;
65
  text-decoration: none;
66
  }
67
  a:visited {
68
  color: #3f004a;
69
  text-decoration: none;
70
- }
 
60
  border-color:rgba(27,31,35,.35);
61
  background-image:linear-gradient(180deg, #f0f3f6, #e6ebf1 90%)
62
  }
63
+ /* a:link {
64
  color: #00194a;
65
  text-decoration: none;
66
  }
67
  a:visited {
68
  color: #3f004a;
69
  text-decoration: none;
70
+ } */
static/header_style.css CHANGED
@@ -100,14 +100,14 @@ canvas {
100
  border-color:rgba(27,31,35,.35);
101
  background-image:linear-gradient(180deg, #f0f3f6, #e6ebf1 90%)
102
  }
103
- a:link {
104
  color: #00194a;
105
  text-decoration: none;
106
  }
107
  a:visited {
108
  color: #3f004a;
109
  text-decoration: none;
110
- }
111
  .tooltip {
112
  position: relative;
113
  display: inline-block;
 
100
  border-color:rgba(27,31,35,.35);
101
  background-image:linear-gradient(180deg, #f0f3f6, #e6ebf1 90%)
102
  }
103
+ /* a:link {
104
  color: #00194a;
105
  text-decoration: none;
106
  }
107
  a:visited {
108
  color: #3f004a;
109
  text-decoration: none;
110
+ } */
111
  .tooltip {
112
  position: relative;
113
  display: inline-block;
static/tabs.html CHANGED
@@ -32,25 +32,37 @@
32
  font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,
33
  sans-serif,Apple Color Emoji,Segoe UI Emoji;
34
  }
35
- a:link {
 
 
 
 
 
 
 
 
 
 
 
 
36
  color: #00194a;
37
  text-decoration: none;
38
  }
39
  a:visited {
40
  color: #3f004a;
41
  text-decoration: none;
42
- }
43
  </style>
44
  </head>
45
  <body>
46
 
47
- <div id="tabs" style="width: 100%; margin:0 auto;">
48
  <div>
49
  <!-- Nav tabs -->
50
  <ul class="nav nav-tabs" role="tablist">
51
  <li role="presentation" class="active"><a href="#tab1" aria-controls="tab1" role="tab" data-toggle="tab">"Efficient Training"</a></li>
52
  <li role="presentation"><a href="#tab2" aria-controls="tab2" role="tab" data-toggle="tab">Security</a></li>
53
- <li role="presentation"><a href="#tab3" aria-controls="tab3" role="tab" data-toggle="tab">Make Your Own (TBU)</a></li>
54
  </ul>
55
 
56
  <!-- Tab panes -->
@@ -86,44 +98,126 @@ a:visited {
86
  The same can happen due to broken hardware or misconfiguration.
87
  </p>
88
 
89
- <p>
90
- One possible defense is using <b>authentication</b> combined with <b>model checkpointing</b>.
91
- In this case, participants should log in (e.g. with their Hugging Face account) to interact with the rest of the collaboration.
92
- In turn, moderators can screen potential participants and add them to an allowlist.
93
- If something goes wrong (e.g. if a participant sends invalid gradients and the model diverges),
94
- the moderators remove them from the list and revert the model to the latest checkpoint unaffected by the attack.
95
- </p>
 
 
96
 
97
- <p><b>Spoiler: How to implement authentication in a decentralized system efficiently?</b></p>
98
 
 
 
 
 
 
99
  <p>
100
- Nice bonus: using this data, the moderators can acknowledge the personal contribution of each participant.
101
- </p>
 
 
102
 
103
- <p>
104
- Another defense is replacing the naive averaging of the peers' gradients with an <b>aggregation technique robust to outliers</b>.
105
- <a href="https://arxiv.org/abs/2012.10333">Karimireddy et al. (2020)</a>
106
- suggested such a technique (named CenteredClip) and proved that it does not significantly affect the model's convergence.
107
- </p>
108
 
109
- <p><b>Spoiler: How does CenteredClip protect from outliers? (Interactive Demo)</b></p>
 
 
 
 
110
 
111
- <p>
112
- In our case, CenteredClip is useful but not enough to protect from malicious participants,
113
- since it implies that the CenteredClip procedure itself is performed by a trusted server.
114
- In contrast, in our decentralized system, all participants can aggregate a part of the gradients and we cannot assume all of them to be trusted.
115
- </p>
116
-
117
- <p>
118
- Recently, <a href="https://arxiv.org/abs/2106.11257">Gorbunov et al. (2021)</a>
119
- proposed a robust aggregation protocol for decentralized systems that does not require this assumption.
120
- This protocol uses CenteredClip as a subroutine but is able to detect and ban participants who performed it incorrectly.
121
- </p>
122
  </div>
123
  <div role="tabpanel" class="tab-pane" id="tab3">
124
- <span class="padded faded text">
125
- <b> TODO 123</b> Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
126
- </span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  </div>
128
  </div>
129
 
 
32
  font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,
33
  sans-serif,Apple Color Emoji,Segoe UI Emoji;
34
  }
35
+ .tab-group {
36
+ font-size: 16px;
37
+ }
38
+ .tab-content {
39
+ margin-top: 16px;
40
+ }
41
+ ul > li {
42
+ margin: 3px 0;
43
+ }
44
+ ol > li {
45
+ margin: 5px 0;
46
+ }
47
+ /* a:link {
48
  color: #00194a;
49
  text-decoration: none;
50
  }
51
  a:visited {
52
  color: #3f004a;
53
  text-decoration: none;
54
+ } */
55
  </style>
56
  </head>
57
  <body>
58
 
59
+ <div class="tab-group" style="width: 100%; margin:0 auto;">
60
  <div>
61
  <!-- Nav tabs -->
62
  <ul class="nav nav-tabs" role="tablist">
63
  <li role="presentation" class="active"><a href="#tab1" aria-controls="tab1" role="tab" data-toggle="tab">"Efficient Training"</a></li>
64
  <li role="presentation"><a href="#tab2" aria-controls="tab2" role="tab" data-toggle="tab">Security</a></li>
65
+ <li role="presentation"><a href="#tab3" aria-controls="tab3" role="tab" data-toggle="tab">Make Your Own</a></li>
66
  </ul>
67
 
68
  <!-- Tab panes -->
 
98
  The same can happen due to broken hardware or misconfiguration.
99
  </p>
100
 
101
+ <ul>
102
+ <li>
103
+ <p>
104
+ One possible defense is using <b>authentication</b> combined with <b>model checkpointing</b>.
105
+ In this case, participants should log in (e.g. with their Hugging Face account) to interact with the rest of the collaboration.
106
+ In turn, moderators can screen potential participants and add them to an allowlist.
107
+ If something goes wrong (e.g. if a participant sends invalid gradients and the model diverges),
108
+ the moderators remove them from the list and revert the model to the latest checkpoint unaffected by the attack.
109
+ </p>
110
 
111
+ <p><b>Spoiler: How to implement authentication in a decentralized system efficiently?</b></p>
112
 
113
+ <p>
114
+ Nice bonus: using this data, the moderators can acknowledge the personal contribution of each participant.
115
+ </p>
116
+ </li>
117
+ <li>
118
  <p>
119
+ Another defense is replacing the naive averaging of the peers' gradients with an <b>aggregation technique robust to outliers</b>.
120
+ <a href="https://arxiv.org/abs/2012.10333">Karimireddy et al. (2020)</a>
121
+ suggested such a technique (named CenteredClip) and proved that it does not significantly affect the model's convergence.
122
+ </p>
123
 
124
+ <p><b>Spoiler: How does CenteredClip protect from outliers? (Interactive Demo)</b></p>
 
 
 
 
125
 
126
+ <p>
127
+ In our case, CenteredClip is useful but not enough to protect from malicious participants,
128
+ since it implies that the CenteredClip procedure itself is performed by a trusted server.
129
+ In contrast, in our decentralized system, all participants can aggregate a part of the gradients and we cannot assume all of them to be trusted.
130
+ </p>
131
 
132
+ <p>
133
+ Recently, <a href="https://arxiv.org/abs/2106.11257">Gorbunov et al. (2021)</a>
134
+ proposed a robust aggregation protocol for decentralized systems that does not require this assumption.
135
+ This protocol uses CenteredClip as a subroutine but is able to detect and ban participants who performed it incorrectly.
136
+ </p>
137
+ </li>
138
+ </ul>
 
 
 
 
139
  </div>
140
  <div role="tabpanel" class="tab-pane" id="tab3">
141
+ <ol>
142
+ <li>
143
+ Set up dataset streaming:
144
+ <ul>
145
+ <li>
146
+ <a href="https://huggingface.co/docs/datasets/share_dataset.html">Upload</a> your dataset to Hugging Face Hub
147
+ in a streaming-friendly format (<a href="https://huggingface.co/datasets/laion/laion_100m_vqgan_f8">example</a>).
148
+ </li>
149
+ <li>Set up dataset streaming (see the "Efficient Training" section).</li>
150
+ </ul>
151
+ </li>
152
+ <li>
153
+ Write code of training peers (<a href="https://github.com/learning-at-home/dalle-hivemind/blob/main/run_trainer.py">example</a>):
154
+ <ul>
155
+ <li>Implement your model, set up dataset streaming, and write the training loop.</li>
156
+ <li>
157
+ Get familiar with the hivemind library
158
+ (e.g., via the <a href="https://learning-at-home.readthedocs.io/en/latest/user/quickstart.html">quickstart</a>).
159
+ </li>
160
+ <li>
161
+ In the training loop, wrap up your PyTorch optimizer with
162
+ <a href="https://learning-at-home.readthedocs.io/en/latest/modules/optim.html#hivemind.optim.experimental.optimizer.Optimizer">hivemind.Optimizer</a>
163
+ (<a href="https://github.com/learning-at-home/dalle-hivemind/blob/main/task.py#L121">example</a>).
164
+ </li>
165
+ </ul>
166
+ </li>
167
+ <li>
168
+ <b>(optional)</b> Write code of auxiliary peers (<a href="https://github.com/learning-at-home/dalle-hivemind/blob/main/run_aux_peer.py">example</a>):
169
+ <ul>
170
+ <li>
171
+ It is convenient to create a special kind of peers responsible for
172
+ logging loss values and metrics (e.g. to <a href="https://wandb.ai/">Weights & Biases</a>)
173
+ and uploading model checkpoints (e.g. to Hugging Face Hub).
174
+ </li>
175
+ <li>
176
+ Such peers don't need to calculate gradients and may be run on cheap machines without GPUs.
177
+ </li>
178
+ <li>
179
+ They can serve as a convenient entry point to
180
+ <a href="https://learning-at-home.readthedocs.io/en/latest/modules/dht.html">hivemind.DHT</a>
181
+ (i.e., their address can be specified as <code>initial_peers</code>).
182
+ </li>
183
+ <li>
184
+ It is useful to fix their address by providing <code>host_maddrs</code> and <code>identity_path</code>
185
+ arguments to <code>hivemind.DHT</code>
186
+ (these are forwarded to the underlying <a href="https://libp2p.io/">libp2p</a> daemon).
187
+ </li>
188
+ </ul>
189
+ </li>
190
+ <li>
191
+ <b>(optional)</b> Make it easier for other people to join:
192
+ <ul>
193
+ <li>
194
+ Create notebooks for free GPU providers (Google Colab, Kaggle, AWS SageMaker, etc.).
195
+ People may run them online and/or download and run them on their own hardware.
196
+ </li>
197
+ <li>
198
+ <a href="https://huggingface.co/organizations/new">Create</a> a Hugging Face organization
199
+ with all resources related to the training
200
+ (dataset, model, inference demo, links to a dashboard with loss values and metrics, etc.).
201
+ Look at <a href="https://huggingface.co/training-transformers-together">ours</a> as an example.
202
+ </li>
203
+ <li>
204
+ Set up an authentication system (see the "Security" section).
205
+ For example, you can ask people to join your organization with their Hugging Face accounts
206
+ (Hugging Face allows to share a link for joining or manually approve new participants).
207
+ This allows you to screen participants,
208
+ acknowledge their contributions (e.g., make a leaderboard), and
209
+ ban accounts who behave maliciously.
210
+ </li>
211
+ <li>
212
+ Set up an inference demo for your model (e.g., using <a href="https://huggingface.co/spaces">Spaces</a>) or
213
+ a script that periodically uploads the inference results to show the training progress.
214
+ </li>
215
+ </ul>
216
+ </li>
217
+ </ol>
218
+ <p>
219
+ <b>Got confused?</b> Feel free to ask any questions at our <a href="https://discord.gg/uGugx9zYvN">Discord</a>!
220
+ </p>
221
  </div>
222
  </div>
223