All nits
Browse files- Accelerate.ipynb +11 -12
- CLI.gif +0 -0
- index.html +6 -3
Accelerate.ipynb
CHANGED
@@ -15,15 +15,6 @@
|
|
15 |
"---"
|
16 |
]
|
17 |
},
|
18 |
-
{
|
19 |
-
"attachments": {},
|
20 |
-
"cell_type": "markdown",
|
21 |
-
"id": "f2333422",
|
22 |
-
"metadata": {},
|
23 |
-
"source": [
|
24 |
-
"## Test Gradio {background-iframe=\"https://muellerzr-accelerate-presentation.hf.space\"}"
|
25 |
-
]
|
26 |
-
},
|
27 |
{
|
28 |
"cell_type": "markdown",
|
29 |
"id": "45e61402-f734-4500-8eb6-fcdd6f17a0d4",
|
@@ -366,7 +357,7 @@
|
|
366 |
"accelerator = Accelerator(gradient_accumulation_steps=4)\n",
|
367 |
"...\n",
|
368 |
"for batch in dataloader:\n",
|
369 |
-
" with accelerator.accumulate(model)
|
370 |
" optimizer.zero_grad()\n",
|
371 |
" inputs, targets = batch\n",
|
372 |
" outputs = model(inputs)\n",
|
@@ -522,7 +513,7 @@
|
|
522 |
" my_model = ModelClass(...)\n",
|
523 |
"\n",
|
524 |
"my_model = load_checkpoint_and_dispatch(\n",
|
525 |
-
"
|
526 |
")\n",
|
527 |
"```\n",
|
528 |
"`device_map=\"auto\"` will tell π€ Accelerate that it should determine where to put each layer of the model:\n",
|
@@ -546,7 +537,7 @@
|
|
546 |
" my_model = ModelClass(...)\n",
|
547 |
"\n",
|
548 |
"my_model = load_checkpoint_and_dispatch(\n",
|
549 |
-
"
|
550 |
")\n",
|
551 |
"my_model.eval()\n",
|
552 |
"\n",
|
@@ -555,6 +546,14 @@
|
|
555 |
"```"
|
556 |
]
|
557 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
558 |
{
|
559 |
"cell_type": "markdown",
|
560 |
"id": "6f5122b2-f4fe-4237-aff2-d2a69f85b692",
|
|
|
15 |
"---"
|
16 |
]
|
17 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
{
|
19 |
"cell_type": "markdown",
|
20 |
"id": "45e61402-f734-4500-8eb6-fcdd6f17a0d4",
|
|
|
357 |
"accelerator = Accelerator(gradient_accumulation_steps=4)\n",
|
358 |
"...\n",
|
359 |
"for batch in dataloader:\n",
|
360 |
+
" with accelerator.accumulate(model):\n",
|
361 |
" optimizer.zero_grad()\n",
|
362 |
" inputs, targets = batch\n",
|
363 |
" outputs = model(inputs)\n",
|
|
|
513 |
" my_model = ModelClass(...)\n",
|
514 |
"\n",
|
515 |
"my_model = load_checkpoint_and_dispatch(\n",
|
516 |
+
" my_model, \"sharded-weights\", device_map=\"auto\"\n",
|
517 |
")\n",
|
518 |
"```\n",
|
519 |
"`device_map=\"auto\"` will tell π€ Accelerate that it should determine where to put each layer of the model:\n",
|
|
|
537 |
" my_model = ModelClass(...)\n",
|
538 |
"\n",
|
539 |
"my_model = load_checkpoint_and_dispatch(\n",
|
540 |
+
" my_model, \"sharded-weights\", device_map=\"auto\"\n",
|
541 |
")\n",
|
542 |
"my_model.eval()\n",
|
543 |
"\n",
|
|
|
546 |
"```"
|
547 |
]
|
548 |
},
|
549 |
+
{
|
550 |
+
"cell_type": "markdown",
|
551 |
+
"id": "23911045-44a7-4e1d-aebc-db46be856234",
|
552 |
+
"metadata": {},
|
553 |
+
"source": [
|
554 |
+
"## Demo!"
|
555 |
+
]
|
556 |
+
},
|
557 |
{
|
558 |
"cell_type": "markdown",
|
559 |
"id": "6f5122b2-f4fe-4237-aff2-d2a69f85b692",
|
CLI.gif
ADDED
index.html
CHANGED
@@ -581,7 +581,7 @@ Zachary Mueller
|
|
581 |
<span id="cb13-2"><a href="#cb13-2"></a>accelerator <span class="op">=</span> Accelerator(gradient_accumulation_steps<span class="op">=</span><span class="dv">4</span>)</span>
|
582 |
<span id="cb13-3"><a href="#cb13-3"></a>...</span>
|
583 |
<span id="cb13-4"><a href="#cb13-4"></a><span class="cf">for</span> batch <span class="kw">in</span> dataloader:</span>
|
584 |
-
<span id="cb13-5"><a href="#cb13-5"></a> <span class="cf">with</span> accelerator.accumulate(model)
|
585 |
<span id="cb13-6"><a href="#cb13-6"></a> optimizer.zero_grad()</span>
|
586 |
<span id="cb13-7"><a href="#cb13-7"></a> inputs, targets <span class="op">=</span> batch</span>
|
587 |
<span id="cb13-8"><a href="#cb13-8"></a> outputs <span class="op">=</span> model(inputs)</span>
|
@@ -682,7 +682,7 @@ Zachary Mueller
|
|
682 |
<span id="cb17-4"><a href="#cb17-4"></a> my_model <span class="op">=</span> ModelClass(...)</span>
|
683 |
<span id="cb17-5"><a href="#cb17-5"></a></span>
|
684 |
<span id="cb17-6"><a href="#cb17-6"></a>my_model <span class="op">=</span> load_checkpoint_and_dispatch(</span>
|
685 |
-
<span id="cb17-7"><a href="#cb17-7"></a>
|
686 |
<span id="cb17-8"><a href="#cb17-8"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
687 |
<p><code>device_map="auto"</code> will tell π€ Accelerate that it should determine where to put each layer of the model:</p>
|
688 |
<ol type="1">
|
@@ -699,12 +699,15 @@ Zachary Mueller
|
|
699 |
<span id="cb18-4"><a href="#cb18-4"></a> my_model <span class="op">=</span> ModelClass(...)</span>
|
700 |
<span id="cb18-5"><a href="#cb18-5"></a></span>
|
701 |
<span id="cb18-6"><a href="#cb18-6"></a>my_model <span class="op">=</span> load_checkpoint_and_dispatch(</span>
|
702 |
-
<span id="cb18-7"><a href="#cb18-7"></a>
|
703 |
<span id="cb18-8"><a href="#cb18-8"></a>)</span>
|
704 |
<span id="cb18-9"><a href="#cb18-9"></a>my_model.<span class="bu">eval</span>()</span>
|
705 |
<span id="cb18-10"><a href="#cb18-10"></a></span>
|
706 |
<span id="cb18-11"><a href="#cb18-11"></a><span class="cf">for</span> batch <span class="kw">in</span> dataloader:</span>
|
707 |
<span id="cb18-12"><a href="#cb18-12"></a> output <span class="op">=</span> my_model(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
|
|
|
|
|
|
708 |
</section></section>
|
709 |
<section>
|
710 |
<section id="thanks-for-listening" class="title-slide slide level1 center">
|
|
|
581 |
<span id="cb13-2"><a href="#cb13-2"></a>accelerator <span class="op">=</span> Accelerator(gradient_accumulation_steps<span class="op">=</span><span class="dv">4</span>)</span>
|
582 |
<span id="cb13-3"><a href="#cb13-3"></a>...</span>
|
583 |
<span id="cb13-4"><a href="#cb13-4"></a><span class="cf">for</span> batch <span class="kw">in</span> dataloader:</span>
|
584 |
+
<span id="cb13-5"><a href="#cb13-5"></a> <span class="cf">with</span> accelerator.accumulate(model):</span>
|
585 |
<span id="cb13-6"><a href="#cb13-6"></a> optimizer.zero_grad()</span>
|
586 |
<span id="cb13-7"><a href="#cb13-7"></a> inputs, targets <span class="op">=</span> batch</span>
|
587 |
<span id="cb13-8"><a href="#cb13-8"></a> outputs <span class="op">=</span> model(inputs)</span>
|
|
|
682 |
<span id="cb17-4"><a href="#cb17-4"></a> my_model <span class="op">=</span> ModelClass(...)</span>
|
683 |
<span id="cb17-5"><a href="#cb17-5"></a></span>
|
684 |
<span id="cb17-6"><a href="#cb17-6"></a>my_model <span class="op">=</span> load_checkpoint_and_dispatch(</span>
|
685 |
+
<span id="cb17-7"><a href="#cb17-7"></a> my_model, <span class="st">"sharded-weights"</span>, device_map<span class="op">=</span><span class="st">"auto"</span></span>
|
686 |
<span id="cb17-8"><a href="#cb17-8"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
687 |
<p><code>device_map="auto"</code> will tell π€ Accelerate that it should determine where to put each layer of the model:</p>
|
688 |
<ol type="1">
|
|
|
699 |
<span id="cb18-4"><a href="#cb18-4"></a> my_model <span class="op">=</span> ModelClass(...)</span>
|
700 |
<span id="cb18-5"><a href="#cb18-5"></a></span>
|
701 |
<span id="cb18-6"><a href="#cb18-6"></a>my_model <span class="op">=</span> load_checkpoint_and_dispatch(</span>
|
702 |
+
<span id="cb18-7"><a href="#cb18-7"></a> my_model, <span class="st">"sharded-weights"</span>, device_map<span class="op">=</span><span class="st">"auto"</span></span>
|
703 |
<span id="cb18-8"><a href="#cb18-8"></a>)</span>
|
704 |
<span id="cb18-9"><a href="#cb18-9"></a>my_model.<span class="bu">eval</span>()</span>
|
705 |
<span id="cb18-10"><a href="#cb18-10"></a></span>
|
706 |
<span id="cb18-11"><a href="#cb18-11"></a><span class="cf">for</span> batch <span class="kw">in</span> dataloader:</span>
|
707 |
<span id="cb18-12"><a href="#cb18-12"></a> output <span class="op">=</span> my_model(batch)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
708 |
+
</section>
|
709 |
+
<section id="demo" class="slide level2">
|
710 |
+
<h2>Demo!</h2>
|
711 |
</section></section>
|
712 |
<section>
|
713 |
<section id="thanks-for-listening" class="title-slide slide level1 center">
|