Adding tabs for different set of Accelerate's features and content for large scale training features

#2
by smangrul - opened
code_samples/base/accelerate ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pre>
2
+ from accelerate import Accelerator
3
+ accelerator = Accelerator()
4
+ train_dataloader, model, optimizer scheduler = accelerator.prepare(
5
+ dataloader, model, optimizer, scheduler
6
+ )
7
+
8
+ model.train()
9
+ for batch in train_dataloader:
10
+ optimizer.zero_grad()
11
+ inputs, targets = batch
12
+ outputs = model(inputs)
13
+ loss = loss_function(outputs, targets)
14
+ accelerator.backward(loss)
15
+ optimizer.step()
16
+ scheduler.step()
17
+ </pre>
code_samples/base/basic ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ <pre>
3
+ +from accelerate import Accelerator
4
+ +accelerator = Accelerator()
5
+ +dataloader, model, optimizer scheduler = accelerator.prepare(
6
+ + dataloader, model, optimizer, scheduler
7
+ +)
8
+
9
+ for batch in dataloader:
10
+ optimizer.zero_grad()
11
+ inputs, targets = batch
12
+ - inputs = inputs.to(device)
13
+ - targets = targets.to(device)
14
+ outputs = model(inputs)
15
+ loss = loss_function(outputs, targets)
16
+ - loss.backward()
17
+ + accelerator.backward(loss)
18
+ optimizer.step()
19
+ scheduler.step()</pre>
20
+ ##
21
+ Everything around `accelerate` occurs with the `Accelerator` class. To use it, first make an object.
22
+ Then call `.prepare` passing in the PyTorch objects that you would normally train with. This will
23
+ return the same objects, but they will be on the correct device and distributed if needed. Then
24
+ you can train as normal, but instead of calling `loss.backward()` you call `accelerator.backward(loss)`.
25
+ Also note that you don't need to call `model.to(device)` or `inputs.to(device)` anymore, as this
26
+ is done automatically by `accelerator.prepare()`.
27
+
28
+ ##
29
+ To learn more checkout the related documentation:
30
+ - <a href="https://huggingface.co/docs/accelerate/basic_tutorials/migration" target="_blank">Migrating to 🤗 Accelerate</a>
31
+ - <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator" target="_blank">The Accelerator</a>
code_samples/base/calculating_metrics ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ <pre>
3
+ import evaluate
4
+ +from accelerate import Accelerator
5
+ +accelerator = Accelerator()
6
+ +train_dataloader, eval_dataloader, model, optimizer, scheduler = (
7
+ + accelerator.prepare(
8
+ + train_dataloader, eval_dataloader,
9
+ + model, optimizer, scheduler
10
+ + )
11
+ +)
12
+ metric = evaluate.load("accuracy")
13
+ for batch in train_dataloader:
14
+ optimizer.zero_grad()
15
+ inputs, targets = batch
16
+ - inputs = inputs.to(device)
17
+ - targets = targets.to(device)
18
+ outputs = model(inputs)
19
+ loss = loss_function(outputs, targets)
20
+ loss.backward()
21
+ optimizer.step()
22
+ scheduler.step()
23
+
24
+ model.eval()
25
+ for batch in eval_dataloader:
26
+ inputs, targets = batch
27
+ - inputs = inputs.to(device)
28
+ - targets = targets.to(device)
29
+ with torch.no_grad():
30
+ outputs = model(inputs)
31
+ predictions = outputs.argmax(dim=-1)
32
+ + predictions, references = accelerator.gather_for_metrics(
33
+ + (predictions, references)
34
+ + )
35
+ metric.add_batch(
36
+ predictions = predictions,
37
+ references = references
38
+ )
39
+ print(metric.compute())</pre>
40
+
41
+ ##
42
+ When calculating metrics on a validation set, you can use the `Accelerator.gather_for_metrics`
43
+ method to gather the predictions and references from all devices and then calculate the metric on the gathered values.
44
+ This will also *automatically* drop the padded values from the gathered tensors that were added to ensure
45
+ that all tensors have the same length. This ensures that the metric is calculated on the correct values.
46
+ ##
47
+ To learn more checkout the related documentation:
48
+
49
+ - <a href="https://huggingface.co/docs/accelerate/en/quicktour#distributed-evaluation" target="_blank">Quicktour - Calculating metrics</a>
50
+ - <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.gather_for_metrics" target="_blank">API reference</a>
51
+ - <a href="https://github.com/huggingface/accelerate/blob/main/examples/by_feature/multi_process_metrics.py" target="_blank">Example script</a>
code_samples/base/checkpointing ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ <pre>
3
+ from accelerate import Accelerator
4
+ accelerator = Accelerator()
5
+ dataloader, model, optimizer scheduler = accelerator.prepare(
6
+ dataloader, model, optimizer, scheduler
7
+ )
8
+
9
+ for batch in dataloader:
10
+ optimizer.zero_grad()
11
+ inputs, targets = batch
12
+ outputs = model(inputs)
13
+ loss = loss_function(outputs, targets)
14
+ accelerator.backward(loss)
15
+ optimizer.step()
16
+ scheduler.step()
17
+ +accelerator.save_state("checkpoint_dir")
18
+ +accelerator.load_state("checkpoint_dir")</pre>
19
+ ##
20
+ To save or load a checkpoint in, `Accelerator` provides the `save_state` and `load_state` methods.
21
+ These methods will save or load the state of the model, optimizer, scheduler, as well as random states and
22
+ any custom registered objects from the main process on each device to a passed in folder.
23
+ **This API is designed to save and resume training states only from within the same python script or training setup.**
24
+ ##
25
+ To learn more checkout the related documentation:
26
+ - <a href="https://huggingface.co/docs/accelerate/usage_guides/checkpoint" target="_blank">Saving and loading training states</a>
27
+ - <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.save_state" target="_blank">`save_state` API reference</a>
28
+ - <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.load_state" target="_blank">`load_state` API reference</a>
29
+ - <a href="https://github.com/huggingface/accelerate/blob/main/examples/by_feature/checkpointing.py" target="_blank">Example script</a>
code_samples/base/experiment_tracking ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ <pre>
3
+ from accelerate import Accelerator
4
+ -accelerator = Accelerator()
5
+ +accelerator = Accelerator(log_with="wandb")
6
+ train_dataloader, model, optimizer scheduler = accelerator.prepare(
7
+ dataloader, model, optimizer, scheduler
8
+ )
9
+ +accelerator.init_trackers()
10
+ model.train()
11
+ for batch in train_dataloader:
12
+ optimizer.zero_grad()
13
+ inputs, targets = batch
14
+ outputs = model(inputs)
15
+ loss = loss_function(outputs, targets)
16
+ + accelerator.log({"loss":loss})
17
+ accelerator.backward(loss)
18
+ optimizer.step()
19
+ scheduler.step()
20
+ +accelerator.end_training()
21
+ </pre>
22
+ ##
23
+ To use experiment trackers with `accelerate`, simply pass the desired tracker to the `log_with` parameter
24
+ when building the `Accelerator` object. Then initialize the tracker(s) by running `Accelerator.init_trackers()`
25
+ passing in any configurations they may need. Afterwards call `Accelerator.log` to log a particular value to your tracker.
26
+ At the end of training call `accelerator.end_training()` to call any finalization functions a tracking library
27
+ may need automatically.
28
+ ##
29
+ To learn more checkout the related documentation:
30
+ - <a href="https://huggingface.co/docs/accelerate/usage_guides/tracking" target="_blank">Using experiment trackers</a>
31
+ - <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.log" target="_blank">Accelerator API Reference</a>
32
+ - <a href="https://huggingface.co/docs/accelerate/package_reference/tracking" target="_blank">Tracking API Reference</a>
code_samples/base/gradient_accumulation ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ <pre>
3
+ from accelerate import Accelerator
4
+ accelerator = Accelerator(
5
+ + gradient_accumulation_steps=2,
6
+ )
7
+ dataloader, model, optimizer scheduler = accelerator.prepare(
8
+ dataloader, model, optimizer, scheduler
9
+ )
10
+
11
+ for batch in dataloader:
12
+ + with accelerator.accumulate(model):
13
+ optimizer.zero_grad()
14
+ inputs, targets = batch
15
+ outputs = model(inputs)
16
+ loss = loss_function(outputs, targets)
17
+ accelerator.backward(loss)
18
+ optimizer.step()
19
+ scheduler.step()</pre>
20
+
21
+ ##
22
+ When performing gradient accumulation in a distributed setup, there are many opportunities for efficiency mistakes
23
+ to occur. `Accelerator` provides a context manager that will take care of the details for you and ensure that the
24
+ model is training correctly. Simply wrap the training loop in the `Accelerator.accumulate` context manager
25
+ while passing in the model you are training on and during training the gradients will accumulate and synchronize
26
+ automatically when needed.
27
+
28
+ ##
29
+ To learn more checkout the related documentation:
30
+ - <a href="https://huggingface.co/docs/accelerate/usage_guides/gradient_accumulation" target="_blank">Performing gradient accumulation</a>
31
+ - <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.accumulate" target="_blank">API reference</a>
32
+ - <a href="https://github.com/huggingface/accelerate/blob/main/examples/by_feature/gradient_accumulation.py" target="_blank">Example script</a>
33
+ - <a href="https://github.com/huggingface/accelerate/blob/main/examples/by_feature/automatic_gradient_accumulation.py" target="_blank">Performing automatic gradient accumulation example script</a>
code_samples/base/initial ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pre>
2
+ for batch in dataloader:
3
+ optimizer.zero_grad()
4
+ inputs, targets = batch
5
+ inputs = inputs.to(device)
6
+ targets = targets.to(device)
7
+ outputs = model(inputs)
8
+ loss = loss_function(outputs, targets)
9
+ loss.backward()
10
+ optimizer.step()
11
+ scheduler.step()</pre>
code_samples/base/initial_with_metrics ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pre>
2
+ import evaluate
3
+ metric = evaluate.load("accuracy")
4
+ for batch in train_dataloader:
5
+ optimizer.zero_grad()
6
+ inputs, targets = batch
7
+ inputs = inputs.to(device)
8
+ targets = targets.to(device)
9
+ outputs = model(inputs)
10
+ loss = loss_function(outputs, targets)
11
+ loss.backward()
12
+ optimizer.step()
13
+ scheduler.step()
14
+
15
+ model.eval()
16
+ for batch in eval_dataloader:
17
+ inputs, targets = batch
18
+ inputs = inputs.to(device)
19
+ targets = targets.to(device)
20
+ with torch.no_grad():
21
+ outputs = model(inputs)
22
+ predictions = outputs.argmax(dim=-1)
23
+ metric.add_batch(
24
+ predictions = predictions,
25
+ references = references
26
+ )
27
+ print(metric.compute())</pre>
code_samples/large_scale_training/aws_sagemaker ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ Run `accelerate config` on and answer the questionnaire accordingly.
3
+ Below is an example yaml for running code remotely on AWS SageMaker. Replace placeholder `xxxxx` with
4
+ appropriate values.
5
+
6
+ <pre>
7
+ base_job_name: accelerate-sagemaker-1
8
+ compute_environment: AMAZON_SAGEMAKER
9
+ distributed_type: 'NO'
10
+ dynamo_backend: 'NO'
11
+ ec2_instance_type: ml.p3.2xlarge
12
+ gpu_ids: all
13
+ iam_role_name: xxxxx
14
+ mixed_precision: 'no'
15
+ num_machines: 1
16
+ profile: xxxxx
17
+ py_version: py38
18
+ pytorch_version: 1.10.2
19
+ region: us-east-1
20
+ transformers_version: 4.17.0
21
+ use_cpu: false
22
+ </pre>
23
+ ##
24
+ <pre>
25
+ from accelerate import Accelerator
26
+
27
+ def parse_args():
28
+ parser = argparse.ArgumentParser(description="sample task")
29
+
30
+ parser.add_argument(
31
+ "--pad_to_max_length",
32
+ - action="store_true",
33
+ + type=bool,
34
+ + default=False,
35
+ help="If passed, pad all samples to `max_length`. Otherwise, dynamic padding is used.",
36
+ )
37
+
38
+ ...
39
+
40
+
41
+ + def main():
42
+ accelerator = Accelerator()
43
+
44
+ model, optimizer, training_dataloader, scheduler = accelerator.prepare(
45
+ model, optimizer, training_dataloader, scheduler
46
+ )
47
+
48
+ for batch in training_dataloader:
49
+ optimizer.zero_grad()
50
+ inputs, targets = batch
51
+ outputs = model(inputs)
52
+ loss = loss_function(outputs, targets)
53
+ accelerator.backward(loss)
54
+ optimizer.step()
55
+ scheduler.step()
56
+
57
+ - torch.save('/opt/ml/model`)
58
+ + accelerator.save('/opt/ml/model')
59
+
60
+ + if __name__ == "__main__":
61
+ + main()
62
+ </pre>
63
+ Launching a script using default accelerate config file looks like the following:
64
+ ```
65
+ accelerate launch {script_name.py} {--arg1} {--arg2} ...
66
+ ```
67
+ ##
68
+ SageMaker doesn’t support argparse actions. If you want to use, for example, boolean hyperparameters, you need to specify type as bool in your script and provide an explicit True or False value for this hyperparameter. An example for the same is shown above for `pad_to_max_length` argument. Another important point is to save all the output artifacts to `/opt/ml/model` or use `os.environ["SM_MODEL_DIR"]` as your save directory. After training, artifacts in this directory are uploaded to S3, an example is shown in above code snippet.
69
+
70
+ You can provide custom docker image, input channels pointing to S3 data locations and use SageMaker metrics logging
71
+ as part of advanced features. Please refer <a href="https://github.com/huggingface/notebooks/tree/main/sagemaker/22_accelerate_sagemaker_examples" target="_blank">Examples showcasing AWS SageMaker integration of 🤗 Accelerate</a>
72
+
73
+ ##
74
+ To learn more checkout the related documentation:
75
+ - <a href="https://huggingface.co/docs/accelerate/usage_guides/sagemaker" target="_blank">How to use 🤗 Accelerate with SageMaker</a>
76
+ - <a href="https://github.com/huggingface/notebooks/tree/main/sagemaker/22_accelerate_sagemaker_examples" target="_blank">Examples showcasing AWS SageMaker integration of 🤗 Accelerate</a>
77
+ - <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/cli" target="_blank">The Accelerate CLI</a>
code_samples/large_scale_training/deepspeed ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ Run `accelerate config` and answer the questionnaire accordingly.
3
+ Below is an example yaml for mixed-precision training using DeepSpeed ZeRO Stage-3 with CPU offloading on 8 GPUs.
4
+ <pre>
5
+ compute_environment: LOCAL_MACHINE
6
+ deepspeed_config:
7
+ gradient_accumulation_steps: 1
8
+ gradient_clipping: 1.0
9
+ offload_optimizer_device: cpu
10
+ offload_param_device: cpu
11
+ zero3_init_flag: true
12
+ zero3_save_16bit_model: true
13
+ zero_stage: 3
14
+ distributed_type: DEEPSPEED
15
+ downcast_bf16: 'no'
16
+ dynamo_backend: 'NO'
17
+ fsdp_config: {}
18
+ machine_rank: 0
19
+ main_training_function: main
20
+ megatron_lm_config: {}
21
+ mixed_precision: fp16
22
+ num_machines: 1
23
+ num_processes: 8
24
+ rdzv_backend: static
25
+ same_network: true
26
+ use_cpu: false
27
+ </pre>
28
+ ##
29
+ <pre>
30
+ from accelerate import Accelerator
31
+
32
+ + def main():
33
+ accelerator = Accelerator()
34
+
35
+ model, optimizer, training_dataloader, scheduler = accelerator.prepare(
36
+ model, optimizer, training_dataloader, scheduler
37
+ )
38
+
39
+ for batch in training_dataloader:
40
+ optimizer.zero_grad()
41
+ inputs, targets = batch
42
+ outputs = model(inputs)
43
+ loss = loss_function(outputs, targets)
44
+ accelerator.backward(loss)
45
+ optimizer.step()
46
+ scheduler.step()
47
+
48
+ ...
49
+
50
+ generated_tokens = accelerator.unwrap_model(model).generate(
51
+ batch["input_ids"],
52
+ attention_mask=batch["attention_mask"],
53
+ **gen_kwargs,
54
+ + synced_gpus=True #required for ZeRO Stage 3
55
+ )
56
+ ...
57
+
58
+ accelerator.unwrap_model(model).save_pretrained(
59
+ args.output_dir,
60
+ is_main_process=accelerator.is_main_process,
61
+ save_function=accelerator.save,
62
+ + state_dict=accelerator.get_state_dict(model), #required for ZeRO Stage 3
63
+ )
64
+
65
+ ...
66
+
67
+ + if __name__ == "__main__":
68
+ + main()
69
+ </pre>
70
+
71
+ Launching a script using default accelerate config file looks like the following:
72
+ ```
73
+ accelerate launch {script_name.py} {--arg1} {--arg2} ...
74
+ ```
75
+
76
+ Alternatively, you can use `accelerate launch` with right config params for multi-gpu training as shown below
77
+ ```
78
+ accelerate launch \
79
+ --use_deepspeed \
80
+ --num_processes=8 \
81
+ --mixed_precision=fp16 \
82
+ --zero_stage=3 \
83
+ --gradient_accumulation_steps=1 \
84
+ --gradient_clipping=1 \
85
+ --zero3_init_flag=True \
86
+ --zero3_save_16bit_model=True \
87
+ --offload_optimizer_device=cpu \
88
+ --offload_param_device=cpu \
89
+ {script_name.py} {--arg1} {--arg2} ...
90
+ ```
91
+
92
+ ##
93
+ For core DeepSpeed features supported via accelerate config file, no changes are required for ZeRO Stages 1 and 2. For ZeRO Stage-3, transformers' `generate` function requires `synced_gpus=True` and `save_pretrained` requires the `state_dict` param due to the fact that model parameters are sharded across the GPUs.
94
+
95
+ For advanced users who like granular control via DeepSpeed config file, it is supported wherein you can pass its loaction when running `accelerate config` command. You can also specify values of most of the fields in DeepSpeed config file as `auto` and they are filled automatically via the arguments of `accelerate launch` command and `accelerator.prepare` call thereby making life simple for users. Please refer docs on <a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed#deepspeed-config-file" target="_blank">DeepSpeed Config File</a>
96
+
97
+ ##
98
+ To learn more checkout the related documentation:
99
+ - <a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed" target="_blank">How to use DeepSpeed</a>
100
+ - <a href="https://huggingface.co/blog/accelerate-deepspeed" target="_blank">Accelerate Large Model Training using DeepSpeed</a>
101
+ - <a href="https://huggingface.co/docs/accelerate/package_reference/deepspeed" target="_blank">DeepSpeed Utilities</a>
code_samples/large_scale_training/megatron-lm ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ Run `accelerate config` and answer the questionnaire accordingly.
3
+ Below is an example yaml for BF16 mixed-precision training using Megatron-LM with DPxTPxPP=2x2x2 degrees on 8 GPUs. (DP-Data Parallelism, PP-Pipeline Parallelism, TP-Tensor Parallelism). It is also using Sequence Parallelism and selective activation checkpointing along with sharded optimizer.
4
+ <pre>
5
+ compute_environment: LOCAL_MACHINE
6
+ deepspeed_config: {}
7
+ distributed_type: MEGATRON_LM
8
+ downcast_bf16: 'no'
9
+ dynamo_backend: 'NO'
10
+ fsdp_config: {}
11
+ machine_rank: 0
12
+ main_training_function: main
13
+ megatron_lm_config:
14
+ megatron_lm_gradient_clipping: 1.0
15
+ megatron_lm_num_micro_batches: 2
16
+ megatron_lm_pp_degree: 2
17
+ megatron_lm_recompute_activations: true
18
+ megatron_lm_sequence_parallelism: true
19
+ megatron_lm_tp_degree: 2
20
+ megatron_lm_use_distributed_optimizer: true
21
+ mixed_precision: bf16
22
+ num_machines: 1
23
+ num_processes: 8
24
+ rdzv_backend: static
25
+ same_network: true
26
+ use_cpu: false
27
+ </pre>
28
+ ##
29
+ <pre>
30
+ from accelerate import Accelerator
31
+
32
+ + def main():
33
+ accelerator = Accelerator()
34
+
35
+ ...
36
+
37
+ - lr_scheduler = get_scheduler(
38
+ - name=args.lr_scheduler_type,
39
+ + lr_scheduler = accelerate.utils.MegatronLMDummyScheduler(
40
+ optimizer=optimizer,
41
+ num_warmup_steps=args.num_warmup_steps * args.gradient_accumulation_steps,
42
+ num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
43
+ )
44
+
45
+
46
+ model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare(
47
+ model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
48
+ )
49
+
50
+ total_batch_size = (
51
+ - args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
52
+ + accelerator.state.megatron_lm_plugin.global_batch_size
53
+ )
54
+
55
+ for batch in training_dataloader:
56
+ optimizer.zero_grad()
57
+ inputs, targets = batch
58
+ outputs = model(inputs)
59
+ loss = loss_function(outputs, targets)
60
+ accelerator.backward(loss)
61
+ optimizer.step()
62
+ scheduler.step()
63
+
64
+ ...
65
+
66
+ # in eval loop
67
+ for step, batch in enumerate(eval_dataloader):
68
+ with torch.no_grad():
69
+ outputs = model(**batch)
70
+ loss = outputs.loss
71
+ - losses.append(accelerator.gather_for_metrics(loss.repeat(args.per_device_eval_batch_size)))
72
+ + losses.append(loss) # For Megatron-LM, the losses are already averaged across the data parallel group
73
+ - losses = torch.cat(losses)
74
+ + losses = torch.tensor(losses)
75
+ eval_loss = torch.mean(losses)
76
+ perplexity = math.exp(eval_loss)
77
+ logger.info(f"epoch {epoch}: perplexity: {perplexity} eval_loss: {eval_loss}")
78
+
79
+ + accelerator.save_state(output_dir)
80
+
81
+ + if __name__ == "__main__":
82
+ + main()
83
+ </pre>
84
+
85
+ Launching a script using default accelerate config file looks like the following:
86
+ ```
87
+ accelerate launch {script_name.py} {--arg1} {--arg2} ...
88
+ ```
89
+
90
+ Alternatively, you can use `accelerate launch` with right config params for multi-gpu training as shown below
91
+ ```
92
+ accelerate launch \
93
+ --use_megatron_lm \
94
+ --num_processes=8 \
95
+ --mixed_precision=bf16 \
96
+ --megatron_lm_tp_degree=2 \
97
+ --megatron_lm_pp_degree=2 \
98
+ --megatron_lm_num_micro_batches=2 \
99
+ --megatron_lm_sequence_parallelism=true \
100
+ --megatron_lm_recompute_activations=true \
101
+ --megatron_lm_use_distributed_optimizer=true \
102
+ {script_name.py} {--arg1} {--arg2} ...
103
+ ```
104
+
105
+ ##
106
+ For Megatron-LM, the supported models Transformers GPT2, Megatron-BERT and T5 models covering Decoder only, Encode only and Encoder-Decoder model classes. Given the complexity of the features of Megatron-LM, 4 changes that are required to get started are:
107
+ 1. Using `accelerate.utils.MegatronLMDummyScheduler` as Megatron-LM uses its own implementation of Optimizer, the corresponding scheduler compatible with it needs to be used.
108
+ 2. Getting the details of the total batch size now needs to be cognization of tensor and pipeline parallel sizes.
109
+ 3. Losses are already averaged across the data parallel group
110
+ 4. save the model using `accelerator.save_state` instead of transformers `from_pretrianed`
111
+
112
+ These changes have been highlited in the code snippet above.
113
+
114
+ Megatron-LM intergration supports many advanced features such as ability to leverage custom train step, using Megatron-LM indexed datasets, checkpoint reshaping and interoperabiloity utilities, `megatron_generate` function for text generation using Tensor and Pipeline Parallelism and support for ROPE/ALibi Positional embeddings and Multi-Query Attention. However, these require more changes owing to the complexity; worth it for getting the highest performance.
115
+
116
+ ##
117
+ To learn more checkout the related documentation:
118
+ - <a href="https://huggingface.co/docs/accelerate/usage_guides/megatron_lm" target="_blank">How to use Megatron-LM</a>
119
+ - <a href="https://github.com/pacman100/accelerate-megatron-test" target="_blank">Examples showcasing the Megatron-LM integration of Accelerate</a>
code_samples/large_scale_training/multi_gpu ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ Run `accelerate config` and answer the questionnaire accordingly.
3
+ Below is an example yaml for using multi-gpu training with 4 GPUs.
4
+ <pre>
5
+ compute_environment: LOCAL_MACHINE
6
+ deepspeed_config: {}
7
+ distributed_type: MULTI_GPU
8
+ downcast_bf16: 'no'
9
+ dynamo_backend: 'NO'
10
+ fsdp_config: {}
11
+ gpu_ids: all
12
+ machine_rank: 0
13
+ main_training_function: main
14
+ megatron_lm_config: {}
15
+ mixed_precision: 'no'
16
+ num_machines: 1
17
+ num_processes: 4
18
+ rdzv_backend: static
19
+ same_network: true
20
+ use_cpu: false</pre>
21
+ ##
22
+ <pre>
23
+ from accelerate import Accelerator
24
+
25
+ + def main():
26
+ accelerator = Accelerator()
27
+
28
+ model, optimizer, training_dataloader, scheduler = accelerator.prepare(
29
+ model, optimizer, training_dataloader, scheduler
30
+ )
31
+
32
+ for batch in training_dataloader:
33
+ optimizer.zero_grad()
34
+ inputs, targets = batch
35
+ outputs = model(inputs)
36
+ loss = loss_function(outputs, targets)
37
+ accelerator.backward(loss)
38
+ optimizer.step()
39
+ scheduler.step()
40
+
41
+ + if __name__ == "__main__":
42
+ + main()
43
+ </pre>
44
+
45
+ Launching a script using default accelerate config file looks like the following:
46
+ ```
47
+ accelerate launch {script_name.py} {--arg1} {--arg2} ...
48
+ ```
49
+
50
+ Alternatively, you can use `accelerate launch` with right config params for multi-gpu training as shown below
51
+ ```
52
+ accelerate launch --multi_gpu --num_processes=4 {script_name.py} {--arg1} {--arg2} ...
53
+ ```
54
+
55
+ ##
56
+ Using this feature involves no changes to the code apart from the ones mentioned in the tab `Simplify your code and improve efficieny`.
57
+ ##
58
+ To learn more checkout the related documentation:
59
+ - <a href="https://huggingface.co/docs/accelerate/main/en/basic_tutorials/launch" target="_blank">Launching distributed code</a>
60
+ - <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/cli" target="_blank">The Accelerate CLI</a>
code_samples/large_scale_training/multi_node_multi_gpu ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ Run `accelerate config` on and answer the questionnaire accordingly.
3
+ Below is an example yaml for using multi-gpu training with 4 GPUs on 2 nodes/machines.
4
+
5
+ On Node/Machine 1:
6
+ <pre>
7
+ compute_environment: LOCAL_MACHINE
8
+ deepspeed_config: {}
9
+ distributed_type: MULTI_GPU
10
+ downcast_bf16: 'no'
11
+ dynamo_backend: 'NO'
12
+ fsdp_config: {}
13
+ gpu_ids: all
14
+ machine_rank: 0
15
+ main_process_ip: 192.168.20.1
16
+ main_process_port: 8080
17
+ main_training_function: main
18
+ megatron_lm_config: {}
19
+ mixed_precision: 'no'
20
+ num_machines: 2
21
+ num_processes: 8
22
+ rdzv_backend: static
23
+ same_network: true
24
+ use_cpu: false
25
+ </pre>
26
+
27
+ On Node/Machine 2:
28
+ <pre>
29
+ compute_environment: LOCAL_MACHINE
30
+ deepspeed_config: {}
31
+ distributed_type: MULTI_GPU
32
+ downcast_bf16: 'no'
33
+ dynamo_backend: 'NO'
34
+ fsdp_config: {}
35
+ gpu_ids: all
36
+ -machine_rank: 0
37
+ +machine_rank: 1
38
+ main_process_ip: 192.168.20.1
39
+ main_process_port: 8080
40
+ main_training_function: main
41
+ megatron_lm_config: {}
42
+ mixed_precision: 'no'
43
+ num_machines: 2
44
+ num_processes: 8
45
+ rdzv_backend: static
46
+ same_network: true
47
+ use_cpu: false
48
+ </pre>
49
+ ##
50
+ <pre>
51
+ from accelerate import Accelerator
52
+
53
+ + def main():
54
+ accelerator = Accelerator()
55
+
56
+ model, optimizer, training_dataloader, scheduler = accelerator.prepare(
57
+ model, optimizer, training_dataloader, scheduler
58
+ )
59
+
60
+ for batch in training_dataloader:
61
+ optimizer.zero_grad()
62
+ inputs, targets = batch
63
+ outputs = model(inputs)
64
+ loss = loss_function(outputs, targets)
65
+ accelerator.backward(loss)
66
+ optimizer.step()
67
+ scheduler.step()
68
+
69
+ + if __name__ == "__main__":
70
+ + main()
71
+ </pre>
72
+
73
+ Launching a script using default accelerate config file looks like the following:
74
+ ```
75
+ accelerate launch {script_name.py} {--arg1} {--arg2} ...
76
+ ```
77
+
78
+ Alternatively, you can use `accelerate launch` with right config params for multi-gpu training as shown below. Replace `{node_number}` with appropriate number.
79
+ ```
80
+ accelerate launch --multi_gpu --num_machines=2 --num_processes=8 --main_process_ip="192.168.20.1" --main_process_port=8080
81
+ --machine_rank={node_number} {script_name.py} {--arg1} {--arg2} ...
82
+ ```
83
+
84
+ ##
85
+ Using this feature involves no changes to the code apart from the ones mentioned in the tab `Simplify your code and improve efficieny`.
86
+ ##
87
+ To learn more checkout the related documentation:
88
+ - <a href="https://huggingface.co/docs/accelerate/main/en/basic_tutorials/launch" target="_blank">Launching distributed code</a>
89
+ - <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/cli" target="_blank">The Accelerate CLI</a>
code_samples/large_scale_training/pytorch_fsdp ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##
2
+ Run `accelerate config` and answer the questionnaire accordingly.
3
+ Below is an example yaml for BF16 mixed-precision training using PyTorch FSDP with CPU offloading on 8 GPUs.
4
+ <pre>
5
+ compute_environment: LOCAL_MACHINE
6
+ deepspeed_config: {}
7
+ distributed_type: FSDP
8
+ downcast_bf16: 'no'
9
+ dynamo_backend: 'NO'
10
+ fsdp_config:
11
+ fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
12
+ fsdp_backward_prefetch_policy: BACKWARD_PRE
13
+ fsdp_offload_params: true
14
+ fsdp_sharding_strategy: 1
15
+ fsdp_state_dict_type: FULL_STATE_DICT
16
+ fsdp_transformer_layer_cls_to_wrap: T5Block
17
+ machine_rank: 0
18
+ main_training_function: main
19
+ megatron_lm_config: {}
20
+ mixed_precision: bf16
21
+ num_machines: 1
22
+ num_processes: 8
23
+ rdzv_backend: static
24
+ same_network: true
25
+ use_cpu: false
26
+ </pre>
27
+ ##
28
+ <pre>
29
+ from accelerate import Accelerator
30
+
31
+ + def main():
32
+ accelerator = Accelerator()
33
+
34
+ model = accelerator.prepare(model)
35
+
36
+ optimizer, training_dataloader, scheduler = accelerator.prepare(
37
+ optimizer, training_dataloader, scheduler
38
+ )
39
+
40
+ for batch in training_dataloader:
41
+ optimizer.zero_grad()
42
+ inputs, targets = batch
43
+ outputs = model(inputs)
44
+ loss = loss_function(outputs, targets)
45
+ accelerator.backward(loss)
46
+ optimizer.step()
47
+ scheduler.step()
48
+
49
+ ...
50
+
51
+ + if __name__ == "__main__":
52
+ + main()
53
+ </pre>
54
+
55
+ Launching a script using default accelerate config file looks like the following:
56
+ ```
57
+ accelerate launch {script_name.py} {--arg1} {--arg2} ...
58
+ ```
59
+
60
+ Alternatively, you can use `accelerate launch` with right config params for multi-gpu training as shown below
61
+ ```
62
+ accelerate launch \
63
+ --use_fsdp \
64
+ --num_processes=8 \
65
+ --mixed_precision=bf16 \
66
+ --fsdp_sharding_strategy=1 \
67
+ --fsdp_auto_wrap_policy=TRANSFORMER_BASED_WRAP \
68
+ --fsdp_transformer_layer_cls_to_wrap=T5Block \
69
+ --fsdp_offload_params=true \
70
+ {script_name.py} {--arg1} {--arg2} ...
71
+ ```
72
+
73
+ ##
74
+ For PyTorch FDSP, you need to prepare the model first before preparing the optimizer since FSDP will shard parameters in-place and this will break any previously initialized optimizers. Same in outlined in the above code snippet. For transformer models, please use `TRANSFORMER_BASED_WRAP` auto wrap policy as shown in the config above.
75
+
76
+
77
+ ##
78
+ To learn more checkout the related documentation:
79
+ - <a href="https://huggingface.co/docs/accelerate/usage_guides/fsdp" target="_blank">How to use FSDP</a>
80
+ - <a href="https://huggingface.co/blog/pytorch-fsdp" target="_blank">Accelerate Large Model Training using PyTorch Fully Sharded Data Parallel</a>
src/app.py CHANGED
@@ -4,47 +4,140 @@ from template import get_templates
4
 
5
  templates = get_templates()
6
 
7
- def change(inp):
 
8
  """Based on an `inp`, render and highlight the appropriate code sample.
9
 
10
  Args:
11
  inp (`str`):
12
  The input button from the interface.
 
 
13
 
14
  Returns:
15
  `tuple`: A tuple of the highlighted code diff, and the title for the section.
16
  """
17
- code, explanation, docs = get_text(inp)
18
- if inp == "Basic":
19
- return (highlight(code), "## Accelerate Code (Base Integration)", explanation, docs)
20
- elif inp == "Calculating Metrics":
21
- return (highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
22
- else:
23
- return (highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
 
 
 
 
24
 
25
- default = change("Basic")
26
 
27
- with gr.Blocks() as demo:
 
 
 
 
28
  inp = gr.Radio(
29
- ["Basic", "Calculating Metrics", "Checkpointing", "Experiment Tracking", "Gradient Accumulation"],
30
  label="Select a feature you would like to integrate",
31
- value="Basic"
32
  )
33
  with gr.Row():
34
  with gr.Column():
35
  feature = gr.Markdown("## Accelerate Code")
36
  out = gr.Markdown(default[0])
37
  with gr.Row():
38
- with gr.Column():
39
  gr.Markdown("## Explanation")
40
  explanation = gr.Markdown(default[2])
41
  with gr.Row():
42
- with gr.Column():
43
  gr.Markdown("## Documentation Links")
44
  docs = gr.Markdown(default[3])
45
- inp.change(
46
- fn=change,
47
- inputs=inp,
48
- outputs=[out, feature, explanation, docs]
 
 
 
 
 
49
  )
50
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  templates = get_templates()
6
 
7
+
8
+ def change(inp, textbox):
9
  """Based on an `inp`, render and highlight the appropriate code sample.
10
 
11
  Args:
12
  inp (`str`):
13
  The input button from the interface.
14
+ textbox (`str`):
15
+ The textbox specifying the tab name from the interface.
16
 
17
  Returns:
18
  `tuple`: A tuple of the highlighted code diff, and the title for the section.
19
  """
20
+ if textbox == "base":
21
+ code, explanation, docs = get_text(inp, textbox)
22
+ if inp == "Basic":
23
+ return (highlight(code), "## Accelerate Code (Base Integration)", explanation, docs)
24
+ elif inp == "Calculating Metrics":
25
+ return (highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
26
+ else:
27
+ return (highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
28
+ elif textbox == "large_scale_training":
29
+ config, code, explanation, docs = get_text(inp, textbox)
30
+ return (highlight(config), highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
31
 
 
32
 
33
+ default = change("Basic", "base")
34
+
35
+
36
+ def base_features(textbox):
37
+ # textbox.value = "base"
38
  inp = gr.Radio(
39
+ ["Basic", "Calculating Metrics", "Checkpointing", "Experiment Tracking", "Gradient Accumulation"],
40
  label="Select a feature you would like to integrate",
41
+ value="Basic",
42
  )
43
  with gr.Row():
44
  with gr.Column():
45
  feature = gr.Markdown("## Accelerate Code")
46
  out = gr.Markdown(default[0])
47
  with gr.Row():
48
+ with gr.Column():
49
  gr.Markdown("## Explanation")
50
  explanation = gr.Markdown(default[2])
51
  with gr.Row():
52
+ with gr.Column():
53
  gr.Markdown("## Documentation Links")
54
  docs = gr.Markdown(default[3])
55
+ inp.change(fn=change, inputs=[inp, textbox], outputs=[out, feature, explanation, docs])
56
+
57
+
58
+ def large_scale_training(textbox):
59
+ # textbox.value = "large_scale_training"
60
+ inp = gr.Radio(
61
+ ["Multi GPU", "Multi Node Multi GPU", "AWS SageMaker", "DeepSpeed", "PyTorch FSDP", "Megatron-LM"],
62
+ label="Select a feature you would like to integrate",
63
+ value="Basic",
64
  )
65
+ with gr.Row():
66
+ with gr.Column():
67
+ feature = gr.Markdown("## Accelerate Config")
68
+ config = gr.Markdown("")
69
+ with gr.Row():
70
+ with gr.Column():
71
+ feature = gr.Markdown("## Accelerate Code")
72
+ out = gr.Markdown("")
73
+ with gr.Row():
74
+ with gr.Column():
75
+ gr.Markdown("## Explanation")
76
+ explanation = gr.Markdown("")
77
+ with gr.Row():
78
+ with gr.Column():
79
+ gr.Markdown("## Documentation Links")
80
+ docs = gr.Markdown("")
81
+ inp.change(fn=change, inputs=[inp, textbox], outputs=[config, out, feature, explanation, docs])
82
+
83
+
84
+ # def big_model_inference():
85
+ # inp = gr.Radio(
86
+ # ["Accelerate's Big Model Inference",], # "DeepSpeed ZeRO Stage-3 Offload"
87
+ # label="Select a feature you would like to integrate",
88
+ # value="Basic",
89
+ # )
90
+ # with gr.Row():
91
+ # with gr.Column():
92
+ # feature = gr.Markdown("## Accelerate Code")
93
+ # out = gr.Markdown(default[0])
94
+ # with gr.Row():
95
+ # with gr.Column():
96
+ # gr.Markdown(default[1])
97
+ # explanation = gr.Markdown(default[2])
98
+ # with gr.Row():
99
+ # with gr.Column():
100
+ # gr.Markdown("## Documentation Links")
101
+ # docs = gr.Markdown(default[3])
102
+ # inp.change(fn=change, inputs=[inp, "big_model_inference"], outputs=[out, feature, explanation, docs])
103
+
104
+
105
+ # def notebook_launcher():
106
+ # inp = gr.Radio(
107
+ # ["Colab GPU", "Colab TPU", "Kaggle GPU", "Kaggle Multi GPU", "Kaggle TPU", "Multi GPU VMs"],
108
+ # label="Select a feature you would like to integrate",
109
+ # value="Basic",
110
+ # )
111
+ # with gr.Row():
112
+ # with gr.Column():
113
+ # feature = gr.Markdown("## Accelerate Code")
114
+ # out = gr.Markdown(default[0])
115
+ # with gr.Row():
116
+ # with gr.Column():
117
+ # gr.Markdown(default[1])
118
+ # explanation = gr.Markdown(default[2])
119
+ # with gr.Row():
120
+ # with gr.Column():
121
+ # gr.Markdown("## Documentation Links")
122
+ # docs = gr.Markdown(default[3])
123
+ # inp.change(fn=change, inputs=[inp, "notebook_launcher"], outputs=[out, feature, explanation, docs])
124
+
125
+
126
+ with gr.Blocks() as demo:
127
+
128
+ with gr.Tabs():
129
+ with gr.TabItem("Simplify your code and improve efficieny"):
130
+ textbox = gr.Textbox(label="tab_name", visible=False, value="base")
131
+ base_features(textbox)
132
+ with gr.TabItem("Large Scale Training"):
133
+ textbox = gr.Textbox(label="tab_name", visible=False, value="large_scale_training")
134
+ large_scale_training(textbox)
135
+ with gr.TabItem("Big Model Inference"):
136
+ # big_model_inference()
137
+ pass
138
+ with gr.TabItem("Notebook Launcher Intergation"):
139
+ # notebook_launcher()
140
+ pass
141
+
142
+
143
+ demo.launch()
src/markup.py CHANGED
@@ -17,6 +17,7 @@ from template import get_filename
17
  _remove_color = "rgb(103,6,12)"
18
  _addition_color = "rgb(6,103,12)"
19
 
 
20
  def mark_text(text, add=True):
21
  """Marks text with a highlight color for addition or removal.
22
 
@@ -35,7 +36,8 @@ def mark_text(text, add=True):
35
  color = _remove_color
36
  return f'<mark style="background-color:{color}!important;color:white!important">{text}</mark>'
37
 
38
- def highlight(code:str):
 
39
  """Takes in code and returns the respective highlighted code sample.
40
 
41
  Args:
@@ -43,7 +45,7 @@ def highlight(code:str):
43
  Code from a file.
44
  """
45
  lines = code.split("\n")
46
- for i,line in enumerate(lines):
47
  if line.startswith("-"):
48
  lines[i] = "- " + line[1:]
49
  lines[i] = mark_text(lines[i], False)
@@ -54,12 +56,12 @@ def highlight(code:str):
54
  lines[i] = " " + line
55
  return "\n".join(lines).rstrip()
56
 
57
- def get_text(option):
 
58
  """
59
  Reads in an option and returns the code, explanation, and documentation links
60
  """
61
- filename = option.lower().replace(' ', '_')
62
- with open(get_filename(filename)) as f:
63
  output = f.read()
64
- code, explanation, doclink = output.split("##\n")[1:]
65
- return code, explanation, doclink
 
17
  _remove_color = "rgb(103,6,12)"
18
  _addition_color = "rgb(6,103,12)"
19
 
20
+
21
  def mark_text(text, add=True):
22
  """Marks text with a highlight color for addition or removal.
23
 
 
36
  color = _remove_color
37
  return f'<mark style="background-color:{color}!important;color:white!important">{text}</mark>'
38
 
39
+
40
+ def highlight(code: str):
41
  """Takes in code and returns the respective highlighted code sample.
42
 
43
  Args:
 
45
  Code from a file.
46
  """
47
  lines = code.split("\n")
48
+ for i, line in enumerate(lines):
49
  if line.startswith("-"):
50
  lines[i] = "- " + line[1:]
51
  lines[i] = mark_text(lines[i], False)
 
56
  lines[i] = " " + line
57
  return "\n".join(lines).rstrip()
58
 
59
+
60
+ def get_text(option, tab):
61
  """
62
  Reads in an option and returns the code, explanation, and documentation links
63
  """
64
+ filename = option.lower().replace(" ", "_")
65
+ with open(get_filename(tab, filename)) as f:
66
  output = f.read()
67
+ return output.split("##\n")[1:]
 
src/template.py CHANGED
@@ -15,17 +15,16 @@ import os
15
 
16
  TEMPLATES = ["initial", "initial_with_metrics", "accelerate"]
17
 
18
- def get_filename(template: str) -> str:
 
19
  """
20
  Takes an template and returns the respective filename relative to the cwd.
21
  """
22
- return os.path.join(os.getcwd(), "code_samples", template)
 
23
 
24
  def get_templates() -> dict:
25
  """
26
  Returns a dictionary of template type to code content
27
  """
28
- return {
29
- template: open(get_filename(template)).read()
30
- for template in TEMPLATES
31
- }
 
15
 
16
  TEMPLATES = ["initial", "initial_with_metrics", "accelerate"]
17
 
18
+
19
+ def get_filename(tab: str, template: str) -> str:
20
  """
21
  Takes an template and returns the respective filename relative to the cwd.
22
  """
23
+ return os.path.join(os.getcwd(), "code_samples", tab, template)
24
+
25
 
26
  def get_templates() -> dict:
27
  """
28
  Returns a dictionary of template type to code content
29
  """
30
+ return {template: open(get_filename("base", template)).read() for template in TEMPLATES}