Jinchen commited on
Commit
79fcabb
1 Parent(s): 91a58cf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -14
README.md CHANGED
@@ -50,27 +50,25 @@ Trained on 16 Graphcore Mk2 IPUs using [optimum-graphcore](https://github.com/hu
50
  Command line:
51
 
52
  ```
53
- python examples/language-modeling/run_clm.py \
54
- --model_name_or_path gpt2 \
55
- --ipu_config_name Graphcore/gpt2-small-ipu \
56
- --dataset_name wikitext \
57
- --dataset_config_name wikitext-103-raw-v1 \
58
  --do_train \
59
  --do_eval \
60
- --num_train_epochs 10 \
61
- --dataloader_num_workers 64 \
62
  --per_device_train_batch_size 1 \
63
- --per_device_eval_batch_size 1 \
64
- --gradient_accumulation_steps 128 \
65
- --output_dir /tmp/clm_output \
66
  --logging_steps 5 \
67
- --learning_rate 1e-5 \
68
  --lr_scheduler_type linear \
69
  --loss_scaling 16384 \
70
  --weight_decay 0.01 \
71
  --warmup_ratio 0.1 \
72
- --ipu_config_overrides="embedding_serialization_factor=4,optimizer_state_offchip=true,inference_device_iterations=5" \
73
  --dataloader_drop_last \
 
74
  --pod_type pod16
75
  ```
76
 
@@ -91,8 +89,18 @@ The following hyperparameters were used during training:
91
  - training precision: Mixed Precision
92
 
93
  ### Training results
94
-
95
-
 
 
 
 
 
 
 
 
 
 
96
 
97
  ### Framework versions
98
 
 
50
  Command line:
51
 
52
  ```
53
+ python examples/question-answering/run_vqa.py \
54
+ --model_name_or_path unc-nlp/lxmert-base-uncased \
55
+ --ipu_config_name Graphcore/lxmert-base-ipu \
56
+ --dataset_name Graphcore/vqa-lxmert \
 
57
  --do_train \
58
  --do_eval \
59
+ --max_seq_length 512 \
 
60
  --per_device_train_batch_size 1 \
61
+ --num_train_epochs 4 \
62
+ --dataloader_num_workers 64 \
 
63
  --logging_steps 5 \
64
+ --learning_rate 5e-5 \
65
  --lr_scheduler_type linear \
66
  --loss_scaling 16384 \
67
  --weight_decay 0.01 \
68
  --warmup_ratio 0.1 \
69
+ --output_dir /tmp/vqa/ \
70
  --dataloader_drop_last \
71
+ --replace_qa_head \
72
  --pod_type pod16
73
  ```
74
 
 
89
  - training precision: Mixed Precision
90
 
91
  ### Training results
92
+ ***** train metrics *****
93
+ "epoch": 4.0,
94
+ "train_loss": 0.0060005393999575125,
95
+ "train_runtime": 13854.802,
96
+ "train_samples": 443757,
97
+ "train_samples_per_second": 128.116,
98
+ "train_steps_per_second": 2.002
99
+
100
+ ***** eval metrics *****
101
+ "eval_accuracy": 0.7242196202278137,
102
+ "eval_loss": 0.0008745193481445312,
103
+ "eval_samples": 214354,
104
 
105
  ### Framework versions
106