m-biriuchinskii
commited on
Commit
β’
663ba60
1
Parent(s):
638ee1d
Update README.md
Browse files
README.md
CHANGED
@@ -2,6 +2,66 @@
|
|
2 |
library_name: transformers
|
3 |
tags: []
|
4 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Model Card for Model ID
|
7 |
|
|
|
2 |
library_name: transformers
|
3 |
tags: []
|
4 |
---
|
5 |
+
```
|
6 |
+
wandb: - 0.003 MB of 0.003 MB uploaded
|
7 |
+
wandb: \ 0.003 MB of 0.003 MB uploaded
|
8 |
+
wandb:
|
9 |
+
wandb:
|
10 |
+
wandb: Run history:
|
11 |
+
wandb: eval/loss ββ
β
ββββββββββββββββββββββββ
|
12 |
+
wandb: eval/runtime ββββββ
ββ
βββββββ
βββββ
βββββββ
|
13 |
+
wandb: eval/samples_per_second βββββββββ
β
β
β
β
β
ββ
ββ
ββββ
βββββ
|
14 |
+
wandb: eval/steps_per_second βββββββββββ
β
ββ
ββ
ββββββ
βββββ
|
15 |
+
wandb: train/epoch βββββββββββββββββββββ
β
β
β
β
β
β
βββββββββββββ
|
16 |
+
wandb: train/global_step βββββββββββββββββββββββ
β
β
β
β
β
ββββββββββββ
|
17 |
+
wandb: train/grad_norm ββββββββββββββββββββββββββββββββββββββ
ββ
|
18 |
+
wandb: train/learning_rate βββββββββββββββ
β
β
β
β
β
ββββββββββββββββββββ
|
19 |
+
wandb: train/loss ββββββββββββββββββββββββββββββββββββββββ
|
20 |
+
wandb:
|
21 |
+
wandb: Run summary:
|
22 |
+
wandb: eval/loss 0.92221
|
23 |
+
wandb: eval/runtime 93.6611
|
24 |
+
wandb: eval/samples_per_second 3.587
|
25 |
+
wandb: eval/steps_per_second 1.196
|
26 |
+
wandb: total_flos 2.952274602780672e+16
|
27 |
+
wandb: train/epoch 2.46201
|
28 |
+
wandb: train/global_step 810
|
29 |
+
wandb: train/grad_norm 0.81067
|
30 |
+
wandb: train/learning_rate 3e-05
|
31 |
+
wandb: train/loss 0.7747
|
32 |
+
wandb: train_loss 1.05936
|
33 |
+
wandb: train_runtime 8326.639
|
34 |
+
wandb: train_samples_per_second 1.58
|
35 |
+
wandb: train_steps_per_second 0.198
|
36 |
+
|
37 |
+
training_arguments = SFTConfig(
|
38 |
+
output_dir=new_model,
|
39 |
+
run_name="fine_tune_ocr_correction",
|
40 |
+
per_device_train_batch_size=2,
|
41 |
+
per_device_eval_batch_size=3,
|
42 |
+
gradient_accumulation_steps=4,
|
43 |
+
optim="paged_adamw_32bit",
|
44 |
+
num_train_epochs=5,
|
45 |
+
eval_strategy="steps",
|
46 |
+
eval_steps=30, # normally 10 steps, but our dataset is small
|
47 |
+
save_steps=30,
|
48 |
+
logging_steps=20, # Log progress every 20 steps
|
49 |
+
warmup_steps=10,
|
50 |
+
logging_strategy="steps",
|
51 |
+
learning_rate=5e-5,
|
52 |
+
fp16=use_fp16,
|
53 |
+
bf16=use_bf16,
|
54 |
+
group_by_length=True,
|
55 |
+
report_to="wandb",
|
56 |
+
max_seq_length=1220,
|
57 |
+
save_strategy="steps",
|
58 |
+
dataset_text_field="text",
|
59 |
+
load_best_model_at_end = True
|
60 |
+
)
|
61 |
+
|
62 |
+
Dataset complet
|
63 |
+
```
|
64 |
+
|
65 |
|
66 |
# Model Card for Model ID
|
67 |
|