File size: 413 Bytes
43000e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99cd2a1
43000e1
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
ATASET = "task-focus + sample from remain datasets"

DATASET_FORMAT = 'input-output'

PER_DEVICE_TRAIN_BATCH_SIZE = 2

GRADIENT_ACCUMULATION_STEPS = 4

LEARNING_RATE = 0.0003

LR_SCHEDULER_TYPE = 'cosine'

WARMUP_RATIO = 0.03

LORA_R = 192

LORA_ALPHA = 64

LORA_DROPOUT = 0.1

TRAIN_ON_SOURCE = False

SOURCE_MAX_LENGTH = 1024

TARGET_MAX_LENGTH = 1024

LOGGING_STEPS = 20

SAVE_STEPS = 100

SAVE_TOTAL_LIMIT = 4