Iker commited on
Commit
1c3cd06
1 Parent(s): 30cf63d

Upload Tower13B.yml

Browse files
Files changed (1) hide show
  1. Tower13B.yml +84 -0
Tower13B.yml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Unbabel/TowerInstruct-13B-v0.1
2
+ model_type: LlamaForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+ is_llama_derived_model: true
5
+
6
+ load_in_8bit: false
7
+ load_in_4bit: false
8
+ strict: false
9
+
10
+ device_map: null
11
+
12
+ datasets:
13
+ - path: /ikerlariak/igarcia945/Mortadelo-Filemon/translation_data/translation_instruction_axolotl.jsonl
14
+ type: sharegpt
15
+ conversation: chatml
16
+ field: conversations
17
+ dataset_prepared_path:
18
+ val_set_size: 0.05
19
+
20
+ output_dir: /ikerlariak/igarcia945/Mortadelo-Filemon/Tower13B-EN2ES
21
+
22
+ adapter: lora
23
+ lora_model_dir:
24
+
25
+ sequence_len: 8096
26
+ sample_packing: false
27
+ eval_sample_packing: false
28
+ pad_to_sequence_len: false
29
+
30
+ overrides_of_model_config:
31
+ # RoPE Scaling https://github.com/huggingface/transformers/pull/24653
32
+ rope_scaling:
33
+ type: dynamic
34
+ factor: 2.0
35
+
36
+
37
+ lora_r: 128
38
+ lora_alpha: 256
39
+ lora_dropout: 0.05
40
+ lora_target_modules:
41
+ lora_target_linear: true
42
+ lora_fan_in_fan_out:
43
+ lora_modules_to_save:
44
+ - embed_tokens
45
+ - lm_head
46
+
47
+ wandb_project: Mortadelo&Filemon
48
+ wandb_entity: igarciaf
49
+ wandb_watch:
50
+ wandb_name: Tower13B-EN2ES
51
+ wandb_log_model:
52
+
53
+ gradient_accumulation_steps: 8
54
+ micro_batch_size: 2
55
+ eval_batch_size: 2
56
+ num_epochs: 3
57
+ optimizer: paged_adamw_32bit
58
+ lr_scheduler: cosine
59
+ learning_rate: 0.0002
60
+
61
+ train_on_inputs: false
62
+ group_by_length: false
63
+ bf16: true
64
+ fp16: false
65
+ tf32: false
66
+
67
+ gradient_checkpointing: true
68
+ early_stopping_patience:
69
+ resume_from_checkpoint:
70
+ local_rank:
71
+ logging_steps: 1
72
+ xformers_attention:
73
+ flash_attention: true
74
+
75
+ warmup_ratio: 0.03
76
+ evals_per_epoch: 2
77
+ eval_table_size:
78
+ save_strategy: "no"
79
+ debug:
80
+ deepspeed: /ikerlariak/igarcia945/Mortadelo-Filemon/train_configs/deepspeed_zero3.json
81
+ weight_decay: 0.0
82
+ fsdp:
83
+ fsdp_config:
84
+ special_tokens: