theprint commited on
Commit
74e644d
1 Parent(s): fb6e046

Upload cfg.yaml

Browse files
Files changed (1) hide show
  1. cfg.yaml +117 -0
cfg.yaml ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ architecture:
2
+ backbone_dtype: int4
3
+ gradient_checkpointing: true
4
+ intermediate_dropout: 0.0
5
+ pretrained: true
6
+ pretrained_weights: ''
7
+ augmentation:
8
+ neftune_noise_alpha: 0.0
9
+ random_parent_probability: 0.0
10
+ skip_parent_probability: 0.0
11
+ token_mask_probability: 0.0
12
+ dataset:
13
+ add_eos_token_to_answer: true
14
+ add_eos_token_to_prompt: true
15
+ add_eos_token_to_system: true
16
+ answer_column: output
17
+ chatbot_author: H2O.ai
18
+ chatbot_name: h2oGPT
19
+ data_sample: 1.0
20
+ data_sample_choice:
21
+ - Train
22
+ - Validation
23
+ limit_chained_samples: false
24
+ mask_prompt_labels: true
25
+ only_last_answer: false
26
+ parent_id_column: None
27
+ personalize: false
28
+ prompt_column:
29
+ - instruction
30
+ - input
31
+ prompt_column_separator: \n\n
32
+ system_column: None
33
+ text_answer_separator: <|answer|>
34
+ text_prompt_start: <|prompt|>
35
+ text_system_start: <|system|>
36
+ train_dataframe: /home/llmstudio/mount/data/user/MysteryWriter_train/MysteryWriter_train.pq
37
+ validation_dataframe: None
38
+ validation_size: 0.01
39
+ validation_strategy: automatic
40
+ environment:
41
+ compile_model: false
42
+ deepspeed_allgather_bucket_size: 1000000
43
+ deepspeed_method: ZeRO2
44
+ deepspeed_reduce_bucket_size: 1000000
45
+ deepspeed_stage3_param_persistence_threshold: 1000000
46
+ deepspeed_stage3_prefetch_bucket_size: 1000000
47
+ find_unused_parameters: false
48
+ gpus:
49
+ - '0'
50
+ huggingface_branch: main
51
+ mixed_precision: true
52
+ mixed_precision_dtype: bfloat16
53
+ number_of_workers: 8
54
+ seed: -1
55
+ trust_remote_code: true
56
+ use_deepspeed: false
57
+ experiment_name: Boptruth-Agatha
58
+ llm_backbone: theprint/Boptruth-NeuralMonarch-7B
59
+ logging:
60
+ log_all_ranks: false
61
+ log_step_size: absolute
62
+ logger: None
63
+ neptune_project: ''
64
+ wandb_entity: ''
65
+ wandb_project: ''
66
+ output_directory: /home/llmstudio/mount/output/user/Boptruth-Agatha/
67
+ prediction:
68
+ batch_size_inference: 0
69
+ do_sample: false
70
+ max_length_inference: 256
71
+ max_time: 0.0
72
+ metric: BLEU
73
+ metric_gpt_model: gpt-3.5-turbo-0301
74
+ metric_gpt_template: general
75
+ min_length_inference: 2
76
+ num_beams: 1
77
+ num_history: 4
78
+ repetition_penalty: 1.0
79
+ stop_tokens: ''
80
+ temperature: 0.0
81
+ top_k: 0
82
+ top_p: 1.0
83
+ problem_type: text_causal_language_modeling
84
+ tokenizer:
85
+ add_prompt_answer_tokens: false
86
+ max_length: 1024
87
+ padding_quantile: 1.0
88
+ tokenizer_kwargs: '{"use_fast": true, "add_prefix_space": false}'
89
+ training:
90
+ attention_implementation: auto
91
+ batch_size: 3
92
+ differential_learning_rate: 1.0e-05
93
+ differential_learning_rate_layers: []
94
+ drop_last_batch: true
95
+ epochs: 1
96
+ evaluate_before_training: false
97
+ evaluation_epochs: 1.0
98
+ freeze_layers: []
99
+ grad_accumulation: 1
100
+ gradient_clip: 0.0
101
+ learning_rate: 0.0001
102
+ lora: true
103
+ lora_alpha: 16
104
+ lora_dropout: 0.05
105
+ lora_r: 4
106
+ lora_target_modules: ''
107
+ lora_unfreeze_layers: []
108
+ loss_function: TokenAveragedCrossEntropy
109
+ min_learning_rate_ratio: 0.0
110
+ optimizer: AdamW
111
+ save_checkpoint: last
112
+ schedule: Cosine
113
+ train_validation_data: false
114
+ use_dora: false
115
+ use_rslora: false
116
+ warmup_epochs: 0.0
117
+ weight_decay: 0.0