maryprecious commited on
Commit
01005b1
·
verified ·
1 Parent(s): 8813c4c

Upload folder using huggingface_hub

Browse files
.locks/models--psd401--llama3-2-1b-Solutions/ef85b799c596b5809377eb3268cd9f789c037ce4.lock ADDED
File without changes
models--psd401--llama3-2-1b-Solutions/.no_exist/8813c4cef407330136f13ab6ed5b83fd989f093e/README.md ADDED
File without changes
models--psd401--llama3-2-1b-Solutions/.no_exist/8813c4cef407330136f13ab6ed5b83fd989f093e/adapter_config.json ADDED
File without changes
models--psd401--llama3-2-1b-Solutions/.no_exist/8813c4cef407330136f13ab6ed5b83fd989f093e/adapter_model.safetensors ADDED
File without changes
models--psd401--llama3-2-1b-Solutions/.no_exist/8813c4cef407330136f13ab6ed5b83fd989f093e/config.json ADDED
File without changes
models--psd401--llama3-2-1b-Solutions/.no_exist/8813c4cef407330136f13ab6ed5b83fd989f093e/generation_config.json ADDED
File without changes
models--psd401--llama3-2-1b-Solutions/.no_exist/8813c4cef407330136f13ab6ed5b83fd989f093e/special_tokens_map.json ADDED
File without changes
models--psd401--llama3-2-1b-Solutions/.no_exist/8813c4cef407330136f13ab6ed5b83fd989f093e/tokenizer.json ADDED
File without changes
models--psd401--llama3-2-1b-Solutions/.no_exist/8813c4cef407330136f13ab6ed5b83fd989f093e/tokenizer_config.json ADDED
File without changes
models--psd401--llama3-2-1b-Solutions/.no_exist/8813c4cef407330136f13ab6ed5b83fd989f093e/training_args.bin ADDED
File without changes
models--psd401--llama3-2-1b-Solutions/blobs/ef85b799c596b5809377eb3268cd9f789c037ce4 ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "meta-llama/Llama-3.2-1B-Instruct",
3
+ "project_name": "llama3-2-1b-Solutions",
4
+ "data_path": "psd401/PSD401SolutionsData",
5
+ "train_split": "train",
6
+ "valid_split": null,
7
+ "add_eos_token": true,
8
+ "block_size": -1,
9
+ "model_max_length": 2048,
10
+ "padding": "right",
11
+ "trainer": "sft",
12
+ "use_flash_attention_2": false,
13
+ "log": "tensorboard",
14
+ "disable_gradient_checkpointing": false,
15
+ "logging_steps": -1,
16
+ "eval_strategy": "epoch",
17
+ "save_total_limit": 1,
18
+ "auto_find_batch_size": false,
19
+ "mixed_precision": "bf16",
20
+ "lr": 0.00001,
21
+ "epochs": 3,
22
+ "batch_size": 1,
23
+ "warmup_ratio": 0.1,
24
+ "gradient_accumulation": 8,
25
+ "optimizer": "paged_adamw_8bit",
26
+ "scheduler": "cosine",
27
+ "weight_decay": 0.0,
28
+ "max_grad_norm": 1.0,
29
+ "seed": 42,
30
+ "chat_template": "tokenizer",
31
+ "quantization": "int8",
32
+ "target_modules": "all-linear",
33
+ "merge_adapter": true,
34
+ "peft": true,
35
+ "lora_r": 16,
36
+ "lora_alpha": 32,
37
+ "lora_dropout": 0.05,
38
+ "model_ref": null,
39
+ "dpo_beta": 0.1,
40
+ "max_prompt_length": 128,
41
+ "max_completion_length": null,
42
+ "prompt_text_column": null,
43
+ "text_column": "messages",
44
+ "rejected_text_column": null,
45
+ "push_to_hub": false,
46
+ "username": null,
47
+ "token": null,
48
+ "unsloth": false,
49
+ "distributed_backend": null
50
+ }
models--psd401--llama3-2-1b-Solutions/refs/main CHANGED
@@ -1 +1 @@
1
- 94115ce35647c6da6fdd605765875b74579ba563
 
1
+ 8813c4cef407330136f13ab6ed5b83fd989f093e
models--psd401--llama3-2-1b-Solutions/snapshots/8813c4cef407330136f13ab6ed5b83fd989f093e/training_params.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "meta-llama/Llama-3.2-1B-Instruct",
3
+ "project_name": "llama3-2-1b-Solutions",
4
+ "data_path": "psd401/PSD401SolutionsData",
5
+ "train_split": "train",
6
+ "valid_split": null,
7
+ "add_eos_token": true,
8
+ "block_size": -1,
9
+ "model_max_length": 2048,
10
+ "padding": "right",
11
+ "trainer": "sft",
12
+ "use_flash_attention_2": false,
13
+ "log": "tensorboard",
14
+ "disable_gradient_checkpointing": false,
15
+ "logging_steps": -1,
16
+ "eval_strategy": "epoch",
17
+ "save_total_limit": 1,
18
+ "auto_find_batch_size": false,
19
+ "mixed_precision": "bf16",
20
+ "lr": 0.00001,
21
+ "epochs": 3,
22
+ "batch_size": 1,
23
+ "warmup_ratio": 0.1,
24
+ "gradient_accumulation": 8,
25
+ "optimizer": "paged_adamw_8bit",
26
+ "scheduler": "cosine",
27
+ "weight_decay": 0.0,
28
+ "max_grad_norm": 1.0,
29
+ "seed": 42,
30
+ "chat_template": "tokenizer",
31
+ "quantization": "int8",
32
+ "target_modules": "all-linear",
33
+ "merge_adapter": true,
34
+ "peft": true,
35
+ "lora_r": 16,
36
+ "lora_alpha": 32,
37
+ "lora_dropout": 0.05,
38
+ "model_ref": null,
39
+ "dpo_beta": 0.1,
40
+ "max_prompt_length": 128,
41
+ "max_completion_length": null,
42
+ "prompt_text_column": null,
43
+ "text_column": "messages",
44
+ "rejected_text_column": null,
45
+ "push_to_hub": false,
46
+ "username": null,
47
+ "token": null,
48
+ "unsloth": false,
49
+ "distributed_backend": null
50
+ }