netcat420 commited on
Commit
a9b307a
·
verified ·
1 Parent(s): 2011502

Upload 6 files

Browse files
hierarchos.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34ba416829b300faab486d51659a40a12a907d8fcdefbf14d2ac7d90dabc524
3
+ size 935386573
hierarchos_config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode": "train",
3
+ "train": true,
4
+ "hf_dataset": "netcat420/Experiment_0.1",
5
+ "hf_dataset_config": null,
6
+ "hf_dataset_split": "train",
7
+ "text_column": null,
8
+ "prompt_column": "instruction",
9
+ "completion_column": "output",
10
+ "model_path": null,
11
+ "out_dir": "./chatHRM",
12
+ "tokenizer_path": null,
13
+ "resume_from_ckpt": null,
14
+ "shadow_model_path": null,
15
+ "pre_chunked_dataset": false,
16
+ "pre_pt_dataset": false,
17
+ "context_dim": 450,
18
+ "persistent_dim": 128,
19
+ "ltm_slots": 1024,
20
+ "ltm_key_dim": 128,
21
+ "ltm_val_dim": 128,
22
+ "h_hidden": 450,
23
+ "l_hidden": 450,
24
+ "h_stride": 4,
25
+ "max_h_steps": 5,
26
+ "max_l_steps": 5,
27
+ "ltm_topk": 4,
28
+ "max_length": 10016,
29
+ "auto_max_length": true,
30
+ "epochs": 10,
31
+ "batch_size": 8,
32
+ "accumulation_steps": 1,
33
+ "starting_lr": 0.0001,
34
+ "min_lr": 9e-08,
35
+ "disable_lr_schedule": false,
36
+ "ltm_lr": 0.001,
37
+ "kayla": false,
38
+ "lora_r": 8,
39
+ "lora_alpha": 16,
40
+ "grad_clip": 1.0,
41
+ "ponder_loss_weight": 0.001,
42
+ "commitment_loss_weight": 0.5,
43
+ "commitment_threshold": 0.05,
44
+ "l_conv_atol": 0.0001,
45
+ "detach_every_n_steps": 32,
46
+ "h_halt_thresh": 0.9,
47
+ "encourage_thinking": false,
48
+ "adaptive_ponder": false,
49
+ "ponder_target_scale": 0.5,
50
+ "reset_halt_bias": null,
51
+ "override_scheduling": false,
52
+ "persist_state": false,
53
+ "training_chunk_size": 128,
54
+ "save_steps": 0,
55
+ "num_workers": 0,
56
+ "amp": true,
57
+ "compile": true,
58
+ "force_compile": true,
59
+ "eval_tasks": null,
60
+ "eval_every_epoch": 1,
61
+ "eval_batch_size": 1,
62
+ "eval_limit": null,
63
+ "eval_steps": null,
64
+ "temperature": 0.7,
65
+ "top_k": 40,
66
+ "top_p": 0.9,
67
+ "repetition_penalty": 1.2,
68
+ "max_new_tokens": 512,
69
+ "device": null,
70
+ "threads": 24,
71
+ "enable_quantized_learning": false,
72
+ "ltm_lora_path": null,
73
+ "static_ltm_lr": false,
74
+ "ltm_schedule_steps": 100,
75
+ "ltm_schedule_min_lr": 1e-05,
76
+ "finetune_unlock_percent": null,
77
+ "gradient_checkpointing": false,
78
+ "passive_learning": true,
79
+ "passive_lr": 5e-06,
80
+ "surprise_threshold": 1.0,
81
+ "ckpt_input": null,
82
+ "inf_output": null,
83
+ "ckpt_tok_path": null,
84
+ "amp_dtype": "bfloat16",
85
+ "vocab_size": 50257,
86
+ "completed_epoch": 28
87
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<|endoftext|>",
5
+ "eos_token": "<|endoftext|>",
6
+ "errors": "replace",
7
+ "is_local": false,
8
+ "model_max_length": 10016,
9
+ "pad_token": "<|endoftext|>",
10
+ "tokenizer_class": "GPT2Tokenizer",
11
+ "unk_token": "<|endoftext|>"
12
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff