lomahony commited on
Commit
fd7e976
1 Parent(s): cf1d221

add pythia-70m config

Browse files
Files changed (1) hide show
  1. config.json +21 -48
config.json CHANGED
@@ -1,51 +1,24 @@
1
  {
2
- "seed": 0,
3
- "exp_name": "pythia_SFT_ga4",
4
- "batch_size": 64,
5
- "eval_batch_size": 32,
6
- "debug": false,
7
- "fsdp_port": 12355,
8
- "datasets": [
9
- "hh"
10
  ],
11
- "wandb": {
12
- "enabled": true,
13
- "entity": "pythia_dpo",
14
- "project": "Pythia_LOM"
15
- },
16
- "local_dirs": [
17
- "/scr-ssd",
18
- "/scr",
19
- ".cache"
20
- ],
21
- "sample_during_eval": false,
22
- "n_eval_model_samples": 16,
23
- "do_first_eval": true,
24
- "local_run_dir": ".cache/laura/pythia_SFT_ga4_2023-07-16_16-50-13_244945",
25
- "lr": 0.000001,
26
- "gradient_accumulation_steps": 4,
27
- "max_grad_norm": 10,
28
- "max_length": 512,
29
- "max_prompt_length": 256,
30
- "n_epochs": 1,
31
- "n_examples": null,
32
- "n_eval_examples": 256,
33
- "trainer": "FSDPTrainer",
34
- "optimizer": "RMSprop",
35
- "warmup_steps": 150,
36
- "activation_checkpointing": false,
37
- "eval_every": 19968,
38
- "minimum_log_interval_secs": 1,
39
- "model": {
40
- "name_or_path": "EleutherAI/pythia-70m",
41
- "tokenizer_name_or_path": null,
42
- "archive": null,
43
- "block_name": "GPTNeoXLayer",
44
- "policy_dtype": "float32",
45
- "fsdp_policy_mp": "bfloat16",
46
- "reference_dtype": "float16"
47
- },
48
- "loss": {
49
- "name": "sft"
50
- }
51
  }
 
1
  {
2
+ "architectures": [
3
+ "GPTNeoXForCausalLM"
 
 
 
 
 
 
4
  ],
5
+ "bos_token_id": 0,
6
+ "eos_token_id": 0,
7
+ "hidden_act": "gelu",
8
+ "hidden_size": 512,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 2048,
11
+ "layer_norm_eps": 1e-05,
12
+ "max_position_embeddings": 2048,
13
+ "model_type": "gpt_neox",
14
+ "num_attention_heads": 8,
15
+ "num_hidden_layers": 6,
16
+ "rotary_emb_base": 10000,
17
+ "rotary_pct": 0.25,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.24.0",
21
+ "use_cache": true,
22
+ "use_parallel_residual": true,
23
+ "vocab_size": 50304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }