acecalisto3 commited on
Commit
b3de3de
1 Parent(s): 96dff05

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +0 -39
config.json CHANGED
@@ -26,43 +26,4 @@
26
  "transformers_version": "4.36.0.dev0",
27
  "use_cache": true,
28
  "vocab_size": 32000
29
- }
30
- {
31
- "_name_or_path": "/fsx/bigcode/experiments/pretraining/conversions/starcoderpy/large-model",
32
- "activation_function": "gelu",
33
- "architectures": [
34
- "GPTBigCodeForCausalLM"
35
- ],
36
- "attention_softmax_in_fp32": true,
37
- "multi_query": true,
38
- "attn_pdrop": 0.1,
39
- "bos_token_id": 0,
40
- "embd_pdrop": 0.1,
41
- "eos_token_id": 0,
42
- "inference_runner": 0,
43
- "initializer_range": 0.02,
44
- "layer_norm_epsilon": 1e-05,
45
- "max_batch_size": null,
46
- "max_sequence_length": null,
47
- "model_type": "gpt_bigcode",
48
- "n_embd": 6144,
49
- "n_head": 48,
50
- "n_inner": 24576,
51
- "n_layer": 40,
52
- "n_positions": 8192,
53
- "pad_key_length": true,
54
- "pre_allocate_kv_cache": false,
55
- "resid_pdrop": 0.1,
56
- "scale_attention_softmax_in_fp32": true,
57
- "scale_attn_weights": true,
58
- "summary_activation": null,
59
- "summary_first_dropout": 0.1,
60
- "summary_proj_to_labels": true,
61
- "summary_type": "cls_index",
62
- "summary_use_proj": true,
63
- "torch_dtype": "float32",
64
- "transformers_version": "4.28.1",
65
- "use_cache": true,
66
- "validate_runner_input": true,
67
- "vocab_size": 49152
68
  }
 
26
  "transformers_version": "4.36.0.dev0",
27
  "use_cache": true,
28
  "vocab_size": 32000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }