Recag commited on
Commit
bcc760c
1 Parent(s): 4debd9d

Upload config

Browse files
Files changed (2) hide show
  1. config.json +3 -4
  2. config.py +5 -5
config.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "architectures": [
3
- "BharataiForCausalLM"
4
- ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
 
 
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
9
  "hidden_act": "silu",
@@ -20,7 +20,6 @@
20
  "rope_scaling": null,
21
  "rope_theta": 10000.0,
22
  "tie_word_embeddings": false,
23
- "torch_dtype": "float32",
24
  "transformers_version": "4.36.0.dev0",
25
  "use_cache": true,
26
  "vocab_size": 5000
 
1
  {
 
 
 
2
  "attention_bias": false,
3
  "attention_dropout": 0.0,
4
+ "auto_map": {
5
+ "AutoConfig": "config.BharataiConfig"
6
+ },
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
9
  "hidden_act": "silu",
 
20
  "rope_scaling": null,
21
  "rope_theta": 10000.0,
22
  "tie_word_embeddings": false,
 
23
  "transformers_version": "4.36.0.dev0",
24
  "use_cache": true,
25
  "vocab_size": 5000
config.py CHANGED
@@ -83,11 +83,11 @@ class BharataiConfig(PretrainedConfig):
83
 
84
  def __init__(
85
  self,
86
- vocab_size=32000,
87
- hidden_size=4096,
88
  intermediate_size=11008,
89
- num_hidden_layers=32,
90
- num_attention_heads=32,
91
  num_key_value_heads=None,
92
  hidden_act="silu",
93
  max_position_embeddings=16384,
@@ -155,4 +155,4 @@ class BharataiConfig(PretrainedConfig):
155
  f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
156
  )
157
  if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
158
- raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}")
 
83
 
84
  def __init__(
85
  self,
86
+ vocab_size=5000,
87
+ hidden_size=512,
88
  intermediate_size=11008,
89
+ num_hidden_layers=8,
90
+ num_attention_heads=8,
91
  num_key_value_heads=None,
92
  hidden_act="silu",
93
  max_position_embeddings=16384,
 
155
  f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
156
  )
157
  if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
158
+ raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}")