bhaveshgoel07 commited on
Commit
eee5a53
·
1 Parent(s): f0d6c24

v2 of model

Browse files
Files changed (5) hide show
  1. config.json +9 -9
  2. metadata.json +2 -2
  3. model.weights.h5 +2 -2
  4. preprocessor.json +1 -1
  5. task.json +10 -10
config.json CHANGED
@@ -5,21 +5,21 @@
5
  "name": "gemma_backbone",
6
  "trainable": true,
7
  "vocabulary_size": 256000,
8
- "num_layers": 18,
9
  "num_query_heads": 8,
10
- "num_key_value_heads": 1,
11
- "hidden_dim": 2048,
12
- "intermediate_dim": 32768,
13
  "head_dim": 256,
14
  "layer_norm_epsilon": 1e-06,
15
  "dropout": 0,
16
  "query_head_dim_normalize": true,
17
- "use_post_ffw_norm": false,
18
- "use_post_attention_norm": false,
19
- "final_logit_soft_cap": null,
20
- "attention_logit_soft_cap": null,
21
  "sliding_window_size": 4096,
22
- "use_sliding_window_attention": false
23
  },
24
  "registered_name": "keras_hub>GemmaBackbone"
25
  }
 
5
  "name": "gemma_backbone",
6
  "trainable": true,
7
  "vocabulary_size": 256000,
8
+ "num_layers": 26,
9
  "num_query_heads": 8,
10
+ "num_key_value_heads": 4,
11
+ "hidden_dim": 2304,
12
+ "intermediate_dim": 18432,
13
  "head_dim": 256,
14
  "layer_norm_epsilon": 1e-06,
15
  "dropout": 0,
16
  "query_head_dim_normalize": true,
17
+ "use_post_ffw_norm": true,
18
+ "use_post_attention_norm": true,
19
+ "final_logit_soft_cap": 30.0,
20
+ "attention_logit_soft_cap": 50.0,
21
  "sliding_window_size": 4096,
22
+ "use_sliding_window_attention": true
23
  },
24
  "registered_name": "keras_hub>GemmaBackbone"
25
  }
metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "keras_version": "3.6.0",
3
  "keras_hub_version": "0.17.0",
4
- "parameter_count": 2507536384,
5
- "date_saved": "2024-11-20@00:28:07"
6
  }
 
1
  {
2
  "keras_version": "3.6.0",
3
  "keras_hub_version": "0.17.0",
4
+ "parameter_count": 2617270528,
5
+ "date_saved": "2024-11-21@00:48:49"
6
  }
model.weights.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2d2ee8f20e0f098cb7b96363da64e9a56baf27544ad6525e75337a3ce7ffa39
3
- size 10025273304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffcd0d3ef03bcf2e7d810c328ccc6944ee0d08af98ad37b0b7fffa41607581d3
3
+ size 10458345856
preprocessor.json CHANGED
@@ -35,7 +35,7 @@
35
  "registered_name": "keras_hub>GemmaTokenizer"
36
  },
37
  "config_file": "preprocessor.json",
38
- "sequence_length": 512,
39
  "add_start_token": true,
40
  "add_end_token": true
41
  },
 
35
  "registered_name": "keras_hub>GemmaTokenizer"
36
  },
37
  "config_file": "preprocessor.json",
38
+ "sequence_length": 64,
39
  "add_start_token": true,
40
  "add_end_token": true
41
  },
task.json CHANGED
@@ -9,21 +9,21 @@
9
  "name": "gemma_backbone",
10
  "trainable": true,
11
  "vocabulary_size": 256000,
12
- "num_layers": 18,
13
  "num_query_heads": 8,
14
- "num_key_value_heads": 1,
15
- "hidden_dim": 2048,
16
- "intermediate_dim": 32768,
17
  "head_dim": 256,
18
  "layer_norm_epsilon": 1e-06,
19
  "dropout": 0,
20
  "query_head_dim_normalize": true,
21
- "use_post_ffw_norm": false,
22
- "use_post_attention_norm": false,
23
- "final_logit_soft_cap": null,
24
- "attention_logit_soft_cap": null,
25
  "sliding_window_size": 4096,
26
- "use_sliding_window_attention": false
27
  },
28
  "registered_name": "keras_hub>GemmaBackbone"
29
  },
@@ -64,7 +64,7 @@
64
  "registered_name": "keras_hub>GemmaTokenizer"
65
  },
66
  "config_file": "preprocessor.json",
67
- "sequence_length": 512,
68
  "add_start_token": true,
69
  "add_end_token": true
70
  },
 
9
  "name": "gemma_backbone",
10
  "trainable": true,
11
  "vocabulary_size": 256000,
12
+ "num_layers": 26,
13
  "num_query_heads": 8,
14
+ "num_key_value_heads": 4,
15
+ "hidden_dim": 2304,
16
+ "intermediate_dim": 18432,
17
  "head_dim": 256,
18
  "layer_norm_epsilon": 1e-06,
19
  "dropout": 0,
20
  "query_head_dim_normalize": true,
21
+ "use_post_ffw_norm": true,
22
+ "use_post_attention_norm": true,
23
+ "final_logit_soft_cap": 30.0,
24
+ "attention_logit_soft_cap": 50.0,
25
  "sliding_window_size": 4096,
26
+ "use_sliding_window_attention": true
27
  },
28
  "registered_name": "keras_hub>GemmaBackbone"
29
  },
 
64
  "registered_name": "keras_hub>GemmaTokenizer"
65
  },
66
  "config_file": "preprocessor.json",
67
+ "sequence_length": 64,
68
  "add_start_token": true,
69
  "add_end_token": true
70
  },