Divyasreepat commited on
Commit
633902f
1 Parent(s): e3e7d4e

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: keras-hub
3
+ ---
4
+ This is a [`Llama3` model](https://keras.io/api/keras_hub/models/llama3) uploaded using the KerasHub library and can be used with JAX, TensorFlow, and PyTorch backends.
5
+ Model config:
6
+ * **name:** llama_backbone
7
+ * **trainable:** True
8
+ * **dtype:** {'module': 'keras.dtype_policies', 'class_name': 'DTypePolicyMap', 'config': {'default_policy': None, 'policy_map': {'token_embedding': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_0/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_0/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_0/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_0/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_0/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_0/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_0/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_1/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_1/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_1/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_1/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_1/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_1/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_1/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_2/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_2/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_2/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_2/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_2/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_2/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_2/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_3/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_3/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_3/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_3/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_3/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_3/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_3/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_4/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_4/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_4/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_4/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_4/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_4/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_4/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_5/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_5/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_5/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_5/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_5/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_5/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_5/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_6/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_6/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_6/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_6/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_6/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_6/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_6/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_7/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_7/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_7/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_7/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_7/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_7/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_7/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_8/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_8/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_8/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_8/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_8/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_8/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_8/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_9/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_9/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_9/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_9/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_9/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_9/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_9/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_10/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_10/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_10/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_10/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_10/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_10/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_10/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_11/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_11/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_11/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_11/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_11/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_11/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_11/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_12/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_12/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_12/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_12/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_12/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_12/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_12/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_13/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_13/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_13/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_13/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_13/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_13/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_13/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_14/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_14/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_14/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_14/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_14/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_14/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_14/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_15/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_15/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_15/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_15/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_15/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_15/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_15/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_16/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_16/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_16/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_16/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_16/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_16/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_16/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_17/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_17/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_17/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_17/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_17/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_17/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_17/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_18/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_18/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_18/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_18/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_18/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_18/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_18/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_19/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_19/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_19/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_19/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_19/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_19/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_19/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_20/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_20/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_20/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_20/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_20/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_20/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_20/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_21/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_21/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_21/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_21/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_21/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_21/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_21/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_22/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_22/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_22/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_22/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_22/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_22/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_22/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_23/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_23/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_23/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_23/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_23/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_23/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_23/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_24/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_24/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_24/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_24/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_24/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_24/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_24/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_25/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_25/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_25/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_25/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_25/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_25/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_25/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_26/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_26/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_26/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_26/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_26/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_26/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_26/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_27/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_27/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_27/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_27/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_27/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_27/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_27/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_28/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_28/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_28/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_28/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_28/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_28/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_28/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_29/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_29/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_29/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_29/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_29/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_29/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_29/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_30/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_30/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_30/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_30/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_30/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_30/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_30/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_31/feedforward_output_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_31/feedforward_gate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_31/feedforward_intermediate_dense': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_31/self_attention/attention_output': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_31/self_attention/value': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_31/self_attention/key': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}, 'transformer_layer_31/self_attention/query': {'module': 'keras.dtype_policies', 'class_name': 'QuantizedDTypePolicy', 'config': {'mode': 'int8', 'source_name': None}, 'registered_name': None}}}, 'registered_name': None}
9
+ * **vocabulary_size:** 128256
10
+ * **num_layers:** 32
11
+ * **num_query_heads:** 32
12
+ * **hidden_dim:** 4096
13
+ * **intermediate_dim:** 14336
14
+ * **rope_max_wavelength:** 500000.0
15
+ * **rope_scaling_factor:** 1.0
16
+ * **num_key_value_heads:** 8
17
+ * **layer_norm_epsilon:** 1e-05
18
+ * **dropout:** 0
19
+
20
+ This model card has been generated automatically and should be completed by the model author. See [Model Cards documentation](https://huggingface.co/docs/hub/model-cards) for more information.
assets/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
assets/tokenizer/vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,2054 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_nlp.src.models.llama3.llama3_backbone",
3
+ "class_name": "Llama3Backbone",
4
+ "config": {
5
+ "name": "llama_backbone",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras.dtype_policies",
9
+ "class_name": "DTypePolicyMap",
10
+ "config": {
11
+ "default_policy": null,
12
+ "policy_map": {
13
+ "token_embedding": {
14
+ "module": "keras.dtype_policies",
15
+ "class_name": "QuantizedDTypePolicy",
16
+ "config": {
17
+ "mode": "int8",
18
+ "source_name": null
19
+ },
20
+ "registered_name": null
21
+ },
22
+ "transformer_layer_0/feedforward_output_dense": {
23
+ "module": "keras.dtype_policies",
24
+ "class_name": "QuantizedDTypePolicy",
25
+ "config": {
26
+ "mode": "int8",
27
+ "source_name": null
28
+ },
29
+ "registered_name": null
30
+ },
31
+ "transformer_layer_0/feedforward_gate_dense": {
32
+ "module": "keras.dtype_policies",
33
+ "class_name": "QuantizedDTypePolicy",
34
+ "config": {
35
+ "mode": "int8",
36
+ "source_name": null
37
+ },
38
+ "registered_name": null
39
+ },
40
+ "transformer_layer_0/feedforward_intermediate_dense": {
41
+ "module": "keras.dtype_policies",
42
+ "class_name": "QuantizedDTypePolicy",
43
+ "config": {
44
+ "mode": "int8",
45
+ "source_name": null
46
+ },
47
+ "registered_name": null
48
+ },
49
+ "transformer_layer_0/self_attention/attention_output": {
50
+ "module": "keras.dtype_policies",
51
+ "class_name": "QuantizedDTypePolicy",
52
+ "config": {
53
+ "mode": "int8",
54
+ "source_name": null
55
+ },
56
+ "registered_name": null
57
+ },
58
+ "transformer_layer_0/self_attention/value": {
59
+ "module": "keras.dtype_policies",
60
+ "class_name": "QuantizedDTypePolicy",
61
+ "config": {
62
+ "mode": "int8",
63
+ "source_name": null
64
+ },
65
+ "registered_name": null
66
+ },
67
+ "transformer_layer_0/self_attention/key": {
68
+ "module": "keras.dtype_policies",
69
+ "class_name": "QuantizedDTypePolicy",
70
+ "config": {
71
+ "mode": "int8",
72
+ "source_name": null
73
+ },
74
+ "registered_name": null
75
+ },
76
+ "transformer_layer_0/self_attention/query": {
77
+ "module": "keras.dtype_policies",
78
+ "class_name": "QuantizedDTypePolicy",
79
+ "config": {
80
+ "mode": "int8",
81
+ "source_name": null
82
+ },
83
+ "registered_name": null
84
+ },
85
+ "transformer_layer_1/feedforward_output_dense": {
86
+ "module": "keras.dtype_policies",
87
+ "class_name": "QuantizedDTypePolicy",
88
+ "config": {
89
+ "mode": "int8",
90
+ "source_name": null
91
+ },
92
+ "registered_name": null
93
+ },
94
+ "transformer_layer_1/feedforward_gate_dense": {
95
+ "module": "keras.dtype_policies",
96
+ "class_name": "QuantizedDTypePolicy",
97
+ "config": {
98
+ "mode": "int8",
99
+ "source_name": null
100
+ },
101
+ "registered_name": null
102
+ },
103
+ "transformer_layer_1/feedforward_intermediate_dense": {
104
+ "module": "keras.dtype_policies",
105
+ "class_name": "QuantizedDTypePolicy",
106
+ "config": {
107
+ "mode": "int8",
108
+ "source_name": null
109
+ },
110
+ "registered_name": null
111
+ },
112
+ "transformer_layer_1/self_attention/attention_output": {
113
+ "module": "keras.dtype_policies",
114
+ "class_name": "QuantizedDTypePolicy",
115
+ "config": {
116
+ "mode": "int8",
117
+ "source_name": null
118
+ },
119
+ "registered_name": null
120
+ },
121
+ "transformer_layer_1/self_attention/value": {
122
+ "module": "keras.dtype_policies",
123
+ "class_name": "QuantizedDTypePolicy",
124
+ "config": {
125
+ "mode": "int8",
126
+ "source_name": null
127
+ },
128
+ "registered_name": null
129
+ },
130
+ "transformer_layer_1/self_attention/key": {
131
+ "module": "keras.dtype_policies",
132
+ "class_name": "QuantizedDTypePolicy",
133
+ "config": {
134
+ "mode": "int8",
135
+ "source_name": null
136
+ },
137
+ "registered_name": null
138
+ },
139
+ "transformer_layer_1/self_attention/query": {
140
+ "module": "keras.dtype_policies",
141
+ "class_name": "QuantizedDTypePolicy",
142
+ "config": {
143
+ "mode": "int8",
144
+ "source_name": null
145
+ },
146
+ "registered_name": null
147
+ },
148
+ "transformer_layer_2/feedforward_output_dense": {
149
+ "module": "keras.dtype_policies",
150
+ "class_name": "QuantizedDTypePolicy",
151
+ "config": {
152
+ "mode": "int8",
153
+ "source_name": null
154
+ },
155
+ "registered_name": null
156
+ },
157
+ "transformer_layer_2/feedforward_gate_dense": {
158
+ "module": "keras.dtype_policies",
159
+ "class_name": "QuantizedDTypePolicy",
160
+ "config": {
161
+ "mode": "int8",
162
+ "source_name": null
163
+ },
164
+ "registered_name": null
165
+ },
166
+ "transformer_layer_2/feedforward_intermediate_dense": {
167
+ "module": "keras.dtype_policies",
168
+ "class_name": "QuantizedDTypePolicy",
169
+ "config": {
170
+ "mode": "int8",
171
+ "source_name": null
172
+ },
173
+ "registered_name": null
174
+ },
175
+ "transformer_layer_2/self_attention/attention_output": {
176
+ "module": "keras.dtype_policies",
177
+ "class_name": "QuantizedDTypePolicy",
178
+ "config": {
179
+ "mode": "int8",
180
+ "source_name": null
181
+ },
182
+ "registered_name": null
183
+ },
184
+ "transformer_layer_2/self_attention/value": {
185
+ "module": "keras.dtype_policies",
186
+ "class_name": "QuantizedDTypePolicy",
187
+ "config": {
188
+ "mode": "int8",
189
+ "source_name": null
190
+ },
191
+ "registered_name": null
192
+ },
193
+ "transformer_layer_2/self_attention/key": {
194
+ "module": "keras.dtype_policies",
195
+ "class_name": "QuantizedDTypePolicy",
196
+ "config": {
197
+ "mode": "int8",
198
+ "source_name": null
199
+ },
200
+ "registered_name": null
201
+ },
202
+ "transformer_layer_2/self_attention/query": {
203
+ "module": "keras.dtype_policies",
204
+ "class_name": "QuantizedDTypePolicy",
205
+ "config": {
206
+ "mode": "int8",
207
+ "source_name": null
208
+ },
209
+ "registered_name": null
210
+ },
211
+ "transformer_layer_3/feedforward_output_dense": {
212
+ "module": "keras.dtype_policies",
213
+ "class_name": "QuantizedDTypePolicy",
214
+ "config": {
215
+ "mode": "int8",
216
+ "source_name": null
217
+ },
218
+ "registered_name": null
219
+ },
220
+ "transformer_layer_3/feedforward_gate_dense": {
221
+ "module": "keras.dtype_policies",
222
+ "class_name": "QuantizedDTypePolicy",
223
+ "config": {
224
+ "mode": "int8",
225
+ "source_name": null
226
+ },
227
+ "registered_name": null
228
+ },
229
+ "transformer_layer_3/feedforward_intermediate_dense": {
230
+ "module": "keras.dtype_policies",
231
+ "class_name": "QuantizedDTypePolicy",
232
+ "config": {
233
+ "mode": "int8",
234
+ "source_name": null
235
+ },
236
+ "registered_name": null
237
+ },
238
+ "transformer_layer_3/self_attention/attention_output": {
239
+ "module": "keras.dtype_policies",
240
+ "class_name": "QuantizedDTypePolicy",
241
+ "config": {
242
+ "mode": "int8",
243
+ "source_name": null
244
+ },
245
+ "registered_name": null
246
+ },
247
+ "transformer_layer_3/self_attention/value": {
248
+ "module": "keras.dtype_policies",
249
+ "class_name": "QuantizedDTypePolicy",
250
+ "config": {
251
+ "mode": "int8",
252
+ "source_name": null
253
+ },
254
+ "registered_name": null
255
+ },
256
+ "transformer_layer_3/self_attention/key": {
257
+ "module": "keras.dtype_policies",
258
+ "class_name": "QuantizedDTypePolicy",
259
+ "config": {
260
+ "mode": "int8",
261
+ "source_name": null
262
+ },
263
+ "registered_name": null
264
+ },
265
+ "transformer_layer_3/self_attention/query": {
266
+ "module": "keras.dtype_policies",
267
+ "class_name": "QuantizedDTypePolicy",
268
+ "config": {
269
+ "mode": "int8",
270
+ "source_name": null
271
+ },
272
+ "registered_name": null
273
+ },
274
+ "transformer_layer_4/feedforward_output_dense": {
275
+ "module": "keras.dtype_policies",
276
+ "class_name": "QuantizedDTypePolicy",
277
+ "config": {
278
+ "mode": "int8",
279
+ "source_name": null
280
+ },
281
+ "registered_name": null
282
+ },
283
+ "transformer_layer_4/feedforward_gate_dense": {
284
+ "module": "keras.dtype_policies",
285
+ "class_name": "QuantizedDTypePolicy",
286
+ "config": {
287
+ "mode": "int8",
288
+ "source_name": null
289
+ },
290
+ "registered_name": null
291
+ },
292
+ "transformer_layer_4/feedforward_intermediate_dense": {
293
+ "module": "keras.dtype_policies",
294
+ "class_name": "QuantizedDTypePolicy",
295
+ "config": {
296
+ "mode": "int8",
297
+ "source_name": null
298
+ },
299
+ "registered_name": null
300
+ },
301
+ "transformer_layer_4/self_attention/attention_output": {
302
+ "module": "keras.dtype_policies",
303
+ "class_name": "QuantizedDTypePolicy",
304
+ "config": {
305
+ "mode": "int8",
306
+ "source_name": null
307
+ },
308
+ "registered_name": null
309
+ },
310
+ "transformer_layer_4/self_attention/value": {
311
+ "module": "keras.dtype_policies",
312
+ "class_name": "QuantizedDTypePolicy",
313
+ "config": {
314
+ "mode": "int8",
315
+ "source_name": null
316
+ },
317
+ "registered_name": null
318
+ },
319
+ "transformer_layer_4/self_attention/key": {
320
+ "module": "keras.dtype_policies",
321
+ "class_name": "QuantizedDTypePolicy",
322
+ "config": {
323
+ "mode": "int8",
324
+ "source_name": null
325
+ },
326
+ "registered_name": null
327
+ },
328
+ "transformer_layer_4/self_attention/query": {
329
+ "module": "keras.dtype_policies",
330
+ "class_name": "QuantizedDTypePolicy",
331
+ "config": {
332
+ "mode": "int8",
333
+ "source_name": null
334
+ },
335
+ "registered_name": null
336
+ },
337
+ "transformer_layer_5/feedforward_output_dense": {
338
+ "module": "keras.dtype_policies",
339
+ "class_name": "QuantizedDTypePolicy",
340
+ "config": {
341
+ "mode": "int8",
342
+ "source_name": null
343
+ },
344
+ "registered_name": null
345
+ },
346
+ "transformer_layer_5/feedforward_gate_dense": {
347
+ "module": "keras.dtype_policies",
348
+ "class_name": "QuantizedDTypePolicy",
349
+ "config": {
350
+ "mode": "int8",
351
+ "source_name": null
352
+ },
353
+ "registered_name": null
354
+ },
355
+ "transformer_layer_5/feedforward_intermediate_dense": {
356
+ "module": "keras.dtype_policies",
357
+ "class_name": "QuantizedDTypePolicy",
358
+ "config": {
359
+ "mode": "int8",
360
+ "source_name": null
361
+ },
362
+ "registered_name": null
363
+ },
364
+ "transformer_layer_5/self_attention/attention_output": {
365
+ "module": "keras.dtype_policies",
366
+ "class_name": "QuantizedDTypePolicy",
367
+ "config": {
368
+ "mode": "int8",
369
+ "source_name": null
370
+ },
371
+ "registered_name": null
372
+ },
373
+ "transformer_layer_5/self_attention/value": {
374
+ "module": "keras.dtype_policies",
375
+ "class_name": "QuantizedDTypePolicy",
376
+ "config": {
377
+ "mode": "int8",
378
+ "source_name": null
379
+ },
380
+ "registered_name": null
381
+ },
382
+ "transformer_layer_5/self_attention/key": {
383
+ "module": "keras.dtype_policies",
384
+ "class_name": "QuantizedDTypePolicy",
385
+ "config": {
386
+ "mode": "int8",
387
+ "source_name": null
388
+ },
389
+ "registered_name": null
390
+ },
391
+ "transformer_layer_5/self_attention/query": {
392
+ "module": "keras.dtype_policies",
393
+ "class_name": "QuantizedDTypePolicy",
394
+ "config": {
395
+ "mode": "int8",
396
+ "source_name": null
397
+ },
398
+ "registered_name": null
399
+ },
400
+ "transformer_layer_6/feedforward_output_dense": {
401
+ "module": "keras.dtype_policies",
402
+ "class_name": "QuantizedDTypePolicy",
403
+ "config": {
404
+ "mode": "int8",
405
+ "source_name": null
406
+ },
407
+ "registered_name": null
408
+ },
409
+ "transformer_layer_6/feedforward_gate_dense": {
410
+ "module": "keras.dtype_policies",
411
+ "class_name": "QuantizedDTypePolicy",
412
+ "config": {
413
+ "mode": "int8",
414
+ "source_name": null
415
+ },
416
+ "registered_name": null
417
+ },
418
+ "transformer_layer_6/feedforward_intermediate_dense": {
419
+ "module": "keras.dtype_policies",
420
+ "class_name": "QuantizedDTypePolicy",
421
+ "config": {
422
+ "mode": "int8",
423
+ "source_name": null
424
+ },
425
+ "registered_name": null
426
+ },
427
+ "transformer_layer_6/self_attention/attention_output": {
428
+ "module": "keras.dtype_policies",
429
+ "class_name": "QuantizedDTypePolicy",
430
+ "config": {
431
+ "mode": "int8",
432
+ "source_name": null
433
+ },
434
+ "registered_name": null
435
+ },
436
+ "transformer_layer_6/self_attention/value": {
437
+ "module": "keras.dtype_policies",
438
+ "class_name": "QuantizedDTypePolicy",
439
+ "config": {
440
+ "mode": "int8",
441
+ "source_name": null
442
+ },
443
+ "registered_name": null
444
+ },
445
+ "transformer_layer_6/self_attention/key": {
446
+ "module": "keras.dtype_policies",
447
+ "class_name": "QuantizedDTypePolicy",
448
+ "config": {
449
+ "mode": "int8",
450
+ "source_name": null
451
+ },
452
+ "registered_name": null
453
+ },
454
+ "transformer_layer_6/self_attention/query": {
455
+ "module": "keras.dtype_policies",
456
+ "class_name": "QuantizedDTypePolicy",
457
+ "config": {
458
+ "mode": "int8",
459
+ "source_name": null
460
+ },
461
+ "registered_name": null
462
+ },
463
+ "transformer_layer_7/feedforward_output_dense": {
464
+ "module": "keras.dtype_policies",
465
+ "class_name": "QuantizedDTypePolicy",
466
+ "config": {
467
+ "mode": "int8",
468
+ "source_name": null
469
+ },
470
+ "registered_name": null
471
+ },
472
+ "transformer_layer_7/feedforward_gate_dense": {
473
+ "module": "keras.dtype_policies",
474
+ "class_name": "QuantizedDTypePolicy",
475
+ "config": {
476
+ "mode": "int8",
477
+ "source_name": null
478
+ },
479
+ "registered_name": null
480
+ },
481
+ "transformer_layer_7/feedforward_intermediate_dense": {
482
+ "module": "keras.dtype_policies",
483
+ "class_name": "QuantizedDTypePolicy",
484
+ "config": {
485
+ "mode": "int8",
486
+ "source_name": null
487
+ },
488
+ "registered_name": null
489
+ },
490
+ "transformer_layer_7/self_attention/attention_output": {
491
+ "module": "keras.dtype_policies",
492
+ "class_name": "QuantizedDTypePolicy",
493
+ "config": {
494
+ "mode": "int8",
495
+ "source_name": null
496
+ },
497
+ "registered_name": null
498
+ },
499
+ "transformer_layer_7/self_attention/value": {
500
+ "module": "keras.dtype_policies",
501
+ "class_name": "QuantizedDTypePolicy",
502
+ "config": {
503
+ "mode": "int8",
504
+ "source_name": null
505
+ },
506
+ "registered_name": null
507
+ },
508
+ "transformer_layer_7/self_attention/key": {
509
+ "module": "keras.dtype_policies",
510
+ "class_name": "QuantizedDTypePolicy",
511
+ "config": {
512
+ "mode": "int8",
513
+ "source_name": null
514
+ },
515
+ "registered_name": null
516
+ },
517
+ "transformer_layer_7/self_attention/query": {
518
+ "module": "keras.dtype_policies",
519
+ "class_name": "QuantizedDTypePolicy",
520
+ "config": {
521
+ "mode": "int8",
522
+ "source_name": null
523
+ },
524
+ "registered_name": null
525
+ },
526
+ "transformer_layer_8/feedforward_output_dense": {
527
+ "module": "keras.dtype_policies",
528
+ "class_name": "QuantizedDTypePolicy",
529
+ "config": {
530
+ "mode": "int8",
531
+ "source_name": null
532
+ },
533
+ "registered_name": null
534
+ },
535
+ "transformer_layer_8/feedforward_gate_dense": {
536
+ "module": "keras.dtype_policies",
537
+ "class_name": "QuantizedDTypePolicy",
538
+ "config": {
539
+ "mode": "int8",
540
+ "source_name": null
541
+ },
542
+ "registered_name": null
543
+ },
544
+ "transformer_layer_8/feedforward_intermediate_dense": {
545
+ "module": "keras.dtype_policies",
546
+ "class_name": "QuantizedDTypePolicy",
547
+ "config": {
548
+ "mode": "int8",
549
+ "source_name": null
550
+ },
551
+ "registered_name": null
552
+ },
553
+ "transformer_layer_8/self_attention/attention_output": {
554
+ "module": "keras.dtype_policies",
555
+ "class_name": "QuantizedDTypePolicy",
556
+ "config": {
557
+ "mode": "int8",
558
+ "source_name": null
559
+ },
560
+ "registered_name": null
561
+ },
562
+ "transformer_layer_8/self_attention/value": {
563
+ "module": "keras.dtype_policies",
564
+ "class_name": "QuantizedDTypePolicy",
565
+ "config": {
566
+ "mode": "int8",
567
+ "source_name": null
568
+ },
569
+ "registered_name": null
570
+ },
571
+ "transformer_layer_8/self_attention/key": {
572
+ "module": "keras.dtype_policies",
573
+ "class_name": "QuantizedDTypePolicy",
574
+ "config": {
575
+ "mode": "int8",
576
+ "source_name": null
577
+ },
578
+ "registered_name": null
579
+ },
580
+ "transformer_layer_8/self_attention/query": {
581
+ "module": "keras.dtype_policies",
582
+ "class_name": "QuantizedDTypePolicy",
583
+ "config": {
584
+ "mode": "int8",
585
+ "source_name": null
586
+ },
587
+ "registered_name": null
588
+ },
589
+ "transformer_layer_9/feedforward_output_dense": {
590
+ "module": "keras.dtype_policies",
591
+ "class_name": "QuantizedDTypePolicy",
592
+ "config": {
593
+ "mode": "int8",
594
+ "source_name": null
595
+ },
596
+ "registered_name": null
597
+ },
598
+ "transformer_layer_9/feedforward_gate_dense": {
599
+ "module": "keras.dtype_policies",
600
+ "class_name": "QuantizedDTypePolicy",
601
+ "config": {
602
+ "mode": "int8",
603
+ "source_name": null
604
+ },
605
+ "registered_name": null
606
+ },
607
+ "transformer_layer_9/feedforward_intermediate_dense": {
608
+ "module": "keras.dtype_policies",
609
+ "class_name": "QuantizedDTypePolicy",
610
+ "config": {
611
+ "mode": "int8",
612
+ "source_name": null
613
+ },
614
+ "registered_name": null
615
+ },
616
+ "transformer_layer_9/self_attention/attention_output": {
617
+ "module": "keras.dtype_policies",
618
+ "class_name": "QuantizedDTypePolicy",
619
+ "config": {
620
+ "mode": "int8",
621
+ "source_name": null
622
+ },
623
+ "registered_name": null
624
+ },
625
+ "transformer_layer_9/self_attention/value": {
626
+ "module": "keras.dtype_policies",
627
+ "class_name": "QuantizedDTypePolicy",
628
+ "config": {
629
+ "mode": "int8",
630
+ "source_name": null
631
+ },
632
+ "registered_name": null
633
+ },
634
+ "transformer_layer_9/self_attention/key": {
635
+ "module": "keras.dtype_policies",
636
+ "class_name": "QuantizedDTypePolicy",
637
+ "config": {
638
+ "mode": "int8",
639
+ "source_name": null
640
+ },
641
+ "registered_name": null
642
+ },
643
+ "transformer_layer_9/self_attention/query": {
644
+ "module": "keras.dtype_policies",
645
+ "class_name": "QuantizedDTypePolicy",
646
+ "config": {
647
+ "mode": "int8",
648
+ "source_name": null
649
+ },
650
+ "registered_name": null
651
+ },
652
+ "transformer_layer_10/feedforward_output_dense": {
653
+ "module": "keras.dtype_policies",
654
+ "class_name": "QuantizedDTypePolicy",
655
+ "config": {
656
+ "mode": "int8",
657
+ "source_name": null
658
+ },
659
+ "registered_name": null
660
+ },
661
+ "transformer_layer_10/feedforward_gate_dense": {
662
+ "module": "keras.dtype_policies",
663
+ "class_name": "QuantizedDTypePolicy",
664
+ "config": {
665
+ "mode": "int8",
666
+ "source_name": null
667
+ },
668
+ "registered_name": null
669
+ },
670
+ "transformer_layer_10/feedforward_intermediate_dense": {
671
+ "module": "keras.dtype_policies",
672
+ "class_name": "QuantizedDTypePolicy",
673
+ "config": {
674
+ "mode": "int8",
675
+ "source_name": null
676
+ },
677
+ "registered_name": null
678
+ },
679
+ "transformer_layer_10/self_attention/attention_output": {
680
+ "module": "keras.dtype_policies",
681
+ "class_name": "QuantizedDTypePolicy",
682
+ "config": {
683
+ "mode": "int8",
684
+ "source_name": null
685
+ },
686
+ "registered_name": null
687
+ },
688
+ "transformer_layer_10/self_attention/value": {
689
+ "module": "keras.dtype_policies",
690
+ "class_name": "QuantizedDTypePolicy",
691
+ "config": {
692
+ "mode": "int8",
693
+ "source_name": null
694
+ },
695
+ "registered_name": null
696
+ },
697
+ "transformer_layer_10/self_attention/key": {
698
+ "module": "keras.dtype_policies",
699
+ "class_name": "QuantizedDTypePolicy",
700
+ "config": {
701
+ "mode": "int8",
702
+ "source_name": null
703
+ },
704
+ "registered_name": null
705
+ },
706
+ "transformer_layer_10/self_attention/query": {
707
+ "module": "keras.dtype_policies",
708
+ "class_name": "QuantizedDTypePolicy",
709
+ "config": {
710
+ "mode": "int8",
711
+ "source_name": null
712
+ },
713
+ "registered_name": null
714
+ },
715
+ "transformer_layer_11/feedforward_output_dense": {
716
+ "module": "keras.dtype_policies",
717
+ "class_name": "QuantizedDTypePolicy",
718
+ "config": {
719
+ "mode": "int8",
720
+ "source_name": null
721
+ },
722
+ "registered_name": null
723
+ },
724
+ "transformer_layer_11/feedforward_gate_dense": {
725
+ "module": "keras.dtype_policies",
726
+ "class_name": "QuantizedDTypePolicy",
727
+ "config": {
728
+ "mode": "int8",
729
+ "source_name": null
730
+ },
731
+ "registered_name": null
732
+ },
733
+ "transformer_layer_11/feedforward_intermediate_dense": {
734
+ "module": "keras.dtype_policies",
735
+ "class_name": "QuantizedDTypePolicy",
736
+ "config": {
737
+ "mode": "int8",
738
+ "source_name": null
739
+ },
740
+ "registered_name": null
741
+ },
742
+ "transformer_layer_11/self_attention/attention_output": {
743
+ "module": "keras.dtype_policies",
744
+ "class_name": "QuantizedDTypePolicy",
745
+ "config": {
746
+ "mode": "int8",
747
+ "source_name": null
748
+ },
749
+ "registered_name": null
750
+ },
751
+ "transformer_layer_11/self_attention/value": {
752
+ "module": "keras.dtype_policies",
753
+ "class_name": "QuantizedDTypePolicy",
754
+ "config": {
755
+ "mode": "int8",
756
+ "source_name": null
757
+ },
758
+ "registered_name": null
759
+ },
760
+ "transformer_layer_11/self_attention/key": {
761
+ "module": "keras.dtype_policies",
762
+ "class_name": "QuantizedDTypePolicy",
763
+ "config": {
764
+ "mode": "int8",
765
+ "source_name": null
766
+ },
767
+ "registered_name": null
768
+ },
769
+ "transformer_layer_11/self_attention/query": {
770
+ "module": "keras.dtype_policies",
771
+ "class_name": "QuantizedDTypePolicy",
772
+ "config": {
773
+ "mode": "int8",
774
+ "source_name": null
775
+ },
776
+ "registered_name": null
777
+ },
778
+ "transformer_layer_12/feedforward_output_dense": {
779
+ "module": "keras.dtype_policies",
780
+ "class_name": "QuantizedDTypePolicy",
781
+ "config": {
782
+ "mode": "int8",
783
+ "source_name": null
784
+ },
785
+ "registered_name": null
786
+ },
787
+ "transformer_layer_12/feedforward_gate_dense": {
788
+ "module": "keras.dtype_policies",
789
+ "class_name": "QuantizedDTypePolicy",
790
+ "config": {
791
+ "mode": "int8",
792
+ "source_name": null
793
+ },
794
+ "registered_name": null
795
+ },
796
+ "transformer_layer_12/feedforward_intermediate_dense": {
797
+ "module": "keras.dtype_policies",
798
+ "class_name": "QuantizedDTypePolicy",
799
+ "config": {
800
+ "mode": "int8",
801
+ "source_name": null
802
+ },
803
+ "registered_name": null
804
+ },
805
+ "transformer_layer_12/self_attention/attention_output": {
806
+ "module": "keras.dtype_policies",
807
+ "class_name": "QuantizedDTypePolicy",
808
+ "config": {
809
+ "mode": "int8",
810
+ "source_name": null
811
+ },
812
+ "registered_name": null
813
+ },
814
+ "transformer_layer_12/self_attention/value": {
815
+ "module": "keras.dtype_policies",
816
+ "class_name": "QuantizedDTypePolicy",
817
+ "config": {
818
+ "mode": "int8",
819
+ "source_name": null
820
+ },
821
+ "registered_name": null
822
+ },
823
+ "transformer_layer_12/self_attention/key": {
824
+ "module": "keras.dtype_policies",
825
+ "class_name": "QuantizedDTypePolicy",
826
+ "config": {
827
+ "mode": "int8",
828
+ "source_name": null
829
+ },
830
+ "registered_name": null
831
+ },
832
+ "transformer_layer_12/self_attention/query": {
833
+ "module": "keras.dtype_policies",
834
+ "class_name": "QuantizedDTypePolicy",
835
+ "config": {
836
+ "mode": "int8",
837
+ "source_name": null
838
+ },
839
+ "registered_name": null
840
+ },
841
+ "transformer_layer_13/feedforward_output_dense": {
842
+ "module": "keras.dtype_policies",
843
+ "class_name": "QuantizedDTypePolicy",
844
+ "config": {
845
+ "mode": "int8",
846
+ "source_name": null
847
+ },
848
+ "registered_name": null
849
+ },
850
+ "transformer_layer_13/feedforward_gate_dense": {
851
+ "module": "keras.dtype_policies",
852
+ "class_name": "QuantizedDTypePolicy",
853
+ "config": {
854
+ "mode": "int8",
855
+ "source_name": null
856
+ },
857
+ "registered_name": null
858
+ },
859
+ "transformer_layer_13/feedforward_intermediate_dense": {
860
+ "module": "keras.dtype_policies",
861
+ "class_name": "QuantizedDTypePolicy",
862
+ "config": {
863
+ "mode": "int8",
864
+ "source_name": null
865
+ },
866
+ "registered_name": null
867
+ },
868
+ "transformer_layer_13/self_attention/attention_output": {
869
+ "module": "keras.dtype_policies",
870
+ "class_name": "QuantizedDTypePolicy",
871
+ "config": {
872
+ "mode": "int8",
873
+ "source_name": null
874
+ },
875
+ "registered_name": null
876
+ },
877
+ "transformer_layer_13/self_attention/value": {
878
+ "module": "keras.dtype_policies",
879
+ "class_name": "QuantizedDTypePolicy",
880
+ "config": {
881
+ "mode": "int8",
882
+ "source_name": null
883
+ },
884
+ "registered_name": null
885
+ },
886
+ "transformer_layer_13/self_attention/key": {
887
+ "module": "keras.dtype_policies",
888
+ "class_name": "QuantizedDTypePolicy",
889
+ "config": {
890
+ "mode": "int8",
891
+ "source_name": null
892
+ },
893
+ "registered_name": null
894
+ },
895
+ "transformer_layer_13/self_attention/query": {
896
+ "module": "keras.dtype_policies",
897
+ "class_name": "QuantizedDTypePolicy",
898
+ "config": {
899
+ "mode": "int8",
900
+ "source_name": null
901
+ },
902
+ "registered_name": null
903
+ },
904
+ "transformer_layer_14/feedforward_output_dense": {
905
+ "module": "keras.dtype_policies",
906
+ "class_name": "QuantizedDTypePolicy",
907
+ "config": {
908
+ "mode": "int8",
909
+ "source_name": null
910
+ },
911
+ "registered_name": null
912
+ },
913
+ "transformer_layer_14/feedforward_gate_dense": {
914
+ "module": "keras.dtype_policies",
915
+ "class_name": "QuantizedDTypePolicy",
916
+ "config": {
917
+ "mode": "int8",
918
+ "source_name": null
919
+ },
920
+ "registered_name": null
921
+ },
922
+ "transformer_layer_14/feedforward_intermediate_dense": {
923
+ "module": "keras.dtype_policies",
924
+ "class_name": "QuantizedDTypePolicy",
925
+ "config": {
926
+ "mode": "int8",
927
+ "source_name": null
928
+ },
929
+ "registered_name": null
930
+ },
931
+ "transformer_layer_14/self_attention/attention_output": {
932
+ "module": "keras.dtype_policies",
933
+ "class_name": "QuantizedDTypePolicy",
934
+ "config": {
935
+ "mode": "int8",
936
+ "source_name": null
937
+ },
938
+ "registered_name": null
939
+ },
940
+ "transformer_layer_14/self_attention/value": {
941
+ "module": "keras.dtype_policies",
942
+ "class_name": "QuantizedDTypePolicy",
943
+ "config": {
944
+ "mode": "int8",
945
+ "source_name": null
946
+ },
947
+ "registered_name": null
948
+ },
949
+ "transformer_layer_14/self_attention/key": {
950
+ "module": "keras.dtype_policies",
951
+ "class_name": "QuantizedDTypePolicy",
952
+ "config": {
953
+ "mode": "int8",
954
+ "source_name": null
955
+ },
956
+ "registered_name": null
957
+ },
958
+ "transformer_layer_14/self_attention/query": {
959
+ "module": "keras.dtype_policies",
960
+ "class_name": "QuantizedDTypePolicy",
961
+ "config": {
962
+ "mode": "int8",
963
+ "source_name": null
964
+ },
965
+ "registered_name": null
966
+ },
967
+ "transformer_layer_15/feedforward_output_dense": {
968
+ "module": "keras.dtype_policies",
969
+ "class_name": "QuantizedDTypePolicy",
970
+ "config": {
971
+ "mode": "int8",
972
+ "source_name": null
973
+ },
974
+ "registered_name": null
975
+ },
976
+ "transformer_layer_15/feedforward_gate_dense": {
977
+ "module": "keras.dtype_policies",
978
+ "class_name": "QuantizedDTypePolicy",
979
+ "config": {
980
+ "mode": "int8",
981
+ "source_name": null
982
+ },
983
+ "registered_name": null
984
+ },
985
+ "transformer_layer_15/feedforward_intermediate_dense": {
986
+ "module": "keras.dtype_policies",
987
+ "class_name": "QuantizedDTypePolicy",
988
+ "config": {
989
+ "mode": "int8",
990
+ "source_name": null
991
+ },
992
+ "registered_name": null
993
+ },
994
+ "transformer_layer_15/self_attention/attention_output": {
995
+ "module": "keras.dtype_policies",
996
+ "class_name": "QuantizedDTypePolicy",
997
+ "config": {
998
+ "mode": "int8",
999
+ "source_name": null
1000
+ },
1001
+ "registered_name": null
1002
+ },
1003
+ "transformer_layer_15/self_attention/value": {
1004
+ "module": "keras.dtype_policies",
1005
+ "class_name": "QuantizedDTypePolicy",
1006
+ "config": {
1007
+ "mode": "int8",
1008
+ "source_name": null
1009
+ },
1010
+ "registered_name": null
1011
+ },
1012
+ "transformer_layer_15/self_attention/key": {
1013
+ "module": "keras.dtype_policies",
1014
+ "class_name": "QuantizedDTypePolicy",
1015
+ "config": {
1016
+ "mode": "int8",
1017
+ "source_name": null
1018
+ },
1019
+ "registered_name": null
1020
+ },
1021
+ "transformer_layer_15/self_attention/query": {
1022
+ "module": "keras.dtype_policies",
1023
+ "class_name": "QuantizedDTypePolicy",
1024
+ "config": {
1025
+ "mode": "int8",
1026
+ "source_name": null
1027
+ },
1028
+ "registered_name": null
1029
+ },
1030
+ "transformer_layer_16/feedforward_output_dense": {
1031
+ "module": "keras.dtype_policies",
1032
+ "class_name": "QuantizedDTypePolicy",
1033
+ "config": {
1034
+ "mode": "int8",
1035
+ "source_name": null
1036
+ },
1037
+ "registered_name": null
1038
+ },
1039
+ "transformer_layer_16/feedforward_gate_dense": {
1040
+ "module": "keras.dtype_policies",
1041
+ "class_name": "QuantizedDTypePolicy",
1042
+ "config": {
1043
+ "mode": "int8",
1044
+ "source_name": null
1045
+ },
1046
+ "registered_name": null
1047
+ },
1048
+ "transformer_layer_16/feedforward_intermediate_dense": {
1049
+ "module": "keras.dtype_policies",
1050
+ "class_name": "QuantizedDTypePolicy",
1051
+ "config": {
1052
+ "mode": "int8",
1053
+ "source_name": null
1054
+ },
1055
+ "registered_name": null
1056
+ },
1057
+ "transformer_layer_16/self_attention/attention_output": {
1058
+ "module": "keras.dtype_policies",
1059
+ "class_name": "QuantizedDTypePolicy",
1060
+ "config": {
1061
+ "mode": "int8",
1062
+ "source_name": null
1063
+ },
1064
+ "registered_name": null
1065
+ },
1066
+ "transformer_layer_16/self_attention/value": {
1067
+ "module": "keras.dtype_policies",
1068
+ "class_name": "QuantizedDTypePolicy",
1069
+ "config": {
1070
+ "mode": "int8",
1071
+ "source_name": null
1072
+ },
1073
+ "registered_name": null
1074
+ },
1075
+ "transformer_layer_16/self_attention/key": {
1076
+ "module": "keras.dtype_policies",
1077
+ "class_name": "QuantizedDTypePolicy",
1078
+ "config": {
1079
+ "mode": "int8",
1080
+ "source_name": null
1081
+ },
1082
+ "registered_name": null
1083
+ },
1084
+ "transformer_layer_16/self_attention/query": {
1085
+ "module": "keras.dtype_policies",
1086
+ "class_name": "QuantizedDTypePolicy",
1087
+ "config": {
1088
+ "mode": "int8",
1089
+ "source_name": null
1090
+ },
1091
+ "registered_name": null
1092
+ },
1093
+ "transformer_layer_17/feedforward_output_dense": {
1094
+ "module": "keras.dtype_policies",
1095
+ "class_name": "QuantizedDTypePolicy",
1096
+ "config": {
1097
+ "mode": "int8",
1098
+ "source_name": null
1099
+ },
1100
+ "registered_name": null
1101
+ },
1102
+ "transformer_layer_17/feedforward_gate_dense": {
1103
+ "module": "keras.dtype_policies",
1104
+ "class_name": "QuantizedDTypePolicy",
1105
+ "config": {
1106
+ "mode": "int8",
1107
+ "source_name": null
1108
+ },
1109
+ "registered_name": null
1110
+ },
1111
+ "transformer_layer_17/feedforward_intermediate_dense": {
1112
+ "module": "keras.dtype_policies",
1113
+ "class_name": "QuantizedDTypePolicy",
1114
+ "config": {
1115
+ "mode": "int8",
1116
+ "source_name": null
1117
+ },
1118
+ "registered_name": null
1119
+ },
1120
+ "transformer_layer_17/self_attention/attention_output": {
1121
+ "module": "keras.dtype_policies",
1122
+ "class_name": "QuantizedDTypePolicy",
1123
+ "config": {
1124
+ "mode": "int8",
1125
+ "source_name": null
1126
+ },
1127
+ "registered_name": null
1128
+ },
1129
+ "transformer_layer_17/self_attention/value": {
1130
+ "module": "keras.dtype_policies",
1131
+ "class_name": "QuantizedDTypePolicy",
1132
+ "config": {
1133
+ "mode": "int8",
1134
+ "source_name": null
1135
+ },
1136
+ "registered_name": null
1137
+ },
1138
+ "transformer_layer_17/self_attention/key": {
1139
+ "module": "keras.dtype_policies",
1140
+ "class_name": "QuantizedDTypePolicy",
1141
+ "config": {
1142
+ "mode": "int8",
1143
+ "source_name": null
1144
+ },
1145
+ "registered_name": null
1146
+ },
1147
+ "transformer_layer_17/self_attention/query": {
1148
+ "module": "keras.dtype_policies",
1149
+ "class_name": "QuantizedDTypePolicy",
1150
+ "config": {
1151
+ "mode": "int8",
1152
+ "source_name": null
1153
+ },
1154
+ "registered_name": null
1155
+ },
1156
+ "transformer_layer_18/feedforward_output_dense": {
1157
+ "module": "keras.dtype_policies",
1158
+ "class_name": "QuantizedDTypePolicy",
1159
+ "config": {
1160
+ "mode": "int8",
1161
+ "source_name": null
1162
+ },
1163
+ "registered_name": null
1164
+ },
1165
+ "transformer_layer_18/feedforward_gate_dense": {
1166
+ "module": "keras.dtype_policies",
1167
+ "class_name": "QuantizedDTypePolicy",
1168
+ "config": {
1169
+ "mode": "int8",
1170
+ "source_name": null
1171
+ },
1172
+ "registered_name": null
1173
+ },
1174
+ "transformer_layer_18/feedforward_intermediate_dense": {
1175
+ "module": "keras.dtype_policies",
1176
+ "class_name": "QuantizedDTypePolicy",
1177
+ "config": {
1178
+ "mode": "int8",
1179
+ "source_name": null
1180
+ },
1181
+ "registered_name": null
1182
+ },
1183
+ "transformer_layer_18/self_attention/attention_output": {
1184
+ "module": "keras.dtype_policies",
1185
+ "class_name": "QuantizedDTypePolicy",
1186
+ "config": {
1187
+ "mode": "int8",
1188
+ "source_name": null
1189
+ },
1190
+ "registered_name": null
1191
+ },
1192
+ "transformer_layer_18/self_attention/value": {
1193
+ "module": "keras.dtype_policies",
1194
+ "class_name": "QuantizedDTypePolicy",
1195
+ "config": {
1196
+ "mode": "int8",
1197
+ "source_name": null
1198
+ },
1199
+ "registered_name": null
1200
+ },
1201
+ "transformer_layer_18/self_attention/key": {
1202
+ "module": "keras.dtype_policies",
1203
+ "class_name": "QuantizedDTypePolicy",
1204
+ "config": {
1205
+ "mode": "int8",
1206
+ "source_name": null
1207
+ },
1208
+ "registered_name": null
1209
+ },
1210
+ "transformer_layer_18/self_attention/query": {
1211
+ "module": "keras.dtype_policies",
1212
+ "class_name": "QuantizedDTypePolicy",
1213
+ "config": {
1214
+ "mode": "int8",
1215
+ "source_name": null
1216
+ },
1217
+ "registered_name": null
1218
+ },
1219
+ "transformer_layer_19/feedforward_output_dense": {
1220
+ "module": "keras.dtype_policies",
1221
+ "class_name": "QuantizedDTypePolicy",
1222
+ "config": {
1223
+ "mode": "int8",
1224
+ "source_name": null
1225
+ },
1226
+ "registered_name": null
1227
+ },
1228
+ "transformer_layer_19/feedforward_gate_dense": {
1229
+ "module": "keras.dtype_policies",
1230
+ "class_name": "QuantizedDTypePolicy",
1231
+ "config": {
1232
+ "mode": "int8",
1233
+ "source_name": null
1234
+ },
1235
+ "registered_name": null
1236
+ },
1237
+ "transformer_layer_19/feedforward_intermediate_dense": {
1238
+ "module": "keras.dtype_policies",
1239
+ "class_name": "QuantizedDTypePolicy",
1240
+ "config": {
1241
+ "mode": "int8",
1242
+ "source_name": null
1243
+ },
1244
+ "registered_name": null
1245
+ },
1246
+ "transformer_layer_19/self_attention/attention_output": {
1247
+ "module": "keras.dtype_policies",
1248
+ "class_name": "QuantizedDTypePolicy",
1249
+ "config": {
1250
+ "mode": "int8",
1251
+ "source_name": null
1252
+ },
1253
+ "registered_name": null
1254
+ },
1255
+ "transformer_layer_19/self_attention/value": {
1256
+ "module": "keras.dtype_policies",
1257
+ "class_name": "QuantizedDTypePolicy",
1258
+ "config": {
1259
+ "mode": "int8",
1260
+ "source_name": null
1261
+ },
1262
+ "registered_name": null
1263
+ },
1264
+ "transformer_layer_19/self_attention/key": {
1265
+ "module": "keras.dtype_policies",
1266
+ "class_name": "QuantizedDTypePolicy",
1267
+ "config": {
1268
+ "mode": "int8",
1269
+ "source_name": null
1270
+ },
1271
+ "registered_name": null
1272
+ },
1273
+ "transformer_layer_19/self_attention/query": {
1274
+ "module": "keras.dtype_policies",
1275
+ "class_name": "QuantizedDTypePolicy",
1276
+ "config": {
1277
+ "mode": "int8",
1278
+ "source_name": null
1279
+ },
1280
+ "registered_name": null
1281
+ },
1282
+ "transformer_layer_20/feedforward_output_dense": {
1283
+ "module": "keras.dtype_policies",
1284
+ "class_name": "QuantizedDTypePolicy",
1285
+ "config": {
1286
+ "mode": "int8",
1287
+ "source_name": null
1288
+ },
1289
+ "registered_name": null
1290
+ },
1291
+ "transformer_layer_20/feedforward_gate_dense": {
1292
+ "module": "keras.dtype_policies",
1293
+ "class_name": "QuantizedDTypePolicy",
1294
+ "config": {
1295
+ "mode": "int8",
1296
+ "source_name": null
1297
+ },
1298
+ "registered_name": null
1299
+ },
1300
+ "transformer_layer_20/feedforward_intermediate_dense": {
1301
+ "module": "keras.dtype_policies",
1302
+ "class_name": "QuantizedDTypePolicy",
1303
+ "config": {
1304
+ "mode": "int8",
1305
+ "source_name": null
1306
+ },
1307
+ "registered_name": null
1308
+ },
1309
+ "transformer_layer_20/self_attention/attention_output": {
1310
+ "module": "keras.dtype_policies",
1311
+ "class_name": "QuantizedDTypePolicy",
1312
+ "config": {
1313
+ "mode": "int8",
1314
+ "source_name": null
1315
+ },
1316
+ "registered_name": null
1317
+ },
1318
+ "transformer_layer_20/self_attention/value": {
1319
+ "module": "keras.dtype_policies",
1320
+ "class_name": "QuantizedDTypePolicy",
1321
+ "config": {
1322
+ "mode": "int8",
1323
+ "source_name": null
1324
+ },
1325
+ "registered_name": null
1326
+ },
1327
+ "transformer_layer_20/self_attention/key": {
1328
+ "module": "keras.dtype_policies",
1329
+ "class_name": "QuantizedDTypePolicy",
1330
+ "config": {
1331
+ "mode": "int8",
1332
+ "source_name": null
1333
+ },
1334
+ "registered_name": null
1335
+ },
1336
+ "transformer_layer_20/self_attention/query": {
1337
+ "module": "keras.dtype_policies",
1338
+ "class_name": "QuantizedDTypePolicy",
1339
+ "config": {
1340
+ "mode": "int8",
1341
+ "source_name": null
1342
+ },
1343
+ "registered_name": null
1344
+ },
1345
+ "transformer_layer_21/feedforward_output_dense": {
1346
+ "module": "keras.dtype_policies",
1347
+ "class_name": "QuantizedDTypePolicy",
1348
+ "config": {
1349
+ "mode": "int8",
1350
+ "source_name": null
1351
+ },
1352
+ "registered_name": null
1353
+ },
1354
+ "transformer_layer_21/feedforward_gate_dense": {
1355
+ "module": "keras.dtype_policies",
1356
+ "class_name": "QuantizedDTypePolicy",
1357
+ "config": {
1358
+ "mode": "int8",
1359
+ "source_name": null
1360
+ },
1361
+ "registered_name": null
1362
+ },
1363
+ "transformer_layer_21/feedforward_intermediate_dense": {
1364
+ "module": "keras.dtype_policies",
1365
+ "class_name": "QuantizedDTypePolicy",
1366
+ "config": {
1367
+ "mode": "int8",
1368
+ "source_name": null
1369
+ },
1370
+ "registered_name": null
1371
+ },
1372
+ "transformer_layer_21/self_attention/attention_output": {
1373
+ "module": "keras.dtype_policies",
1374
+ "class_name": "QuantizedDTypePolicy",
1375
+ "config": {
1376
+ "mode": "int8",
1377
+ "source_name": null
1378
+ },
1379
+ "registered_name": null
1380
+ },
1381
+ "transformer_layer_21/self_attention/value": {
1382
+ "module": "keras.dtype_policies",
1383
+ "class_name": "QuantizedDTypePolicy",
1384
+ "config": {
1385
+ "mode": "int8",
1386
+ "source_name": null
1387
+ },
1388
+ "registered_name": null
1389
+ },
1390
+ "transformer_layer_21/self_attention/key": {
1391
+ "module": "keras.dtype_policies",
1392
+ "class_name": "QuantizedDTypePolicy",
1393
+ "config": {
1394
+ "mode": "int8",
1395
+ "source_name": null
1396
+ },
1397
+ "registered_name": null
1398
+ },
1399
+ "transformer_layer_21/self_attention/query": {
1400
+ "module": "keras.dtype_policies",
1401
+ "class_name": "QuantizedDTypePolicy",
1402
+ "config": {
1403
+ "mode": "int8",
1404
+ "source_name": null
1405
+ },
1406
+ "registered_name": null
1407
+ },
1408
+ "transformer_layer_22/feedforward_output_dense": {
1409
+ "module": "keras.dtype_policies",
1410
+ "class_name": "QuantizedDTypePolicy",
1411
+ "config": {
1412
+ "mode": "int8",
1413
+ "source_name": null
1414
+ },
1415
+ "registered_name": null
1416
+ },
1417
+ "transformer_layer_22/feedforward_gate_dense": {
1418
+ "module": "keras.dtype_policies",
1419
+ "class_name": "QuantizedDTypePolicy",
1420
+ "config": {
1421
+ "mode": "int8",
1422
+ "source_name": null
1423
+ },
1424
+ "registered_name": null
1425
+ },
1426
+ "transformer_layer_22/feedforward_intermediate_dense": {
1427
+ "module": "keras.dtype_policies",
1428
+ "class_name": "QuantizedDTypePolicy",
1429
+ "config": {
1430
+ "mode": "int8",
1431
+ "source_name": null
1432
+ },
1433
+ "registered_name": null
1434
+ },
1435
+ "transformer_layer_22/self_attention/attention_output": {
1436
+ "module": "keras.dtype_policies",
1437
+ "class_name": "QuantizedDTypePolicy",
1438
+ "config": {
1439
+ "mode": "int8",
1440
+ "source_name": null
1441
+ },
1442
+ "registered_name": null
1443
+ },
1444
+ "transformer_layer_22/self_attention/value": {
1445
+ "module": "keras.dtype_policies",
1446
+ "class_name": "QuantizedDTypePolicy",
1447
+ "config": {
1448
+ "mode": "int8",
1449
+ "source_name": null
1450
+ },
1451
+ "registered_name": null
1452
+ },
1453
+ "transformer_layer_22/self_attention/key": {
1454
+ "module": "keras.dtype_policies",
1455
+ "class_name": "QuantizedDTypePolicy",
1456
+ "config": {
1457
+ "mode": "int8",
1458
+ "source_name": null
1459
+ },
1460
+ "registered_name": null
1461
+ },
1462
+ "transformer_layer_22/self_attention/query": {
1463
+ "module": "keras.dtype_policies",
1464
+ "class_name": "QuantizedDTypePolicy",
1465
+ "config": {
1466
+ "mode": "int8",
1467
+ "source_name": null
1468
+ },
1469
+ "registered_name": null
1470
+ },
1471
+ "transformer_layer_23/feedforward_output_dense": {
1472
+ "module": "keras.dtype_policies",
1473
+ "class_name": "QuantizedDTypePolicy",
1474
+ "config": {
1475
+ "mode": "int8",
1476
+ "source_name": null
1477
+ },
1478
+ "registered_name": null
1479
+ },
1480
+ "transformer_layer_23/feedforward_gate_dense": {
1481
+ "module": "keras.dtype_policies",
1482
+ "class_name": "QuantizedDTypePolicy",
1483
+ "config": {
1484
+ "mode": "int8",
1485
+ "source_name": null
1486
+ },
1487
+ "registered_name": null
1488
+ },
1489
+ "transformer_layer_23/feedforward_intermediate_dense": {
1490
+ "module": "keras.dtype_policies",
1491
+ "class_name": "QuantizedDTypePolicy",
1492
+ "config": {
1493
+ "mode": "int8",
1494
+ "source_name": null
1495
+ },
1496
+ "registered_name": null
1497
+ },
1498
+ "transformer_layer_23/self_attention/attention_output": {
1499
+ "module": "keras.dtype_policies",
1500
+ "class_name": "QuantizedDTypePolicy",
1501
+ "config": {
1502
+ "mode": "int8",
1503
+ "source_name": null
1504
+ },
1505
+ "registered_name": null
1506
+ },
1507
+ "transformer_layer_23/self_attention/value": {
1508
+ "module": "keras.dtype_policies",
1509
+ "class_name": "QuantizedDTypePolicy",
1510
+ "config": {
1511
+ "mode": "int8",
1512
+ "source_name": null
1513
+ },
1514
+ "registered_name": null
1515
+ },
1516
+ "transformer_layer_23/self_attention/key": {
1517
+ "module": "keras.dtype_policies",
1518
+ "class_name": "QuantizedDTypePolicy",
1519
+ "config": {
1520
+ "mode": "int8",
1521
+ "source_name": null
1522
+ },
1523
+ "registered_name": null
1524
+ },
1525
+ "transformer_layer_23/self_attention/query": {
1526
+ "module": "keras.dtype_policies",
1527
+ "class_name": "QuantizedDTypePolicy",
1528
+ "config": {
1529
+ "mode": "int8",
1530
+ "source_name": null
1531
+ },
1532
+ "registered_name": null
1533
+ },
1534
+ "transformer_layer_24/feedforward_output_dense": {
1535
+ "module": "keras.dtype_policies",
1536
+ "class_name": "QuantizedDTypePolicy",
1537
+ "config": {
1538
+ "mode": "int8",
1539
+ "source_name": null
1540
+ },
1541
+ "registered_name": null
1542
+ },
1543
+ "transformer_layer_24/feedforward_gate_dense": {
1544
+ "module": "keras.dtype_policies",
1545
+ "class_name": "QuantizedDTypePolicy",
1546
+ "config": {
1547
+ "mode": "int8",
1548
+ "source_name": null
1549
+ },
1550
+ "registered_name": null
1551
+ },
1552
+ "transformer_layer_24/feedforward_intermediate_dense": {
1553
+ "module": "keras.dtype_policies",
1554
+ "class_name": "QuantizedDTypePolicy",
1555
+ "config": {
1556
+ "mode": "int8",
1557
+ "source_name": null
1558
+ },
1559
+ "registered_name": null
1560
+ },
1561
+ "transformer_layer_24/self_attention/attention_output": {
1562
+ "module": "keras.dtype_policies",
1563
+ "class_name": "QuantizedDTypePolicy",
1564
+ "config": {
1565
+ "mode": "int8",
1566
+ "source_name": null
1567
+ },
1568
+ "registered_name": null
1569
+ },
1570
+ "transformer_layer_24/self_attention/value": {
1571
+ "module": "keras.dtype_policies",
1572
+ "class_name": "QuantizedDTypePolicy",
1573
+ "config": {
1574
+ "mode": "int8",
1575
+ "source_name": null
1576
+ },
1577
+ "registered_name": null
1578
+ },
1579
+ "transformer_layer_24/self_attention/key": {
1580
+ "module": "keras.dtype_policies",
1581
+ "class_name": "QuantizedDTypePolicy",
1582
+ "config": {
1583
+ "mode": "int8",
1584
+ "source_name": null
1585
+ },
1586
+ "registered_name": null
1587
+ },
1588
+ "transformer_layer_24/self_attention/query": {
1589
+ "module": "keras.dtype_policies",
1590
+ "class_name": "QuantizedDTypePolicy",
1591
+ "config": {
1592
+ "mode": "int8",
1593
+ "source_name": null
1594
+ },
1595
+ "registered_name": null
1596
+ },
1597
+ "transformer_layer_25/feedforward_output_dense": {
1598
+ "module": "keras.dtype_policies",
1599
+ "class_name": "QuantizedDTypePolicy",
1600
+ "config": {
1601
+ "mode": "int8",
1602
+ "source_name": null
1603
+ },
1604
+ "registered_name": null
1605
+ },
1606
+ "transformer_layer_25/feedforward_gate_dense": {
1607
+ "module": "keras.dtype_policies",
1608
+ "class_name": "QuantizedDTypePolicy",
1609
+ "config": {
1610
+ "mode": "int8",
1611
+ "source_name": null
1612
+ },
1613
+ "registered_name": null
1614
+ },
1615
+ "transformer_layer_25/feedforward_intermediate_dense": {
1616
+ "module": "keras.dtype_policies",
1617
+ "class_name": "QuantizedDTypePolicy",
1618
+ "config": {
1619
+ "mode": "int8",
1620
+ "source_name": null
1621
+ },
1622
+ "registered_name": null
1623
+ },
1624
+ "transformer_layer_25/self_attention/attention_output": {
1625
+ "module": "keras.dtype_policies",
1626
+ "class_name": "QuantizedDTypePolicy",
1627
+ "config": {
1628
+ "mode": "int8",
1629
+ "source_name": null
1630
+ },
1631
+ "registered_name": null
1632
+ },
1633
+ "transformer_layer_25/self_attention/value": {
1634
+ "module": "keras.dtype_policies",
1635
+ "class_name": "QuantizedDTypePolicy",
1636
+ "config": {
1637
+ "mode": "int8",
1638
+ "source_name": null
1639
+ },
1640
+ "registered_name": null
1641
+ },
1642
+ "transformer_layer_25/self_attention/key": {
1643
+ "module": "keras.dtype_policies",
1644
+ "class_name": "QuantizedDTypePolicy",
1645
+ "config": {
1646
+ "mode": "int8",
1647
+ "source_name": null
1648
+ },
1649
+ "registered_name": null
1650
+ },
1651
+ "transformer_layer_25/self_attention/query": {
1652
+ "module": "keras.dtype_policies",
1653
+ "class_name": "QuantizedDTypePolicy",
1654
+ "config": {
1655
+ "mode": "int8",
1656
+ "source_name": null
1657
+ },
1658
+ "registered_name": null
1659
+ },
1660
+ "transformer_layer_26/feedforward_output_dense": {
1661
+ "module": "keras.dtype_policies",
1662
+ "class_name": "QuantizedDTypePolicy",
1663
+ "config": {
1664
+ "mode": "int8",
1665
+ "source_name": null
1666
+ },
1667
+ "registered_name": null
1668
+ },
1669
+ "transformer_layer_26/feedforward_gate_dense": {
1670
+ "module": "keras.dtype_policies",
1671
+ "class_name": "QuantizedDTypePolicy",
1672
+ "config": {
1673
+ "mode": "int8",
1674
+ "source_name": null
1675
+ },
1676
+ "registered_name": null
1677
+ },
1678
+ "transformer_layer_26/feedforward_intermediate_dense": {
1679
+ "module": "keras.dtype_policies",
1680
+ "class_name": "QuantizedDTypePolicy",
1681
+ "config": {
1682
+ "mode": "int8",
1683
+ "source_name": null
1684
+ },
1685
+ "registered_name": null
1686
+ },
1687
+ "transformer_layer_26/self_attention/attention_output": {
1688
+ "module": "keras.dtype_policies",
1689
+ "class_name": "QuantizedDTypePolicy",
1690
+ "config": {
1691
+ "mode": "int8",
1692
+ "source_name": null
1693
+ },
1694
+ "registered_name": null
1695
+ },
1696
+ "transformer_layer_26/self_attention/value": {
1697
+ "module": "keras.dtype_policies",
1698
+ "class_name": "QuantizedDTypePolicy",
1699
+ "config": {
1700
+ "mode": "int8",
1701
+ "source_name": null
1702
+ },
1703
+ "registered_name": null
1704
+ },
1705
+ "transformer_layer_26/self_attention/key": {
1706
+ "module": "keras.dtype_policies",
1707
+ "class_name": "QuantizedDTypePolicy",
1708
+ "config": {
1709
+ "mode": "int8",
1710
+ "source_name": null
1711
+ },
1712
+ "registered_name": null
1713
+ },
1714
+ "transformer_layer_26/self_attention/query": {
1715
+ "module": "keras.dtype_policies",
1716
+ "class_name": "QuantizedDTypePolicy",
1717
+ "config": {
1718
+ "mode": "int8",
1719
+ "source_name": null
1720
+ },
1721
+ "registered_name": null
1722
+ },
1723
+ "transformer_layer_27/feedforward_output_dense": {
1724
+ "module": "keras.dtype_policies",
1725
+ "class_name": "QuantizedDTypePolicy",
1726
+ "config": {
1727
+ "mode": "int8",
1728
+ "source_name": null
1729
+ },
1730
+ "registered_name": null
1731
+ },
1732
+ "transformer_layer_27/feedforward_gate_dense": {
1733
+ "module": "keras.dtype_policies",
1734
+ "class_name": "QuantizedDTypePolicy",
1735
+ "config": {
1736
+ "mode": "int8",
1737
+ "source_name": null
1738
+ },
1739
+ "registered_name": null
1740
+ },
1741
+ "transformer_layer_27/feedforward_intermediate_dense": {
1742
+ "module": "keras.dtype_policies",
1743
+ "class_name": "QuantizedDTypePolicy",
1744
+ "config": {
1745
+ "mode": "int8",
1746
+ "source_name": null
1747
+ },
1748
+ "registered_name": null
1749
+ },
1750
+ "transformer_layer_27/self_attention/attention_output": {
1751
+ "module": "keras.dtype_policies",
1752
+ "class_name": "QuantizedDTypePolicy",
1753
+ "config": {
1754
+ "mode": "int8",
1755
+ "source_name": null
1756
+ },
1757
+ "registered_name": null
1758
+ },
1759
+ "transformer_layer_27/self_attention/value": {
1760
+ "module": "keras.dtype_policies",
1761
+ "class_name": "QuantizedDTypePolicy",
1762
+ "config": {
1763
+ "mode": "int8",
1764
+ "source_name": null
1765
+ },
1766
+ "registered_name": null
1767
+ },
1768
+ "transformer_layer_27/self_attention/key": {
1769
+ "module": "keras.dtype_policies",
1770
+ "class_name": "QuantizedDTypePolicy",
1771
+ "config": {
1772
+ "mode": "int8",
1773
+ "source_name": null
1774
+ },
1775
+ "registered_name": null
1776
+ },
1777
+ "transformer_layer_27/self_attention/query": {
1778
+ "module": "keras.dtype_policies",
1779
+ "class_name": "QuantizedDTypePolicy",
1780
+ "config": {
1781
+ "mode": "int8",
1782
+ "source_name": null
1783
+ },
1784
+ "registered_name": null
1785
+ },
1786
+ "transformer_layer_28/feedforward_output_dense": {
1787
+ "module": "keras.dtype_policies",
1788
+ "class_name": "QuantizedDTypePolicy",
1789
+ "config": {
1790
+ "mode": "int8",
1791
+ "source_name": null
1792
+ },
1793
+ "registered_name": null
1794
+ },
1795
+ "transformer_layer_28/feedforward_gate_dense": {
1796
+ "module": "keras.dtype_policies",
1797
+ "class_name": "QuantizedDTypePolicy",
1798
+ "config": {
1799
+ "mode": "int8",
1800
+ "source_name": null
1801
+ },
1802
+ "registered_name": null
1803
+ },
1804
+ "transformer_layer_28/feedforward_intermediate_dense": {
1805
+ "module": "keras.dtype_policies",
1806
+ "class_name": "QuantizedDTypePolicy",
1807
+ "config": {
1808
+ "mode": "int8",
1809
+ "source_name": null
1810
+ },
1811
+ "registered_name": null
1812
+ },
1813
+ "transformer_layer_28/self_attention/attention_output": {
1814
+ "module": "keras.dtype_policies",
1815
+ "class_name": "QuantizedDTypePolicy",
1816
+ "config": {
1817
+ "mode": "int8",
1818
+ "source_name": null
1819
+ },
1820
+ "registered_name": null
1821
+ },
1822
+ "transformer_layer_28/self_attention/value": {
1823
+ "module": "keras.dtype_policies",
1824
+ "class_name": "QuantizedDTypePolicy",
1825
+ "config": {
1826
+ "mode": "int8",
1827
+ "source_name": null
1828
+ },
1829
+ "registered_name": null
1830
+ },
1831
+ "transformer_layer_28/self_attention/key": {
1832
+ "module": "keras.dtype_policies",
1833
+ "class_name": "QuantizedDTypePolicy",
1834
+ "config": {
1835
+ "mode": "int8",
1836
+ "source_name": null
1837
+ },
1838
+ "registered_name": null
1839
+ },
1840
+ "transformer_layer_28/self_attention/query": {
1841
+ "module": "keras.dtype_policies",
1842
+ "class_name": "QuantizedDTypePolicy",
1843
+ "config": {
1844
+ "mode": "int8",
1845
+ "source_name": null
1846
+ },
1847
+ "registered_name": null
1848
+ },
1849
+ "transformer_layer_29/feedforward_output_dense": {
1850
+ "module": "keras.dtype_policies",
1851
+ "class_name": "QuantizedDTypePolicy",
1852
+ "config": {
1853
+ "mode": "int8",
1854
+ "source_name": null
1855
+ },
1856
+ "registered_name": null
1857
+ },
1858
+ "transformer_layer_29/feedforward_gate_dense": {
1859
+ "module": "keras.dtype_policies",
1860
+ "class_name": "QuantizedDTypePolicy",
1861
+ "config": {
1862
+ "mode": "int8",
1863
+ "source_name": null
1864
+ },
1865
+ "registered_name": null
1866
+ },
1867
+ "transformer_layer_29/feedforward_intermediate_dense": {
1868
+ "module": "keras.dtype_policies",
1869
+ "class_name": "QuantizedDTypePolicy",
1870
+ "config": {
1871
+ "mode": "int8",
1872
+ "source_name": null
1873
+ },
1874
+ "registered_name": null
1875
+ },
1876
+ "transformer_layer_29/self_attention/attention_output": {
1877
+ "module": "keras.dtype_policies",
1878
+ "class_name": "QuantizedDTypePolicy",
1879
+ "config": {
1880
+ "mode": "int8",
1881
+ "source_name": null
1882
+ },
1883
+ "registered_name": null
1884
+ },
1885
+ "transformer_layer_29/self_attention/value": {
1886
+ "module": "keras.dtype_policies",
1887
+ "class_name": "QuantizedDTypePolicy",
1888
+ "config": {
1889
+ "mode": "int8",
1890
+ "source_name": null
1891
+ },
1892
+ "registered_name": null
1893
+ },
1894
+ "transformer_layer_29/self_attention/key": {
1895
+ "module": "keras.dtype_policies",
1896
+ "class_name": "QuantizedDTypePolicy",
1897
+ "config": {
1898
+ "mode": "int8",
1899
+ "source_name": null
1900
+ },
1901
+ "registered_name": null
1902
+ },
1903
+ "transformer_layer_29/self_attention/query": {
1904
+ "module": "keras.dtype_policies",
1905
+ "class_name": "QuantizedDTypePolicy",
1906
+ "config": {
1907
+ "mode": "int8",
1908
+ "source_name": null
1909
+ },
1910
+ "registered_name": null
1911
+ },
1912
+ "transformer_layer_30/feedforward_output_dense": {
1913
+ "module": "keras.dtype_policies",
1914
+ "class_name": "QuantizedDTypePolicy",
1915
+ "config": {
1916
+ "mode": "int8",
1917
+ "source_name": null
1918
+ },
1919
+ "registered_name": null
1920
+ },
1921
+ "transformer_layer_30/feedforward_gate_dense": {
1922
+ "module": "keras.dtype_policies",
1923
+ "class_name": "QuantizedDTypePolicy",
1924
+ "config": {
1925
+ "mode": "int8",
1926
+ "source_name": null
1927
+ },
1928
+ "registered_name": null
1929
+ },
1930
+ "transformer_layer_30/feedforward_intermediate_dense": {
1931
+ "module": "keras.dtype_policies",
1932
+ "class_name": "QuantizedDTypePolicy",
1933
+ "config": {
1934
+ "mode": "int8",
1935
+ "source_name": null
1936
+ },
1937
+ "registered_name": null
1938
+ },
1939
+ "transformer_layer_30/self_attention/attention_output": {
1940
+ "module": "keras.dtype_policies",
1941
+ "class_name": "QuantizedDTypePolicy",
1942
+ "config": {
1943
+ "mode": "int8",
1944
+ "source_name": null
1945
+ },
1946
+ "registered_name": null
1947
+ },
1948
+ "transformer_layer_30/self_attention/value": {
1949
+ "module": "keras.dtype_policies",
1950
+ "class_name": "QuantizedDTypePolicy",
1951
+ "config": {
1952
+ "mode": "int8",
1953
+ "source_name": null
1954
+ },
1955
+ "registered_name": null
1956
+ },
1957
+ "transformer_layer_30/self_attention/key": {
1958
+ "module": "keras.dtype_policies",
1959
+ "class_name": "QuantizedDTypePolicy",
1960
+ "config": {
1961
+ "mode": "int8",
1962
+ "source_name": null
1963
+ },
1964
+ "registered_name": null
1965
+ },
1966
+ "transformer_layer_30/self_attention/query": {
1967
+ "module": "keras.dtype_policies",
1968
+ "class_name": "QuantizedDTypePolicy",
1969
+ "config": {
1970
+ "mode": "int8",
1971
+ "source_name": null
1972
+ },
1973
+ "registered_name": null
1974
+ },
1975
+ "transformer_layer_31/feedforward_output_dense": {
1976
+ "module": "keras.dtype_policies",
1977
+ "class_name": "QuantizedDTypePolicy",
1978
+ "config": {
1979
+ "mode": "int8",
1980
+ "source_name": null
1981
+ },
1982
+ "registered_name": null
1983
+ },
1984
+ "transformer_layer_31/feedforward_gate_dense": {
1985
+ "module": "keras.dtype_policies",
1986
+ "class_name": "QuantizedDTypePolicy",
1987
+ "config": {
1988
+ "mode": "int8",
1989
+ "source_name": null
1990
+ },
1991
+ "registered_name": null
1992
+ },
1993
+ "transformer_layer_31/feedforward_intermediate_dense": {
1994
+ "module": "keras.dtype_policies",
1995
+ "class_name": "QuantizedDTypePolicy",
1996
+ "config": {
1997
+ "mode": "int8",
1998
+ "source_name": null
1999
+ },
2000
+ "registered_name": null
2001
+ },
2002
+ "transformer_layer_31/self_attention/attention_output": {
2003
+ "module": "keras.dtype_policies",
2004
+ "class_name": "QuantizedDTypePolicy",
2005
+ "config": {
2006
+ "mode": "int8",
2007
+ "source_name": null
2008
+ },
2009
+ "registered_name": null
2010
+ },
2011
+ "transformer_layer_31/self_attention/value": {
2012
+ "module": "keras.dtype_policies",
2013
+ "class_name": "QuantizedDTypePolicy",
2014
+ "config": {
2015
+ "mode": "int8",
2016
+ "source_name": null
2017
+ },
2018
+ "registered_name": null
2019
+ },
2020
+ "transformer_layer_31/self_attention/key": {
2021
+ "module": "keras.dtype_policies",
2022
+ "class_name": "QuantizedDTypePolicy",
2023
+ "config": {
2024
+ "mode": "int8",
2025
+ "source_name": null
2026
+ },
2027
+ "registered_name": null
2028
+ },
2029
+ "transformer_layer_31/self_attention/query": {
2030
+ "module": "keras.dtype_policies",
2031
+ "class_name": "QuantizedDTypePolicy",
2032
+ "config": {
2033
+ "mode": "int8",
2034
+ "source_name": null
2035
+ },
2036
+ "registered_name": null
2037
+ }
2038
+ }
2039
+ },
2040
+ "registered_name": null
2041
+ },
2042
+ "vocabulary_size": 128256,
2043
+ "num_layers": 32,
2044
+ "num_query_heads": 32,
2045
+ "hidden_dim": 4096,
2046
+ "intermediate_dim": 14336,
2047
+ "rope_max_wavelength": 500000.0,
2048
+ "rope_scaling_factor": 1.0,
2049
+ "num_key_value_heads": 8,
2050
+ "layer_norm_epsilon": 1e-05,
2051
+ "dropout": 0
2052
+ },
2053
+ "registered_name": "keras_nlp>Llama3Backbone"
2054
+ }
metadata.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "keras_version": "3.4.1",
3
+ "keras_nlp_version": "0.13.0",
4
+ "parameter_count": 8031894016,
5
+ "date_saved": "2024-08-02@12:09:09"
6
+ }
model.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d5a8f242678f18e5cbb77703cee76384cd0f7e73d649e0838e648d9c5286687
3
+ size 8034899912
tokenizer.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_nlp.src.models.llama3.llama3_tokenizer",
3
+ "class_name": "Llama3Tokenizer",
4
+ "config": {
5
+ "name": "llama3_tokenizer",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "int32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "sequence_length": null,
16
+ "add_prefix_space": false
17
+ },
18
+ "registered_name": "keras_nlp>Llama3Tokenizer"
19
+ }