Spaces:

huggingface
/

keras-chatbot-arena

Running on TPU v5e

martin-gorner commited on Nov 20, 2024

Commit

a2b7758

1 Parent(s): b637f0b

bug fixes: logging of model loading

Files changed (2) hide show

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ from models import (
 model_labels_list = list(model_labels)
-# lod a warm up (compile) all the models
 models = []
 for preset in model_presets:
     model = load_model(preset)
@@ -32,7 +32,7 @@ for preset in model_presets:
 # model = keras_hub.models.Llama3CausalLM.from_preset(
 #     "hf://meta-llama/Llama-3.2-1B-Instruct", dtype="bfloat16"
 # )
-# models = [model, model]
 def chat_turn_assistant_1(
@@ -170,7 +170,7 @@ with gr.Blocks(fill_width=True, title="Keras demo") as demo:
         gr.HTML(
             "<H2> Battle of the Keras chatbots on TPU</H2>"
             + "All the models are loaded into the TPU memory. "
-            + "You can call them at will and compare their answers. <br/>"
             + "The entire chat history is fed to the models at every submission."
             + "This demno is runnig on a Google TPU v5e 2x4 (8 cores).",
         )

 model_labels_list = list(model_labels)
+# load and warm up (compile) all the models
 models = []
 for preset in model_presets:
     model = load_model(preset)
 # model = keras_hub.models.Llama3CausalLM.from_preset(
 #     "hf://meta-llama/Llama-3.2-1B-Instruct", dtype="bfloat16"
 # )
+# models = [model, model, model, model, model]
 def chat_turn_assistant_1(
         gr.HTML(
             "<H2> Battle of the Keras chatbots on TPU</H2>"
             + "All the models are loaded into the TPU memory. "
+            + "You can call any of them and compare their answers. <br/>"
             + "The entire chat history is fed to the models at every submission."
             + "This demno is runnig on a Google TPU v5e 2x4 (8 cores).",
         )

models.py CHANGED Viewed

@@ -39,24 +39,25 @@ def get_default_layout_map(preset_name, device_mesh):
 def log_applied_layout_map(model):
-    if "Gemma" in type(model):
         transformer_decoder_block_name = "decoder_block_1"
-    elif "Llama3" in type(model) or "Mistral" in type(model):
         transformer_decoder_block_name = "transformer_layer_1"
     else:
         assert (0, "Model type not recognized. Cannot display model layout.")
-        # See how layer sharding was applied
-        embedding_layer = model.backbone.get_layer("token_embedding")
-        print(embedding_layer)
-        decoder_block = model.backbone.get_layer(transformer_decoder_block_name)
-        print(type(decoder_block))
-        for variable in embedding_layer.weights + decoder_block.weights:
-            print(
-                f"{variable.path:<58}  \
-                  {str(variable.shape):<16}  \
-                  {str(variable.value.sharding.spec):<35} \
-                  {str(variable.dtype)}"
-            )
 def load_model(preset):

 def log_applied_layout_map(model):
+    if "Gemma" in type(model).__name__:
         transformer_decoder_block_name = "decoder_block_1"
+    elif "Llama3" in type(model).__name__ or "Mistral" in type(model).__name__:
         transformer_decoder_block_name = "transformer_layer_1"
     else:
         assert (0, "Model type not recognized. Cannot display model layout.")
+    # See how layer sharding was applied
+    embedding_layer = model.backbone.get_layer("token_embedding")
+    print(embedding_layer)
+    decoder_block = model.backbone.get_layer(transformer_decoder_block_name)
+    print(type(decoder_block))
+    for variable in embedding_layer.weights + decoder_block.weights:
+        print(
+            f"{variable.path:<58}  \
+                {str(variable.shape):<16}  \
+                {str(variable.value.sharding.spec):<35} \
+                {str(variable.dtype)}"
+        )
 def load_model(preset):