Spaces:

5to9
/

bot-royale

Sleeping

App Files Files Community

5to9 commited on Sep 25

Commit

c6e8ef5

•

1 Parent(s): 7977c5d

0.6 defining chat template for pharia

Browse files

Files changed (1) hide show

app.py +53 -16

app.py CHANGED Viewed

@@ -25,8 +25,34 @@ tokenizer_b, model_b = None, None
 torch_dtype = torch.bfloat16
 attn_implementation = "flash_attention_2"
 def load_model_a(model_id):
-    global tokenizer_a, model_a
     tokenizer_a = AutoTokenizer.from_pretrained(model_id)
     logging.debug(f"model A: {tokenizer_a.eos_token}")
     try:
@@ -50,7 +76,8 @@ def load_model_a(model_id):
     return gr.update(label=model_id)
 def load_model_b(model_id):
-    global tokenizer_b, model_b
     tokenizer_b = AutoTokenizer.from_pretrained(model_id)
     logging.debug(f"model B: {tokenizer_b.eos_token}")
     try:
@@ -92,20 +119,30 @@ def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_token
         chat_history_b.append({"role": "user", "content": user})
         chat_history_b.append({"role": "assistant", "content": assistant})
-    base_messages = system_prompt_list + chat_history_a + input_text_list
-    new_messages = system_prompt_list + chat_history_b + input_text_list
-    input_ids_a = tokenizer_a.apply_chat_template(
-        base_messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model_a.device)
-    input_ids_b = tokenizer_b.apply_chat_template(
-        new_messages,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model_b.device)
     generation_kwargs_a = dict(
         input_ids=input_ids_a,

 torch_dtype = torch.bfloat16
 attn_implementation = "flash_attention_2"
+def apply_chat_template(messages, add_generation_prompt=False):
+    """
+    Function to apply the chat template manually for each message in a list.
+    messages: List of dictionaries, each containing a 'role' and 'content'.
+    """
+    pharia_template = """<|begin_of_text|>"""
+    role_map = {
+        "system": "<|start_header_id|>system<|end_header_id|>\n",
+        "user": "<|start_header_id|>user<|end_header_id|>\n",
+        "assistant": "<|start_header_id|>assistant<|end_header_id|>\n",
+    }
+    # Iterate through the messages and apply the template for each role
+    for message in messages:
+        role = message["role"]
+        content = message["content"]
+        pharia_template += role_map.get(role, "") + content + "<|eot_id|>\n"
+    # Add the assistant generation prompt if required
+    if add_generation_prompt:
+        pharia_template += "<|start_header_id|>assistant<|end_header_id|>\n"
+    return pharia_template
 def load_model_a(model_id):
+    global tokenizer_a, model_a, model_id_a
+    model_id_a = model_id # need to access model_id with tokenizer
     tokenizer_a = AutoTokenizer.from_pretrained(model_id)
     logging.debug(f"model A: {tokenizer_a.eos_token}")
     try:
     return gr.update(label=model_id)
 def load_model_b(model_id):
+    global tokenizer_b, model_b, model_id_b
+    model_id_b = model_id
     tokenizer_b = AutoTokenizer.from_pretrained(model_id)
     logging.debug(f"model B: {tokenizer_b.eos_token}")
     try:
         chat_history_b.append({"role": "user", "content": user})
         chat_history_b.append({"role": "assistant", "content": assistant})
+    new_messages_a = system_prompt_list + chat_history_a + input_text_list
+    new_messages_b = system_prompt_list + chat_history_b + input_text_list
+    if "pharia" in model_id_a:
+        logging.debug("model a is pharia based, applying own template")
+        formatted_message_a = apply_chat_template(new_messages_a, add_generation_prompt=True)
+        input_ids_a = tokenizer_b(formatted_message_a, return_tensors="pt").input_ids.to(model_a.device)
+    else:
+        input_ids_a = tokenizer_a.apply_chat_template(
+            new_messages_a,
+            add_generation_prompt=True,
+            return_tensors="pt"
+        ).to(model_a.device)
+    if "pharia" in model_id_b:
+        logging.debug("model b is pharia based, applying own template")
+        formatted_message_b = apply_chat_template(new_messages_a, add_generation_prompt=True)
+        input_ids_b = tokenizer_b(formatted_message_b, return_tensors="pt").input_ids.to(model_a.device)
+    else:
+        input_ids_b = tokenizer_b.apply_chat_template(
+            new_messages_b,
+            add_generation_prompt=True,
+            return_tensors="pt"
+        ).to(model_b.device)
     generation_kwargs_a = dict(
         input_ids=input_ids_a,