Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.gitattributes +1 -0
chat_template.jinja +351 -0
config.json +193 -0
generation_config.json +15 -0
model.safetensors +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
tokenizer.json +3 -0
tokenizer_config.json +293 -0
trainer_state.json +1716 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,351 @@

+{%- macro format_parameters(properties, required, filter_keys=false) -%}
+    {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in properties | dictsort -%}
+        {%- set add_comma = false -%}
+        {%- if not filter_keys or key not in standard_keys -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {{ key }}:{
+            {%- if value['description'] -%}
+                description:<|"|>{{ value['description'] }}<|"|>
+                {%- set add_comma = true -%}
+            {%- endif -%}
+            {%- if value['type'] | upper == 'STRING' -%}
+                {%- if value['enum'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    enum:{{ format_argument(value['enum']) }}
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'ARRAY' -%}
+                {%- if value['items'] is mapping and value['items'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    items:{
+                    {%- set ns_items = namespace(found_first=false) -%}
+                    {%- for item_key, item_value in value['items'] | dictsort -%}
+                        {%- if item_value is not none -%}
+                            {%- if ns_items.found_first %},{% endif -%}
+                            {%- set ns_items.found_first = true -%}
+                            {%- if item_key == 'properties' -%}
+                                properties:{
+                                {%- if item_value is mapping -%}
+                                    {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
+                                {%- endif -%}
+                                }
+                            {%- elif item_key == 'required' -%}
+                                required:[
+                                {%- for req_item in item_value -%}
+                                    <|"|>{{- req_item -}}<|"|>
+                                    {%- if not loop.last %},{% endif -%}
+                                {%- endfor -%}
+                                ]
+                            {%- elif item_key == 'type' -%}
+                                {%- if item_value is string -%}
+                                    type:{{ format_argument(item_value | upper) }}
+                                {%- else -%}
+                                    type:{{ format_argument(item_value | map('upper') | list) }}
+                                {%- endif -%}
+                            {%- else -%}
+                                {{ item_key }}:{{ format_argument(item_value) }}
+                            {%- endif -%}
+                        {%- endif -%}
+                    {%- endfor -%}
+                    }
+                {%- endif -%}
+            {%- endif -%}
+            {%- if value['nullable'] %}
+                {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                nullable:true
+            {%- endif -%}
+            {%- if value['type'] | upper == 'OBJECT' -%}
+                {%- if value['properties'] is defined and value['properties'] is mapping -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    properties:{
+                    {{- format_parameters(value['properties'], value['required'] | default([])) -}}
+                    }
+                {%- elif value is mapping -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    properties:{
+                    {{- format_parameters(value, value['required'] | default([]), filter_keys=true) -}}
+                    }
+                {%- endif -%}
+                {%- if value['required'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    required:[
+                    {%- for item in value['required'] | default([]) -%}
+                        <|"|>{{- item -}}<|"|>
+                        {%- if not loop.last %},{% endif -%}
+                    {%- endfor -%}
+                    ]
+                {%- endif -%}
+            {%- endif -%}
+            {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+            type:<|"|>{{ value['type'] | upper }}<|"|>}
+        {%- endif -%}
+    {%- endfor -%}
+{%- endmacro -%}
+{%- macro format_function_declaration(tool_data) -%}
+    declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
+    {%- set params = tool_data['function']['parameters'] -%}
+    {%- if params -%}
+        ,parameters:{
+        {%- if params['properties'] -%}
+            properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
+        {%- endif -%}
+        {%- if params['required'] -%}
+            required:[
+            {%- for item in params['required'] -%}
+                <|"|>{{- item -}}<|"|>
+                {{- ',' if not loop.last -}}
+            {%- endfor -%}
+            ],
+        {%- endif -%}
+        {%- if params['type'] -%}
+            type:<|"|>{{- params['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    {%- if 'response' in tool_data['function'] -%}
+        {%- set response_declaration = tool_data['function']['response'] -%}
+        ,response:{
+        {%- if response_declaration['description'] -%}
+            description:<|"|>{{- response_declaration['description'] -}}<|"|>,
+        {%- endif -%}
+        {%- if response_declaration['type'] | upper == 'OBJECT' -%}
+            type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    }
+{%- endmacro -%}
+{%- macro format_argument(argument, escape_keys=True) -%}
+    {%- if argument is string -%}
+        {{- '<|"|>' + argument + '<|"|>' -}}
+    {%- elif argument is boolean -%}
+        {{- 'true' if argument else 'false' -}}
+    {%- elif argument is mapping -%}
+        {{- '{' -}}
+        {%- set ns = namespace(found_first=false) -%}
+        {%- for key, value in argument | dictsort -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {%- if escape_keys -%}
+                {{- '<|"|>' + key + '<|"|>' -}}
+            {%- else -%}
+                {{- key -}}
+            {%- endif -%}
+            :{{- format_argument(value, escape_keys=escape_keys) -}}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- elif argument is sequence -%}
+        {{- '[' -}}
+        {%- for item in argument -%}
+            {{- format_argument(item, escape_keys=escape_keys) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- ']' -}}
+    {%- else -%}
+        {{- argument -}}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro strip_thinking(text) -%}
+    {%- set ns = namespace(result='') -%}
+    {%- for part in text.split('<channel|>') -%}
+        {%- if '<|channel>' in part -%}
+            {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
+        {%- else -%}
+            {%- set ns.result = ns.result + part -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {{- ns.result | trim -}}
+{%- endmacro -%}
+{%- macro format_tool_response_block(tool_name, response) -%}
+    {{- '<|tool_response>' -}}
+    {%- if response is mapping -%}
+        {{- 'response:' + tool_name + '{' -}}
+        {%- for key, value in response | dictsort -%}
+            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- else -%}
+        {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
+    {%- endif -%}
+    {{- '<tool_response|>' -}}
+{%- endmacro -%}
+{%- set ns = namespace(prev_message_type=None) -%}
+{%- set loop_messages = messages -%}
+{{- bos_token -}}
+{#- Handle System/Tool Definitions Block -#}
+{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
+    {{- '<|turn>system\n' -}}
+    {#- Inject Thinking token at the very top of the FIRST system turn -#}
+    {%- if enable_thinking is defined and enable_thinking -%}
+        {{- '<|think|>\n' -}}
+        {%- set ns.prev_message_type = 'think' -%}
+    {%- endif -%}
+    {%- if messages[0]['role'] in ['system', 'developer'] -%}
+        {%- if messages[0]['content'] is string -%}
+            {{- messages[0]['content'] | trim -}}
+        {%- elif messages[0]['content'] is sequence -%}
+            {%- for item in messages[0]['content'] -%}
+                {{- item['text'] | trim + ' '-}}
+            {%- endfor -%}
+        {%- endif -%}
+        {%- set loop_messages = messages[1:] -%}
+    {%- endif -%}
+    {%- if tools -%}
+        {%- for tool in tools %}
+            {{- '<|tool>' -}}
+            {{- format_function_declaration(tool) | trim -}}
+            {{- '<tool|>' -}}
+        {%- endfor %}
+        {%- set ns.prev_message_type = 'tool' -%}
+    {%- endif -%}
+    {{- '<turn|>\n' -}}
+{%- endif %}
+{#- Pre-scan: find last user message index for reasoning guard -#}
+{%- set ns_turn = namespace(last_user_idx=-1) -%}
+{%- for i in range(loop_messages | length) -%}
+    {%- if loop_messages[i]['role'] == 'user' -%}
+        {%- set ns_turn.last_user_idx = i -%}
+    {%- endif -%}
+{%- endfor -%}
+{#- Loop through messages -#}
+{%- for message in loop_messages -%}
+    {%- if message['role'] != 'tool' -%}
+    {%- set ns.prev_message_type = None -%}
+    {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
+    {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
+    {%- set prev_nt = namespace(role=None, found=false) -%}
+    {%- if loop.index0 > 0 -%}
+        {%- for j in range(loop.index0 - 1, -1, -1) -%}
+            {%- if not prev_nt.found -%}
+                {%- if loop_messages[j]['role'] != 'tool' -%}
+                    {%- set prev_nt.role = loop_messages[j]['role'] -%}
+                    {%- set prev_nt.found = true -%}
+                {%- endif -%}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- endif -%}
+    {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
+    {%- if not continue_same_model_turn -%}
+        {{- '<|turn>' + role + '\n' }}
+    {%- endif -%}
+    {#- Render reasoning/reasoning_content as thinking channel -#}
+    {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
+    {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
+        {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
+    {%- endif -%}
+            {%- if message['tool_calls'] -%}
+                {%- for tool_call in message['tool_calls'] -%}
+                    {%- set function = tool_call['function'] -%}
+                    {{- '<|tool_call>call:' + function['name'] + '{' -}}
+                    {%- if function['arguments'] is mapping -%}
+                        {%- set ns_args = namespace(found_first=false) -%}
+                        {%- for key, value in function['arguments'] | dictsort -%}
+                            {%- if ns_args.found_first %},{% endif -%}
+                            {%- set ns_args.found_first = true -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                        {%- endfor -%}
+                    {%- elif function['arguments'] is string -%}
+                        {{- function['arguments'] -}}
+                    {%- endif -%}
+                    {{- '}<tool_call|>' -}}
+                {%- endfor -%}
+                {%- set ns.prev_message_type = 'tool_call' -%}
+            {%- endif -%}
+            {%- set ns_tr_out = namespace(flag=false) -%}
+            {%- if message.get('tool_responses') -%}
+                {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
+                {%- for tool_response in message['tool_responses'] -%}
+                    {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
+                    {%- set ns_tr_out.flag = true -%}
+                    {%- set ns.prev_message_type = 'tool_response' -%}
+                {%- endfor -%}
+            {%- elif message.get('tool_calls') -%}
+                {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}
+                {%- set ns_tool_scan = namespace(stopped=false) -%}
+                {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
+                    {%- if ns_tool_scan.stopped -%}
+                    {%- elif loop_messages[k]['role'] != 'tool' -%}
+                        {%- set ns_tool_scan.stopped = true -%}
+                    {%- else -%}
+                        {%- set follow = loop_messages[k] -%}
+                        {#- Resolve tool_call_id to function name -#}
+                        {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
+                        {%- for tc in message['tool_calls'] -%}
+                            {%- if tc.get('id') == follow.get('tool_call_id') -%}
+                                {%- set ns_tname.name = tc['function']['name'] -%}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {#- Handle content as string or content-parts array -#}
+                        {%- set tool_body = follow.get('content') -%}
+                        {%- if tool_body is string -%}
+                            {{- format_tool_response_block(ns_tname.name, tool_body) -}}
+                        {%- elif tool_body is sequence and tool_body is not string -%}
+                            {%- set ns_txt = namespace(s='') -%}
+                            {%- for part in tool_body -%}
+                                {%- if part.get('type') == 'text' -%}
+                                    {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
+                                {%- endif -%}
+                            {%- endfor -%}
+                            {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
+                        {%- else -%}
+                            {{- format_tool_response_block(ns_tname.name, tool_body) -}}
+                        {%- endif -%}
+                        {%- set ns_tr_out.flag = true -%}
+                        {%- set ns.prev_message_type = 'tool_response' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+            {%- set captured_content -%}
+            {%- if message['content'] is string -%}
+                {%- if role == 'model' -%}
+                    {{- strip_thinking(message['content']) -}}
+                {%- else -%}
+                    {{- message['content'] | trim -}}
+                {%- endif -%}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item['type'] == 'text' -%}
+                        {%- if role == 'model' -%}
+                            {{- strip_thinking(item['text']) -}}
+                        {%- else -%}
+                            {{- item['text'] | trim -}}
+                        {%- endif -%}
+                    {%- elif item['type'] == 'image' -%}
+                        {{- '<|image|>' -}}
+                        {%- set ns.prev_message_type = 'image' -%}
+                    {%- elif item['type'] == 'audio' -%}
+                        {{- '<|audio|>' -}}
+                        {%- set ns.prev_message_type = 'audio' -%}
+                    {%- elif item['type'] == 'video' -%}
+                        {{- '<|video|>' -}}
+                        {%- set ns.prev_message_type = 'video' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+            {%- endset -%}
+            {{- captured_content -}}
+            {%- set has_content = captured_content | trim | length > 0 -%}
+        {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
+            {{- '<|tool_response>' -}}
+        {%- elif not (ns_tr_out.flag and not has_content) -%}
+            {{- '<turn|>\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
+        {{- '<|turn>model\n' -}}
+    {%- endif -%}
+{%- endif -%}

config.json ADDED Viewed

	@@ -0,0 +1,193 @@

+{
+  "architectures": [
+    "Gemma4ForConditionalGeneration"
+  ],
+  "audio_config": {
+    "_name_or_path": "",
+    "architectures": null,
+    "attention_chunk_size": 12,
+    "attention_context_left": 13,
+    "attention_context_right": 0,
+    "attention_invalid_logits_value": -1000000000.0,
+    "attention_logit_cap": 50.0,
+    "chunk_size_feed_forward": 0,
+    "conv_kernel_size": 5,
+    "dtype": "bfloat16",
+    "gradient_clipping": 10000000000.0,
+    "hidden_act": "silu",
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_range": 0.02,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "model_type": "gemma4_audio",
+    "num_attention_heads": 8,
+    "num_hidden_layers": 12,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_proj_dims": 1536,
+    "problem_type": null,
+    "residual_weight": 0.5,
+    "return_dict": true,
+    "rms_norm_eps": 1e-06,
+    "subsampling_conv_channels": [
+      128,
+      32
+    ],
+    "use_clipped_linears": true
+  },
+  "audio_token_id": 258881,
+  "boa_token_id": 256000,
+  "boi_token_id": 255999,
+  "bos_token_id": 2,
+  "dtype": "bfloat16",
+  "eoa_token_id": 258883,
+  "eoa_token_index": 258883,
+  "eoi_token_id": 258882,
+  "eos_token_id": 1,
+  "image_token_id": 258880,
+  "initializer_range": 0.02,
+  "model_name": "jq/gemma4-e2b-fft-asr-uga",
+  "model_type": "gemma4",
+  "pad_token_id": 0,
+  "text_config": {
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "attention_k_eq_v": false,
+    "bos_token_id": 2,
+    "dtype": "bfloat16",
+    "enable_moe_block": false,
+    "eos_token_id": 1,
+    "expert_intermediate_size": null,
+    "final_logit_softcapping": 30.0,
+    "global_head_dim": 512,
+    "head_dim": 256,
+    "hidden_activation": "gelu_pytorch_tanh",
+    "hidden_size": 1536,
+    "hidden_size_per_layer_input": 256,
+    "initializer_range": 0.02,
+    "intermediate_size": 6144,
+    "layer_types": [
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 131072,
+    "model_type": "gemma4_text",
+    "moe_intermediate_size": null,
+    "num_attention_heads": 8,
+    "num_experts": null,
+    "num_global_key_value_heads": null,
+    "num_hidden_layers": 35,
+    "num_key_value_heads": 1,
+    "num_kv_shared_layers": 20,
+    "pad_token_id": 0,
+    "rms_norm_eps": 1e-06,
+    "rope_parameters": {
+      "full_attention": {
+        "partial_rotary_factor": 0.25,
+        "rope_theta": 1000000.0,
+        "rope_type": "proportional"
+      },
+      "sliding_attention": {
+        "rope_theta": 10000.0,
+        "rope_type": "default"
+      }
+    },
+    "sliding_window": 512,
+    "tie_word_embeddings": true,
+    "top_k_experts": null,
+    "use_bidirectional_attention": null,
+    "use_cache": true,
+    "use_double_wide_mlp": true,
+    "vocab_size": 262144,
+    "vocab_size_per_layer_input": 262144
+  },
+  "tie_word_embeddings": true,
+  "transformers_version": "5.8.0",
+  "unsloth_version": "2026.5.2",
+  "use_cache": false,
+  "video_token_id": 258884,
+  "vision_config": {
+    "_name_or_path": "",
+    "architectures": null,
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "chunk_size_feed_forward": 0,
+    "default_output_length": 280,
+    "dtype": "bfloat16",
+    "global_head_dim": 64,
+    "head_dim": 64,
+    "hidden_activation": "gelu_pytorch_tanh",
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "max_position_embeddings": 131072,
+    "model_type": "gemma4_vision",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 16,
+    "num_key_value_heads": 12,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "patch_size": 16,
+    "pooling_kernel_size": 3,
+    "position_embedding_size": 10240,
+    "problem_type": null,
+    "return_dict": true,
+    "rms_norm_eps": 1e-06,
+    "rope_parameters": {
+      "rope_theta": 100.0,
+      "rope_type": "default"
+    },
+    "standardize": false,
+    "use_clipped_linears": true
+  },
+  "vision_soft_tokens_per_image": 280
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token_id": 2,
+  "do_sample": true,
+  "eos_token_id": [
+    1,
+    1,
+    106,
+    50
+  ],
+  "pad_token_id": 0,
+  "temperature": 1.0,
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "5.8.0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7be21e063d3e5d24f86d47f552cb5cdb4103527ab04df1900738205dd866d863
+size 10208852910

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87ee2f2ae8de49581bfeb04ce113841200a5b6fdd727c9383b4d28dadbf204e2
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73f5c7141f58e850dfe7bff58a726c04f15c33b6b0cb030aec4e9b1cdc5b34ec
+size 1465

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1d8bced9d66859cd2c3f4dcd8ab427197d4d46af3d9598b72af9fcf80b8392e
+size 32169781

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,293 @@

+{
+  "audio_token": "<|audio|>",
+  "backend": "tokenizers",
+  "boa_token": "<|audio>",
+  "boi_token": "<|image>",
+  "bos_token": "<bos>",
+  "eoa_token": "<audio|>",
+  "eoc_token": "<channel|>",
+  "eoi_token": "<image|>",
+  "eos_token": "<eos>",
+  "eot_token": "<turn|>",
+  "escape_token": "<|\"|>",
+  "etc_token": "<tool_call|>",
+  "etd_token": "<tool|>",
+  "etr_token": "<tool_response|>",
+  "extra_special_tokens": [
+    "<|video|>"
+  ],
+  "image_token": "<|image|>",
+  "is_local": false,
+  "local_files_only": false,
+  "mask_token": "<mask>",
+  "max_length": null,
+  "model_max_length": 1000000000000000019884624838656,
+  "model_specific_special_tokens": {
+    "audio_token": "<|audio|>",
+    "boa_token": "<|audio>",
+    "boi_token": "<|image>",
+    "eoa_token": "<audio|>",
+    "eoc_token": "<channel|>",
+    "eoi_token": "<image|>",
+    "eot_token": "<turn|>",
+    "escape_token": "<|\"|>",
+    "etc_token": "<tool_call|>",
+    "etd_token": "<tool|>",
+    "etr_token": "<tool_response|>",
+    "image_token": "<|image|>",
+    "soc_token": "<|channel>",
+    "sot_token": "<|turn>",
+    "stc_token": "<|tool_call>",
+    "std_token": "<|tool>",
+    "str_token": "<|tool_response>",
+    "think_token": "<|think|>"
+  },
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "processor_class": "Gemma4Processor",
+  "response_schema": {
+    "properties": {
+      "content": {
+        "type": "string"
+      },
+      "role": {
+        "const": "assistant"
+      },
+      "thinking": {
+        "type": "string"
+      },
+      "tool_calls": {
+        "items": {
+          "properties": {
+            "function": {
+              "properties": {
+                "arguments": {
+                  "additionalProperties": {},
+                  "type": "object",
+                  "x-parser": "gemma4-tool-call"
+                },
+                "name": {
+                  "type": "string"
+                }
+              },
+              "type": "object",
+              "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
+            },
+            "type": {
+              "const": "function"
+            }
+          },
+          "type": "object"
+        },
+        "type": "array",
+        "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
+      }
+    },
+    "type": "object",
+    "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
+  },
+  "soc_token": "<|channel>",
+  "sot_token": "<|turn>",
+  "stc_token": "<|tool_call>",
+  "std_token": "<|tool>",
+  "str_token": "<|tool_response>",
+  "think_token": "<|think|>",
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "1": {
+      "content": "<eos>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "2": {
+      "content": "<bos>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "4": {
+      "content": "<mask>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "46": {
+      "content": "<|tool>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "47": {
+      "content": "<tool|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "48": {
+      "content": "<|tool_call>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "49": {
+      "content": "<tool_call|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "50": {
+      "content": "<|tool_response>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "51": {
+      "content": "<tool_response|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "52": {
+      "content": "<|\"|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "98": {
+      "content": "<|think|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "100": {
+      "content": "<|channel>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "101": {
+      "content": "<channel|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "105": {
+      "content": "<|turn>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "106": {
+      "content": "<turn|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "255999": {
+      "content": "<|image>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "256000": {
+      "content": "<|audio>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "258880": {
+      "content": "<|image|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "258881": {
+      "content": "<|audio|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "258882": {
+      "content": "<image|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "258883": {
+      "content": "<audio|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    "258884": {
+      "content": "<|video|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  }
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1716 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 578,
+  "global_step": 2309,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0043308791684712,
+      "grad_norm": 0.9375,
+      "learning_rate": 4.5e-06,
+      "loss": 0.44086565971374514,
+      "step": 10
+    },
+    {
+      "epoch": 0.0086617583369424,
+      "grad_norm": 1.7265625,
+      "learning_rate": 9.5e-06,
+      "loss": 0.38978335857391355,
+      "step": 20
+    },
+    {
+      "epoch": 0.0129926375054136,
+      "grad_norm": 1.1171875,
+      "learning_rate": 1.45e-05,
+      "loss": 0.3998324632644653,
+      "step": 30
+    },
+    {
+      "epoch": 0.0173235166738848,
+      "grad_norm": 0.8359375,
+      "learning_rate": 1.9500000000000003e-05,
+      "loss": 0.36765055656433104,
+      "step": 40
+    },
+    {
+      "epoch": 0.021654395842355997,
+      "grad_norm": 0.9140625,
+      "learning_rate": 2.45e-05,
+      "loss": 0.38308374881744384,
+      "step": 50
+    },
+    {
+      "epoch": 0.0259852750108272,
+      "grad_norm": 1.3125,
+      "learning_rate": 2.95e-05,
+      "loss": 0.37063548564910886,
+      "step": 60
+    },
+    {
+      "epoch": 0.030316154179298397,
+      "grad_norm": 0.921875,
+      "learning_rate": 3.45e-05,
+      "loss": 0.3595526456832886,
+      "step": 70
+    },
+    {
+      "epoch": 0.0346470333477696,
+      "grad_norm": 1.25,
+      "learning_rate": 3.9500000000000005e-05,
+      "loss": 0.3661433935165405,
+      "step": 80
+    },
+    {
+      "epoch": 0.0389779125162408,
+      "grad_norm": 0.8671875,
+      "learning_rate": 4.4500000000000004e-05,
+      "loss": 0.37132949829101564,
+      "step": 90
+    },
+    {
+      "epoch": 0.043308791684711995,
+      "grad_norm": 1.1015625,
+      "learning_rate": 4.9500000000000004e-05,
+      "loss": 0.3292886257171631,
+      "step": 100
+    },
+    {
+      "epoch": 0.04763967085318319,
+      "grad_norm": 1.03125,
+      "learning_rate": 4.999795215342401e-05,
+      "loss": 0.3398285865783691,
+      "step": 110
+    },
+    {
+      "epoch": 0.0519705500216544,
+      "grad_norm": 0.94921875,
+      "learning_rate": 4.999087360832838e-05,
+      "loss": 0.3274256229400635,
+      "step": 120
+    },
+    {
+      "epoch": 0.056301429190125596,
+      "grad_norm": 1.0078125,
+      "learning_rate": 4.9978740513992555e-05,
+      "loss": 0.32653069496154785,
+      "step": 130
+    },
+    {
+      "epoch": 0.060632308358596794,
+      "grad_norm": 0.92578125,
+      "learning_rate": 4.996155532440529e-05,
+      "loss": 0.34501988887786866,
+      "step": 140
+    },
+    {
+      "epoch": 0.06496318752706799,
+      "grad_norm": 0.58984375,
+      "learning_rate": 4.9939321515370904e-05,
+      "loss": 0.33666629791259767,
+      "step": 150
+    },
+    {
+      "epoch": 0.0692940666955392,
+      "grad_norm": 0.875,
+      "learning_rate": 4.991204358380634e-05,
+      "loss": 0.34445130825042725,
+      "step": 160
+    },
+    {
+      "epoch": 0.07362494586401039,
+      "grad_norm": 0.7109375,
+      "learning_rate": 4.9879727046831594e-05,
+      "loss": 0.31984710693359375,
+      "step": 170
+    },
+    {
+      "epoch": 0.0779558250324816,
+      "grad_norm": 0.65625,
+      "learning_rate": 4.984237844065383e-05,
+      "loss": 0.2977360486984253,
+      "step": 180
+    },
+    {
+      "epoch": 0.0822867042009528,
+      "grad_norm": 1.0234375,
+      "learning_rate": 4.980000531924542e-05,
+      "loss": 0.33499305248260497,
+      "step": 190
+    },
+    {
+      "epoch": 0.08661758336942399,
+      "grad_norm": 1.1953125,
+      "learning_rate": 4.975261625281614e-05,
+      "loss": 0.3221716403961182,
+      "step": 200
+    },
+    {
+      "epoch": 0.0909484625378952,
+      "grad_norm": 0.8828125,
+      "learning_rate": 4.97002208260797e-05,
+      "loss": 0.3402083396911621,
+      "step": 210
+    },
+    {
+      "epoch": 0.09527934170636639,
+      "grad_norm": 0.875,
+      "learning_rate": 4.964282963631529e-05,
+      "loss": 0.32864036560058596,
+      "step": 220
+    },
+    {
+      "epoch": 0.09961022087483759,
+      "grad_norm": 0.8046875,
+      "learning_rate": 4.9580454291224156e-05,
+      "loss": 0.319922137260437,
+      "step": 230
+    },
+    {
+      "epoch": 0.1039411000433088,
+      "grad_norm": 0.88671875,
+      "learning_rate": 4.951310740658187e-05,
+      "loss": 0.34775371551513673,
+      "step": 240
+    },
+    {
+      "epoch": 0.10827197921177999,
+      "grad_norm": 0.703125,
+      "learning_rate": 4.944080260368675e-05,
+      "loss": 0.3269499778747559,
+      "step": 250
+    },
+    {
+      "epoch": 0.11260285838025119,
+      "grad_norm": 1.0078125,
+      "learning_rate": 4.936355450660487e-05,
+      "loss": 0.3431516170501709,
+      "step": 260
+    },
+    {
+      "epoch": 0.1169337375487224,
+      "grad_norm": 0.86328125,
+      "learning_rate": 4.9281378739212225e-05,
+      "loss": 0.33036587238311765,
+      "step": 270
+    },
+    {
+      "epoch": 0.12126461671719359,
+      "grad_norm": 0.87109375,
+      "learning_rate": 4.919429192203473e-05,
+      "loss": 0.31238555908203125,
+      "step": 280
+    },
+    {
+      "epoch": 0.1255954958856648,
+      "grad_norm": 0.828125,
+      "learning_rate": 4.9102311668886634e-05,
+      "loss": 0.34010791778564453,
+      "step": 290
+    },
+    {
+      "epoch": 0.12992637505413598,
+      "grad_norm": 1.0390625,
+      "learning_rate": 4.9005456583308016e-05,
+      "loss": 0.37534093856811523,
+      "step": 300
+    },
+    {
+      "epoch": 0.1342572542226072,
+      "grad_norm": 0.90625,
+      "learning_rate": 4.8903746254802096e-05,
+      "loss": 0.33760197162628175,
+      "step": 310
+    },
+    {
+      "epoch": 0.1385881333910784,
+      "grad_norm": 1.1328125,
+      "learning_rate": 4.879720125487317e-05,
+      "loss": 0.3542941093444824,
+      "step": 320
+    },
+    {
+      "epoch": 0.14291901255954959,
+      "grad_norm": 0.875,
+      "learning_rate": 4.868584313286589e-05,
+      "loss": 0.3184266805648804,
+      "step": 330
+    },
+    {
+      "epoch": 0.14724989172802078,
+      "grad_norm": 1.0625,
+      "learning_rate": 4.8569694411606784e-05,
+      "loss": 0.3295663595199585,
+      "step": 340
+    },
+    {
+      "epoch": 0.151580770896492,
+      "grad_norm": 0.9609375,
+      "learning_rate": 4.844877858284886e-05,
+      "loss": 0.3455744504928589,
+      "step": 350
+    },
+    {
+      "epoch": 0.1559116500649632,
+      "grad_norm": 0.8203125,
+      "learning_rate": 4.8323120102520334e-05,
+      "loss": 0.32991776466369627,
+      "step": 360
+    },
+    {
+      "epoch": 0.16024252923343438,
+      "grad_norm": 0.6953125,
+      "learning_rate": 4.8192744385778185e-05,
+      "loss": 0.33624818325042727,
+      "step": 370
+    },
+    {
+      "epoch": 0.1645734084019056,
+      "grad_norm": 0.75390625,
+      "learning_rate": 4.805767780186786e-05,
+      "loss": 0.29813156127929685,
+      "step": 380
+    },
+    {
+      "epoch": 0.1689042875703768,
+      "grad_norm": 39.5,
+      "learning_rate": 4.7917947668789926e-05,
+      "loss": 0.3126053810119629,
+      "step": 390
+    },
+    {
+      "epoch": 0.17323516673884798,
+      "grad_norm": 0.94921875,
+      "learning_rate": 4.7773582247774806e-05,
+      "loss": 0.30437936782836916,
+      "step": 400
+    },
+    {
+      "epoch": 0.1775660459073192,
+      "grad_norm": 0.609375,
+      "learning_rate": 4.7624610737566846e-05,
+      "loss": 0.313838791847229,
+      "step": 410
+    },
+    {
+      "epoch": 0.1818969250757904,
+      "grad_norm": 1.015625,
+      "learning_rate": 4.747106326851864e-05,
+      "loss": 0.3429024457931519,
+      "step": 420
+    },
+    {
+      "epoch": 0.18622780424426158,
+      "grad_norm": 0.76953125,
+      "learning_rate": 4.731297089649703e-05,
+      "loss": 0.3326719284057617,
+      "step": 430
+    },
+    {
+      "epoch": 0.19055868341273277,
+      "grad_norm": 0.65234375,
+      "learning_rate": 4.7150365596601876e-05,
+      "loss": 0.3007481336593628,
+      "step": 440
+    },
+    {
+      "epoch": 0.194889562581204,
+      "grad_norm": 0.86328125,
+      "learning_rate": 4.6983280256698864e-05,
+      "loss": 0.3022452354431152,
+      "step": 450
+    },
+    {
+      "epoch": 0.19922044174967518,
+      "grad_norm": 0.8046875,
+      "learning_rate": 4.68117486707678e-05,
+      "loss": 0.31449906826019286,
+      "step": 460
+    },
+    {
+      "epoch": 0.20355132091814637,
+      "grad_norm": 1.015625,
+      "learning_rate": 4.663580553206755e-05,
+      "loss": 0.316056227684021,
+      "step": 470
+    },
+    {
+      "epoch": 0.2078822000866176,
+      "grad_norm": 0.8671875,
+      "learning_rate": 4.645548642611913e-05,
+      "loss": 0.31620476245880125,
+      "step": 480
+    },
+    {
+      "epoch": 0.21221307925508878,
+      "grad_norm": 0.765625,
+      "learning_rate": 4.627082782350833e-05,
+      "loss": 0.3331968069076538,
+      "step": 490
+    },
+    {
+      "epoch": 0.21654395842355997,
+      "grad_norm": 0.67578125,
+      "learning_rate": 4.6081867072509334e-05,
+      "loss": 0.32191483974456786,
+      "step": 500
+    },
+    {
+      "epoch": 0.2208748375920312,
+      "grad_norm": 0.91015625,
+      "learning_rate": 4.58886423915308e-05,
+      "loss": 0.3320103406906128,
+      "step": 510
+    },
+    {
+      "epoch": 0.22520571676050238,
+      "grad_norm": 0.91015625,
+      "learning_rate": 4.569119286138598e-05,
+      "loss": 0.3349184036254883,
+      "step": 520
+    },
+    {
+      "epoch": 0.22953659592897357,
+      "grad_norm": 1.0703125,
+      "learning_rate": 4.548955841738839e-05,
+      "loss": 0.3133854389190674,
+      "step": 530
+    },
+    {
+      "epoch": 0.2338674750974448,
+      "grad_norm": 1.1328125,
+      "learning_rate": 4.528377984127466e-05,
+      "loss": 0.31101830005645753,
+      "step": 540
+    },
+    {
+      "epoch": 0.23819835426591598,
+      "grad_norm": 0.87890625,
+      "learning_rate": 4.50738987529562e-05,
+      "loss": 0.32517337799072266,
+      "step": 550
+    },
+    {
+      "epoch": 0.24252923343438718,
+      "grad_norm": 0.6328125,
+      "learning_rate": 4.485995760210132e-05,
+      "loss": 0.31145153045654295,
+      "step": 560
+    },
+    {
+      "epoch": 0.24686011260285837,
+      "grad_norm": 0.9609375,
+      "learning_rate": 4.464199965954954e-05,
+      "loss": 0.33439595699310304,
+      "step": 570
+    },
+    {
+      "epoch": 0.25032481593763534,
+      "eval_text_loss": 1.6730986833572388,
+      "eval_text_model_preparation_time": 0.0199,
+      "eval_text_runtime": 23.6907,
+      "eval_text_samples_per_second": 13.93,
+      "eval_text_steps_per_second": 13.93,
+      "step": 578
+    },
+    {
+      "epoch": 0.25032481593763534,
+      "eval_audio_loss": 0.8745406866073608,
+      "eval_audio_model_preparation_time": 0.0199,
+      "eval_audio_runtime": 45.6232,
+      "eval_audio_samples_per_second": 5.611,
+      "eval_audio_steps_per_second": 5.611,
+      "step": 578
+    },
+    {
+      "epoch": 0.2511909917713296,
+      "grad_norm": 0.6015625,
+      "learning_rate": 4.442006900855983e-05,
+      "loss": 0.3008127689361572,
+      "step": 580
+    },
+    {
+      "epoch": 0.2555218709398008,
+      "grad_norm": 0.984375,
+      "learning_rate": 4.4194210535894475e-05,
+      "loss": 0.3019113063812256,
+      "step": 590
+    },
+    {
+      "epoch": 0.25985275010827197,
+      "grad_norm": 0.890625,
+      "learning_rate": 4.3964469922740526e-05,
+      "loss": 0.33276543617248533,
+      "step": 600
+    },
+    {
+      "epoch": 0.26418362927674316,
+      "grad_norm": 0.83984375,
+      "learning_rate": 4.3730893635470456e-05,
+      "loss": 0.32658073902130125,
+      "step": 610
+    },
+    {
+      "epoch": 0.2685145084452144,
+      "grad_norm": 0.71875,
+      "learning_rate": 4.3493528916244094e-05,
+      "loss": 0.2932913303375244,
+      "step": 620
+    },
+    {
+      "epoch": 0.2728453876136856,
+      "grad_norm": 1.0390625,
+      "learning_rate": 4.3252423773453623e-05,
+      "loss": 0.28568110466003416,
+      "step": 630
+    },
+    {
+      "epoch": 0.2771762667821568,
+      "grad_norm": 0.765625,
+      "learning_rate": 4.3007626972013596e-05,
+      "loss": 0.31102354526519777,
+      "step": 640
+    },
+    {
+      "epoch": 0.281507145950628,
+      "grad_norm": 0.82421875,
+      "learning_rate": 4.2759188023497984e-05,
+      "loss": 0.3104224681854248,
+      "step": 650
+    },
+    {
+      "epoch": 0.28583802511909917,
+      "grad_norm": 0.8671875,
+      "learning_rate": 4.250715717612611e-05,
+      "loss": 0.30080304145812986,
+      "step": 660
+    },
+    {
+      "epoch": 0.29016890428757036,
+      "grad_norm": 0.78515625,
+      "learning_rate": 4.22515854045997e-05,
+      "loss": 0.319173264503479,
+      "step": 670
+    },
+    {
+      "epoch": 0.29449978345604155,
+      "grad_norm": 0.96875,
+      "learning_rate": 4.1992524399792945e-05,
+      "loss": 0.33753151893615724,
+      "step": 680
+    },
+    {
+      "epoch": 0.2988306626245128,
+      "grad_norm": 0.75,
+      "learning_rate": 4.173002655829771e-05,
+      "loss": 0.30184378623962405,
+      "step": 690
+    },
+    {
+      "epoch": 0.303161541792984,
+      "grad_norm": 0.921875,
+      "learning_rate": 4.1464144971826056e-05,
+      "loss": 0.2979575157165527,
+      "step": 700
+    },
+    {
+      "epoch": 0.3074924209614552,
+      "grad_norm": 0.625,
+      "learning_rate": 4.119493341647208e-05,
+      "loss": 0.3160278558731079,
+      "step": 710
+    },
+    {
+      "epoch": 0.3118233001299264,
+      "grad_norm": 0.92578125,
+      "learning_rate": 4.0922446341835405e-05,
+      "loss": 0.3350363254547119,
+      "step": 720
+    },
+    {
+      "epoch": 0.31615417929839756,
+      "grad_norm": 0.734375,
+      "learning_rate": 4.064673886000847e-05,
+      "loss": 0.2890045642852783,
+      "step": 730
+    },
+    {
+      "epoch": 0.32048505846686876,
+      "grad_norm": 0.83203125,
+      "learning_rate": 4.036786673442974e-05,
+      "loss": 0.3226451396942139,
+      "step": 740
+    },
+    {
+      "epoch": 0.32481593763533995,
+      "grad_norm": 0.67578125,
+      "learning_rate": 4.0085886368605256e-05,
+      "loss": 0.3058380842208862,
+      "step": 750
+    },
+    {
+      "epoch": 0.3291468168038112,
+      "grad_norm": 1.109375,
+      "learning_rate": 3.9800854794700685e-05,
+      "loss": 0.3117943286895752,
+      "step": 760
+    },
+    {
+      "epoch": 0.3334776959722824,
+      "grad_norm": 1.3125,
+      "learning_rate": 3.951282966200624e-05,
+      "loss": 0.3291532039642334,
+      "step": 770
+    },
+    {
+      "epoch": 0.3378085751407536,
+      "grad_norm": 0.8671875,
+      "learning_rate": 3.922186922527677e-05,
+      "loss": 0.32024178504943845,
+      "step": 780
+    },
+    {
+      "epoch": 0.34213945430922477,
+      "grad_norm": 0.78515625,
+      "learning_rate": 3.892803233294942e-05,
+      "loss": 0.32744786739349363,
+      "step": 790
+    },
+    {
+      "epoch": 0.34647033347769596,
+      "grad_norm": 0.796875,
+      "learning_rate": 3.8631378415241135e-05,
+      "loss": 0.26966466903686526,
+      "step": 800
+    },
+    {
+      "epoch": 0.35080121264616715,
+      "grad_norm": 0.89453125,
+      "learning_rate": 3.833196747212865e-05,
+      "loss": 0.33243422508239745,
+      "step": 810
+    },
+    {
+      "epoch": 0.3551320918146384,
+      "grad_norm": 0.95703125,
+      "learning_rate": 3.802986006121304e-05,
+      "loss": 0.3162668228149414,
+      "step": 820
+    },
+    {
+      "epoch": 0.3594629709831096,
+      "grad_norm": 0.498046875,
+      "learning_rate": 3.772511728547168e-05,
+      "loss": 0.2786050319671631,
+      "step": 830
+    },
+    {
+      "epoch": 0.3637938501515808,
+      "grad_norm": 0.703125,
+      "learning_rate": 3.741780078089975e-05,
+      "loss": 0.3166247844696045,
+      "step": 840
+    },
+    {
+      "epoch": 0.36812472932005197,
+      "grad_norm": 0.84765625,
+      "learning_rate": 3.710797270404405e-05,
+      "loss": 0.3263724327087402,
+      "step": 850
+    },
+    {
+      "epoch": 0.37245560848852316,
+      "grad_norm": 0.498046875,
+      "learning_rate": 3.6795695719431436e-05,
+      "loss": 0.3051194667816162,
+      "step": 860
+    },
+    {
+      "epoch": 0.37678648765699435,
+      "grad_norm": 0.9375,
+      "learning_rate": 3.6481032986894566e-05,
+      "loss": 0.3087512254714966,
+      "step": 870
+    },
+    {
+      "epoch": 0.38111736682546554,
+      "grad_norm": 1.0859375,
+      "learning_rate": 3.616404814879748e-05,
+      "loss": 0.31677100658416746,
+      "step": 880
+    },
+    {
+      "epoch": 0.3854482459939368,
+      "grad_norm": 0.59375,
+      "learning_rate": 3.5844805317163525e-05,
+      "loss": 0.3010247707366943,
+      "step": 890
+    },
+    {
+      "epoch": 0.389779125162408,
+      "grad_norm": 0.86328125,
+      "learning_rate": 3.552336906070838e-05,
+      "loss": 0.2823305606842041,
+      "step": 900
+    },
+    {
+      "epoch": 0.39411000433087917,
+      "grad_norm": 1.046875,
+      "learning_rate": 3.5199804391780594e-05,
+      "loss": 0.3044945240020752,
+      "step": 910
+    },
+    {
+      "epoch": 0.39844088349935036,
+      "grad_norm": 1.265625,
+      "learning_rate": 3.48741767532125e-05,
+      "loss": 0.31587924957275393,
+      "step": 920
+    },
+    {
+      "epoch": 0.40277176266782155,
+      "grad_norm": 1.203125,
+      "learning_rate": 3.454655200508402e-05,
+      "loss": 0.32800912857055664,
+      "step": 930
+    },
+    {
+      "epoch": 0.40710264183629274,
+      "grad_norm": 0.8125,
+      "learning_rate": 3.4216996411402077e-05,
+      "loss": 0.30738139152526855,
+      "step": 940
+    },
+    {
+      "epoch": 0.411433521004764,
+      "grad_norm": 0.63671875,
+      "learning_rate": 3.388557662669831e-05,
+      "loss": 0.2966631889343262,
+      "step": 950
+    },
+    {
+      "epoch": 0.4157644001732352,
+      "grad_norm": 0.83984375,
+      "learning_rate": 3.355235968254782e-05,
+      "loss": 0.30721249580383303,
+      "step": 960
+    },
+    {
+      "epoch": 0.4200952793417064,
+      "grad_norm": 0.90234375,
+      "learning_rate": 3.321741297401162e-05,
+      "loss": 0.29646241664886475,
+      "step": 970
+    },
+    {
+      "epoch": 0.42442615851017756,
+      "grad_norm": 1.1875,
+      "learning_rate": 3.288080424600563e-05,
+      "loss": 0.30686221122741697,
+      "step": 980
+    },
+    {
+      "epoch": 0.42875703767864876,
+      "grad_norm": 0.76953125,
+      "learning_rate": 3.254260157959884e-05,
+      "loss": 0.2886993408203125,
+      "step": 990
+    },
+    {
+      "epoch": 0.43308791684711995,
+      "grad_norm": 0.6328125,
+      "learning_rate": 3.220287337824355e-05,
+      "loss": 0.2889398097991943,
+      "step": 1000
+    },
+    {
+      "epoch": 0.43741879601559114,
+      "grad_norm": 0.80859375,
+      "learning_rate": 3.186168835394032e-05,
+      "loss": 0.28464765548706056,
+      "step": 1010
+    },
+    {
+      "epoch": 0.4417496751840624,
+      "grad_norm": 0.87890625,
+      "learning_rate": 3.151911551334066e-05,
+      "loss": 0.2970520734786987,
+      "step": 1020
+    },
+    {
+      "epoch": 0.4460805543525336,
+      "grad_norm": 1.046875,
+      "learning_rate": 3.1175224143789946e-05,
+      "loss": 0.27363173961639403,
+      "step": 1030
+    },
+    {
+      "epoch": 0.45041143352100477,
+      "grad_norm": 1.0546875,
+      "learning_rate": 3.083008379931369e-05,
+      "loss": 0.3226848840713501,
+      "step": 1040
+    },
+    {
+      "epoch": 0.45474231268947596,
+      "grad_norm": 0.9921875,
+      "learning_rate": 3.0483764286549843e-05,
+      "loss": 0.3007674217224121,
+      "step": 1050
+    },
+    {
+      "epoch": 0.45907319185794715,
+      "grad_norm": 1.0859375,
+      "learning_rate": 3.013633565062999e-05,
+      "loss": 0.2832650661468506,
+      "step": 1060
+    },
+    {
+      "epoch": 0.46340407102641834,
+      "grad_norm": 0.60546875,
+      "learning_rate": 2.978786816101229e-05,
+      "loss": 0.30285255908966063,
+      "step": 1070
+    },
+    {
+      "epoch": 0.4677349501948896,
+      "grad_norm": 0.98828125,
+      "learning_rate": 2.9438432297269113e-05,
+      "loss": 0.3115823268890381,
+      "step": 1080
+    },
+    {
+      "epoch": 0.4720658293633608,
+      "grad_norm": 0.5625,
+      "learning_rate": 2.9088098734832105e-05,
+      "loss": 0.31290392875671386,
+      "step": 1090
+    },
+    {
+      "epoch": 0.47639670853183197,
+      "grad_norm": 0.95703125,
+      "learning_rate": 2.873693833069769e-05,
+      "loss": 0.32068135738372805,
+      "step": 1100
+    },
+    {
+      "epoch": 0.48072758770030316,
+      "grad_norm": 1.1640625,
+      "learning_rate": 2.8385022109095828e-05,
+      "loss": 0.29875409603118896,
+      "step": 1110
+    },
+    {
+      "epoch": 0.48505846686877435,
+      "grad_norm": 0.86328125,
+      "learning_rate": 2.803242124712493e-05,
+      "loss": 0.30289008617401125,
+      "step": 1120
+    },
+    {
+      "epoch": 0.48938934603724554,
+      "grad_norm": 0.8828125,
+      "learning_rate": 2.7679207060355912e-05,
+      "loss": 0.3129941463470459,
+      "step": 1130
+    },
+    {
+      "epoch": 0.49372022520571673,
+      "grad_norm": 0.8203125,
+      "learning_rate": 2.7325450988408185e-05,
+      "loss": 0.28847951889038087,
+      "step": 1140
+    },
+    {
+      "epoch": 0.498051104374188,
+      "grad_norm": 0.64453125,
+      "learning_rate": 2.6971224580500592e-05,
+      "loss": 0.3122821092605591,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5006496318752707,
+      "eval_text_loss": 1.580957055091858,
+      "eval_text_model_preparation_time": 0.0199,
+      "eval_text_runtime": 23.7145,
+      "eval_text_samples_per_second": 13.916,
+      "eval_text_steps_per_second": 13.916,
+      "step": 1156
+    },
+    {
+      "epoch": 0.5006496318752707,
+      "eval_audio_loss": 0.8451715111732483,
+      "eval_audio_model_preparation_time": 0.0199,
+      "eval_audio_runtime": 46.4407,
+      "eval_audio_samples_per_second": 5.512,
+      "eval_audio_steps_per_second": 5.512,
+      "step": 1156
+    },
+    {
+      "epoch": 0.5023819835426592,
+      "grad_norm": 0.66796875,
+      "learning_rate": 2.6616599480980143e-05,
+      "loss": 0.32015998363494874,
+      "step": 1160
+    },
+    {
+      "epoch": 0.5067128627111304,
+      "grad_norm": 0.8828125,
+      "learning_rate": 2.626164741483154e-05,
+      "loss": 0.30613036155700685,
+      "step": 1170
+    },
+    {
+      "epoch": 0.5110437418796016,
+      "grad_norm": 0.84375,
+      "learning_rate": 2.5906440173170386e-05,
+      "loss": 0.30779805183410647,
+      "step": 1180
+    },
+    {
+      "epoch": 0.5153746210480727,
+      "grad_norm": 0.890625,
+      "learning_rate": 2.5551049598723027e-05,
+      "loss": 0.29211466312408446,
+      "step": 1190
+    },
+    {
+      "epoch": 0.5197055002165439,
+      "grad_norm": 1.0703125,
+      "learning_rate": 2.5195547571295898e-05,
+      "loss": 0.29266557693481443,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5240363793850151,
+      "grad_norm": 0.5703125,
+      "learning_rate": 2.484000599323747e-05,
+      "loss": 0.2896392583847046,
+      "step": 1210
+    },
+    {
+      "epoch": 0.5283672585534863,
+      "grad_norm": 0.81640625,
+      "learning_rate": 2.448449677489555e-05,
+      "loss": 0.298609185218811,
+      "step": 1220
+    },
+    {
+      "epoch": 0.5326981377219575,
+      "grad_norm": 0.54296875,
+      "learning_rate": 2.4129091820073015e-05,
+      "loss": 0.29924740791320803,
+      "step": 1230
+    },
+    {
+      "epoch": 0.5370290168904288,
+      "grad_norm": 1.0078125,
+      "learning_rate": 2.377386301148482e-05,
+      "loss": 0.2744253635406494,
+      "step": 1240
+    },
+    {
+      "epoch": 0.5413598960589,
+      "grad_norm": 0.6796875,
+      "learning_rate": 2.341888219621934e-05,
+      "loss": 0.278640079498291,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5456907752273712,
+      "grad_norm": 0.859375,
+      "learning_rate": 2.3064221171206856e-05,
+      "loss": 0.28020195960998534,
+      "step": 1260
+    },
+    {
+      "epoch": 0.5500216543958424,
+      "grad_norm": 0.625,
+      "learning_rate": 2.270995166869821e-05,
+      "loss": 0.30299355983734133,
+      "step": 1270
+    },
+    {
+      "epoch": 0.5543525335643136,
+      "grad_norm": 0.84375,
+      "learning_rate": 2.2356145341756548e-05,
+      "loss": 0.3237317562103271,
+      "step": 1280
+    },
+    {
+      "epoch": 0.5586834127327848,
+      "grad_norm": 0.76171875,
+      "learning_rate": 2.2002873749765076e-05,
+      "loss": 0.3258683681488037,
+      "step": 1290
+    },
+    {
+      "epoch": 0.563014291901256,
+      "grad_norm": 0.90625,
+      "learning_rate": 2.1650208343953747e-05,
+      "loss": 0.32569427490234376,
+      "step": 1300
+    },
+    {
+      "epoch": 0.5673451710697271,
+      "grad_norm": 0.76171875,
+      "learning_rate": 2.1298220452947826e-05,
+      "loss": 0.28685925006866453,
+      "step": 1310
+    },
+    {
+      "epoch": 0.5716760502381983,
+      "grad_norm": 0.765625,
+      "learning_rate": 2.0946981268341297e-05,
+      "loss": 0.27852678298950195,
+      "step": 1320
+    },
+    {
+      "epoch": 0.5760069294066695,
+      "grad_norm": 0.6640625,
+      "learning_rate": 2.059656183029792e-05,
+      "loss": 0.30152413845062254,
+      "step": 1330
+    },
+    {
+      "epoch": 0.5803378085751407,
+      "grad_norm": 0.984375,
+      "learning_rate": 2.0247033013182955e-05,
+      "loss": 0.30128428936004636,
+      "step": 1340
+    },
+    {
+      "epoch": 0.5846686877436119,
+      "grad_norm": 1.15625,
+      "learning_rate": 1.9898465511228416e-05,
+      "loss": 0.31016106605529786,
+      "step": 1350
+    },
+    {
+      "epoch": 0.5889995669120831,
+      "grad_norm": 0.7109375,
+      "learning_rate": 1.9550929824234736e-05,
+      "loss": 0.2842914342880249,
+      "step": 1360
+    },
+    {
+      "epoch": 0.5933304460805544,
+      "grad_norm": 0.8046875,
+      "learning_rate": 1.920449624331179e-05,
+      "loss": 0.2784018278121948,
+      "step": 1370
+    },
+    {
+      "epoch": 0.5976613252490256,
+      "grad_norm": 0.8359375,
+      "learning_rate": 1.8859234836662117e-05,
+      "loss": 0.28922812938690184,
+      "step": 1380
+    },
+    {
+      "epoch": 0.6019922044174968,
+      "grad_norm": 0.80859375,
+      "learning_rate": 1.851521543540916e-05,
+      "loss": 0.31638059616088865,
+      "step": 1390
+    },
+    {
+      "epoch": 0.606323083585968,
+      "grad_norm": 1.09375,
+      "learning_rate": 1.8172507619473614e-05,
+      "loss": 0.30915663242340086,
+      "step": 1400
+    },
+    {
+      "epoch": 0.6106539627544392,
+      "grad_norm": 0.68359375,
+      "learning_rate": 1.78311807035004e-05,
+      "loss": 0.31648037433624265,
+      "step": 1410
+    },
+    {
+      "epoch": 0.6149848419229104,
+      "grad_norm": 0.56640625,
+      "learning_rate": 1.749130372283942e-05,
+      "loss": 0.3143639326095581,
+      "step": 1420
+    },
+    {
+      "epoch": 0.6193157210913816,
+      "grad_norm": 0.9375,
+      "learning_rate": 1.715294541958274e-05,
+      "loss": 0.2913862943649292,
+      "step": 1430
+    },
+    {
+      "epoch": 0.6236466002598527,
+      "grad_norm": 0.703125,
+      "learning_rate": 1.6816174228661097e-05,
+      "loss": 0.28313374519348145,
+      "step": 1440
+    },
+    {
+      "epoch": 0.6279774794283239,
+      "grad_norm": 0.90234375,
+      "learning_rate": 1.648105826400256e-05,
+      "loss": 0.28623785972595217,
+      "step": 1450
+    },
+    {
+      "epoch": 0.6323083585967951,
+      "grad_norm": 0.9765625,
+      "learning_rate": 1.6147665304756084e-05,
+      "loss": 0.2705575227737427,
+      "step": 1460
+    },
+    {
+      "epoch": 0.6366392377652663,
+      "grad_norm": 0.78515625,
+      "learning_rate": 1.581606278158274e-05,
+      "loss": 0.30232059955596924,
+      "step": 1470
+    },
+    {
+      "epoch": 0.6409701169337375,
+      "grad_norm": 0.734375,
+      "learning_rate": 1.548631776301756e-05,
+      "loss": 0.29305553436279297,
+      "step": 1480
+    },
+    {
+      "epoch": 0.6453009961022087,
+      "grad_norm": 0.8515625,
+      "learning_rate": 1.5158496941904462e-05,
+      "loss": 0.2765927314758301,
+      "step": 1490
+    },
+    {
+      "epoch": 0.6496318752706799,
+      "grad_norm": 0.91015625,
+      "learning_rate": 1.4832666621907265e-05,
+      "loss": 0.2911043882369995,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6539627544391512,
+      "grad_norm": 0.87109375,
+      "learning_rate": 1.4508892704099392e-05,
+      "loss": 0.2826990604400635,
+      "step": 1510
+    },
+    {
+      "epoch": 0.6582936336076224,
+      "grad_norm": 0.91796875,
+      "learning_rate": 1.4187240673634964e-05,
+      "loss": 0.2976381778717041,
+      "step": 1520
+    },
+    {
+      "epoch": 0.6626245127760936,
+      "grad_norm": 0.8359375,
+      "learning_rate": 1.3867775586504094e-05,
+      "loss": 0.28958122730255126,
+      "step": 1530
+    },
+    {
+      "epoch": 0.6669553919445648,
+      "grad_norm": 1.0625,
+      "learning_rate": 1.3550562056374908e-05,
+      "loss": 0.34233570098876953,
+      "step": 1540
+    },
+    {
+      "epoch": 0.671286271113036,
+      "grad_norm": 0.80859375,
+      "learning_rate": 1.3235664241525052e-05,
+      "loss": 0.30748577117919923,
+      "step": 1550
+    },
+    {
+      "epoch": 0.6756171502815072,
+      "grad_norm": 0.7265625,
+      "learning_rate": 1.2923145831865325e-05,
+      "loss": 0.279698920249939,
+      "step": 1560
+    },
+    {
+      "epoch": 0.6799480294499783,
+      "grad_norm": 0.78515625,
+      "learning_rate": 1.2613070036058005e-05,
+      "loss": 0.2836109399795532,
+      "step": 1570
+    },
+    {
+      "epoch": 0.6842789086184495,
+      "grad_norm": 0.80078125,
+      "learning_rate": 1.2305499568732554e-05,
+      "loss": 0.30521063804626464,
+      "step": 1580
+    },
+    {
+      "epoch": 0.6886097877869207,
+      "grad_norm": 0.7265625,
+      "learning_rate": 1.2000496637801195e-05,
+      "loss": 0.2962735891342163,
+      "step": 1590
+    },
+    {
+      "epoch": 0.6929406669553919,
+      "grad_norm": 0.9140625,
+      "learning_rate": 1.1698122931877018e-05,
+      "loss": 0.2767336845397949,
+      "step": 1600
+    },
+    {
+      "epoch": 0.6972715461238631,
+      "grad_norm": 0.78515625,
+      "learning_rate": 1.1398439607797098e-05,
+      "loss": 0.2979475975036621,
+      "step": 1610
+    },
+    {
+      "epoch": 0.7016024252923343,
+      "grad_norm": 0.8828125,
+      "learning_rate": 1.1101507278253167e-05,
+      "loss": 0.26526603698730467,
+      "step": 1620
+    },
+    {
+      "epoch": 0.7059333044608055,
+      "grad_norm": 0.9609375,
+      "learning_rate": 1.080738599953233e-05,
+      "loss": 0.28164918422698976,
+      "step": 1630
+    },
+    {
+      "epoch": 0.7102641836292768,
+      "grad_norm": 0.72265625,
+      "learning_rate": 1.0516135259370355e-05,
+      "loss": 0.3067033767700195,
+      "step": 1640
+    },
+    {
+      "epoch": 0.714595062797748,
+      "grad_norm": 0.91015625,
+      "learning_rate": 1.0227813964919938e-05,
+      "loss": 0.299686861038208,
+      "step": 1650
+    },
+    {
+      "epoch": 0.7189259419662192,
+      "grad_norm": 0.74609375,
+      "learning_rate": 9.94248043083636e-06,
+      "loss": 0.27935218811035156,
+      "step": 1660
+    },
+    {
+      "epoch": 0.7232568211346904,
+      "grad_norm": 1.1015625,
+      "learning_rate": 9.660192367483038e-06,
+      "loss": 0.2881230115890503,
+      "step": 1670
+    },
+    {
+      "epoch": 0.7275877003031616,
+      "grad_norm": 0.765625,
+      "learning_rate": 9.381006869259243e-06,
+      "loss": 0.29773364067077634,
+      "step": 1680
+    },
+    {
+      "epoch": 0.7319185794716327,
+      "grad_norm": 1.0390625,
+      "learning_rate": 9.104980403052458e-06,
+      "loss": 0.31337528228759765,
+      "step": 1690
+    },
+    {
+      "epoch": 0.7362494586401039,
+      "grad_norm": 0.58984375,
+      "learning_rate": 8.8321687968176e-06,
+      "loss": 0.29222004413604735,
+      "step": 1700
+    },
+    {
+      "epoch": 0.7405803378085751,
+      "grad_norm": 0.94140625,
+      "learning_rate": 8.562627228285478e-06,
+      "loss": 0.2752734661102295,
+      "step": 1710
+    },
+    {
+      "epoch": 0.7449112169770463,
+      "grad_norm": 0.76171875,
+      "learning_rate": 8.296410213802813e-06,
+      "loss": 0.2992286443710327,
+      "step": 1720
+    },
+    {
+      "epoch": 0.7492420961455175,
+      "grad_norm": 1.09375,
+      "learning_rate": 8.033571597305977e-06,
+      "loss": 0.3042688608169556,
+      "step": 1730
+    },
+    {
+      "epoch": 0.750974447812906,
+      "eval_text_loss": 1.555040955543518,
+      "eval_text_model_preparation_time": 0.0199,
+      "eval_text_runtime": 24.438,
+      "eval_text_samples_per_second": 13.504,
+      "eval_text_steps_per_second": 13.504,
+      "step": 1734
+    },
+    {
+      "epoch": 0.750974447812906,
+      "eval_audio_loss": 0.8402940034866333,
+      "eval_audio_model_preparation_time": 0.0199,
+      "eval_audio_runtime": 45.5183,
+      "eval_audio_samples_per_second": 5.624,
+      "eval_audio_steps_per_second": 5.624,
+      "step": 1734
+    },
+    {
+      "epoch": 0.7535729753139887,
+      "grad_norm": 0.8984375,
+      "learning_rate": 7.774164539430734e-06,
+      "loss": 0.300627326965332,
+      "step": 1740
+    },
+    {
+      "epoch": 0.7579038544824599,
+      "grad_norm": 0.87109375,
+      "learning_rate": 7.518241506760196e-06,
+      "loss": 0.2855875253677368,
+      "step": 1750
+    },
+    {
+      "epoch": 0.7622347336509311,
+      "grad_norm": 1.5,
+      "learning_rate": 7.265854261213156e-06,
+      "loss": 0.29245338439941404,
+      "step": 1760
+    },
+    {
+      "epoch": 0.7665656128194024,
+      "grad_norm": 0.56640625,
+      "learning_rate": 7.017053849574945e-06,
+      "loss": 0.2890751361846924,
+      "step": 1770
+    },
+    {
+      "epoch": 0.7708964919878736,
+      "grad_norm": 0.70703125,
+      "learning_rate": 6.771890593172911e-06,
+      "loss": 0.2851340055465698,
+      "step": 1780
+    },
+    {
+      "epoch": 0.7752273711563448,
+      "grad_norm": 1.0078125,
+      "learning_rate": 6.530414077698649e-06,
+      "loss": 0.2948427677154541,
+      "step": 1790
+    },
+    {
+      "epoch": 0.779558250324816,
+      "grad_norm": 0.8359375,
+      "learning_rate": 6.2926731431789954e-06,
+      "loss": 0.3046332597732544,
+      "step": 1800
+    },
+    {
+      "epoch": 0.7838891294932872,
+      "grad_norm": 0.7734375,
+      "learning_rate": 6.058715874097864e-06,
+      "loss": 0.31103029251098635,
+      "step": 1810
+    },
+    {
+      "epoch": 0.7882200086617583,
+      "grad_norm": 0.625,
+      "learning_rate": 5.828589589670871e-06,
+      "loss": 0.29731192588806155,
+      "step": 1820
+    },
+    {
+      "epoch": 0.7925508878302295,
+      "grad_norm": 0.87109375,
+      "learning_rate": 5.60234083427475e-06,
+      "loss": 0.3095412254333496,
+      "step": 1830
+    },
+    {
+      "epoch": 0.7968817669987007,
+      "grad_norm": 0.8125,
+      "learning_rate": 5.380015368033476e-06,
+      "loss": 0.29828534126281736,
+      "step": 1840
+    },
+    {
+      "epoch": 0.8012126461671719,
+      "grad_norm": 0.578125,
+      "learning_rate": 5.161658157563026e-06,
+      "loss": 0.2998827934265137,
+      "step": 1850
+    },
+    {
+      "epoch": 0.8055435253356431,
+      "grad_norm": 0.546875,
+      "learning_rate": 4.947313366876619e-06,
+      "loss": 0.3060743808746338,
+      "step": 1860
+    },
+    {
+      "epoch": 0.8098744045041143,
+      "grad_norm": 0.76953125,
+      "learning_rate": 4.737024348452282e-06,
+      "loss": 0.28541877269744875,
+      "step": 1870
+    },
+    {
+      "epoch": 0.8142052836725855,
+      "grad_norm": 0.77734375,
+      "learning_rate": 4.530833634464548e-06,
+      "loss": 0.3088233947753906,
+      "step": 1880
+    },
+    {
+      "epoch": 0.8185361628410567,
+      "grad_norm": 1.25,
+      "learning_rate": 4.328782928182104e-06,
+      "loss": 0.3078837156295776,
+      "step": 1890
+    },
+    {
+      "epoch": 0.822867042009528,
+      "grad_norm": 0.83984375,
+      "learning_rate": 4.130913095533046e-06,
+      "loss": 0.2788443088531494,
+      "step": 1900
+    },
+    {
+      "epoch": 0.8271979211779992,
+      "grad_norm": 1.1796875,
+      "learning_rate": 3.9372641568395125e-06,
+      "loss": 0.2981949090957642,
+      "step": 1910
+    },
+    {
+      "epoch": 0.8315288003464704,
+      "grad_norm": 0.8984375,
+      "learning_rate": 3.747875278723334e-06,
+      "loss": 0.29841878414154055,
+      "step": 1920
+    },
+    {
+      "epoch": 0.8358596795149416,
+      "grad_norm": 0.87890625,
+      "learning_rate": 3.562784766184371e-06,
+      "loss": 0.3009059190750122,
+      "step": 1930
+    },
+    {
+      "epoch": 0.8401905586834127,
+      "grad_norm": 0.7578125,
+      "learning_rate": 3.3820300548530943e-06,
+      "loss": 0.29599244594573976,
+      "step": 1940
+    },
+    {
+      "epoch": 0.8445214378518839,
+      "grad_norm": 0.8203125,
+      "learning_rate": 3.205647703419015e-06,
+      "loss": 0.2934562683105469,
+      "step": 1950
+    },
+    {
+      "epoch": 0.8488523170203551,
+      "grad_norm": 0.80859375,
+      "learning_rate": 3.0336733862364684e-06,
+      "loss": 0.29344346523284914,
+      "step": 1960
+    },
+    {
+      "epoch": 0.8531831961888263,
+      "grad_norm": 0.96875,
+      "learning_rate": 2.866141886109286e-06,
+      "loss": 0.2922006845474243,
+      "step": 1970
+    },
+    {
+      "epoch": 0.8575140753572975,
+      "grad_norm": 0.62109375,
+      "learning_rate": 2.7030870872557593e-06,
+      "loss": 0.2850653648376465,
+      "step": 1980
+    },
+    {
+      "epoch": 0.8618449545257687,
+      "grad_norm": 0.90625,
+      "learning_rate": 2.544541968455372e-06,
+      "loss": 0.2891335725784302,
+      "step": 1990
+    },
+    {
+      "epoch": 0.8661758336942399,
+      "grad_norm": 0.7109375,
+      "learning_rate": 2.39053859637863e-06,
+      "loss": 0.28892719745635986,
+      "step": 2000
+    },
+    {
+      "epoch": 0.8705067128627111,
+      "grad_norm": 0.72265625,
+      "learning_rate": 2.2411081191014093e-06,
+      "loss": 0.29591898918151854,
+      "step": 2010
+    },
+    {
+      "epoch": 0.8748375920311823,
+      "grad_norm": 1.046875,
+      "learning_rate": 2.096280759805069e-06,
+      "loss": 0.2645926237106323,
+      "step": 2020
+    },
+    {
+      "epoch": 0.8791684711996536,
+      "grad_norm": 1.1640625,
+      "learning_rate": 1.9560858106636408e-06,
+      "loss": 0.2921916007995605,
+      "step": 2030
+    },
+    {
+      "epoch": 0.8834993503681248,
+      "grad_norm": 0.9375,
+      "learning_rate": 1.8205516269193063e-06,
+      "loss": 0.2916860580444336,
+      "step": 2040
+    },
+    {
+      "epoch": 0.887830229536596,
+      "grad_norm": 0.75390625,
+      "learning_rate": 1.6897056211474054e-06,
+      "loss": 0.28798465728759765,
+      "step": 2050
+    },
+    {
+      "epoch": 0.8921611087050672,
+      "grad_norm": 0.84765625,
+      "learning_rate": 1.5635742577120638e-06,
+      "loss": 0.28080263137817385,
+      "step": 2060
+    },
+    {
+      "epoch": 0.8964919878735383,
+      "grad_norm": 0.59765625,
+      "learning_rate": 1.4421830474136339e-06,
+      "loss": 0.2958329677581787,
+      "step": 2070
+    },
+    {
+      "epoch": 0.9008228670420095,
+      "grad_norm": 0.859375,
+      "learning_rate": 1.3255565423289818e-06,
+      "loss": 0.29818201065063477,
+      "step": 2080
+    },
+    {
+      "epoch": 0.9051537462104807,
+      "grad_norm": 0.9453125,
+      "learning_rate": 1.2137183308456867e-06,
+      "loss": 0.30882911682128905,
+      "step": 2090
+    },
+    {
+      "epoch": 0.9094846253789519,
+      "grad_norm": 0.796875,
+      "learning_rate": 1.1066910328911579e-06,
+      "loss": 0.29765470027923585,
+      "step": 2100
+    },
+    {
+      "epoch": 0.9138155045474231,
+      "grad_norm": 0.8125,
+      "learning_rate": 1.0044962953576238e-06,
+      "loss": 0.28506147861480713,
+      "step": 2110
+    },
+    {
+      "epoch": 0.9181463837158943,
+      "grad_norm": 0.77734375,
+      "learning_rate": 9.071547877239017e-07,
+      "loss": 0.2811078310012817,
+      "step": 2120
+    },
+    {
+      "epoch": 0.9224772628843655,
+      "grad_norm": 0.73046875,
+      "learning_rate": 8.146861978749115e-07,
+      "loss": 0.2883314847946167,
+      "step": 2130
+    },
+    {
+      "epoch": 0.9268081420528367,
+      "grad_norm": 0.5234375,
+      "learning_rate": 7.271092281196573e-07,
+      "loss": 0.27544965744018557,
+      "step": 2140
+    },
+    {
+      "epoch": 0.9311390212213079,
+      "grad_norm": 0.97265625,
+      "learning_rate": 6.4444159140859e-07,
+      "loss": 0.3128631353378296,
+      "step": 2150
+    },
+    {
+      "epoch": 0.9354699003897792,
+      "grad_norm": 0.92578125,
+      "learning_rate": 5.667000077510559e-07,
+      "loss": 0.2900621652603149,
+      "step": 2160
+    },
+    {
+      "epoch": 0.9398007795582504,
+      "grad_norm": 0.63671875,
+      "learning_rate": 4.939002008335802e-07,
+      "loss": 0.3027785062789917,
+      "step": 2170
+    },
+    {
+      "epoch": 0.9441316587267216,
+      "grad_norm": 1.140625,
+      "learning_rate": 4.2605689483966037e-07,
+      "loss": 0.3042114734649658,
+      "step": 2180
+    },
+    {
+      "epoch": 0.9484625378951927,
+      "grad_norm": 0.91796875,
+      "learning_rate": 3.6318381147171234e-07,
+      "loss": 0.2658639669418335,
+      "step": 2190
+    },
+    {
+      "epoch": 0.9527934170636639,
+      "grad_norm": 0.75390625,
+      "learning_rate": 3.052936671757739e-07,
+      "loss": 0.28366703987121583,
+      "step": 2200
+    },
+    {
+      "epoch": 0.9571242962321351,
+      "grad_norm": 1.1796875,
+      "learning_rate": 2.523981705695427e-07,
+      "loss": 0.31629841327667235,
+      "step": 2210
+    },
+    {
+      "epoch": 0.9614551754006063,
+      "grad_norm": 1.171875,
+      "learning_rate": 2.0450802007422053e-07,
+      "loss": 0.3071019172668457,
+      "step": 2220
+    },
+    {
+      "epoch": 0.9657860545690775,
+      "grad_norm": 1.0078125,
+      "learning_rate": 1.6163290175071343e-07,
+      "loss": 0.30133790969848634,
+      "step": 2230
+    },
+    {
+      "epoch": 0.9701169337375487,
+      "grad_norm": 0.61328125,
+      "learning_rate": 1.2378148734056017e-07,
+      "loss": 0.2814929962158203,
+      "step": 2240
+    },
+    {
+      "epoch": 0.9744478129060199,
+      "grad_norm": 0.796875,
+      "learning_rate": 9.096143251202115e-08,
+      "loss": 0.28168976306915283,
+      "step": 2250
+    },
+    {
+      "epoch": 0.9787786920744911,
+      "grad_norm": 0.86328125,
+      "learning_rate": 6.317937531168106e-08,
+      "loss": 0.31104719638824463,
+      "step": 2260
+    },
+    {
+      "epoch": 0.9831095712429623,
+      "grad_norm": 0.7890625,
+      "learning_rate": 4.0440934821864286e-08,
+      "loss": 0.2767423868179321,
+      "step": 2270
+    },
+    {
+      "epoch": 0.9874404504114335,
+      "grad_norm": 0.87890625,
+      "learning_rate": 2.2750710024138646e-08,
+      "loss": 0.29538898468017577,
+      "step": 2280
+    },
+    {
+      "epoch": 0.9917713295799048,
+      "grad_norm": 0.71484375,
+      "learning_rate": 1.0112278869145453e-08,
+      "loss": 0.275164270401001,
+      "step": 2290
+    },
+    {
+      "epoch": 0.996102208748376,
+      "grad_norm": 1.1484375,
+      "learning_rate": 2.528197552942313e-09,
+      "loss": 0.30729990005493163,
+      "step": 2300
+    },
+    {
+      "epoch": 1.0,
+      "eval_text_loss": 1.5541865825653076,
+      "eval_text_model_preparation_time": 0.0199,
+      "eval_text_runtime": 24.8156,
+      "eval_text_samples_per_second": 13.298,
+      "eval_text_steps_per_second": 13.298,
+      "step": 2309
+    },
+    {
+      "epoch": 1.0,
+      "eval_audio_loss": 0.8393442034721375,
+      "eval_audio_model_preparation_time": 0.0199,
+      "eval_audio_runtime": 44.203,
+      "eval_audio_samples_per_second": 5.791,
+      "eval_audio_steps_per_second": 5.791,
+      "step": 2309
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2309,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 47,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.2190042187360576e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5393638fc96a51ffc096bc09eb48144285f49379afa915db3fe6586e5211b890
+size 5905