Model save

Browse files

Files changed (9) hide show

README.md +70 -0
config.json +42 -0
generation_config.json +7 -0
model.safetensors +3 -0
special_tokens_map.json +48 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +196 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,70 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: ai21labs/Jamba-tiny-dev
+tags:
+- trl
+- sft
+- generated_from_trainer
+datasets:
+- generator
+model-index:
+- name: BT-Jamba-1.5-Mini-customer-support-3
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# BT-Jamba-1.5-Mini-customer-support-3
+This model is a fine-tuned version of [ai21labs/Jamba-tiny-dev](https://huggingface.co/ai21labs/Jamba-tiny-dev) on the generator dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.5598
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 4
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 32
+- total_eval_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.01
+- num_epochs: 3
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.6095        | 1.0   | 150  | 0.6130          |
+| 0.5353        | 2.0   | 300  | 0.5674          |
+| 0.4782        | 3.0   | 450  | 0.5598          |
+### Framework versions
+- Transformers 4.45.0.dev0
+- Pytorch 2.1.0
+- Datasets 2.18.0
+- Tokenizers 0.19.1

config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "_name_or_path": "ai21labs/Jamba-tiny-dev",
+  "architectures": [
+    "JambaForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "attn_layer_offset": 4,
+  "attn_layer_period": 8,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "expert_layer_offset": 1,
+  "expert_layer_period": 2,
+  "hidden_act": "silu",
+  "hidden_size": 512,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "mamba_conv_bias": true,
+  "mamba_d_conv": 4,
+  "mamba_d_state": 16,
+  "mamba_dt_rank": 32,
+  "mamba_expand": 2,
+  "mamba_proj_bias": false,
+  "max_position_embeddings": 262144,
+  "model_type": "jamba",
+  "num_attention_heads": 8,
+  "num_experts": 8,
+  "num_experts_per_tok": 2,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 2,
+  "num_logits_to_keep": 1,
+  "output_router_logits": false,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-06,
+  "router_aux_loss_coef": 0.001,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.0.dev0",
+  "use_cache": true,
+  "use_mamba_kernels": true,
+  "vocab_size": 65536
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.45.0.dev0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:770d42376902418c015360cf6769782affbb3d9cfee9b5eef037f5d6e3628eab
+size 1274805832

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "additional_special_tokens": [
+    "<|eom|>",
+    "<|bom|>",
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>",
+    "<|tool|>",
+    "<documents>",
+    "</documents>",
+    "<tool_definitions>",
+    "</tool_definitions>",
+    "<active_output_modes>",
+    "</active_output_modes>",
+    "<citations>",
+    "</citations>",
+    "<tool_calls>",
+    "</tool_calls>"
+  ],
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|pad|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|unk|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b0df4fb43262c452ef37061951a06df4c63ca191d02a60ea08f14428af24376
+size 1124714

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,196 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<|unk|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "518": {
+      "content": "<|eom|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "519": {
+      "content": "<|bom|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "520": {
+      "content": "<|system|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "521": {
+      "content": "<|user|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "522": {
+      "content": "<|assistant|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "523": {
+      "content": "<|tool|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "524": {
+      "content": "<documents>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "525": {
+      "content": "</documents>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "526": {
+      "content": "<tool_definitions>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "527": {
+      "content": "</tool_definitions>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "528": {
+      "content": "<active_output_modes>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "529": {
+      "content": "</active_output_modes>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "530": {
+      "content": "<citations>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "531": {
+      "content": "</citations>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "532": {
+      "content": "<tool_calls>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "533": {
+      "content": "</tool_calls>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|eom|>",
+    "<|bom|>",
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>",
+    "<|tool|>",
+    "<documents>",
+    "</documents>",
+    "<tool_definitions>",
+    "</tool_definitions>",
+    "<active_output_modes>",
+    "</active_output_modes>",
+    "<citations>",
+    "</citations>",
+    "<tool_calls>",
+    "</tool_calls>"
+  ],
+  "bos_token": "<|startoftext|>",
+  "chat_template": "{# Variables #}\n{% set ns = namespace(message_count=0, is_last_checked_defined=False) %}\n{##}\n{% set bom_str = bom_str or \"<|bom|>\" %}\n{% set eom_str = eom_str or \"<|eom|>\" %}\n{% set default_system_message = \"\" %}\n{##}\n{% set documents_prefix = \"<documents>\" %}\n{% set documents_suffix = \"</documents>\" %}\n{% set tool_definitions_prefix = \"<tool_definitions>\" %}\n{% set tool_definitions_suffix = \"</tool_definitions>\" %}\n{% set active_modes_prefix = \"<active_output_modes>\" %}\n{% set active_modes_suffix = \"</active_output_modes>\" %}\n{##}\n{% set tool_calls_prefix = \"<tool_calls>\" %}\n{% set tool_calls_suffix = \"</tool_calls>\" %}\n{% set citations_prefix = \"<citations>\" %}\n{% set citations_suffix = \"</citations>\" %}\n{##}\n{% if add_generation_prompt is not defined %}\n  {% set add_generation_prompt = True %}\n{% endif %}\n{% set role_to_predict = role_to_predict or \"assistant\" %}\n{% if messages|length > 0 and messages[0].role == \"system\" %}\n  {% set system_message = messages[0].content %}\n  {% set loop_messages = messages[1:] %}\n{% else %}\n  {% set system_message = default_system_message %}\n  {% set loop_messages = messages %}\n{% endif %}\n{##}\n{##}\n{# Macros #}\n{% macro handle_tool_definitions(tools) %}\n  {{- tool_definitions_prefix -}}\n  {{- \"\\n# Tools\" -}}\n  {{- \"\\n\\n## Functions\" -}}\n  {% for tool in tools %}\n    {% set _ = is_param_set(tool, field=\"type\") %}\n    {% set is_tool_type_set = ns.is_last_checked_defined %}\n    {% if is_tool_type_set %}\n      {% if tool.type == \"function\" %}\n        {% set tool = tool.function %}\n      {% else %}\n        {{ raise_exception(\"Currently, the only supported tool type is `function`\") }}\n      {% endif %}\n    {% endif %}\n    {{- \"\\n\\n\" + (tool|tojson(indent=2)) -}}\n  {% endfor %}\n  {{- \"\\n\" + tool_definitions_suffix -}}\n{% endmacro %}\n{##}\n{% macro handle_first_system_message(system_message, tools) %}\n  {{- bom_str + handle_role(\"system\") -}}\n  {% set _ = is_param_set(system_message) %}\n  {% set is_system_message_set = ns.is_last_checked_defined %}\n  {% if is_system_message_set %}\n    {{- system_message -}}\n  {% endif %}\n  {% set _ = is_param_set(tools, is_list=True) %}\n  {% set is_tools_set = ns.is_last_checked_defined %}\n  {% if is_tools_set %}\n    {% if system_message %}\n      {{- \"\\n\\n\" -}}\n    {% endif %}\n    {{- handle_tool_definitions(tools) -}}\n  {% endif %}\n  {% set ns.message_count = ns.message_count + 1 %}\n{% endmacro %}\n{##}\n{% macro handle_tool_calls(tool_calls) %}\n  {{- tool_calls_prefix + \"[\\n\" -}}\n  {% for tool_call in tool_calls %}\n    {% set _ = is_param_set(tool_call, field=\"function\") %}\n    {% set is_tool_call_function_set = ns.is_last_checked_defined %}\n    {% if is_tool_call_function_set %}\n      {%- set tool_call = tool_call.function %}\n    {%- endif %}\n    {% set arguments = tool_call.arguments %}\n    {% if arguments is not string %}\n      {%- set arguments = arguments|tojson -%}\n    {%- endif %}\n    {{ \"{\\\"name\\\": \\\"\" + tool_call.name + \"\\\", \\\"arguments\\\": \" + arguments + \"}\" -}}\n    {% if not loop.last %}\n      {{- \",\" }}\n    {% endif %}\n  {% endfor %}\n  {{- \"\\n]\" + tool_calls_suffix -}}\n{% endmacro %}\n{##}\n{% macro handle_documents(documents) %}\n  {{- documents_prefix -}}\n  {{- \"\\n# Documents\" -}}\n  {{- \"\\n\\nYou can use the following documents for reference:\" -}}\n  {% for doc in documents %}\n    {{- \"\\n\\n## Document ID: \" + loop.index0|string -}}\n    {% set _ = is_param_set(doc, field=\"title\") %}\n    {% set is_doc_title_set = ns.is_last_checked_defined %}\n    {% if is_doc_title_set %}\n      {{- \"\\nTitle: \" + doc.title -}}\n    {% endif %}\n    {% for key, value in doc.items() %}\n      {% if key not in [\"title\", \"text\"] %}\n        {{- \"\\n\" + key|title + \": \" + value|string -}}\n      {% endif %}\n    {% endfor %}\n    {{- \"\\nText: \" + doc.text -}}\n  {% endfor %}\n  {{- \"\\n\" + documents_suffix -}}\n{% endmacro %}\n{##}\n{% macro handle_knobs(knobs) %}\n  {{- active_modes_prefix -}}\n  {{- \"\\n# Active Modes\" -}}\n  {{ \"\\n\\nThe following modes configure the format or style of your responses. You should adhere to all currently\" -}}\n  {{ \" active modes simultaneously.\" -}}\n  {% if knobs.citation_mode == \"fast\" %}\n    {{- \"\\n\\n## Citation Mode\" -}}\n    {{- \"\\n\\nProvide a list of references only for the documents you base your response on. Format your response\" -}}\n    {{ \" with the original answer followed by a citation section. Use this template:\" -}}\n    {{ \" `{answer}\" + citations_prefix + \"DOCUMENT_IDS\" + citations_suffix + \"`, where DOCUMENT_IDS are the relevant document numbers\" -}}\n    {{ \" (e.g. [2, 5, 9]), or [] if the answer cannot be supported by the provided documents.\" -}}\n  {% endif %}\n  {% if knobs.response_format == \"json_object\" %}\n    {{- \"\\n\\n## JSON Mode\" -}}\n    {{ \"\\n\\nProvide your response in JSON format. Adhere strictly to any schema given by the user.\" -}}\n    {{ \" If an appropriate JSON format exists, use it without modification.\" -}}\n  {% endif %}\n  {{- \"\\n\" + active_modes_suffix -}}\n{% endmacro %}\n{##}\n{% macro get_last_user_index(messages) %}\n  {% set ns.last_user_index = 0 %}\n  {% for message in messages %}\n    {% if message.role == 'user' %}\n      {% set ns.last_user_index = loop.index0 %}\n    {% endif %}\n  {% endfor %}\n  {{- ns.last_user_index -}}\n{% endmacro %}\n{##}\n{% macro handle_last_system_message(documents, knobs, use_documents, use_knobs) %}\n  {{- bom_str + handle_role(\"system\") -}}\n  {% set macros_to_call = [] %}\n  {% set params_for_macros = [] %}\n  {% if use_documents %}\n    {% set macros_to_call = macros_to_call + [handle_documents] %}\n    {% set params_for_macros = params_for_macros + [[documents]] %}\n  {% endif %}\n  {% if use_knobs %}\n    {% set macros_to_call = macros_to_call + [handle_knobs] %}\n    {% set params_for_macros = params_for_macros + [[knobs]] %}\n  {% endif %}\n  {% for i in range(macros_to_call|length) %}\n    {% if i > 0 %}\n      {{- \"\\n\\n\" -}}\n    {% endif %}\n    {{- macros_to_call[i](*params_for_macros[i]) -}}\n  {% endfor %}\n  {% set ns.message_count = ns.message_count + 1 %}\n{% endmacro %}\n{##}\n{% macro handle_role(role, add_space=True) %}\n  {{- \"<|\" + role + \"|>\" -}}\n  {% if add_space %}\n    {{- \" \" -}}\n  {% endif %}\n{% endmacro %}\n{##}\n{% macro is_param_set(param, field=none, is_list=False) %}\n  {% if field is not none %}\n    {% if field in param %}\n      {% set param = param[field] %}\n    {% else %}\n      {% set param = none %}\n    {% endif %}\n  {% endif %}\n  {% set is_defined = param is defined and param is not none %}\n  {% if is_list %}\n    {% set ns.is_last_checked_defined = is_defined and param|length > 0 %}\n  {% else %}\n    {% set ns.is_last_checked_defined = is_defined %}\n  {% endif %}\n{% endmacro %}\n{##}\n{##}\n{# Template #}\n{{- \"<|startoftext|>\" -}}\n{% set _ = is_param_set(system_message) %}\n{% set is_system_message_set = ns.is_last_checked_defined %}\n{% set _ = is_param_set(tools, is_list=True) %}\n{% set is_tools_set = ns.is_last_checked_defined %}\n{% set has_system_message = (is_system_message_set or is_tools_set) %}\n{% if has_system_message %}\n  {{- handle_first_system_message(system_message, tools) -}}\n{% endif %}\n{% set last_user_index = get_last_user_index(loop_messages)|int %}\n{% for message in loop_messages %}\n  {% if loop.index0 == last_user_index %}\n    {% set _ = is_param_set(documents, is_list=True) %}\n    {% set use_documents = ns.is_last_checked_defined %}\n    {% set _ = is_param_set(knobs) %}\n    {% set use_knobs = ns.is_last_checked_defined and knobs.is_set %}\n    {% set add_last_system_message = use_documents or use_knobs %}\n    {% if add_last_system_message %}\n      {% if ns.message_count > 0 %}\n        {{- eom_str -}}\n      {% endif %}\n      {{- handle_last_system_message(documents, knobs, use_documents, use_knobs) -}}\n    {% endif %}\n  {% endif %}\n  {% set role = message.role %}\n  {% set _ = is_param_set(message, field=\"name\") %}\n  {% set is_message_name_set = ns.is_last_checked_defined %}\n  {% if is_message_name_set %}\n    {% set message_prefix = handle_role(role) + \"(\" + message.name + \")\" %}\n  {% else %}\n    {% set message_prefix = handle_role(role) %}\n  {% endif %}\n  {% set content = (message.content or \"\") %}\n  {% if content is not string %}\n    {% set content = content|tojson %}\n  {% endif %}\n  {% if ns.message_count > 0 %}\n    {{- eom_str -}}\n  {% endif %}\n  {{- bom_str + message_prefix + content -}}\n  {% set _ = is_param_set(message, field=\"tool_calls\", is_list=True) %}\n  {% set is_tool_calls_set = ns.is_last_checked_defined %}\n  {% if role == \"assistant\" and is_tool_calls_set %}\n    {{- handle_tool_calls(message.tool_calls) -}}\n  {% endif %}\n  {% set _ = is_param_set(message, field=\"citations\", is_list=True) %}\n  {% set is_citations_set = ns.is_last_checked_defined %}\n  {% if role == \"assistant\" and is_citations_set %}\n    {{- citations_prefix + message.citations|map(attribute=\"document_id\")|list|string + citations_suffix -}}\n  {% endif %}\n  {% set ns.message_count = ns.message_count + 1 %}\n{% endfor %}\n{% if add_generation_prompt %}\n  {% if ns.message_count > 0 %}\n    {{- eom_str -}}\n  {% endif %}\n  {{- bom_str + handle_role(role_to_predict, add_space=False) -}}\n  {% set _ = is_param_set(generation_preamble) %}\n  {% set is_generation_preamble_set = ns.is_last_checked_defined %}\n  {% if is_generation_preamble_set and generation_preamble.strip() != \"\" %}\n    {{- \" \" + generation_preamble -}}\n  {% endif %}\n  {% set ns.message_count = ns.message_count + 1 %}\n{% else %}\n  {% if ns.message_count > 0 %}\n    {{- eom_str -}}\n  {% endif %}\n{% endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "legacy": true,
+  "model_max_length": 2048,
+  "pad_token": "<|pad|>",
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<|unk|>",
+  "use_default_system_prompt": false
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b20504e82a805ce60a3cde55454acdf6776d66ecae166599a9a406412aa9a84
+size 6904