Upload initial model
Browse files- .gitattributes +1 -0
 - README.md +185 -1
 - added_tokens.json +28 -0
 - cfg.yaml +117 -0
 - config.json +31 -0
 - generation_config.json +12 -0
 - merges.txt +0 -0
 - model.safetensors +3 -0
 - special_tokens_map.json +33 -0
 - tokenizer.json +3 -0
 - tokenizer_config.json +241 -0
 - vocab.json +0 -0
 
    	
        .gitattributes
    CHANGED
    
    | 
         @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text 
     | 
|
| 33 | 
         
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         
     | 
| 34 | 
         
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         
     | 
| 35 | 
         
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         
     | 
| 
         | 
| 
         | 
|
| 33 | 
         
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         
     | 
| 34 | 
         
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         
     | 
| 35 | 
         
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         
     | 
| 36 | 
         
            +
            tokenizer.json filter=lfs diff=lfs merge=lfs -text
         
     | 
    	
        README.md
    CHANGED
    
    | 
         @@ -1,3 +1,187 @@ 
     | 
|
| 1 | 
         
             
            ---
         
     | 
| 2 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 3 | 
         
             
            ---
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
             
            ---
         
     | 
| 2 | 
         
            +
            language:
         
     | 
| 3 | 
         
            +
            - en
         
     | 
| 4 | 
         
            +
            library_name: transformers
         
     | 
| 5 | 
         
            +
            inference: false
         
     | 
| 6 | 
         
            +
            thumbnail: https://h2o.ai/etc.clientlibs/h2o/clientlibs/clientlib-site/resources/images/favicon.ico
         
     | 
| 7 | 
         
            +
            tags:
         
     | 
| 8 | 
         
            +
            - gpt
         
     | 
| 9 | 
         
            +
            - llm
         
     | 
| 10 | 
         
            +
            - large language model
         
     | 
| 11 | 
         
            +
            - h2o-llmstudio
         
     | 
| 12 | 
         
             
            ---
         
     | 
| 13 | 
         
            +
            # Model Card
         
     | 
| 14 | 
         
            +
            ## Summary
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
            This model was trained using [H2O LLM Studio](https://github.com/h2oai/h2o-llmstudio).
         
     | 
| 17 | 
         
            +
            - Base model: [Qwen/Qwen3-1.7B](https://huggingface.co/Qwen/Qwen3-1.7B)
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            ## Usage
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
            To use the model with the `transformers` library on a machine with GPUs, first make sure you have the `transformers` library installed.
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
            ```bash
         
     | 
| 25 | 
         
            +
            pip install transformers==4.51.3
         
     | 
| 26 | 
         
            +
            ```
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
            Also make sure you are providing your huggingface token to the pipeline if the model is lying in a private repo.
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            - Either leave `token=True` in the `pipeline` and login to hugginface_hub by running
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
            ```python
         
     | 
| 33 | 
         
            +
            import huggingface_hub
         
     | 
| 34 | 
         
            +
            huggingface_hub.login(<ACCESS_TOKEN>)
         
     | 
| 35 | 
         
            +
            ```
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            - Or directly pass your <ACCESS_TOKEN> to `token` in the `pipeline`
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
            ```python
         
     | 
| 40 | 
         
            +
            from transformers import pipeline
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
            generate_text = pipeline(
         
     | 
| 43 | 
         
            +
                model="SNUMPR/Terran-c",
         
     | 
| 44 | 
         
            +
                torch_dtype="auto",
         
     | 
| 45 | 
         
            +
                trust_remote_code=True,
         
     | 
| 46 | 
         
            +
                device_map={"": "cuda:0"},
         
     | 
| 47 | 
         
            +
                token=True,
         
     | 
| 48 | 
         
            +
            )
         
     | 
| 49 | 
         
            +
             
     | 
| 50 | 
         
            +
            # generate configuration can be modified to your needs
         
     | 
| 51 | 
         
            +
            # generate_text.model.generation_config.min_new_tokens = 2
         
     | 
| 52 | 
         
            +
            # generate_text.model.generation_config.max_new_tokens = 4096
         
     | 
| 53 | 
         
            +
            # generate_text.model.generation_config.do_sample = False
         
     | 
| 54 | 
         
            +
            # generate_text.model.generation_config.num_beams = 1
         
     | 
| 55 | 
         
            +
            # generate_text.model.generation_config.temperature = float(0.0)
         
     | 
| 56 | 
         
            +
            # generate_text.model.generation_config.repetition_penalty = float(1.0)
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
            messages = [
         
     | 
| 59 | 
         
            +
                {"role": "user", "content": "Hi, how are you?"},
         
     | 
| 60 | 
         
            +
                {"role": "assistant", "content": "I'm doing great, how about you?"},
         
     | 
| 61 | 
         
            +
                {"role": "user", "content": "Why is drinking water so healthy?"},
         
     | 
| 62 | 
         
            +
            ]
         
     | 
| 63 | 
         
            +
             
     | 
| 64 | 
         
            +
            res = generate_text(
         
     | 
| 65 | 
         
            +
                messages,
         
     | 
| 66 | 
         
            +
                renormalize_logits=True
         
     | 
| 67 | 
         
            +
            )
         
     | 
| 68 | 
         
            +
            print(res[0]["generated_text"][-1]['content'])
         
     | 
| 69 | 
         
            +
            ```
         
     | 
| 70 | 
         
            +
             
     | 
| 71 | 
         
            +
            You can print a sample prompt after applying chat template to see how it is feed to the tokenizer:
         
     | 
| 72 | 
         
            +
             
     | 
| 73 | 
         
            +
            ```python
         
     | 
| 74 | 
         
            +
            print(generate_text.tokenizer.apply_chat_template(
         
     | 
| 75 | 
         
            +
                messages,
         
     | 
| 76 | 
         
            +
                tokenize=False,
         
     | 
| 77 | 
         
            +
                add_generation_prompt=True,
         
     | 
| 78 | 
         
            +
            ))
         
     | 
| 79 | 
         
            +
            ```
         
     | 
| 80 | 
         
            +
             
     | 
| 81 | 
         
            +
            You may also construct the pipeline from the loaded model and tokenizer yourself and consider the preprocessing steps:
         
     | 
| 82 | 
         
            +
             
     | 
| 83 | 
         
            +
            ```python
         
     | 
| 84 | 
         
            +
            from transformers import AutoModelForCausalLM, AutoTokenizer
         
     | 
| 85 | 
         
            +
             
     | 
| 86 | 
         
            +
            model_name = "SNUMPR/Terran-c"  # either local folder or Hugging Face model name
         
     | 
| 87 | 
         
            +
            # Important: The prompt needs to be in the same format the model was trained with.
         
     | 
| 88 | 
         
            +
            # You can find an example prompt in the experiment logs.
         
     | 
| 89 | 
         
            +
            messages = [
         
     | 
| 90 | 
         
            +
                {"role": "user", "content": "Hi, how are you?"},
         
     | 
| 91 | 
         
            +
                {"role": "assistant", "content": "I'm doing great, how about you?"},
         
     | 
| 92 | 
         
            +
                {"role": "user", "content": "Why is drinking water so healthy?"},
         
     | 
| 93 | 
         
            +
            ]
         
     | 
| 94 | 
         
            +
             
     | 
| 95 | 
         
            +
            tokenizer = AutoTokenizer.from_pretrained(
         
     | 
| 96 | 
         
            +
                model_name,
         
     | 
| 97 | 
         
            +
                trust_remote_code=True,
         
     | 
| 98 | 
         
            +
            )
         
     | 
| 99 | 
         
            +
            model = AutoModelForCausalLM.from_pretrained(
         
     | 
| 100 | 
         
            +
                model_name,
         
     | 
| 101 | 
         
            +
                torch_dtype="auto",
         
     | 
| 102 | 
         
            +
                device_map={"": "cuda:0"},
         
     | 
| 103 | 
         
            +
                trust_remote_code=True,
         
     | 
| 104 | 
         
            +
            )
         
     | 
| 105 | 
         
            +
            model.cuda().eval()
         
     | 
| 106 | 
         
            +
             
     | 
| 107 | 
         
            +
            # generate configuration can be modified to your needs
         
     | 
| 108 | 
         
            +
            # model.generation_config.min_new_tokens = 2
         
     | 
| 109 | 
         
            +
            # model.generation_config.max_new_tokens = 4096
         
     | 
| 110 | 
         
            +
            # model.generation_config.do_sample = False
         
     | 
| 111 | 
         
            +
            # model.generation_config.num_beams = 1
         
     | 
| 112 | 
         
            +
            # model.generation_config.temperature = float(0.0)
         
     | 
| 113 | 
         
            +
            # model.generation_config.repetition_penalty = float(1.0)
         
     | 
| 114 | 
         
            +
             
     | 
| 115 | 
         
            +
            inputs = tokenizer.apply_chat_template(
         
     | 
| 116 | 
         
            +
                messages,
         
     | 
| 117 | 
         
            +
                tokenize=True,
         
     | 
| 118 | 
         
            +
                add_generation_prompt=True,
         
     | 
| 119 | 
         
            +
                return_tensors="pt",
         
     | 
| 120 | 
         
            +
                return_dict=True,
         
     | 
| 121 | 
         
            +
            ).to("cuda")
         
     | 
| 122 | 
         
            +
             
     | 
| 123 | 
         
            +
            tokens = model.generate(
         
     | 
| 124 | 
         
            +
                input_ids=inputs["input_ids"],
         
     | 
| 125 | 
         
            +
                attention_mask=inputs["attention_mask"],
         
     | 
| 126 | 
         
            +
                renormalize_logits=True
         
     | 
| 127 | 
         
            +
            )[0]
         
     | 
| 128 | 
         
            +
             
     | 
| 129 | 
         
            +
            tokens = tokens[inputs["input_ids"].shape[1]:]
         
     | 
| 130 | 
         
            +
            answer = tokenizer.decode(tokens, skip_special_tokens=True)
         
     | 
| 131 | 
         
            +
            print(answer)
         
     | 
| 132 | 
         
            +
            ```
         
     | 
| 133 | 
         
            +
             
     | 
| 134 | 
         
            +
            ## Quantization and sharding
         
     | 
| 135 | 
         
            +
             
     | 
| 136 | 
         
            +
            You can load the models using quantization by specifying ```load_in_8bit=True``` or ```load_in_4bit=True```. Also, sharding on multiple GPUs is possible by setting ```device_map=auto```.
         
     | 
| 137 | 
         
            +
             
     | 
| 138 | 
         
            +
            ## Model Architecture
         
     | 
| 139 | 
         
            +
             
     | 
| 140 | 
         
            +
            ```
         
     | 
| 141 | 
         
            +
            Qwen3ForCausalLM(
         
     | 
| 142 | 
         
            +
              (model): Qwen3Model(
         
     | 
| 143 | 
         
            +
                (embed_tokens): Embedding(151936, 2048, padding_idx=151643)
         
     | 
| 144 | 
         
            +
                (layers): ModuleList(
         
     | 
| 145 | 
         
            +
                  (0-27): 28 x Qwen3DecoderLayer(
         
     | 
| 146 | 
         
            +
                    (self_attn): Qwen3Attention(
         
     | 
| 147 | 
         
            +
                      (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
         
     | 
| 148 | 
         
            +
                      (k_proj): Linear(in_features=2048, out_features=1024, bias=False)
         
     | 
| 149 | 
         
            +
                      (v_proj): Linear(in_features=2048, out_features=1024, bias=False)
         
     | 
| 150 | 
         
            +
                      (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
         
     | 
| 151 | 
         
            +
                      (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
         
     | 
| 152 | 
         
            +
                      (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
         
     | 
| 153 | 
         
            +
                    )
         
     | 
| 154 | 
         
            +
                    (mlp): Qwen3MLP(
         
     | 
| 155 | 
         
            +
                      (gate_proj): Linear(in_features=2048, out_features=6144, bias=False)
         
     | 
| 156 | 
         
            +
                      (up_proj): Linear(in_features=2048, out_features=6144, bias=False)
         
     | 
| 157 | 
         
            +
                      (down_proj): Linear(in_features=6144, out_features=2048, bias=False)
         
     | 
| 158 | 
         
            +
                      (act_fn): SiLU()
         
     | 
| 159 | 
         
            +
                    )
         
     | 
| 160 | 
         
            +
                    (input_layernorm): Qwen3RMSNorm((2048,), eps=1e-06)
         
     | 
| 161 | 
         
            +
                    (post_attention_layernorm): Qwen3RMSNorm((2048,), eps=1e-06)
         
     | 
| 162 | 
         
            +
                  )
         
     | 
| 163 | 
         
            +
                )
         
     | 
| 164 | 
         
            +
                (norm): Qwen3RMSNorm((2048,), eps=1e-06)
         
     | 
| 165 | 
         
            +
                (rotary_emb): Qwen3RotaryEmbedding()
         
     | 
| 166 | 
         
            +
              )
         
     | 
| 167 | 
         
            +
              (lm_head): Linear(in_features=2048, out_features=151936, bias=False)
         
     | 
| 168 | 
         
            +
            )
         
     | 
| 169 | 
         
            +
            ```
         
     | 
| 170 | 
         
            +
             
     | 
| 171 | 
         
            +
            ## Model Configuration
         
     | 
| 172 | 
         
            +
             
     | 
| 173 | 
         
            +
            This model was trained using H2O LLM Studio and with the configuration in [cfg.yaml](cfg.yaml). Visit [H2O LLM Studio](https://github.com/h2oai/h2o-llmstudio) to learn how to train your own large language models.
         
     | 
| 174 | 
         
            +
             
     | 
| 175 | 
         
            +
             
     | 
| 176 | 
         
            +
            ## Disclaimer
         
     | 
| 177 | 
         
            +
             
     | 
| 178 | 
         
            +
            Please read this disclaimer carefully before using the large language model provided in this repository. Your use of the model signifies your agreement to the following terms and conditions.
         
     | 
| 179 | 
         
            +
             
     | 
| 180 | 
         
            +
            - Biases and Offensiveness: The large language model is trained on a diverse range of internet text data, which may contain biased, racist, offensive, or otherwise inappropriate content. By using this model, you acknowledge and accept that the generated content may sometimes exhibit biases or produce content that is offensive or inappropriate. The developers of this repository do not endorse, support, or promote any such content or viewpoints.
         
     | 
| 181 | 
         
            +
            - Limitations: The large language model is an AI-based tool and not a human. It may produce incorrect, nonsensical, or irrelevant responses. It is the user's responsibility to critically evaluate the generated content and use it at their discretion.
         
     | 
| 182 | 
         
            +
            - Use at Your Own Risk: Users of this large language model must assume full responsibility for any consequences that may arise from their use of the tool. The developers and contributors of this repository shall not be held liable for any damages, losses, or harm resulting from the use or misuse of the provided model.
         
     | 
| 183 | 
         
            +
            - Ethical Considerations: Users are encouraged to use the large language model responsibly and ethically. By using this model, you agree not to use it for purposes that promote hate speech, discrimination, harassment, or any form of illegal or harmful activities.
         
     | 
| 184 | 
         
            +
            - Reporting Issues: If you encounter any biased, offensive, or otherwise inappropriate content generated by the large language model, please report it to the repository maintainers through the provided channels. Your feedback will help improve the model and mitigate potential issues.
         
     | 
| 185 | 
         
            +
            - Changes to this Disclaimer: The developers of this repository reserve the right to modify or update this disclaimer at any time without prior notice. It is the user's responsibility to periodically review the disclaimer to stay informed about any changes.
         
     | 
| 186 | 
         
            +
             
     | 
| 187 | 
         
            +
            By using the large language model provided in this repository, you agree to accept and comply with the terms and conditions outlined in this disclaimer. If you do not agree with any part of this disclaimer, you should refrain from using the model and any content generated by it.
         
     | 
    	
        added_tokens.json
    ADDED
    
    | 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "</think>": 151668,
         
     | 
| 3 | 
         
            +
              "</tool_call>": 151658,
         
     | 
| 4 | 
         
            +
              "</tool_response>": 151666,
         
     | 
| 5 | 
         
            +
              "<think>": 151667,
         
     | 
| 6 | 
         
            +
              "<tool_call>": 151657,
         
     | 
| 7 | 
         
            +
              "<tool_response>": 151665,
         
     | 
| 8 | 
         
            +
              "<|box_end|>": 151649,
         
     | 
| 9 | 
         
            +
              "<|box_start|>": 151648,
         
     | 
| 10 | 
         
            +
              "<|endoftext|>": 151643,
         
     | 
| 11 | 
         
            +
              "<|file_sep|>": 151664,
         
     | 
| 12 | 
         
            +
              "<|fim_middle|>": 151660,
         
     | 
| 13 | 
         
            +
              "<|fim_pad|>": 151662,
         
     | 
| 14 | 
         
            +
              "<|fim_prefix|>": 151659,
         
     | 
| 15 | 
         
            +
              "<|fim_suffix|>": 151661,
         
     | 
| 16 | 
         
            +
              "<|im_end|>": 151645,
         
     | 
| 17 | 
         
            +
              "<|im_start|>": 151644,
         
     | 
| 18 | 
         
            +
              "<|image_pad|>": 151655,
         
     | 
| 19 | 
         
            +
              "<|object_ref_end|>": 151647,
         
     | 
| 20 | 
         
            +
              "<|object_ref_start|>": 151646,
         
     | 
| 21 | 
         
            +
              "<|quad_end|>": 151651,
         
     | 
| 22 | 
         
            +
              "<|quad_start|>": 151650,
         
     | 
| 23 | 
         
            +
              "<|repo_name|>": 151663,
         
     | 
| 24 | 
         
            +
              "<|video_pad|>": 151656,
         
     | 
| 25 | 
         
            +
              "<|vision_end|>": 151653,
         
     | 
| 26 | 
         
            +
              "<|vision_pad|>": 151654,
         
     | 
| 27 | 
         
            +
              "<|vision_start|>": 151652
         
     | 
| 28 | 
         
            +
            }
         
     | 
    	
        cfg.yaml
    ADDED
    
    | 
         @@ -0,0 +1,117 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            architecture:
         
     | 
| 2 | 
         
            +
                backbone_dtype: float16
         
     | 
| 3 | 
         
            +
                gradient_checkpointing: true
         
     | 
| 4 | 
         
            +
                intermediate_dropout: 0.0
         
     | 
| 5 | 
         
            +
                pretrained: true
         
     | 
| 6 | 
         
            +
                pretrained_weights: ''
         
     | 
| 7 | 
         
            +
            augmentation:
         
     | 
| 8 | 
         
            +
                neftune_noise_alpha: 0.0
         
     | 
| 9 | 
         
            +
                random_parent_probability: 0.0
         
     | 
| 10 | 
         
            +
                skip_parent_probability: 0.0
         
     | 
| 11 | 
         
            +
                token_mask_probability: 0.0
         
     | 
| 12 | 
         
            +
            dataset:
         
     | 
| 13 | 
         
            +
                add_eos_token_to_answer: true
         
     | 
| 14 | 
         
            +
                add_eos_token_to_prompt: true
         
     | 
| 15 | 
         
            +
                add_eos_token_to_system: true
         
     | 
| 16 | 
         
            +
                answer_column: target
         
     | 
| 17 | 
         
            +
                chatbot_author: H2O.ai
         
     | 
| 18 | 
         
            +
                chatbot_name: h2oGPT
         
     | 
| 19 | 
         
            +
                data_sample: 1.0
         
     | 
| 20 | 
         
            +
                data_sample_choice:
         
     | 
| 21 | 
         
            +
                - Train
         
     | 
| 22 | 
         
            +
                - Validation
         
     | 
| 23 | 
         
            +
                id_column: None
         
     | 
| 24 | 
         
            +
                limit_chained_samples: false
         
     | 
| 25 | 
         
            +
                mask_prompt_labels: true
         
     | 
| 26 | 
         
            +
                only_last_answer: false
         
     | 
| 27 | 
         
            +
                parent_id_column: None
         
     | 
| 28 | 
         
            +
                personalize: false
         
     | 
| 29 | 
         
            +
                prompt_column:
         
     | 
| 30 | 
         
            +
                - input
         
     | 
| 31 | 
         
            +
                prompt_column_separator: \n\n
         
     | 
| 32 | 
         
            +
                system_column: None
         
     | 
| 33 | 
         
            +
                text_answer_separator: ''
         
     | 
| 34 | 
         
            +
                text_prompt_start: ''
         
     | 
| 35 | 
         
            +
                text_system_start: <|system|>
         
     | 
| 36 | 
         
            +
                train_dataframe: /data/skim/starcraft/server/h2o-llmstudio/data/user/tank_c/tank_c.csv
         
     | 
| 37 | 
         
            +
                validation_dataframe: /data/skim/starcraft/server/h2o-llmstudio/data/user/tank_c/tank_test.csv
         
     | 
| 38 | 
         
            +
                validation_size: 0.01
         
     | 
| 39 | 
         
            +
                validation_strategy: custom
         
     | 
| 40 | 
         
            +
            environment:
         
     | 
| 41 | 
         
            +
                compile_model: false
         
     | 
| 42 | 
         
            +
                deepspeed_allgather_bucket_size: 1000000
         
     | 
| 43 | 
         
            +
                deepspeed_method: ZeRO2
         
     | 
| 44 | 
         
            +
                deepspeed_reduce_bucket_size: 1000000
         
     | 
| 45 | 
         
            +
                deepspeed_stage3_param_persistence_threshold: 1000000
         
     | 
| 46 | 
         
            +
                deepspeed_stage3_prefetch_bucket_size: 1000000
         
     | 
| 47 | 
         
            +
                find_unused_parameters: false
         
     | 
| 48 | 
         
            +
                gpus:
         
     | 
| 49 | 
         
            +
                - '5'
         
     | 
| 50 | 
         
            +
                huggingface_branch: main
         
     | 
| 51 | 
         
            +
                mixed_precision: true
         
     | 
| 52 | 
         
            +
                mixed_precision_dtype: bfloat16
         
     | 
| 53 | 
         
            +
                number_of_workers: 8
         
     | 
| 54 | 
         
            +
                seed: -1
         
     | 
| 55 | 
         
            +
                trust_remote_code: true
         
     | 
| 56 | 
         
            +
                use_deepspeed: false
         
     | 
| 57 | 
         
            +
            experiment_name: qwen3_tank_c
         
     | 
| 58 | 
         
            +
            llm_backbone: Qwen/Qwen3-1.7B
         
     | 
| 59 | 
         
            +
            logging:
         
     | 
| 60 | 
         
            +
                log_all_ranks: false
         
     | 
| 61 | 
         
            +
                log_step_size: absolute
         
     | 
| 62 | 
         
            +
                logger: None
         
     | 
| 63 | 
         
            +
                neptune_project: ''
         
     | 
| 64 | 
         
            +
                wandb_entity: ''
         
     | 
| 65 | 
         
            +
                wandb_project: ''
         
     | 
| 66 | 
         
            +
            output_directory: /data/skim/starcraft/server/h2o-llmstudio/output/user/qwen3_tank_c/
         
     | 
| 67 | 
         
            +
            prediction:
         
     | 
| 68 | 
         
            +
                batch_size_inference: 0
         
     | 
| 69 | 
         
            +
                do_sample: false
         
     | 
| 70 | 
         
            +
                max_length_inference: 4096
         
     | 
| 71 | 
         
            +
                max_time: 0.0
         
     | 
| 72 | 
         
            +
                metric: BLEU
         
     | 
| 73 | 
         
            +
                metric_gpt_model: gpt-3.5-turbo-0301
         
     | 
| 74 | 
         
            +
                metric_gpt_template: general
         
     | 
| 75 | 
         
            +
                min_length_inference: 2
         
     | 
| 76 | 
         
            +
                num_beams: 1
         
     | 
| 77 | 
         
            +
                num_history: 4
         
     | 
| 78 | 
         
            +
                repetition_penalty: 1.0
         
     | 
| 79 | 
         
            +
                stop_tokens: ''
         
     | 
| 80 | 
         
            +
                temperature: 0.0
         
     | 
| 81 | 
         
            +
                top_k: 0
         
     | 
| 82 | 
         
            +
                top_p: 1.0
         
     | 
| 83 | 
         
            +
            problem_type: text_causal_language_modeling
         
     | 
| 84 | 
         
            +
            tokenizer:
         
     | 
| 85 | 
         
            +
                add_prompt_answer_tokens: false
         
     | 
| 86 | 
         
            +
                max_length: 4096
         
     | 
| 87 | 
         
            +
                padding_quantile: 1.0
         
     | 
| 88 | 
         
            +
                tokenizer_kwargs: '{"use_fast": true, "add_prefix_space": false}'
         
     | 
| 89 | 
         
            +
            training:
         
     | 
| 90 | 
         
            +
                attention_implementation: auto
         
     | 
| 91 | 
         
            +
                batch_size: 2
         
     | 
| 92 | 
         
            +
                differential_learning_rate: 1.0e-05
         
     | 
| 93 | 
         
            +
                differential_learning_rate_layers: []
         
     | 
| 94 | 
         
            +
                drop_last_batch: true
         
     | 
| 95 | 
         
            +
                epochs: 5
         
     | 
| 96 | 
         
            +
                evaluate_before_training: false
         
     | 
| 97 | 
         
            +
                evaluation_epochs: 1.0
         
     | 
| 98 | 
         
            +
                freeze_layers: []
         
     | 
| 99 | 
         
            +
                grad_accumulation: 1
         
     | 
| 100 | 
         
            +
                gradient_clip: 0.0
         
     | 
| 101 | 
         
            +
                learning_rate: 0.0001
         
     | 
| 102 | 
         
            +
                lora: true
         
     | 
| 103 | 
         
            +
                lora_alpha: 16
         
     | 
| 104 | 
         
            +
                lora_dropout: 0.05
         
     | 
| 105 | 
         
            +
                lora_r: 4
         
     | 
| 106 | 
         
            +
                lora_target_modules: ''
         
     | 
| 107 | 
         
            +
                lora_unfreeze_layers: []
         
     | 
| 108 | 
         
            +
                loss_function: TokenAveragedCrossEntropy
         
     | 
| 109 | 
         
            +
                min_learning_rate_ratio: 0.0
         
     | 
| 110 | 
         
            +
                optimizer: AdamW
         
     | 
| 111 | 
         
            +
                save_checkpoint: last
         
     | 
| 112 | 
         
            +
                schedule: Constant
         
     | 
| 113 | 
         
            +
                train_validation_data: false
         
     | 
| 114 | 
         
            +
                use_dora: false
         
     | 
| 115 | 
         
            +
                use_rslora: false
         
     | 
| 116 | 
         
            +
                warmup_epochs: 0.0
         
     | 
| 117 | 
         
            +
                weight_decay: 0.0
         
     | 
    	
        config.json
    ADDED
    
    | 
         @@ -0,0 +1,31 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "architectures": [
         
     | 
| 3 | 
         
            +
                "Qwen3ForCausalLM"
         
     | 
| 4 | 
         
            +
              ],
         
     | 
| 5 | 
         
            +
              "attention_bias": false,
         
     | 
| 6 | 
         
            +
              "attention_dropout": 0.0,
         
     | 
| 7 | 
         
            +
              "bos_token_id": 151645,
         
     | 
| 8 | 
         
            +
              "eos_token_id": 151645,
         
     | 
| 9 | 
         
            +
              "head_dim": 128,
         
     | 
| 10 | 
         
            +
              "hidden_act": "silu",
         
     | 
| 11 | 
         
            +
              "hidden_size": 2048,
         
     | 
| 12 | 
         
            +
              "initializer_range": 0.02,
         
     | 
| 13 | 
         
            +
              "intermediate_size": 6144,
         
     | 
| 14 | 
         
            +
              "max_position_embeddings": 40960,
         
     | 
| 15 | 
         
            +
              "max_window_layers": 28,
         
     | 
| 16 | 
         
            +
              "model_type": "qwen3",
         
     | 
| 17 | 
         
            +
              "num_attention_heads": 16,
         
     | 
| 18 | 
         
            +
              "num_hidden_layers": 28,
         
     | 
| 19 | 
         
            +
              "num_key_value_heads": 8,
         
     | 
| 20 | 
         
            +
              "pad_token_id": 151643,
         
     | 
| 21 | 
         
            +
              "rms_norm_eps": 1e-06,
         
     | 
| 22 | 
         
            +
              "rope_scaling": null,
         
     | 
| 23 | 
         
            +
              "rope_theta": 1000000,
         
     | 
| 24 | 
         
            +
              "sliding_window": null,
         
     | 
| 25 | 
         
            +
              "tie_word_embeddings": true,
         
     | 
| 26 | 
         
            +
              "torch_dtype": "float16",
         
     | 
| 27 | 
         
            +
              "transformers_version": "4.51.3",
         
     | 
| 28 | 
         
            +
              "use_cache": true,
         
     | 
| 29 | 
         
            +
              "use_sliding_window": false,
         
     | 
| 30 | 
         
            +
              "vocab_size": 151936
         
     | 
| 31 | 
         
            +
            }
         
     | 
    	
        generation_config.json
    ADDED
    
    | 
         @@ -0,0 +1,12 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_from_model_config": true,
         
     | 
| 3 | 
         
            +
              "bos_token_id": 151645,
         
     | 
| 4 | 
         
            +
              "eos_token_id": 151645,
         
     | 
| 5 | 
         
            +
              "max_new_tokens": 4096,
         
     | 
| 6 | 
         
            +
              "min_new_tokens": 2,
         
     | 
| 7 | 
         
            +
              "pad_token_id": 151643,
         
     | 
| 8 | 
         
            +
              "temperature": null,
         
     | 
| 9 | 
         
            +
              "top_k": null,
         
     | 
| 10 | 
         
            +
              "top_p": null,
         
     | 
| 11 | 
         
            +
              "transformers_version": "4.51.3"
         
     | 
| 12 | 
         
            +
            }
         
     | 
    	
        merges.txt
    ADDED
    
    | 
         The diff for this file is too large to render. 
		See raw diff 
     | 
| 
         | 
    	
        model.safetensors
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:9caa75727643fa8538978f990688d8df4454ffd60405464f6c942e35b5809b6b
         
     | 
| 3 | 
         
            +
            size 3441185296
         
     | 
    	
        special_tokens_map.json
    ADDED
    
    | 
         @@ -0,0 +1,33 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "additional_special_tokens": [
         
     | 
| 3 | 
         
            +
                "<|im_start|>",
         
     | 
| 4 | 
         
            +
                "<|im_end|>",
         
     | 
| 5 | 
         
            +
                "<|object_ref_start|>",
         
     | 
| 6 | 
         
            +
                "<|object_ref_end|>",
         
     | 
| 7 | 
         
            +
                "<|box_start|>",
         
     | 
| 8 | 
         
            +
                "<|box_end|>",
         
     | 
| 9 | 
         
            +
                "<|quad_start|>",
         
     | 
| 10 | 
         
            +
                "<|quad_end|>",
         
     | 
| 11 | 
         
            +
                "<|vision_start|>",
         
     | 
| 12 | 
         
            +
                "<|vision_end|>",
         
     | 
| 13 | 
         
            +
                "<|vision_pad|>",
         
     | 
| 14 | 
         
            +
                "<|image_pad|>",
         
     | 
| 15 | 
         
            +
                "<|video_pad|>"
         
     | 
| 16 | 
         
            +
              ],
         
     | 
| 17 | 
         
            +
              "bos_token": "<|im_end|>",
         
     | 
| 18 | 
         
            +
              "cls_token": "<|im_end|>",
         
     | 
| 19 | 
         
            +
              "eos_token": {
         
     | 
| 20 | 
         
            +
                "content": "<|im_end|>",
         
     | 
| 21 | 
         
            +
                "lstrip": false,
         
     | 
| 22 | 
         
            +
                "normalized": false,
         
     | 
| 23 | 
         
            +
                "rstrip": false,
         
     | 
| 24 | 
         
            +
                "single_word": false
         
     | 
| 25 | 
         
            +
              },
         
     | 
| 26 | 
         
            +
              "pad_token": {
         
     | 
| 27 | 
         
            +
                "content": "<|endoftext|>",
         
     | 
| 28 | 
         
            +
                "lstrip": false,
         
     | 
| 29 | 
         
            +
                "normalized": false,
         
     | 
| 30 | 
         
            +
                "rstrip": false,
         
     | 
| 31 | 
         
            +
                "single_word": false
         
     | 
| 32 | 
         
            +
              }
         
     | 
| 33 | 
         
            +
            }
         
     | 
    	
        tokenizer.json
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
         
     | 
| 3 | 
         
            +
            size 11422654
         
     | 
    	
        tokenizer_config.json
    ADDED
    
    | 
         @@ -0,0 +1,241 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "add_bos_token": false,
         
     | 
| 3 | 
         
            +
              "add_prefix_space": false,
         
     | 
| 4 | 
         
            +
              "added_tokens_decoder": {
         
     | 
| 5 | 
         
            +
                "151643": {
         
     | 
| 6 | 
         
            +
                  "content": "<|endoftext|>",
         
     | 
| 7 | 
         
            +
                  "lstrip": false,
         
     | 
| 8 | 
         
            +
                  "normalized": false,
         
     | 
| 9 | 
         
            +
                  "rstrip": false,
         
     | 
| 10 | 
         
            +
                  "single_word": false,
         
     | 
| 11 | 
         
            +
                  "special": true
         
     | 
| 12 | 
         
            +
                },
         
     | 
| 13 | 
         
            +
                "151644": {
         
     | 
| 14 | 
         
            +
                  "content": "<|im_start|>",
         
     | 
| 15 | 
         
            +
                  "lstrip": false,
         
     | 
| 16 | 
         
            +
                  "normalized": false,
         
     | 
| 17 | 
         
            +
                  "rstrip": false,
         
     | 
| 18 | 
         
            +
                  "single_word": false,
         
     | 
| 19 | 
         
            +
                  "special": true
         
     | 
| 20 | 
         
            +
                },
         
     | 
| 21 | 
         
            +
                "151645": {
         
     | 
| 22 | 
         
            +
                  "content": "<|im_end|>",
         
     | 
| 23 | 
         
            +
                  "lstrip": false,
         
     | 
| 24 | 
         
            +
                  "normalized": false,
         
     | 
| 25 | 
         
            +
                  "rstrip": false,
         
     | 
| 26 | 
         
            +
                  "single_word": false,
         
     | 
| 27 | 
         
            +
                  "special": true
         
     | 
| 28 | 
         
            +
                },
         
     | 
| 29 | 
         
            +
                "151646": {
         
     | 
| 30 | 
         
            +
                  "content": "<|object_ref_start|>",
         
     | 
| 31 | 
         
            +
                  "lstrip": false,
         
     | 
| 32 | 
         
            +
                  "normalized": false,
         
     | 
| 33 | 
         
            +
                  "rstrip": false,
         
     | 
| 34 | 
         
            +
                  "single_word": false,
         
     | 
| 35 | 
         
            +
                  "special": true
         
     | 
| 36 | 
         
            +
                },
         
     | 
| 37 | 
         
            +
                "151647": {
         
     | 
| 38 | 
         
            +
                  "content": "<|object_ref_end|>",
         
     | 
| 39 | 
         
            +
                  "lstrip": false,
         
     | 
| 40 | 
         
            +
                  "normalized": false,
         
     | 
| 41 | 
         
            +
                  "rstrip": false,
         
     | 
| 42 | 
         
            +
                  "single_word": false,
         
     | 
| 43 | 
         
            +
                  "special": true
         
     | 
| 44 | 
         
            +
                },
         
     | 
| 45 | 
         
            +
                "151648": {
         
     | 
| 46 | 
         
            +
                  "content": "<|box_start|>",
         
     | 
| 47 | 
         
            +
                  "lstrip": false,
         
     | 
| 48 | 
         
            +
                  "normalized": false,
         
     | 
| 49 | 
         
            +
                  "rstrip": false,
         
     | 
| 50 | 
         
            +
                  "single_word": false,
         
     | 
| 51 | 
         
            +
                  "special": true
         
     | 
| 52 | 
         
            +
                },
         
     | 
| 53 | 
         
            +
                "151649": {
         
     | 
| 54 | 
         
            +
                  "content": "<|box_end|>",
         
     | 
| 55 | 
         
            +
                  "lstrip": false,
         
     | 
| 56 | 
         
            +
                  "normalized": false,
         
     | 
| 57 | 
         
            +
                  "rstrip": false,
         
     | 
| 58 | 
         
            +
                  "single_word": false,
         
     | 
| 59 | 
         
            +
                  "special": true
         
     | 
| 60 | 
         
            +
                },
         
     | 
| 61 | 
         
            +
                "151650": {
         
     | 
| 62 | 
         
            +
                  "content": "<|quad_start|>",
         
     | 
| 63 | 
         
            +
                  "lstrip": false,
         
     | 
| 64 | 
         
            +
                  "normalized": false,
         
     | 
| 65 | 
         
            +
                  "rstrip": false,
         
     | 
| 66 | 
         
            +
                  "single_word": false,
         
     | 
| 67 | 
         
            +
                  "special": true
         
     | 
| 68 | 
         
            +
                },
         
     | 
| 69 | 
         
            +
                "151651": {
         
     | 
| 70 | 
         
            +
                  "content": "<|quad_end|>",
         
     | 
| 71 | 
         
            +
                  "lstrip": false,
         
     | 
| 72 | 
         
            +
                  "normalized": false,
         
     | 
| 73 | 
         
            +
                  "rstrip": false,
         
     | 
| 74 | 
         
            +
                  "single_word": false,
         
     | 
| 75 | 
         
            +
                  "special": true
         
     | 
| 76 | 
         
            +
                },
         
     | 
| 77 | 
         
            +
                "151652": {
         
     | 
| 78 | 
         
            +
                  "content": "<|vision_start|>",
         
     | 
| 79 | 
         
            +
                  "lstrip": false,
         
     | 
| 80 | 
         
            +
                  "normalized": false,
         
     | 
| 81 | 
         
            +
                  "rstrip": false,
         
     | 
| 82 | 
         
            +
                  "single_word": false,
         
     | 
| 83 | 
         
            +
                  "special": true
         
     | 
| 84 | 
         
            +
                },
         
     | 
| 85 | 
         
            +
                "151653": {
         
     | 
| 86 | 
         
            +
                  "content": "<|vision_end|>",
         
     | 
| 87 | 
         
            +
                  "lstrip": false,
         
     | 
| 88 | 
         
            +
                  "normalized": false,
         
     | 
| 89 | 
         
            +
                  "rstrip": false,
         
     | 
| 90 | 
         
            +
                  "single_word": false,
         
     | 
| 91 | 
         
            +
                  "special": true
         
     | 
| 92 | 
         
            +
                },
         
     | 
| 93 | 
         
            +
                "151654": {
         
     | 
| 94 | 
         
            +
                  "content": "<|vision_pad|>",
         
     | 
| 95 | 
         
            +
                  "lstrip": false,
         
     | 
| 96 | 
         
            +
                  "normalized": false,
         
     | 
| 97 | 
         
            +
                  "rstrip": false,
         
     | 
| 98 | 
         
            +
                  "single_word": false,
         
     | 
| 99 | 
         
            +
                  "special": true
         
     | 
| 100 | 
         
            +
                },
         
     | 
| 101 | 
         
            +
                "151655": {
         
     | 
| 102 | 
         
            +
                  "content": "<|image_pad|>",
         
     | 
| 103 | 
         
            +
                  "lstrip": false,
         
     | 
| 104 | 
         
            +
                  "normalized": false,
         
     | 
| 105 | 
         
            +
                  "rstrip": false,
         
     | 
| 106 | 
         
            +
                  "single_word": false,
         
     | 
| 107 | 
         
            +
                  "special": true
         
     | 
| 108 | 
         
            +
                },
         
     | 
| 109 | 
         
            +
                "151656": {
         
     | 
| 110 | 
         
            +
                  "content": "<|video_pad|>",
         
     | 
| 111 | 
         
            +
                  "lstrip": false,
         
     | 
| 112 | 
         
            +
                  "normalized": false,
         
     | 
| 113 | 
         
            +
                  "rstrip": false,
         
     | 
| 114 | 
         
            +
                  "single_word": false,
         
     | 
| 115 | 
         
            +
                  "special": true
         
     | 
| 116 | 
         
            +
                },
         
     | 
| 117 | 
         
            +
                "151657": {
         
     | 
| 118 | 
         
            +
                  "content": "<tool_call>",
         
     | 
| 119 | 
         
            +
                  "lstrip": false,
         
     | 
| 120 | 
         
            +
                  "normalized": false,
         
     | 
| 121 | 
         
            +
                  "rstrip": false,
         
     | 
| 122 | 
         
            +
                  "single_word": false,
         
     | 
| 123 | 
         
            +
                  "special": false
         
     | 
| 124 | 
         
            +
                },
         
     | 
| 125 | 
         
            +
                "151658": {
         
     | 
| 126 | 
         
            +
                  "content": "</tool_call>",
         
     | 
| 127 | 
         
            +
                  "lstrip": false,
         
     | 
| 128 | 
         
            +
                  "normalized": false,
         
     | 
| 129 | 
         
            +
                  "rstrip": false,
         
     | 
| 130 | 
         
            +
                  "single_word": false,
         
     | 
| 131 | 
         
            +
                  "special": false
         
     | 
| 132 | 
         
            +
                },
         
     | 
| 133 | 
         
            +
                "151659": {
         
     | 
| 134 | 
         
            +
                  "content": "<|fim_prefix|>",
         
     | 
| 135 | 
         
            +
                  "lstrip": false,
         
     | 
| 136 | 
         
            +
                  "normalized": false,
         
     | 
| 137 | 
         
            +
                  "rstrip": false,
         
     | 
| 138 | 
         
            +
                  "single_word": false,
         
     | 
| 139 | 
         
            +
                  "special": false
         
     | 
| 140 | 
         
            +
                },
         
     | 
| 141 | 
         
            +
                "151660": {
         
     | 
| 142 | 
         
            +
                  "content": "<|fim_middle|>",
         
     | 
| 143 | 
         
            +
                  "lstrip": false,
         
     | 
| 144 | 
         
            +
                  "normalized": false,
         
     | 
| 145 | 
         
            +
                  "rstrip": false,
         
     | 
| 146 | 
         
            +
                  "single_word": false,
         
     | 
| 147 | 
         
            +
                  "special": false
         
     | 
| 148 | 
         
            +
                },
         
     | 
| 149 | 
         
            +
                "151661": {
         
     | 
| 150 | 
         
            +
                  "content": "<|fim_suffix|>",
         
     | 
| 151 | 
         
            +
                  "lstrip": false,
         
     | 
| 152 | 
         
            +
                  "normalized": false,
         
     | 
| 153 | 
         
            +
                  "rstrip": false,
         
     | 
| 154 | 
         
            +
                  "single_word": false,
         
     | 
| 155 | 
         
            +
                  "special": false
         
     | 
| 156 | 
         
            +
                },
         
     | 
| 157 | 
         
            +
                "151662": {
         
     | 
| 158 | 
         
            +
                  "content": "<|fim_pad|>",
         
     | 
| 159 | 
         
            +
                  "lstrip": false,
         
     | 
| 160 | 
         
            +
                  "normalized": false,
         
     | 
| 161 | 
         
            +
                  "rstrip": false,
         
     | 
| 162 | 
         
            +
                  "single_word": false,
         
     | 
| 163 | 
         
            +
                  "special": false
         
     | 
| 164 | 
         
            +
                },
         
     | 
| 165 | 
         
            +
                "151663": {
         
     | 
| 166 | 
         
            +
                  "content": "<|repo_name|>",
         
     | 
| 167 | 
         
            +
                  "lstrip": false,
         
     | 
| 168 | 
         
            +
                  "normalized": false,
         
     | 
| 169 | 
         
            +
                  "rstrip": false,
         
     | 
| 170 | 
         
            +
                  "single_word": false,
         
     | 
| 171 | 
         
            +
                  "special": false
         
     | 
| 172 | 
         
            +
                },
         
     | 
| 173 | 
         
            +
                "151664": {
         
     | 
| 174 | 
         
            +
                  "content": "<|file_sep|>",
         
     | 
| 175 | 
         
            +
                  "lstrip": false,
         
     | 
| 176 | 
         
            +
                  "normalized": false,
         
     | 
| 177 | 
         
            +
                  "rstrip": false,
         
     | 
| 178 | 
         
            +
                  "single_word": false,
         
     | 
| 179 | 
         
            +
                  "special": false
         
     | 
| 180 | 
         
            +
                },
         
     | 
| 181 | 
         
            +
                "151665": {
         
     | 
| 182 | 
         
            +
                  "content": "<tool_response>",
         
     | 
| 183 | 
         
            +
                  "lstrip": false,
         
     | 
| 184 | 
         
            +
                  "normalized": false,
         
     | 
| 185 | 
         
            +
                  "rstrip": false,
         
     | 
| 186 | 
         
            +
                  "single_word": false,
         
     | 
| 187 | 
         
            +
                  "special": false
         
     | 
| 188 | 
         
            +
                },
         
     | 
| 189 | 
         
            +
                "151666": {
         
     | 
| 190 | 
         
            +
                  "content": "</tool_response>",
         
     | 
| 191 | 
         
            +
                  "lstrip": false,
         
     | 
| 192 | 
         
            +
                  "normalized": false,
         
     | 
| 193 | 
         
            +
                  "rstrip": false,
         
     | 
| 194 | 
         
            +
                  "single_word": false,
         
     | 
| 195 | 
         
            +
                  "special": false
         
     | 
| 196 | 
         
            +
                },
         
     | 
| 197 | 
         
            +
                "151667": {
         
     | 
| 198 | 
         
            +
                  "content": "<think>",
         
     | 
| 199 | 
         
            +
                  "lstrip": false,
         
     | 
| 200 | 
         
            +
                  "normalized": false,
         
     | 
| 201 | 
         
            +
                  "rstrip": false,
         
     | 
| 202 | 
         
            +
                  "single_word": false,
         
     | 
| 203 | 
         
            +
                  "special": false
         
     | 
| 204 | 
         
            +
                },
         
     | 
| 205 | 
         
            +
                "151668": {
         
     | 
| 206 | 
         
            +
                  "content": "</think>",
         
     | 
| 207 | 
         
            +
                  "lstrip": false,
         
     | 
| 208 | 
         
            +
                  "normalized": false,
         
     | 
| 209 | 
         
            +
                  "rstrip": false,
         
     | 
| 210 | 
         
            +
                  "single_word": false,
         
     | 
| 211 | 
         
            +
                  "special": false
         
     | 
| 212 | 
         
            +
                }
         
     | 
| 213 | 
         
            +
              },
         
     | 
| 214 | 
         
            +
              "additional_special_tokens": [
         
     | 
| 215 | 
         
            +
                "<|im_start|>",
         
     | 
| 216 | 
         
            +
                "<|im_end|>",
         
     | 
| 217 | 
         
            +
                "<|object_ref_start|>",
         
     | 
| 218 | 
         
            +
                "<|object_ref_end|>",
         
     | 
| 219 | 
         
            +
                "<|box_start|>",
         
     | 
| 220 | 
         
            +
                "<|box_end|>",
         
     | 
| 221 | 
         
            +
                "<|quad_start|>",
         
     | 
| 222 | 
         
            +
                "<|quad_end|>",
         
     | 
| 223 | 
         
            +
                "<|vision_start|>",
         
     | 
| 224 | 
         
            +
                "<|vision_end|>",
         
     | 
| 225 | 
         
            +
                "<|vision_pad|>",
         
     | 
| 226 | 
         
            +
                "<|image_pad|>",
         
     | 
| 227 | 
         
            +
                "<|video_pad|>"
         
     | 
| 228 | 
         
            +
              ],
         
     | 
| 229 | 
         
            +
              "bos_token": "<|im_end|>",
         
     | 
| 230 | 
         
            +
              "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% if ((message['role'] == 'user') != (loop.index0 % 2 == 0)) or ((message['role'] == 'assistant') != (loop.index0 % 2 == 1)) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '' + message['content'].strip() + eos_token }}{% elif message['role'] == 'assistant' %}{{ '' + message['content'].strip() + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '' }}{% endif %}",
         
     | 
| 231 | 
         
            +
              "clean_up_tokenization_spaces": false,
         
     | 
| 232 | 
         
            +
              "cls_token": "<|im_end|>",
         
     | 
| 233 | 
         
            +
              "eos_token": "<|im_end|>",
         
     | 
| 234 | 
         
            +
              "errors": "replace",
         
     | 
| 235 | 
         
            +
              "extra_special_tokens": {},
         
     | 
| 236 | 
         
            +
              "model_max_length": 131072,
         
     | 
| 237 | 
         
            +
              "pad_token": "<|endoftext|>",
         
     | 
| 238 | 
         
            +
              "split_special_tokens": false,
         
     | 
| 239 | 
         
            +
              "tokenizer_class": "Qwen2Tokenizer",
         
     | 
| 240 | 
         
            +
              "unk_token": null
         
     | 
| 241 | 
         
            +
            }
         
     | 
    	
        vocab.json
    ADDED
    
    | 
         The diff for this file is too large to render. 
		See raw diff 
     | 
| 
         |