Chickaboo commited on
Commit
dc002d6
1 Parent(s): 7c19a34

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -8,9 +8,6 @@ tags:
8
  - merge
9
 
10
  ---
11
- # Models in the ChickaQ family
12
- - **ChickaQ (0.6B)**
13
- - **ChickaQ-Large (1.8B)**
14
  # mergedmodel
15
 
16
  This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
@@ -18,12 +15,12 @@ This is a merge of pre-trained language models created using [mergekit](https://
18
  ## Merge Details
19
  ### Merge Method
20
 
21
- This model was merged using the [TIES](https://arxiv.org/abs/2306.01708) merge method using [vilm/Quyen-SE-v0.1](https://huggingface.co/vilm/Quyen-SE-v0.1) as a base.
22
 
23
  ### Models Merged
24
 
25
  The following models were included in the merge:
26
- * [Qwen/Qwen1.5-0.5B-Chat](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat)
27
 
28
  ### Configuration
29
 
@@ -31,16 +28,15 @@ The following YAML configuration was used to produce this model:
31
 
32
  ```yaml
33
  models:
34
- - model: vilm/Quyen-SE-v0.1
35
- # no parameters necessary for base model
36
  - model: Qwen/Qwen1.5-0.5B-Chat
 
 
37
  parameters:
38
- density: 0.5
39
- weight: 0.5
40
- merge_method: ties
41
- base_model: vilm/Quyen-SE-v0.1
42
  parameters:
43
  normalize: true
44
  dtype: float16
45
-
46
  ```
 
8
  - merge
9
 
10
  ---
 
 
 
11
  # mergedmodel
12
 
13
  This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
 
15
  ## Merge Details
16
  ### Merge Method
17
 
18
+ This model was merged using the [DARE](https://arxiv.org/abs/2311.03099) [TIES](https://arxiv.org/abs/2306.01708) merge method using [Qwen/Qwen1.5-0.5B-Chat](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat) as a base.
19
 
20
  ### Models Merged
21
 
22
  The following models were included in the merge:
23
+ * [vilm/Quyen-SE-v0.1](https://huggingface.co/vilm/Quyen-SE-v0.1)
24
 
25
  ### Configuration
26
 
 
28
 
29
  ```yaml
30
  models:
 
 
31
  - model: Qwen/Qwen1.5-0.5B-Chat
32
+ # no parameters necessary for base model
33
+ - model: vilm/Quyen-SE-v0.1
34
  parameters:
35
+ density: 1
36
+ weight: 1
37
+ merge_method: dare_ties
38
+ base_model: Qwen/Qwen1.5-0.5B-Chat
39
  parameters:
40
  normalize: true
41
  dtype: float16
 
42
  ```
config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "_name_or_path": "vilm/Quyen-SE-v0.1",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
 
7
  "eos_token_id": 151645,
8
  "hidden_act": "silu",
9
  "hidden_size": 1024,
@@ -17,11 +18,11 @@
17
  "num_key_value_heads": 16,
18
  "rms_norm_eps": 1e-06,
19
  "rope_theta": 1000000.0,
20
- "sliding_window": 4096,
21
- "tie_word_embeddings": false,
22
  "torch_dtype": "float16",
23
  "transformers_version": "4.38.2",
24
- "use_cache": false,
25
  "use_sliding_window": false,
26
  "vocab_size": 151936
27
  }
 
1
  {
2
+ "_name_or_path": "Qwen/Qwen1.5-0.5B-Chat",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
  "eos_token_id": 151645,
9
  "hidden_act": "silu",
10
  "hidden_size": 1024,
 
18
  "num_key_value_heads": 16,
19
  "rms_norm_eps": 1e-06,
20
  "rope_theta": 1000000.0,
21
+ "sliding_window": 32768,
22
+ "tie_word_embeddings": true,
23
  "torch_dtype": "float16",
24
  "transformers_version": "4.38.2",
25
+ "use_cache": true,
26
  "use_sliding_window": false,
27
  "vocab_size": 151936
28
  }
mergekit_config.yml CHANGED
@@ -1,12 +1,12 @@
1
  models:
2
- - model: vilm/Quyen-SE-v0.1
3
- # no parameters necessary for base model
4
  - model: Qwen/Qwen1.5-0.5B-Chat
 
 
5
  parameters:
6
- density: 0.5
7
- weight: 0.5
8
- merge_method: ties
9
- base_model: vilm/Quyen-SE-v0.1
10
  parameters:
11
  normalize: true
12
- dtype: float16
 
1
  models:
 
 
2
  - model: Qwen/Qwen1.5-0.5B-Chat
3
+ # no parameters necessary for base model
4
+ - model: vilm/Quyen-SE-v0.1
5
  parameters:
6
+ density: 1
7
+ weight: 1
8
+ merge_method: dare_ties
9
+ base_model: Qwen/Qwen1.5-0.5B-Chat
10
  parameters:
11
  normalize: true
12
+ dtype: float16
model-00001-of-00001.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eece92273f353f4b9563464043b2653ab2ef2f7222ee6cff0a3165c1aefbc95a
3
  size 1239173056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0de001be263c6e862f27a4645edb9276c1c9731173448d92a48bb26697c96877
3
  size 1239173056
special_tokens_map.json CHANGED
@@ -3,13 +3,6 @@
3
  "<|im_start|>",
4
  "<|im_end|>"
5
  ],
6
- "bos_token": {
7
- "content": "<|im_end|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false
12
- },
13
  "eos_token": {
14
  "content": "<|im_end|>",
15
  "lstrip": false,
 
3
  "<|im_start|>",
4
  "<|im_end|>"
5
  ],
 
 
 
 
 
 
 
6
  "eos_token": {
7
  "content": "<|im_end|>",
8
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -30,8 +30,8 @@
30
  "<|im_start|>",
31
  "<|im_end|>"
32
  ],
33
- "bos_token": "<|im_end|>",
34
- "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "<|im_end|>",
37
  "errors": "replace",
 
30
  "<|im_start|>",
31
  "<|im_end|>"
32
  ],
33
+ "bos_token": null,
34
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "<|im_end|>",
37
  "errors": "replace",