liuylhf commited on
Commit
090ac17
1 Parent(s): 91073c8

Model save

Browse files
README.md CHANGED
@@ -2,7 +2,6 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - axolotl
6
  - generated_from_trainer
7
  base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
8
  model-index:
@@ -25,13 +24,13 @@ chat_template: inst
25
  dataset_prepared_path: last_run_prepared
26
  datasets:
27
  - conversation: mistral
28
- path: 4d024e8e9a7d4c788853a531226680a8/./data/with_function_response/original_clean/function_used_training.jsonl
29
  type: sharegpt
30
  - conversation: mistral
31
- path: 4d024e8e9a7d4c788853a531226680a8/./data/with_function_response/original_clean/function_not_used_training.jsonl
32
  type: sharegpt
33
  - conversation: mistral
34
- path: 4d024e8e9a7d4c788853a531226680a8/./data/with_function_response/parallel_call/parallel_data_training.jsonl
35
  type: sharegpt
36
  debug: null
37
  eval_max_new_tokens: 256
@@ -52,6 +51,9 @@ logging_steps: 1
52
  lora_alpha: 64
53
  lora_dropout: 0.05
54
  lora_model_dir: null
 
 
 
55
  lora_r: 32
56
  lora_target_linear: true
57
  loss_watchdog_patience: 3
@@ -61,22 +63,22 @@ micro_batch_size: 2
61
  model_config:
62
  output_router_logits: true
63
  model_type: AutoModelForCausalLM
64
- num_epochs: 4
65
  optimizer: paged_adamw_8bit
66
- output_dir: 4d024e8e9a7d4c788853a531226680a8/model
67
  pad_to_sequence_len: true
68
  sample_packing: true
69
  save_steps: 0.1
70
  sequence_len: 4096
71
- special_tokens:
72
- conversation: '[c]'
73
- function: '[f]'
74
  strict: false
75
  tf32: false
76
  tokenizer_type: AutoTokenizer
 
 
 
77
  train_on_inputs: false
78
  trust_remote_code: true
79
- val_set_size: 0.025
80
  wandb_log_model: end
81
  wandb_name: more-tools
82
  wandb_project: function-call
@@ -89,9 +91,7 @@ weight_decay: 0.0
89
 
90
  # special-token-all-linear
91
 
92
- This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the None dataset.
93
- It achieves the following results on the evaluation set:
94
- - Loss: 0.0801
95
 
96
  ## Model description
97
 
@@ -122,18 +122,7 @@ The following hyperparameters were used during training:
122
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
123
  - lr_scheduler_type: cosine
124
  - lr_scheduler_warmup_steps: 10
125
- - num_epochs: 4
126
-
127
- ### Training results
128
-
129
- | Training Loss | Epoch | Step | Validation Loss |
130
- |:-------------:|:-----:|:----:|:---------------:|
131
- | 2.1829 | 0.01 | 1 | 2.1038 |
132
- | 0.091 | 0.8 | 151 | 0.0832 |
133
- | 0.0741 | 1.58 | 302 | 0.0801 |
134
- | 0.0687 | 2.36 | 453 | 0.0801 |
135
- | 0.0654 | 3.14 | 604 | 0.0801 |
136
-
137
 
138
  ### Framework versions
139
 
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - generated_from_trainer
6
  base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
7
  model-index:
 
24
  dataset_prepared_path: last_run_prepared
25
  datasets:
26
  - conversation: mistral
27
+ path: 722c5ac4af9b4ac09cdd250a281284f5/./data/with_function_response/original_clean/function_used_training.jsonl
28
  type: sharegpt
29
  - conversation: mistral
30
+ path: 722c5ac4af9b4ac09cdd250a281284f5/./data/with_function_response/original_clean/function_not_used_training.jsonl
31
  type: sharegpt
32
  - conversation: mistral
33
+ path: 722c5ac4af9b4ac09cdd250a281284f5/./data/with_function_response/parallel_call/parallel_data_training.jsonl
34
  type: sharegpt
35
  debug: null
36
  eval_max_new_tokens: 256
 
51
  lora_alpha: 64
52
  lora_dropout: 0.05
53
  lora_model_dir: null
54
+ lora_modules_to_save:
55
+ - embed_tokens
56
+ - lm_head
57
  lora_r: 32
58
  lora_target_linear: true
59
  loss_watchdog_patience: 3
 
63
  model_config:
64
  output_router_logits: true
65
  model_type: AutoModelForCausalLM
66
+ num_epochs: 1
67
  optimizer: paged_adamw_8bit
68
+ output_dir: 722c5ac4af9b4ac09cdd250a281284f5/model
69
  pad_to_sequence_len: true
70
  sample_packing: true
71
  save_steps: 0.1
72
  sequence_len: 4096
 
 
 
73
  strict: false
74
  tf32: false
75
  tokenizer_type: AutoTokenizer
76
+ tokens:
77
+ - '[f]'
78
+ - '[c]'
79
  train_on_inputs: false
80
  trust_remote_code: true
81
+ val_set_size: 0.05
82
  wandb_log_model: end
83
  wandb_name: more-tools
84
  wandb_project: function-call
 
91
 
92
  # special-token-all-linear
93
 
94
+ This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on an unknown dataset.
 
 
95
 
96
  ## Model description
97
 
 
122
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
123
  - lr_scheduler_type: cosine
124
  - lr_scheduler_warmup_steps: 10
125
+ - num_epochs: 1
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  ### Framework versions
128
 
adapter_config.json CHANGED
@@ -13,20 +13,23 @@
13
  "lora_dropout": 0.05,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
16
- "modules_to_save": null,
 
 
 
17
  "peft_type": "LORA",
18
  "r": 32,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "w1",
23
- "o_proj",
24
- "q_proj",
25
  "k_proj",
 
 
26
  "v_proj",
 
27
  "w3",
28
- "w2",
29
- "gate"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
13
  "lora_dropout": 0.05,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
16
+ "modules_to_save": [
17
+ "embed_tokens",
18
+ "lm_head"
19
+ ],
20
  "peft_type": "LORA",
21
  "r": 32,
22
  "rank_pattern": {},
23
  "revision": null,
24
  "target_modules": [
25
+ "w2",
 
 
26
  "k_proj",
27
+ "q_proj",
28
+ "w1",
29
  "v_proj",
30
+ "gate",
31
  "w3",
32
+ "o_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc484b9de71050d31c0251f3f04ec4c219f9cf7c78f2be25946331265237faa
3
- size 1938077512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:752f1017a27bd38e1d2b3f9d6687df0af638719a10af88e6dc48d059a5d71d14
3
+ size 3511040576
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "[c]": 32001,
3
+ "[f]": 32000
4
+ }
config.json CHANGED
@@ -40,5 +40,5 @@
40
  "torch_dtype": "bfloat16",
41
  "transformers_version": "4.38.2",
42
  "use_cache": false,
43
- "vocab_size": 32000
44
  }
 
40
  "torch_dtype": "bfloat16",
41
  "transformers_version": "4.38.2",
42
  "use_cache": false,
43
+ "vocab_size": 32002
44
  }
tokenizer.json CHANGED
@@ -29,6 +29,24 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
+ },
33
+ {
34
+ "id": 32000,
35
+ "content": "[f]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": true,
40
+ "special": false
41
+ },
42
+ {
43
+ "id": 32001,
44
+ "content": "[c]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": true,
49
+ "special": false
50
  }
51
  ],
52
  "normalizer": {
tokenizer_config.json CHANGED
@@ -25,6 +25,22 @@
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
  },
30
  "additional_special_tokens": [],
 
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
+ },
29
+ "32000": {
30
+ "content": "[f]",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": false
36
+ },
37
+ "32001": {
38
+ "content": "[c]",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": false
44
  }
45
  },
46
  "additional_special_tokens": [],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e2bf1b5b988d4cfe0c4c0460a349291c90deef362876a889aaa6c018d31b193
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3943d56b3a1e58cef6a9201d4b1b47c36798809881f5ae91a6c377c0528808e3
3
  size 5688