Felladrin commited on
Commit
fd33b3b
1 Parent(s): 321787e

Update model (trained for one more epoch)

Browse files
adapter_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "alpha_pattern": {},
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "Locutusque/TinyMistral-248M",
5
  "bias": "none",
@@ -13,7 +12,6 @@
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
  "r": 16,
16
- "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
  "q_proj",
 
1
  {
 
2
  "auto_mapping": null,
3
  "base_model_name_or_path": "Locutusque/TinyMistral-248M",
4
  "bias": "none",
 
12
  "modules_to_save": null,
13
  "peft_type": "LORA",
14
  "r": 16,
 
15
  "revision": null,
16
  "target_modules": [
17
  "q_proj",
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebf4afe44da097b7e5503ecdf06c785e87db2e054f5d7996d4bfae76d298e60f
3
+ size 2572670
config.json CHANGED
@@ -19,7 +19,7 @@
19
  "sliding_window": 32,
20
  "tie_word_embeddings": false,
21
  "torch_dtype": "float16",
22
- "transformers_version": "4.35.2",
23
  "use_cache": true,
24
  "vocab_size": 32003
25
  }
 
19
  "sliding_window": 32,
20
  "tie_word_embeddings": false,
21
  "torch_dtype": "float16",
22
+ "transformers_version": "4.34.1",
23
  "use_cache": true,
24
  "vocab_size": 32003
25
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.35.2"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.34.1"
6
  }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2167bd8b9ea40693c70afbc71422c46e75dcad1b616f2cac3c7c157c5ce157b
3
+ size 496075846
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 1536,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -54,15 +56,11 @@
54
  "clean_up_tokenization_spaces": false,
55
  "eos_token": "<|endoftext|>",
56
  "legacy": true,
57
- "max_length": 1536,
58
  "model_max_length": 1000000000000000019884624838656,
59
  "pad_token": "[PAD]",
60
  "sp_model_kwargs": {},
61
  "spaces_between_special_tokens": false,
62
- "stride": 0,
63
  "tokenizer_class": "LlamaTokenizer",
64
- "truncation_side": "right",
65
- "truncation_strategy": "longest_first",
66
  "unk_token": "<unk>",
67
  "use_default_system_prompt": true
68
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
56
  "clean_up_tokenization_spaces": false,
57
  "eos_token": "<|endoftext|>",
58
  "legacy": true,
 
59
  "model_max_length": 1000000000000000019884624838656,
60
  "pad_token": "[PAD]",
61
  "sp_model_kwargs": {},
62
  "spaces_between_special_tokens": false,
 
63
  "tokenizer_class": "LlamaTokenizer",
 
 
64
  "unk_token": "<unk>",
65
  "use_default_system_prompt": true
66
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d24b4b32e33ca71ba9e658a2871c9230e51b45f3c94190439d92c65ec9b48270
3
- size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21eae65f4c26b0c16e3e46446fd425ca78580db9e0a0f97b3315795b3d2a65fd
3
+ size 4536
training_params.json CHANGED
@@ -1 +1,47 @@
1
- {"model": "Locutusque/TinyMistral-248M", "data_path": "Felladrin/autotrain-data-TinyMistral-248M", "project_name": "/tmp/model", "train_split": "train", "valid_split": null, "text_column": "autotrain_text", "rejected_text_column": null, "lr": 0.0002, "epochs": 1, "batch_size": 2, "warmup_ratio": 0.1, "gradient_accumulation": 1, "optimizer": "adamw_torch", "scheduler": "linear", "weight_decay": 0.01, "max_grad_norm": 1.0, "seed": 42, "add_eos_token": true, "block_size": 1024, "use_peft": true, "lora_r": 16, "lora_alpha": 32, "lora_dropout": 0.1, "logging_steps": -1, "evaluation_strategy": "epoch", "save_total_limit": 1, "save_strategy": "epoch", "auto_find_batch_size": false, "fp16": true, "push_to_hub": true, "use_int8": false, "model_max_length": 2048, "repo_id": "Felladrin/TinyMistral-248M-1", "use_int4": true, "trainer": "sft", "target_modules": null, "merge_adapter": false, "username": "Felladrin", "use_flash_attention_2": false, "log": "none", "disable_gradient_checkpointing": false, "model_ref": null, "dpo_beta": 0.1, "prompt_text_column": null}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "Felladrin/TinyMistral-248M-Evol-Instruct",
3
+ "data_path": "data/",
4
+ "project_name": "TinyMistral-248M-Evol-Instruct",
5
+ "train_split": "train",
6
+ "valid_split": null,
7
+ "text_column": "text",
8
+ "rejected_text_column": "rejected",
9
+ "token": null,
10
+ "lr": 0.0002,
11
+ "epochs": 1,
12
+ "batch_size": 12,
13
+ "warmup_ratio": 0.1,
14
+ "gradient_accumulation": 4,
15
+ "optimizer": "adamw_torch",
16
+ "scheduler": "linear",
17
+ "weight_decay": 0.01,
18
+ "max_grad_norm": 1.0,
19
+ "seed": 42,
20
+ "add_eos_token": false,
21
+ "block_size": 1024,
22
+ "use_peft": false,
23
+ "lora_r": 16,
24
+ "lora_alpha": 32,
25
+ "lora_dropout": 0.05,
26
+ "logging_steps": -1,
27
+ "evaluation_strategy": "epoch",
28
+ "save_total_limit": 1,
29
+ "save_strategy": "epoch",
30
+ "auto_find_batch_size": false,
31
+ "fp16": false,
32
+ "push_to_hub": false,
33
+ "use_int8": true,
34
+ "model_max_length": 1024,
35
+ "repo_id": null,
36
+ "use_int4": false,
37
+ "trainer": "sft",
38
+ "target_modules": null,
39
+ "merge_adapter": true,
40
+ "username": null,
41
+ "use_flash_attention_2": false,
42
+ "log": "none",
43
+ "disable_gradient_checkpointing": false,
44
+ "model_ref": null,
45
+ "dpo_beta": 0.1,
46
+ "prompt_text_column": "prompt"
47
+ }