Text Generation
Russian
IlyaGusev commited on
Commit
10a2af2
1 Parent(s): ff656e6

New version: 2k context

Browse files
adapter_config.json CHANGED
@@ -8,10 +8,12 @@
8
  "lora_dropout": 0.05,
9
  "modules_to_save": null,
10
  "peft_type": "LORA",
11
- "r": 8,
12
  "target_modules": [
13
  "q_proj",
14
- "v_proj"
 
 
15
  ],
16
  "task_type": "CAUSAL_LM"
17
- }
 
8
  "lora_dropout": 0.05,
9
  "modules_to_save": null,
10
  "peft_type": "LORA",
11
+ "r": 16,
12
  "target_modules": [
13
  "q_proj",
14
+ "v_proj",
15
+ "k_proj",
16
+ "o_proj"
17
  ],
18
  "task_type": "CAUSAL_LM"
19
+ }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4100276c846c3be67ff42bd8ba8a5ee485bfceba0ee398e47685feaeb5bd08a2
3
- size 16822989
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1929a4ec5b10b9a4fb61f34d18882205b89df8270444a035a19e00ce3673e19f
3
+ size 67201357
training_config.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "trainer": {
3
  "evaluation_strategy": "steps",
4
- "per_device_train_batch_size": 12,
5
- "per_device_eval_batch_size": 12,
6
- "gradient_accumulation_steps": 10,
7
  "eval_steps": 50,
8
  "save_steps": 50,
9
  "logging_steps": 5,
10
  "learning_rate": 0.0003,
11
- "num_train_epochs": 3,
12
  "lr_scheduler_type": "cosine",
13
  "warmup_steps": 30,
14
  "fp16": true,
@@ -17,19 +17,19 @@
17
  "optim": "adamw_torch"
18
  },
19
  "lora": {
20
- "r": 8,
21
  "lora_alpha": 16,
22
  "lora_dropout": 0.05,
23
  "bias": "none",
24
- "target_modules": ["q_proj", "v_proj"],
25
  "task_type": "CAUSAL_LM"
26
  },
27
  "load_in_8bit": true,
28
  "only_target_loss": true,
29
- "model": "chat",
30
  "templates_path": "ru_saiga_template.json",
31
- "model_name": "models/llama-7b-hf",
32
  "model_type": "causal",
33
- "max_tokens_count": 1024
34
  }
35
 
 
1
  {
2
  "trainer": {
3
  "evaluation_strategy": "steps",
4
+ "per_device_train_batch_size": 4,
5
+ "per_device_eval_batch_size": 4,
6
+ "gradient_accumulation_steps": 32,
7
  "eval_steps": 50,
8
  "save_steps": 50,
9
  "logging_steps": 5,
10
  "learning_rate": 0.0003,
11
+ "num_train_epochs": 5,
12
  "lr_scheduler_type": "cosine",
13
  "warmup_steps": 30,
14
  "fp16": true,
 
17
  "optim": "adamw_torch"
18
  },
19
  "lora": {
20
+ "r": 16,
21
  "lora_alpha": 16,
22
  "lora_dropout": 0.05,
23
  "bias": "none",
24
+ "target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"],
25
  "task_type": "CAUSAL_LM"
26
  },
27
  "load_in_8bit": true,
28
  "only_target_loss": true,
29
+ "mode": "chat",
30
  "templates_path": "ru_saiga_template.json",
31
+ "model_name": "models/llama-7b",
32
  "model_type": "causal",
33
+ "max_tokens_count": 2000
34
  }
35