robkaandorp
commited on
Commit
•
cfcbee0
1
Parent(s):
1d59dc1
Redo chat training
Browse files- results_phi-2-super/adapter_config.json +2 -2
- results_phi-2-super/adapter_model.safetensors +2 -2
- results_phi-2-super/checkpoint-100/adapter_config.json +2 -2
- results_phi-2-super/checkpoint-100/adapter_model.safetensors +2 -2
- results_phi-2-super/checkpoint-100/optimizer.pt +1 -1
- results_phi-2-super/checkpoint-100/rng_state.pth +1 -1
- results_phi-2-super/checkpoint-100/tokenizer.json +2 -2
- results_phi-2-super/checkpoint-100/tokenizer_config.json +2 -2
- results_phi-2-super/checkpoint-100/trainer_state.json +2 -2
- results_phi-2-super/checkpoint-100/training_args.bin +2 -2
- results_phi-2-super/checkpoint-99/adapter_config.json +2 -2
- results_phi-2-super/checkpoint-99/adapter_model.safetensors +2 -2
- results_phi-2-super/checkpoint-99/optimizer.pt +1 -1
- results_phi-2-super/checkpoint-99/rng_state.pth +1 -1
- results_phi-2-super/checkpoint-99/tokenizer.json +2 -2
- results_phi-2-super/checkpoint-99/tokenizer_config.json +2 -2
- results_phi-2-super/checkpoint-99/trainer_state.json +2 -2
- results_phi-2-super/checkpoint-99/training_args.bin +2 -2
- results_phi-2-super/tokenizer.json +2 -2
- results_phi-2-super/tokenizer_config.json +2 -2
- results_phi-2-super/training_args.bin +2 -2
- train_csv_dataset_phi-2-super.py +10 -17
results_phi-2-super/adapter_config.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13eef94a821e1664305a246a380cf594f1c9e3dd4759e01b41a1f614201f072f
|
3 |
+
size 690
|
results_phi-2-super/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6231c5a9050bbfb4f34b9ae2190441edc2d4aebdc10f05cfedcb5fe287a6c5b5
|
3 |
+
size 377538512
|
results_phi-2-super/checkpoint-100/adapter_config.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13eef94a821e1664305a246a380cf594f1c9e3dd4759e01b41a1f614201f072f
|
3 |
+
size 690
|
results_phi-2-super/checkpoint-100/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:920ad6c5c63efda14d9475569cb990363c103e3992e36ddb7eacccd7c6616330
|
3 |
+
size 377538512
|
results_phi-2-super/checkpoint-100/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189572052
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71e207546bc91c8d641672563ea18c2da86c1a1fb15f8f76ae17a73e71e9322d
|
3 |
size 189572052
|
results_phi-2-super/checkpoint-100/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a91f983c9a6853f984e81a0938214444ef6e4cd928fa241e1da3c50ab92c316
|
3 |
size 14244
|
results_phi-2-super/checkpoint-100/tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:972811be42a0dd9f0ffcdd9563d2234c47fda2bb248d1e2934f3c20ceed07cb6
|
3 |
+
size 2114952
|
results_phi-2-super/checkpoint-100/tokenizer_config.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06369f8433146a04712326910a5c8a8866aae246601dbe5c836b1d5d96ee080f
|
3 |
+
size 7905
|
results_phi-2-super/checkpoint-100/trainer_state.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:411447059825b1e539c8cc827f50228b166c55c5aecd9a7d63d8ea48367c8569
|
3 |
+
size 25920
|
results_phi-2-super/checkpoint-100/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a710d3d12fbd08f305d743e550415b25672249d2c48fd4005249a2ff55e65a91
|
3 |
+
size 4920
|
results_phi-2-super/checkpoint-99/adapter_config.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13eef94a821e1664305a246a380cf594f1c9e3dd4759e01b41a1f614201f072f
|
3 |
+
size 690
|
results_phi-2-super/checkpoint-99/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6231c5a9050bbfb4f34b9ae2190441edc2d4aebdc10f05cfedcb5fe287a6c5b5
|
3 |
+
size 377538512
|
results_phi-2-super/checkpoint-99/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189572052
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd0ddabda4b864a08403a4d2bed400745bb845376cd8fa702e3d9d4514ac1777
|
3 |
size 189572052
|
results_phi-2-super/checkpoint-99/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5379830b2ed121780d736ed09a12ec61c30e2d65980f203ccd2003bdd8823a3
|
3 |
size 14244
|
results_phi-2-super/checkpoint-99/tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:972811be42a0dd9f0ffcdd9563d2234c47fda2bb248d1e2934f3c20ceed07cb6
|
3 |
+
size 2114952
|
results_phi-2-super/checkpoint-99/tokenizer_config.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06369f8433146a04712326910a5c8a8866aae246601dbe5c836b1d5d96ee080f
|
3 |
+
size 7905
|
results_phi-2-super/checkpoint-99/trainer_state.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:986a54386f80b5d1477999e23379aed157c8ddafdfe12ec362a065d73f1ae50b
|
3 |
+
size 25540
|
results_phi-2-super/checkpoint-99/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a710d3d12fbd08f305d743e550415b25672249d2c48fd4005249a2ff55e65a91
|
3 |
+
size 4920
|
results_phi-2-super/tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:972811be42a0dd9f0ffcdd9563d2234c47fda2bb248d1e2934f3c20ceed07cb6
|
3 |
+
size 2114952
|
results_phi-2-super/tokenizer_config.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06369f8433146a04712326910a5c8a8866aae246601dbe5c836b1d5d96ee080f
|
3 |
+
size 7905
|
results_phi-2-super/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a710d3d12fbd08f305d743e550415b25672249d2c48fd4005249a2ff55e65a91
|
3 |
+
size 4920
|
train_csv_dataset_phi-2-super.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import time
|
2 |
import torch
|
3 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
|
4 |
from datasets import load_dataset
|
5 |
from trl import SFTTrainer
|
6 |
-
from peft import LoraConfig, prepare_model_for_kbit_training
|
7 |
|
8 |
dataset = load_dataset("csv", data_files="nowhere_training_input.csv", delimiter=";", split="train")
|
9 |
|
@@ -11,6 +11,7 @@ if torch.cuda.is_available():
|
|
11 |
print("Cuda is available")
|
12 |
|
13 |
base_model_id = "abacaj/phi-2-super"
|
|
|
14 |
output_dir = "./results_phi-2-super"
|
15 |
|
16 |
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
|
@@ -25,23 +26,15 @@ bnb_config = BitsAndBytesConfig(load_in_4bit=True,
|
|
25 |
bnb_4bit_compute_dtype=torch.bfloat16,
|
26 |
bnb_4bit_use_double_quant=False)
|
27 |
|
28 |
-
|
|
|
29 |
print(model)
|
30 |
|
31 |
# Gradient checkpointing to save memory
|
32 |
-
model.gradient_checkpointing_enable()
|
33 |
|
34 |
# Freeze base model layers and cast layernorm in fp32
|
35 |
-
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
|
36 |
-
|
37 |
-
peft_config = LoraConfig(
|
38 |
-
r=64,
|
39 |
-
lora_alpha=64,
|
40 |
-
target_modules= ["q_proj","k_proj","v_proj","dense","fc2","fc1"],
|
41 |
-
bias="none",
|
42 |
-
lora_dropout=0.05,
|
43 |
-
task_type="CAUSAL_LM",
|
44 |
-
)
|
45 |
|
46 |
training_args = TrainingArguments(
|
47 |
output_dir=output_dir, # Output directory for checkpoints and predictions
|
@@ -79,7 +72,7 @@ def formatting_func(data):
|
|
79 |
]
|
80 |
|
81 |
text = tokenizer.apply_chat_template(chat, tokenize=False)
|
82 |
-
print(text)
|
83 |
data['text'] = text
|
84 |
|
85 |
return data
|
@@ -90,11 +83,11 @@ trainer = SFTTrainer(
|
|
90 |
model=model,
|
91 |
train_dataset=dataset,
|
92 |
eval_dataset=dataset,
|
93 |
-
peft_config=peft_config,
|
94 |
args=training_args,
|
95 |
max_seq_length=1024,
|
96 |
packing=True,
|
97 |
-
dataset_text_field="text"
|
|
|
98 |
)
|
99 |
|
100 |
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
|
|
|
1 |
import time
|
2 |
import torch
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
|
4 |
from datasets import load_dataset
|
5 |
from trl import SFTTrainer
|
6 |
+
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel, PeftConfig
|
7 |
|
8 |
dataset = load_dataset("csv", data_files="nowhere_training_input.csv", delimiter=";", split="train")
|
9 |
|
|
|
11 |
print("Cuda is available")
|
12 |
|
13 |
base_model_id = "abacaj/phi-2-super"
|
14 |
+
base_peft_id = "./results"
|
15 |
output_dir = "./results_phi-2-super"
|
16 |
|
17 |
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
|
|
|
26 |
bnb_4bit_compute_dtype=torch.bfloat16,
|
27 |
bnb_4bit_use_double_quant=False)
|
28 |
|
29 |
+
base_model = AutoModelForCausalLM.from_pretrained(base_model_id, attn_implementation="flash_attention_2", quantization_config=bnb_config, torch_dtype="auto")
|
30 |
+
model = PeftModel.from_pretrained(base_model, base_peft_id, is_trainable=True)
|
31 |
print(model)
|
32 |
|
33 |
# Gradient checkpointing to save memory
|
34 |
+
# model.gradient_checkpointing_enable()
|
35 |
|
36 |
# Freeze base model layers and cast layernorm in fp32
|
37 |
+
# model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
training_args = TrainingArguments(
|
40 |
output_dir=output_dir, # Output directory for checkpoints and predictions
|
|
|
72 |
]
|
73 |
|
74 |
text = tokenizer.apply_chat_template(chat, tokenize=False)
|
75 |
+
# print(text)
|
76 |
data['text'] = text
|
77 |
|
78 |
return data
|
|
|
83 |
model=model,
|
84 |
train_dataset=dataset,
|
85 |
eval_dataset=dataset,
|
|
|
86 |
args=training_args,
|
87 |
max_seq_length=1024,
|
88 |
packing=True,
|
89 |
+
dataset_text_field="text",
|
90 |
+
neftune_noise_alpha=5,
|
91 |
)
|
92 |
|
93 |
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
|