Commit
·
a2f8007
1
Parent(s):
cb2c072
Commit config to generate training data
Browse files- promethean-config.py +59 -0
promethean-config.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import load_dataset
|
2 |
+
from promethean.datasets import hub_prompts, HubSplit, Dataset, Prompts
|
3 |
+
from promethean.extract import Extractor, ClientOpts
|
4 |
+
from promethean.lora import LoraSettings
|
5 |
+
import os
|
6 |
+
|
7 |
+
output_dir="output"
|
8 |
+
uncensor_ds_name = "Guilherme34/uncensor"
|
9 |
+
uncensor_ds = load_dataset(uncensor_ds_name, split="train")
|
10 |
+
def uncensor_items():
|
11 |
+
for row in uncensor_ds:
|
12 |
+
for message in row["messages"]:
|
13 |
+
if message["role"] == "user":
|
14 |
+
yield message["content"]
|
15 |
+
break
|
16 |
+
|
17 |
+
extractor = Extractor(
|
18 |
+
teacher="hf:mlabonne/Llama-3.1-70B-Instruct-lorablated",
|
19 |
+
max_concurrent=8,
|
20 |
+
output_dir=output_dir,
|
21 |
+
client_opts=ClientOpts(
|
22 |
+
base_url="https://glhf.chat/api/openai/v1",
|
23 |
+
api_key=os.environ["GLHF_API_KEY"],
|
24 |
+
),
|
25 |
+
dataset=Dataset(
|
26 |
+
train=[
|
27 |
+
Prompts(
|
28 |
+
output_path=f"hub/{uncensor_ds_name}.jsonl",
|
29 |
+
count=lambda: len(uncensor_ds),
|
30 |
+
items=uncensor_items,
|
31 |
+
),
|
32 |
+
hub_prompts(
|
33 |
+
name="mlabonne/harmful_behaviors",
|
34 |
+
text_field="text",
|
35 |
+
split=HubSplit(name="train"),
|
36 |
+
),
|
37 |
+
],
|
38 |
+
eval=[
|
39 |
+
hub_prompts(
|
40 |
+
name="mlabonne/harmful_behaviors",
|
41 |
+
text_field="text",
|
42 |
+
split=HubSplit(name="test"),
|
43 |
+
),
|
44 |
+
],
|
45 |
+
),
|
46 |
+
)
|
47 |
+
|
48 |
+
lora_settings = LoraSettings(
|
49 |
+
lora_r=32,
|
50 |
+
lora_alpha=16,
|
51 |
+
lora_dropout=0.01,
|
52 |
+
num_epochs=2,
|
53 |
+
learning_rate=4e-4,
|
54 |
+
warmup_steps=10,
|
55 |
+
)
|
56 |
+
axolotl_config = lora_settings.llama_70b_axolotl(extractor.output_dataset())
|
57 |
+
|
58 |
+
extractor.run()
|
59 |
+
axolotl_config.save(output_dir)
|