reissbaker commited on
Commit
a2f8007
·
1 Parent(s): cb2c072

Commit config to generate training data

Browse files
Files changed (1) hide show
  1. promethean-config.py +59 -0
promethean-config.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from promethean.datasets import hub_prompts, HubSplit, Dataset, Prompts
3
+ from promethean.extract import Extractor, ClientOpts
4
+ from promethean.lora import LoraSettings
5
+ import os
6
+
7
+ output_dir="output"
8
+ uncensor_ds_name = "Guilherme34/uncensor"
9
+ uncensor_ds = load_dataset(uncensor_ds_name, split="train")
10
+ def uncensor_items():
11
+ for row in uncensor_ds:
12
+ for message in row["messages"]:
13
+ if message["role"] == "user":
14
+ yield message["content"]
15
+ break
16
+
17
+ extractor = Extractor(
18
+ teacher="hf:mlabonne/Llama-3.1-70B-Instruct-lorablated",
19
+ max_concurrent=8,
20
+ output_dir=output_dir,
21
+ client_opts=ClientOpts(
22
+ base_url="https://glhf.chat/api/openai/v1",
23
+ api_key=os.environ["GLHF_API_KEY"],
24
+ ),
25
+ dataset=Dataset(
26
+ train=[
27
+ Prompts(
28
+ output_path=f"hub/{uncensor_ds_name}.jsonl",
29
+ count=lambda: len(uncensor_ds),
30
+ items=uncensor_items,
31
+ ),
32
+ hub_prompts(
33
+ name="mlabonne/harmful_behaviors",
34
+ text_field="text",
35
+ split=HubSplit(name="train"),
36
+ ),
37
+ ],
38
+ eval=[
39
+ hub_prompts(
40
+ name="mlabonne/harmful_behaviors",
41
+ text_field="text",
42
+ split=HubSplit(name="test"),
43
+ ),
44
+ ],
45
+ ),
46
+ )
47
+
48
+ lora_settings = LoraSettings(
49
+ lora_r=32,
50
+ lora_alpha=16,
51
+ lora_dropout=0.01,
52
+ num_epochs=2,
53
+ learning_rate=4e-4,
54
+ warmup_steps=10,
55
+ )
56
+ axolotl_config = lora_settings.llama_70b_axolotl(extractor.output_dataset())
57
+
58
+ extractor.run()
59
+ axolotl_config.save(output_dir)