nbeerbower commited on
Commit
012651a
1 Parent(s): 849a008

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -10
README.md CHANGED
@@ -29,17 +29,14 @@ Dataset preparation:
29
 
30
  ```python
31
  def chatml_format(example):
32
- # Initialize formatted system message
33
  system = ""
34
-
35
- # Check if 'system' field exists and is not None
36
- if example.get('system'):
37
- message = {"role": "system", "content": example['system']}
38
- system = tokenizer.apply_chat_template([message], tokenize=False)
39
 
40
  # Format instruction
41
- message = {"role": "user", "content": example['prompt']}
42
- prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)
43
 
44
  # Format chosen answer
45
  chosen = example['chosen'] + "<|im_end|>\n"
@@ -56,9 +53,8 @@ def chatml_format(example):
56
  # Array of datasets to concat
57
  ds = [
58
  "jondurbin/truthy-dpo-v0.1",
59
- "ResplendentAI/NSFW_RP_Format_DPO",
60
  "jondurbin/gutenberg-dpo-v0.1",
61
- "flammenai/Date-DPO-v1"
62
  ]
63
 
64
  # load_dataset and combine all
 
29
 
30
  ```python
31
  def chatml_format(example):
32
+ # Format system
33
  system = ""
34
+ if example.get('system') and len(example['system']) > 0:
35
+ systemMessage = example['system']
36
+ system = "<|im_start|>system\n" + systemMessage + "<|im_end|>\n"
 
 
37
 
38
  # Format instruction
39
+ prompt = "<|im_start|>user\n" + example['prompt'] + "<|im_end|>\n<|im_start|>assistant\n"
 
40
 
41
  # Format chosen answer
42
  chosen = example['chosen'] + "<|im_end|>\n"
 
53
  # Array of datasets to concat
54
  ds = [
55
  "jondurbin/truthy-dpo-v0.1",
 
56
  "jondurbin/gutenberg-dpo-v0.1",
57
+ "flammenai/FlameMix-DPO-v1"
58
  ]
59
 
60
  # load_dataset and combine all