nbeerbower
/

llama-3-sauce-v2-8B

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

nbeerbower commited on May 13

Commit

012651a

•

1 Parent(s): 849a008

Update README.md

Files changed (1) hide show

README.md +6 -10

README.md CHANGED Viewed

@@ -29,17 +29,14 @@ Dataset preparation:
 ```python
 def chatml_format(example):
-    # Initialize formatted system message
     system = ""
-    # Check if 'system' field exists and is not None
-    if example.get('system'):
-        message = {"role": "system", "content": example['system']}
-        system = tokenizer.apply_chat_template([message], tokenize=False)
     # Format instruction
-    message = {"role": "user", "content": example['prompt']}
-    prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)
     # Format chosen answer
     chosen = example['chosen'] + "<|im_end|>\n"
@@ -56,9 +53,8 @@ def chatml_format(example):
 # Array of datasets to concat
 ds = [
     "jondurbin/truthy-dpo-v0.1",
-    "ResplendentAI/NSFW_RP_Format_DPO",
     "jondurbin/gutenberg-dpo-v0.1",
-    "flammenai/Date-DPO-v1"
 ]
 # load_dataset and combine all

 ```python
 def chatml_format(example):
+    # Format system
     system = ""
+    if example.get('system') and len(example['system']) > 0:
+        systemMessage = example['system']
+        system = "<|im_start|>system\n" + systemMessage + "<|im_end|>\n"
     # Format instruction
+    prompt = "<|im_start|>user\n" + example['prompt'] + "<|im_end|>\n<|im_start|>assistant\n"
     # Format chosen answer
     chosen = example['chosen'] + "<|im_end|>\n"
 # Array of datasets to concat
 ds = [
     "jondurbin/truthy-dpo-v0.1",
     "jondurbin/gutenberg-dpo-v0.1",
+    "flammenai/FlameMix-DPO-v1"
 ]
 # load_dataset and combine all