miqu-control-vectors / patches /repeng /emotion_prompts.py
trollkotze's picture
Patches for llama.cpp and repeng to run this stuff.
6cd1d54 verified
raw
history blame
2 kB
import json
from transformers import AutoTokenizer
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token_id = 0
user_tag, asst_tag = "[INST]", "[/INST]"
with open('notebooks/data/all_truncated_outputs.json') as f:
suffixes = json.load(f)
truncated_suffixes = []
truncated_suffixes_dedup = set()
for suffix in suffixes:
tokens = tokenizer.tokenize(suffix)
for i in range(1, len(tokens)):
truncated = tokenizer.convert_tokens_to_string(tokens[:i])
if truncated in truncated_suffixes_dedup:
continue
truncated_suffixes.append(truncated)
truncated_suffixes_dedup.add(truncated)
persona_pairs = [
('incredibly charismatic, captivating everyone with your presence and words', 'unassuming, rarely drawing attention or swaying others'),
('persuasive, easily influencing others with your charm and eloquence.', 'reticent, struggling to engage or influence those around you'),
]
def template(persona: str, suffix: str) -> str:
return f"{user_tag} Act as if you are {persona}. {asst_tag} {suffix}"
OUT_FILE = 'control_vector_prompts.txt'
f = open(OUT_FILE, 'w')
# Use '\n' as delimiter between prompts. If you want to use a different
# delimiter, change this string and also change PROMPT_DELIMITER_TOKEN in
# llama.cpp/examples/repeng/repeng.cpp.
PROMPT_DELIMITER = '\n'
print('prompt delimiter string: %r' % PROMPT_DELIMITER)
print('prompt delimiter token id: %s' % (
tokenizer.encode(PROMPT_DELIMITER, add_special_tokens=False),))
count = 0
for suffix in truncated_suffixes:
for positive_persona, negative_persona in persona_pairs:
positive = template(positive_persona, suffix)
negative = template(negative_persona, suffix)
f.write(positive)
f.write(PROMPT_DELIMITER)
f.write(negative)
f.write(PROMPT_DELIMITER)
count += 2
print('wrote %d prompts to %s' % (count, OUT_FILE))