|
from datasets import load_dataset |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import re |
|
|
|
from llmcompressor.modifiers.quantization import GPTQModifier |
|
from llmcompressor.transformers import oneshot |
|
|
|
|
|
MODEL_ID = "ArliAI/Llama-3.1-70B-ArliAI-RPMax-v1.3" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
|
|
|
|
|
DATASET_ID = "openerotica/erotiquant3" |
|
DATASET_SPLIT = "train" |
|
|
|
|
|
|
|
NUM_CALIBRATION_SAMPLES = 512 |
|
MAX_SEQUENCE_LENGTH = 4096 |
|
|
|
|
|
ds = load_dataset(DATASET_ID, split=DATASET_SPLIT) |
|
|
|
def preprocess(example): |
|
result = [] |
|
matches = re.findall(r'(SYSTEM|USER|ASSISTANT):\s*((?:(?!SYSTEM|USER|ASSISTANT:).|\n)+)', example['text'], re.DOTALL) |
|
|
|
|
|
for role, content in matches: |
|
result.append({"role": role.lower(), "content": content.strip()}) |
|
|
|
text = tokenizer.apply_chat_template(result, tokenize=False, add_generation_prompt=False) |
|
tokens = tokenizer.apply_chat_template(result, tokenize=True, add_generation_prompt=False) |
|
|
|
return { |
|
"chat": result, |
|
"text": text, |
|
"tokens": tokens, |
|
} |
|
|
|
ds = ds.map(preprocess) |
|
|
|
def filter_short_rows(example): |
|
result = len(example['tokens']) > MAX_SEQUENCE_LENGTH |
|
if result == False: |
|
print(f"length: {len(example['tokens'])}") |
|
return result |
|
|
|
ds = ds.filter(filter_short_rows) |
|
|
|
ds = ds.shuffle(seed=42).select(range(NUM_CALIBRATION_SAMPLES)) |
|
|
|
|
|
|
|
def tokenize(sample): |
|
return tokenizer( |
|
sample["text"], |
|
padding=False, |
|
max_length=MAX_SEQUENCE_LENGTH, |
|
truncation=True, |
|
add_special_tokens=False, |
|
) |
|
|
|
ds = ds.map(tokenize, remove_columns=ds.column_names) |
|
|
|
|
|
|
|
recipe = GPTQModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]) |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
MODEL_ID, |
|
device_map="auto", |
|
torch_dtype="auto", |
|
) |
|
|
|
|
|
oneshot( |
|
model=model, |
|
dataset=ds, |
|
recipe=recipe, |
|
max_seq_length=MAX_SEQUENCE_LENGTH, |
|
num_calibration_samples=NUM_CALIBRATION_SAMPLES |
|
) |
|
|
|
print('SAVING') |
|
|
|
|
|
SAVE_DIR = MODEL_ID.split("/")[1] + "-W4A16-G128" |
|
model.save_pretrained(SAVE_DIR, save_compressed=True, skip_compression_stats=True) |
|
tokenizer.save_pretrained(SAVE_DIR) |
|
|
|
print('Saved') |
|
|