Spaces:

Mdrnfox
/

peft-bench-eval

Sleeping

File size: 6,763 Bytes

bba84bb
 
 
 
e540b2c
bba84bb
1cb7bed
bba84bb
 
 
 
 
 
 
e540b2c
bba84bb
 
e540b2c
bba84bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf4ffdb
 
 
 
 
 
 
 
 
 
 
 
bba84bb
cf4ffdb
bba84bb
 
 
 
 
 
e138b53
cf4ffdb
 
 
 
 
 
 
bba84bb
 
adef9e5
 
e540b2c
 
adef9e5
 
cf4ffdb
bba84bb
cf4ffdb
 
 
adef9e5
 
e540b2c
 
adef9e5
 
 
bba84bb
cf4ffdb
 
1cb7bed
 
 
 
 
cf4ffdb
1cb7bed
cf4ffdb
1cb7bed
cf4ffdb
 
e540b2c
 
 
 
 
 
cf4ffdb
 
 
 
 
bba84bb
 
 
 
 
 
cf4ffdb
 
 
 
 
a3cd329
 
 
bba84bb
 
 
 
 
cf4ffdb
 
 
8359026
 
c485faf
8359026
ac2b840
 
88f3e59
 
 
 
 
 
 
 
cf4ffdb
 
 
bba84bb
 
 
 
 
86b9502
bba84bb
 
 
1e513f3
 
bba84bb
 
921975c
bba84bb
c485faf
 
 
 
 
 
 
 
 
921975c
0799cad
1e513f3
 
bba84bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e138b53
bba84bb
 
1e513f3
c485faf
1e513f3
e138b53
bba84bb

#!/usr/bin/env python3
import datetime, os, subprocess, tempfile
from pathlib import Path

import gc
import pandas as pd, yaml, torch
from huggingface_hub import HfApi, login, hf_hub_download, model_info
from lm_eval import evaluator
from lm_eval.models.huggingface import HFLM
from peft import PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig 
)


CONFIGS = []

# ───── Load all configs ─────
if Path("adapters.yaml").exists():
    CONFIGS.extend(yaml.safe_load(open("adapters.yaml"))["adapters"])

for yml in Path("manifests").glob("*.yaml"):
    CONFIGS.append(yaml.safe_load(open(yml)))

if not CONFIGS:
    raise RuntimeError("No adapter configs found in adapters.yaml or manifests/")

# ───── Hugging Face auth ─────
token = os.getenv("HF_TOKEN")
if not token or token == "***":
    raise RuntimeError("HF_TOKEN secret is missing.")
login(token)

DATASET_REPO = os.environ["HF_DATASET_REPO"]
api = HfApi()

all_rows = []

# ───── Safe tokenizer loading ─────
def load_tokenizer(model_id: str):
    try:
        return AutoTokenizer.from_pretrained(model_id, use_fast=True)
    except Exception as e1:
        print(f"Fast tokenizer failed for {model_id}: {e1}")
        try:
            return AutoTokenizer.from_pretrained(model_id, use_fast=False)
        except Exception as e2:
            raise RuntimeError(f"Failed to load tokenizer for {model_id}: {e2}") from e2

# ───── Evaluate each adapter ─────
for cfg in CONFIGS:
    base_model_id = cfg["base_model"]
    adapter_repo = cfg["adapter_repo"]
    adapter_type = cfg.get("adapter_type", "LoRA")
    tasks = cfg["tasks"]

    print(f"\nLoading base model: {base_model_id}")
    tokenizer = load_tokenizer(base_model_id)

    if "llama" in base_model_id.lower():
        try:
            tokenizer.legacy = False 
        except:
            pass

    try:
        base_model = AutoModelForCausalLM.from_pretrained(
            base_model_id,
            device_map="auto", 
            torch_dtype=torch.float16,
            trust_remote_code=True,
            use_safetensors=True
        )
        is_encoder = False
        print("Loaded as Causal LM")
    except Exception as e:
        print(f"⚠️ Failed to load causal LM: {e}")
        base_model = AutoModelForSequenceClassification.from_pretrained(
            base_model_id,
            device_map="auto", 
            torch_dtype=torch.float16,
            trust_remote_code=True,
            use_safetensors=True
        )
        is_encoder = True
        print("Loaded as Sequence Classification model")

    try:
        info = model_info(adapter_repo)
        files = [f.rfilename for f in info.siblings]
        if "adapter_config.json" not in files:
            print(f"{adapter_repo} is not a valid PEFT adapter (missing adapter_config.json)")
            continue
    except Exception as e:
        print(f"Failed to inspect adapter {adapter_repo}: {e}")
        continue

    try:
        peft_model = PeftModel.from_pretrained(
            base_model,
            adapter_repo,
            device_map="auto",
            torch_dtype=torch.float16,
        )        
        merged_model = peft_model.merge_and_unload()
    except Exception as e:
        print(f"Failed to apply adapter {adapter_repo}: {e}")
        continue

    merged_model.eval()

    with tempfile.TemporaryDirectory() as td:
        merged_model.save_pretrained(td)
        tokenizer.save_pretrained(td)

        # Verify tokenizer object
        if not hasattr(tokenizer, "vocab_size"):
            print("Invalid tokenizer loaded. Skipping.")
            continue

        device = "cuda" if torch.cuda.is_available() else "cpu"


        hf_lm = HFLM(
            pretrained=td,
            batch_size=8 if not is_encoder else 16,
            device=device,
        )

        try:
            res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
            print(f"Raw results for {adapter_repo}: {res}")
            if not res.get("results"):
                print(f"Empty results — likely a task or model compatibility issue for: {adapter_repo}")
                continue
            print(f"\nEvaluation raw result for {adapter_repo}:")
            print(res.get("results", {}))
            del merged_model
            del peft_model
            del base_model
            del tokenizer
            del hf_lm
            gc.collect()
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()
        except Exception as e:
            print(f"Evaluation failed for {adapter_repo}: {e}")
            continue

    meta = {
        "model_id": adapter_repo,
        "adapter_type": adapter_type,
        "trainable_params": cfg.get("trainable_params"),
        "peak_gpu_mem_mb": torch.cuda.max_memory_allocated() // 1024**2 if torch.cuda.is_available() else None,
        "run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
        "commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
    }
    
    count_before = len(all_rows)
    for task, scores in res["results"].items():
        for metric, value in scores.items():
            if value is None:
                continue
            metric_name, _, aggregation = metric.partition(",")
            
            all_rows.append({
                **meta,
                "task": task,
                "metric": metric_name,
                "aggregation": aggregation or None,
                "value": value
            })


    print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")


# ───── Merge and upload results ─────
df_new = pd.DataFrame(all_rows)

with tempfile.TemporaryDirectory() as tmp:
    current_path = hf_hub_download(
        repo_id=DATASET_REPO,
        filename="data/peft_bench.parquet",
        repo_type="dataset",
        cache_dir=tmp,
        local_dir=tmp,
        local_dir_use_symlinks=False,
    )
    df_existing = pd.read_parquet(current_path)
    df_combined = pd.concat([df_existing, df_new], ignore_index=True)
    df_combined = df_combined.sort_values("run_date")
    df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")

    print("\nFinal new results:")
    print(df_new[["model_id", "task", "metric", "aggregation", "value"]])


    out = Path("peft_bench.parquet")
    df_combined.to_parquet(out, index=False)

    api.upload_file(
        path_or_fileobj=out,
        path_in_repo="data/peft_bench.parquet",
        repo_id=DATASET_REPO,
        repo_type="dataset",
        commit_message=f"Add {len(CONFIGS)} new adapter run(s)",
    )