File size: 6,763 Bytes
bba84bb
 
 
 
e540b2c
bba84bb
1cb7bed
bba84bb
 
 
 
 
 
 
e540b2c
bba84bb
 
e540b2c
bba84bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf4ffdb
 
 
 
 
 
 
 
 
 
 
 
bba84bb
cf4ffdb
bba84bb
 
 
 
 
 
e138b53
cf4ffdb
 
 
 
 
 
 
bba84bb
 
adef9e5
 
e540b2c
 
adef9e5
 
cf4ffdb
bba84bb
cf4ffdb
 
 
adef9e5
 
e540b2c
 
adef9e5
 
 
bba84bb
cf4ffdb
 
1cb7bed
 
 
 
 
cf4ffdb
1cb7bed
cf4ffdb
1cb7bed
cf4ffdb
 
e540b2c
 
 
 
 
 
cf4ffdb
 
 
 
 
bba84bb
 
 
 
 
 
cf4ffdb
 
 
 
 
a3cd329
 
 
bba84bb
 
 
 
 
cf4ffdb
 
 
8359026
 
c485faf
8359026
ac2b840
 
88f3e59
 
 
 
 
 
 
 
cf4ffdb
 
 
bba84bb
 
 
 
 
86b9502
bba84bb
 
 
1e513f3
 
bba84bb
 
921975c
bba84bb
c485faf
 
 
 
 
 
 
 
 
921975c
0799cad
1e513f3
 
bba84bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e138b53
bba84bb
 
1e513f3
c485faf
1e513f3
e138b53
bba84bb
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#!/usr/bin/env python3
import datetime, os, subprocess, tempfile
from pathlib import Path

import gc
import pandas as pd, yaml, torch
from huggingface_hub import HfApi, login, hf_hub_download, model_info
from lm_eval import evaluator
from lm_eval.models.huggingface import HFLM
from peft import PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig 
)


CONFIGS = []

# ───── Load all configs ─────
if Path("adapters.yaml").exists():
    CONFIGS.extend(yaml.safe_load(open("adapters.yaml"))["adapters"])

for yml in Path("manifests").glob("*.yaml"):
    CONFIGS.append(yaml.safe_load(open(yml)))

if not CONFIGS:
    raise RuntimeError("No adapter configs found in adapters.yaml or manifests/")

# ───── Hugging Face auth ─────
token = os.getenv("HF_TOKEN")
if not token or token == "***":
    raise RuntimeError("HF_TOKEN secret is missing.")
login(token)

DATASET_REPO = os.environ["HF_DATASET_REPO"]
api = HfApi()

all_rows = []

# ───── Safe tokenizer loading ─────
def load_tokenizer(model_id: str):
    try:
        return AutoTokenizer.from_pretrained(model_id, use_fast=True)
    except Exception as e1:
        print(f"Fast tokenizer failed for {model_id}: {e1}")
        try:
            return AutoTokenizer.from_pretrained(model_id, use_fast=False)
        except Exception as e2:
            raise RuntimeError(f"Failed to load tokenizer for {model_id}: {e2}") from e2

# ───── Evaluate each adapter ─────
for cfg in CONFIGS:
    base_model_id = cfg["base_model"]
    adapter_repo = cfg["adapter_repo"]
    adapter_type = cfg.get("adapter_type", "LoRA")
    tasks = cfg["tasks"]

    print(f"\nLoading base model: {base_model_id}")
    tokenizer = load_tokenizer(base_model_id)

    if "llama" in base_model_id.lower():
        try:
            tokenizer.legacy = False 
        except:
            pass

    try:
        base_model = AutoModelForCausalLM.from_pretrained(
            base_model_id,
            device_map="auto", 
            torch_dtype=torch.float16,
            trust_remote_code=True,
            use_safetensors=True
        )
        is_encoder = False
        print("Loaded as Causal LM")
    except Exception as e:
        print(f"⚠️ Failed to load causal LM: {e}")
        base_model = AutoModelForSequenceClassification.from_pretrained(
            base_model_id,
            device_map="auto", 
            torch_dtype=torch.float16,
            trust_remote_code=True,
            use_safetensors=True
        )
        is_encoder = True
        print("Loaded as Sequence Classification model")

    try:
        info = model_info(adapter_repo)
        files = [f.rfilename for f in info.siblings]
        if "adapter_config.json" not in files:
            print(f"{adapter_repo} is not a valid PEFT adapter (missing adapter_config.json)")
            continue
    except Exception as e:
        print(f"Failed to inspect adapter {adapter_repo}: {e}")
        continue

    try:
        peft_model = PeftModel.from_pretrained(
            base_model,
            adapter_repo,
            device_map="auto",
            torch_dtype=torch.float16,
        )        
        merged_model = peft_model.merge_and_unload()
    except Exception as e:
        print(f"Failed to apply adapter {adapter_repo}: {e}")
        continue

    merged_model.eval()

    with tempfile.TemporaryDirectory() as td:
        merged_model.save_pretrained(td)
        tokenizer.save_pretrained(td)

        # Verify tokenizer object
        if not hasattr(tokenizer, "vocab_size"):
            print("Invalid tokenizer loaded. Skipping.")
            continue

        device = "cuda" if torch.cuda.is_available() else "cpu"


        hf_lm = HFLM(
            pretrained=td,
            batch_size=8 if not is_encoder else 16,
            device=device,
        )

        try:
            res = evaluator.simple_evaluate(model=hf_lm, tasks=tasks)
            print(f"Raw results for {adapter_repo}: {res}")
            if not res.get("results"):
                print(f"Empty results β€” likely a task or model compatibility issue for: {adapter_repo}")
                continue
            print(f"\nEvaluation raw result for {adapter_repo}:")
            print(res.get("results", {}))
            del merged_model
            del peft_model
            del base_model
            del tokenizer
            del hf_lm
            gc.collect()
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()
        except Exception as e:
            print(f"Evaluation failed for {adapter_repo}: {e}")
            continue

    meta = {
        "model_id": adapter_repo,
        "adapter_type": adapter_type,
        "trainable_params": cfg.get("trainable_params"),
        "peak_gpu_mem_mb": torch.cuda.max_memory_allocated() // 1024**2 if torch.cuda.is_available() else None,
        "run_date": datetime.datetime.utcnow().isoformat(timespec="seconds"),
        "commit_sha": subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode(),
    }
    
    count_before = len(all_rows)
    for task, scores in res["results"].items():
        for metric, value in scores.items():
            if value is None:
                continue
            metric_name, _, aggregation = metric.partition(",")
            
            all_rows.append({
                **meta,
                "task": task,
                "metric": metric_name,
                "aggregation": aggregation or None,
                "value": value
            })


    print(f"{len(all_rows) - count_before} rows added for {adapter_repo}")


# ───── Merge and upload results ─────
df_new = pd.DataFrame(all_rows)

with tempfile.TemporaryDirectory() as tmp:
    current_path = hf_hub_download(
        repo_id=DATASET_REPO,
        filename="data/peft_bench.parquet",
        repo_type="dataset",
        cache_dir=tmp,
        local_dir=tmp,
        local_dir_use_symlinks=False,
    )
    df_existing = pd.read_parquet(current_path)
    df_combined = pd.concat([df_existing, df_new], ignore_index=True)
    df_combined = df_combined.sort_values("run_date")
    df_combined["value"] = pd.to_numeric(df_combined["value"], errors="coerce")

    print("\nFinal new results:")
    print(df_new[["model_id", "task", "metric", "aggregation", "value"]])


    out = Path("peft_bench.parquet")
    df_combined.to_parquet(out, index=False)

    api.upload_file(
        path_or_fileobj=out,
        path_in_repo="data/peft_bench.parquet",
        repo_id=DATASET_REPO,
        repo_type="dataset",
        commit_message=f"Add {len(CONFIGS)} new adapter run(s)",
    )