Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

lm_eval/.gitignore +10 -0
lm_eval/.python-version +1 -0
lm_eval/README.md +57 -0
lm_eval/arc_challenge.yaml +26 -0
lm_eval/arc_easy_mi.yaml +26 -0
lm_eval/eval.py +65 -0
lm_eval/lambada_openai_norm.yaml +23 -0
lm_eval/main.py +6 -0
lm_eval/pyproject.toml +64 -0
lm_eval/requirements.txt +49 -0
lm_eval/uv.lock +0 -0
modeling_cloverlm.py +15 -3

lm_eval/.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv

lm_eval/.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11

lm_eval/README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+### Environment Setup
+Download this directory to a local machine and set up [`uv`](https://docs.astral.sh/uv/).
+1. **Install `uv`** (if you haven't already):
+   ```bash
+   curl -LsSf [https://astral.sh/uv/install.sh](https://astral.sh/uv/install.sh) | sh
+   ```
+2. **Sync the environment:**
+   ```bash
+   uv sync
+   ```
+   *(This automatically creates a virtual environment at `.venv` and strictly installs the dependencies locked in `uv.lock`.)*
+3. **Activate the environment:**
+   ```bash
+   source .venv/bin/activate`
+   ```
+### Evaluation Script
+Run:
+```bash
+accelerate launch eval.py \
+    --model cloverlm \
+    --model_args "pretrained=/localhome/apanfero/models/CloverLM,dtype=bfloat16,quartet_2_impl=quartet2,attn_backend=pytorch" \
+    --tasks "arc_easy_mi,arc_challenge_mi,hellaswag,piqa" \
+    --num_fewshot 0 \
+    --include_path ./ \
+    --trust_remote_code \
+    --confirm_run_unsafe_code \
+    --batch_size auto
+```
+### Expected Evaluation Results
+```
+|     Tasks      |Version|Filter|n-shot|    Metric     |   |Value |   |Stderr|
+|----------------|------:|------|-----:|---------------|---|-----:|---|-----:|
+|arc_challenge_mi|      1|none  |     0|acc            |↑  |0.4625|±  |0.0146|
+|                |       |none  |     0|acc_mutual_info|↑  |0.5094|±  |0.0146|
+|                |       |none  |     0|acc_norm       |↑  |0.4923|±  |0.0146|
+|arc_easy_mi     |      1|none  |     0|acc            |↑  |0.7997|±  |0.0082|
+|                |       |none  |     0|acc_mutual_info|↑  |0.7239|±  |0.0092|
+|                |       |none  |     0|acc_norm       |↑  |0.7731|±  |0.0086|
+|hellaswag       |      1|none  |     0|acc            |↑  |0.5392|±  |0.0050|
+|                |       |none  |     0|acc_norm       |↑  |0.7167|±  |0.0045|
+|piqa            |      1|none  |     0|acc            |↑  |0.7922|±  |0.0095|
+|                |       |none  |     0|acc_norm       |↑  |0.8058|±  |0.0092|
+```
+### Alternative Backends
+Replace `quartet_2_impl=quartet2` with `quartet_2_impl=pseudoquant` on non-Blackwell GPUs.
+You can try `attn_backend=pytorch/flash2/flash3/flash4` if you have the corresponding libs installed.

lm_eval/arc_challenge.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+tag:
+  - ai2_arc
+task: arc_challenge_mi
+dataset_path: allenai/ai2_arc
+dataset_name: ARC-Challenge
+output_type: multiple_choice
+training_split: train
+validation_split: validation
+test_split: test
+doc_to_text: "Question: {{question}}\nAnswer:"
+doc_to_target: "{{choices.label.index(answerKey)}}"
+doc_to_choice: "{{choices.text}}"
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+  - metric: acc_mutual_info
+    aggregation: mean
+    higher_is_better: true
+  - metric: acc_norm
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 1.0

lm_eval/arc_easy_mi.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+tag:
+  - ai2_arc
+task: arc_easy_mi
+dataset_path: allenai/ai2_arc
+dataset_name: ARC-Easy
+output_type: multiple_choice
+training_split: train
+validation_split: validation
+test_split: test
+doc_to_text: "Question: {{question}}\nAnswer:"
+doc_to_target: "{{choices.label.index(answerKey)}}"
+doc_to_choice: "{{choices.text}}"
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
+metric_list:
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+  - metric: acc_mutual_info
+    aggregation: mean
+    higher_is_better: true
+  - metric: acc_norm
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 1.0

lm_eval/eval.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import torch
+import torch.nn.functional as F
+from lm_eval.api.registry import register_model
+from lm_eval.models.huggingface import HFLM
+@register_model("cloverlm")
+class CloverLMHFLM(HFLM):
+    def __init__(self, pad_multiple=128, **kwargs):
+        super().__init__(**kwargs)
+        self.pad_multiple = pad_multiple
+    def _encode_pair(self, context, continuation):
+        context_enc, continuation_enc = super()._encode_pair(context, continuation)
+        if not continuation_enc and continuation:
+            whole_enc = self.tok_encode(context + continuation)
+            if len(whole_enc) > 1:
+                continuation_enc = whole_enc[-1:]
+                context_enc = whole_enc[:-1]
+            elif whole_enc:
+                continuation_enc = whole_enc
+                context_enc = [self.prefix_token_id]
+            else:
+                continuation_enc = [self.prefix_token_id]
+                context_enc = [self.prefix_token_id]
+        return context_enc, continuation_enc
+    def _model_call(self, inps: torch.Tensor, attn_mask: torch.Tensor = None, **kwargs):
+        orig_len = inps.shape[1]
+        remainder = orig_len % self.pad_multiple
+        if remainder != 0:
+            pad_len = self.pad_multiple - remainder
+            inps = F.pad(inps, (0, pad_len), value=self.tokenizer.pad_token_id)
+            if attn_mask is not None:
+                attn_mask = F.pad(attn_mask, (0, pad_len), value=0)
+        logits = super()._model_call(inps, attn_mask=attn_mask, **kwargs)
+        if remainder != 0:
+            logits = logits[:, :orig_len, :]
+        return logits
+    def _model_generate(self, context, max_length, **kwargs):
+        orig_len = context.shape[1]
+        remainder = orig_len % self.pad_multiple
+        if remainder != 0:
+            pad_len = self.pad_multiple - remainder
+            context = F.pad(context, (pad_len, 0), value=self.tokenizer.pad_token_id)
+            if "attention_mask" in kwargs and kwargs["attention_mask"] is not None:
+                kwargs["attention_mask"] = F.pad(kwargs["attention_mask"], (pad_len, 0), value=0)
+        out = super()._model_generate(context, max_length, **kwargs)
+        if remainder != 0:
+            out = out[:, pad_len:]
+        return out
+if __name__ == "__main__":
+    from lm_eval.__main__ import cli_evaluate
+    cli_evaluate()

lm_eval/lambada_openai_norm.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+tag:
+  - lambada
+task: lambada_openai_norm
+dataset_path: EleutherAI/lambada_openai
+dataset_name: default
+output_type: loglikelihood
+test_split: test
+doc_to_text: "{{text.split(' ')[:-1]|join(' ')}}"
+doc_to_target: "{{' '+text.split(' ')[-1]}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{text}}"
+metric_list:
+  - metric: perplexity
+    aggregation: perplexity
+    higher_is_better: false
+  - metric: acc
+    aggregation: mean
+    higher_is_better: true
+  - metric: acc_norm
+    aggregation: mean
+    higher_is_better: true
+metadata:
+  version: 1.0

lm_eval/main.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def main():
+    print("Hello from lm-eval!")
+if __name__ == "__main__":
+    main()

lm_eval/pyproject.toml ADDED Viewed

	@@ -0,0 +1,64 @@

+[project]
+name = "cloverlm-eval"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "accelerate>=1.13.0",
+    "apache-tvm-ffi==0.1.9",
+    "certifi==2026.2.25",
+    "charset-normalizer==3.4.6",
+    "click==8.3.1",
+    "cuda-bindings==13.0.3",
+    "cuda-pathfinder==1.4.3",
+    "cuda-python==13.0.3",
+    "einops==0.8.2",
+    "filelock==3.25.2",
+    "flashinfer-python==0.6.6",
+    "fsspec==2026.2.0",
+    "idna==3.11",
+    "jinja2==3.1.6",
+    "lm-eval>=0.4.11",
+    "markupsafe==3.0.3",
+    "mpmath==1.3.0",
+    "networkx==3.6.1",
+    "ninja==1.13.0",
+    "numpy==2.4.3",
+    "nvidia-cublas==13.1.0.3",
+    "nvidia-cuda-cupti==13.0.85",
+    "nvidia-cuda-nvrtc==13.0.88",
+    "nvidia-cuda-runtime==13.0.96",
+    "nvidia-cudnn-cu13==9.15.1.9",
+    "nvidia-cudnn-frontend==1.19.0",
+    "nvidia-cufft==12.0.0.61",
+    "nvidia-cufile==1.15.1.6",
+    "nvidia-curand==10.4.0.35",
+    "nvidia-cusolver==12.0.4.66",
+    "nvidia-cusparse==12.6.3.3",
+    "nvidia-cusparselt-cu13==0.8.0",
+    "nvidia-cutlass-dsl==4.4.2",
+    "nvidia-cutlass-dsl-libs-base==4.4.2",
+    "nvidia-ml-py==13.590.48",
+    "nvidia-nccl-cu13==2.28.9",
+    "nvidia-nvjitlink==13.0.88",
+    "nvidia-nvshmem-cu13==3.4.5",
+    "nvidia-nvtx==13.0.85",
+    "nvtx==0.2.15",
+    "packaging==26.0",
+    "quartet2",
+    "requests==2.32.5",
+    "scipy==1.17.1",
+    "sympy==1.14.0",
+    "tabulate==0.10.0",
+    "tokenmonster>=1.1.12",
+    "torch==2.10.0+cu130",
+    "tqdm==4.67.3",
+    "transformers>=5.3.0",
+    "triton==3.6.0",
+    "typing-extensions==4.15.0",
+    "urllib3==2.6.3",
+]
+[tool.uv.sources]
+quartet2 = { git = "https://github.com/IST-DASLab/Quartet-II.git", subdirectory = "kernels", rev = "0a0d60c51602a78ae530944047e9e4973485bfef" }

lm_eval/requirements.txt ADDED Viewed

	@@ -0,0 +1,49 @@

+apache-tvm-ffi==0.1.9
+certifi==2026.2.25
+charset-normalizer==3.4.6
+click==8.3.1
+cuda-bindings==13.0.3
+cuda-pathfinder==1.4.3
+cuda-python==13.0.3
+einops==0.8.2
+filelock==3.25.2
+flashinfer-python==0.6.6
+fsspec==2026.2.0
+idna==3.11
+jinja2==3.1.6
+markupsafe==3.0.3
+mpmath==1.3.0
+networkx==3.6.1
+ninja==1.13.0
+numpy==2.4.3
+nvidia-cublas==13.1.0.3
+nvidia-cuda-cupti==13.0.85
+nvidia-cuda-nvrtc==13.0.88
+nvidia-cuda-runtime==13.0.96
+nvidia-cudnn-cu13==9.15.1.9
+nvidia-cudnn-frontend==1.20.0
+nvidia-cufft==12.0.0.61
+nvidia-cufile==1.15.1.6
+nvidia-curand==10.4.0.35
+nvidia-cusolver==12.0.4.66
+nvidia-cusparse==12.6.3.3
+nvidia-cusparselt-cu13==0.8.0
+nvidia-cutlass-dsl==4.4.2
+nvidia-cutlass-dsl-libs-base==4.4.2
+nvidia-ml-py==13.590.48
+nvidia-nccl-cu13==2.28.9
+nvidia-nvjitlink==13.0.88
+nvidia-nvshmem-cu13==3.4.5
+nvidia-nvtx==13.0.85
+nvtx==0.2.15
+packaging==26.0
+quartet2 @ git+https://github.com/IST-DASLab/Quartet-II.git@0a0d60c51602a78ae530944047e9e4973485bfef#subdirectory=kernels
+requests==2.32.5
+scipy==1.17.1
+sympy==1.14.0
+tabulate==0.10.0
+torch==2.10.0+cu130
+tqdm==4.67.3
+triton==3.6.0
+typing-extensions==4.15.0
+urllib3==2.6.3

lm_eval/uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

modeling_cloverlm.py CHANGED Viewed

@@ -111,15 +111,27 @@ class MHSA(nn.Module):
             dtype = Q.dtype if Q.dtype in (torch.bfloat16, torch.float16) else torch.bfloat16
             if attn_backend == "flash2":
-                import flash_attn
                 Y = flash_attn.flash_attn_func(Q.to(dtype), K.to(dtype), V.to(dtype), causal=True, softmax_scale=1.0)
             elif attn_backend == "flash3":
                 import importlib
-                _fa3 = importlib.import_module("flash_attn_interface")
                 Y = _fa3.flash_attn_func(Q.to(dtype), K.to(dtype), V.to(dtype), causal=True, softmax_scale=1.0)
             elif attn_backend == "flash4":
                 import importlib
-                _fa4 = importlib.import_module("flash_attn.cute")
                 Y = _fa4.flash_attn_func(Q.to(dtype), K.to(dtype), V.to(dtype), causal=True, softmax_scale=1.0)[0]
             Y = Y.to(Q.dtype).flatten(-2, -1)

             dtype = Q.dtype if Q.dtype in (torch.bfloat16, torch.float16) else torch.bfloat16
             if attn_backend == "flash2":
+                try:
+                    import flash_attn
+                except ImportError as e:
+                    e.add_note(f"Can't run `attn_backend=flash2` because can't import flash_attn")
+                    raise e
                 Y = flash_attn.flash_attn_func(Q.to(dtype), K.to(dtype), V.to(dtype), causal=True, softmax_scale=1.0)
             elif attn_backend == "flash3":
                 import importlib
+                try:
+                    _fa3 = importlib.import_module("flash_attn_interface")
+                except ImportError as e:
+                    e.add_note(f"Can't run `attn_backend=flash3` because can't import flash_attn_interface")
+                    raise e
                 Y = _fa3.flash_attn_func(Q.to(dtype), K.to(dtype), V.to(dtype), causal=True, softmax_scale=1.0)
             elif attn_backend == "flash4":
                 import importlib
+                try:
+                    _fa4 = importlib.import_module("flash_attn.cute")
+                except ImportError as e:
+                    e.add_note(f"Can't run `attn_backend=flash4` because can't import flash_attn.cute")
+                    raise e
                 Y = _fa4.flash_attn_func(Q.to(dtype), K.to(dtype), V.to(dtype), causal=True, softmax_scale=1.0)[0]
             Y = Y.to(Q.dtype).flatten(-2, -1)