Spaces:

mlabonne
/

AutoMerger

Paused

File size: 15,724 Bytes

ba6030d
373dfeb
fc17cd4
ba6030d
 
 
 
373dfeb
ba6030d
 
f7e5eef
373dfeb
bbfe612
ba6030d
 
 
 
 
 
 
 
 
 
 
2b919a2
78c51c6
ba6030d
 
fc17cd4
 
 
 
 
 
 
7e9582a
 
fc17cd4
 
 
 
 
 
7e9582a
fc17cd4
 
05ca648
7e9582a
 
 
05ca648
 
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e4ec7d
ba6030d
7e9582a
 
 
 
 
 
5e4ec7d
ba6030d
 
 
 
 
 
 
 
a66d9b2
ba6030d
 
a66d9b2
 
 
 
 
 
 
 
 
ba6030d
 
 
a66d9b2
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6075314
ba6030d
6075314
ba6030d
 
 
 
 
e815731
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6075314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba6030d
6075314
 
 
 
 
 
ba6030d
6075314
ba6030d
6075314
 
 
 
 
 
380a2f5
6075314
 
 
380a2f5
6075314
b43b73b
6075314
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7e5eef
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
2b919a2
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b43b73b
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f892546
ba6030d
 
 
f7e5eef
 
 
ba6030d
 
 
 
 
 
 
358b5c8
ba6030d
 
 
 
f7e5eef
 
 
ba6030d
 
 
 
 
 
 
 
 
 
 
 
 
 
9822f81
ba6030d
c385bf7
79883c8
ba6030d
 
 
 
29f310f
ba6030d
 
 
 
 
 
 
 
 
 
373dfeb
 
f7e5eef
 
 
79883c8
373dfeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7e5eef
 
 
373dfeb
 
 
 
 
f7e5eef
 
 
 
e815731
 
 
 
 
 
 
 
 
f7e5eef
 
7bf890c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba6030d
f7e5eef
 
 
ba6030d
 
 
 
 
 
 
 
 
76f067b
ba6030d
 
358b5c8
ba6030d
 
 
 
 
a173027
 
ba6030d
 
 
a173027
ba6030d
 
 
db81026
acc384c
ba6030d
 
 
4858507
ba6030d
 
598bf51
 
ba6030d
 
598bf51
 
ba6030d
f7e5eef
58cfbcb
f7e5eef
 
ba6030d
58cfbcb
598bf51
acc384c
66a64db
 
 
 
 
 
 
fc17cd4
ba6030d
 
 
 
 
 
 
 
 
 
 
 
fc17cd4
 
ba6030d
 
 
72e66d3
db81026
7c6a09f
ba6030d
 
d89fc74
8a5e743
 
 
587842d
8a5e743
 
d9adf9e
5f0417a
8a5e743
ba6030d
 
8a5e743
1d5f0b3
8a5e743

import os
import re
import sys
import time
import random
import yaml
import subprocess
from io import StringIO

import runpod
import shutil
import requests
import gradio as gr
import pandas as pd
from jinja2 import Template
from huggingface_hub import ModelCard, ModelCardData, HfApi, repo_info
from huggingface_hub.utils import RepositoryNotFoundError

# Set environment variables
HF_TOKEN = os.environ.get("HF_TOKEN")
runpod.api_key = os.environ.get("RUNPOD_TOKEN")

# Parameters
USERNAME = 'automerger'
N_ROWS = 15
WAIT_TIME = 10800


# Logger from https://github.com/gradio-app/gradio/issues/2362
class Logger:
    def __init__(self, filename):
        self.terminal = sys.stdout
        self.log = open(filename, "w")

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)
        
    def flush(self):
        self.terminal.flush()
        self.log.flush()
        
    def isatty(self):
        return False    


def read_logs():
    sys.stdout.flush()
    with open("output.log", "r") as f:
        return f.read()

        
def create_dataset() -> bool:
    """
    Use Scrape Open LLM Leaderboard to create a CSV dataset.
    """
    command = ["python3", "scrape-open-llm-leaderboard/main.py", "-csv"]

    try:
        result = subprocess.run(command, check=True, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, text=True)
        print(f"scrape-open-llm-leaderboard: {result.stdout}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"scrape-open-llm-leaderboard: {e.stderr}")
        return False


def merge_models() -> None:
    """
    Use mergekit to create a merge.
    """
    command = ["mergekit-yaml", "config.yaml", "/data/merge", "--copy-tokenizer", "--transformers_cache", "/data"]

    with open("output.log", "a") as log_file: 
        try:
            result = subprocess.run(command, check=True, stdout=log_file,
                                    stderr=log_file, text=True)
            print(f"mergekit: {result.stdout}")
        except subprocess.CalledProcessError as e:
            print(f"Error: mergekit {command}: {e.stderr}")


def make_df(file_path: str, n_rows: int) -> pd.DataFrame:
    """
    Create a filtered dataset from the Open LLM Leaderboard.
    """
    columns = ["Available on the hub", "Model sha", "T", "Type", "Precision",
              "Architecture", "Weight type", "Hub ❤️", "Flagged", "MoE"]
    ds = pd.read_csv("open-llm-leaderboard.csv", encoding='utf-8')
    df = (
          ds[
            (ds["#Params (B)"] == 8)
            & (ds["Architecture"] == "LlamaForCausalLM")
            & (ds["Available on the hub"] == True)
            & (ds["Flagged"] == True)
            & (~ds["Model"].str.lower().str.contains("yi"))
            & (~ds["Model"].str.lower().str.contains("9b"))
            & (~ds["Model"].str.lower().str.contains("8xqmff94/slm"))
            & (ds["MoE"] == True)
            & (ds["Weight type"] == "Original")
          ]
          .drop(columns=columns)
          .drop_duplicates(subset=["Model"])
          .sort_values(by="MMLU", ascending=False)
          .iloc[:n_rows]
      )
    return df


def repo_exists(repo_id: str) -> bool:
    try:
        repo_info(repo_id)
        return True
    except RepositoryNotFoundError:
        return False


def get_name(models: list[pd.Series], username: str, version=0) -> str:
    model_name = models[0]["Model"].split("/")[-1].split("-")[0].capitalize() \
                 + models[1]["Model"].split("/")[-1].split("-")[0].capitalize() \
                 + "-8B"
    if version > 0:
        model_name = model_name.split("-")[0] + f"-v{version}-8B"

    if repo_exists(f"{username}/{model_name}"):
        get_name(models, username, version+1)

    return model_name
    

def get_license(models: list[pd.Series]) -> str:
    license1 = models[0]["Hub License"]
    license2 = models[1]["Hub License"]
    license = "cc-by-nc-4.0"

    if license1 == "cc-by-nc-4.0" or license2 == "cc-by-nc-4.0":
        license = "cc-by-nc-4.0"
    elif license1 == "apache-2.0" or license2 == "apache-2.0":
        license = "apache-2.0"
    elif license1 == "MIT" and license2 == "MIT":
        license = "MIT"
    return license


def create_config(models: list[pd.Series]) -> str:
    slerp_config = f"""
slices:
  - sources:
      - model: {models[0]["Model"]}
        layer_range: [0, 32]
      - model: {models[1]["Model"]}
        layer_range: [0, 32]
merge_method: slerp
base_model: {models[0]["Model"]}
parameters:
  t:
    - filter: self_attn
      value: [0, 0.5, 0.3, 0.7, 1]
    - filter: mlp
      value: [1, 0.5, 0.7, 0.3, 0]
    - value: 0.5
dtype: bfloat16
random_seed: 0
    """
    dare_config = f"""
models:
- model: {models[0]["Model"]}
  # No parameters necessary for base model
- model: {models[1]["Model"]}
  parameters:
    density: 0.53
    weight: 0.6
merge_method: dare_ties
base_model: {models[0]["Model"]}
parameters:
int8_mask: true
dtype: bfloat16
random_seed: 0
""" 
    stock_config = f"""
models:
  - model: meta-llama/Meta-Llama-3-8B
  - model: {models[0]["Model"]}
  - model: {models[1]["Model"]}
merge_method: model_stock
base_model: meta-llama/Meta-Llama-3-8B
dtype: bfloat16    
"""
    yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.3, 0.6, 0.1], k=1)[0]

    with open('config.yaml', 'w', encoding="utf-8") as f:
        f.write(yaml_config)

    return yaml_config


def create_model_card(yaml_config: str, model_name: str, username: str, license: str) -> None:
    template_text = """
---
license: {{ license }}
base_model:
{%- for model in models %}
  - {{ model }}
{%- endfor %}
tags:
- merge
- mergekit
- lazymergekit
- automerger
---

# {{ model_name }}

{{ model_name }} is an automated merge created by [Maxime Labonne](https://huggingface.co/mlabonne) using the following configuration.

{%- for model in models %}
* [{{ model }}](https://huggingface.co/{{ model }})
{%- endfor %}

## 🧩 Configuration

```yaml
{{- yaml_config -}}

```

## 💻 Usage

```python
!pip install -qU transformers accelerate

from transformers import AutoTokenizer
import transformers
import torch

model = "{{ username }}/{{ model_name }}"
messages = [{"role": "user", "content": "What is a large language model?"}]

tokenizer = AutoTokenizer.from_pretrained(model)
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)

outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"])
```
"""

    # Create a Jinja template object
    jinja_template = Template(template_text.strip())

    # Get list of models from config
    data = yaml.safe_load(yaml_config)
    if "models" in data:
        models = [data["models"][i]["model"] for i in range(len(data["models"])) if "parameters" in data["models"][i]]
    elif "parameters" in data:
        models = [data["slices"][0]["sources"][i]["model"] for i in range(len(data["slices"][0]["sources"]))]
    elif "slices" in data:
        models = [data["slices"][i]["sources"][0]["model"] for i in range(len(data["slices"]))]
    else:
        raise Exception("No models or slices found in yaml config")

    # Fill the template
    content = jinja_template.render(
        model_name=model_name,
        models=models,
        yaml_config=yaml_config,
        username=username,
        license=license
    )

    # Save the model card
    card = ModelCard(content)
    card.save('/data/merge/README.md')


def upload_model(api: HfApi, username: str, model_name: str) -> None:
    """
    Upload merged model to the Hugging Face Hub.
    """
    api.create_repo(
        repo_id=f"{username}/{model_name}",
        repo_type="model",
        exist_ok=True,
    )
    api.upload_folder(
        repo_id=f"{username}/{model_name}",
        folder_path="/data/merge",
    )


def create_pod(model_name: str, username: str, n=10, wait_seconds=10):
    """
    Create a RunPod instance to run the evaluation.
    """
    for attempt in range(n):
        try:
            pod = runpod.create_pod(
                name=f"Automerge {model_name} on Nous",
                image_name="runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel-ubuntu22.04",
                gpu_type_id="NVIDIA GeForce RTX 3090",
                cloud_type="COMMUNITY",
                gpu_count=1,
                volume_in_gb=0,
                container_disk_in_gb=50,
                template_id="au6nz6emhk",
                env={
                    "BENCHMARK": "nous",
                    "MODEL_ID": f"{username}/{model_name}",
                    "REPO": "https://github.com/mlabonne/llm-autoeval.git",
                    "TRUST_REMOTE_CODE": False,
                    "PRIVATE_GIST": False,
                    "YALL_GIST_ID": "56ebbd012d942a6b749db5243de5740f",
                    "DEBUG": False,
                    "GITHUB_API_TOKEN": os.environ["GITHUB_TOKEN"],
                }
            )
            print("Evaluation started.")
            return pod
        except Exception as e:
            print(f"Attempt {attempt + 1} failed with error: {e}")
            if attempt < n - 1:
                print(f"Waiting {wait_seconds} seconds before retrying...")
                time.sleep(wait_seconds)
            else:
                print("All attempts failed. Giving up.")
                raise


def download_leaderboard():
    """
    Download the gist that contains the leaderboard.
    """
    url = "https://gist.githubusercontent.com/automerger/56ebbd012d942a6b749db5243de5740f/raw"
    file_path = "leaderboard.txt"
    response = requests.get(url)
    return response.content.decode('utf-8')


def convert_markdown_table_to_dataframe(md_content):
    """
    Converts markdown table to Pandas DataFrame.
    """
    # Remove leading and trailing | characters
    cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)

    # Create DataFrame from cleaned content
    df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')

    # Remove the first row after the header
    df = df.drop(0, axis=0)

    # Strip whitespace from column names
    df.columns = df.columns.str.strip()

    return df


def get_dataframe():
    """
    Wrapper to update the Gradio dataframe.
    """
    content = download_leaderboard()
    df = convert_markdown_table_to_dataframe(content)
    return df


def clear_data():
    """
    Clear data so the Space doesn't crash...
    """
    dir_path = "/data"
    try:
        with os.scandir(dir_path) as entries:
            for entry in entries:
                if entry.is_file():
                    os.unlink(entry.path)
        print("All files deleted successfully.")
    except OSError:
        print("Error occurred while deleting files.")


def get_size(start_path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # skip if it is symbolic link
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)
    return total_size


def human_readable_size(size, decimal_places=2):
    for unit in ['B', 'KB', 'MB', 'GB', 'TB', 'PB']:
        if size < 1024.0:
            break
        size /= 1024.0
    return f"{size:.{decimal_places}f} {unit}"


def merge_loop():
    """
    Main function that orchestrates the merge.
    """
    # Start HF API
    api = HfApi(token=HF_TOKEN)

    # Create dataset (proceed only if successful)
    if not create_dataset():
        print("Failed to create dataset. Skipping merge loop.")
        return

    df = make_df("open-llm-leaderboard.csv", N_ROWS)
    assert not df.empty, "DataFrame is empty. Cannot proceed with merge loop."

    # Sample two models
    dir_path = "/data"
    sample = df.sample(n=2)
    models = [sample.iloc[i] for i in range(2)]

    # Get model name
    model_name = get_name(models, USERNAME, version=0)
    print("="*60)
    print(f"Model name: {model_name}")

    # Get model license
    license = get_license(models)
    print(f"License: {license}")

    # Merge configs
    yaml_config = create_config(models)
    print(f"YAML config:{yaml_config}")
    print(f"Data size: {human_readable_size(get_size(dir_path))}")

    # Merge models
    merge_models()
    print("Model merged!")

    # Create model card
    print("Create model card")
    create_model_card(yaml_config, model_name, USERNAME, license)

    # Upload model
    print("Upload model")
    upload_model(api, USERNAME, model_name)

    # Clear data
    print("Clear data")
    clear_data()
    
    # Evaluate model on Runpod
    print("Start evaluation")
    create_pod(model_name, USERNAME)
    print(f"Waiting for {WAIT_TIME/60} minutes...")

# Set the HF_DATASETS_CACHE environment variable
os.environ['HF_DATASETS_CACHE'] = "/data/hfcache/"

# Verify the environment variable is set
print(os.environ['HF_DATASETS_CACHE'])

# Install scrape-open-llm-leaderboard and mergekit
command = ["git", "clone", "-q", "https://github.com/Weyaxi/scrape-open-llm-leaderboard"]
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

command = ["pip", "install", "-r", "scrape-open-llm-leaderboard/requirements.txt"]
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
  
command = ["git", "clone", "https://github.com/arcee-ai/mergekit.git"]
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

command = ["pip", "install", "-e", "mergekit"]
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

sys.stdout = Logger("output.log")

# Gradio interface
title = """
<div align="center">
  <p style="font-size: 44px;">♾️ AutoMerger</p>
  <p style="font-size: 20px;">📃 <a href="https://huggingface.co/automerger">Merged models</a> • 🏆 <a href="https://huggingface.co/spaces/automerger/Yet_Another_LLM_Leaderboard">Leaderboard</a> • 📝 <a href="https://huggingface.co/blog/mlabonne/merge-models">Article</a> • 🐦 <a href="https://twitter.com/maximelabonne">Follow me on X</a></p>
  <p><em>AutoMerger selects two Llama 3 8B models on top of the Open LLM Leaderboard, combine them with a merge technique, and evaluate the resulting model.</em></p>
</div>
"""
footer = '<div align="center"><p><em>Special thanks to <a href="https://huggingface.co/Weyaxi">Weyaxi</a> for the <a href="https://github.com/Weyaxi/scrape-open-llm-leaderboard">Open LLM Leaderboard Scraper</a>, <a href="https://github.com/cg123">Charles Goddard</a> for <a href="https://github.com/arcee-ai/mergekit">mergekit</a>, and <a href="https://huggingface.co/MaziyarPanahi">Maziyar Panahi</a> for making <a href="https://huggingface.co/collections/MaziyarPanahi/gguf-65afc99c3997c4b6d2d9e1d5">GGUF versions</a> of these automerges.</em></p></div>'
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
    gr.Markdown(title)
    logs = gr.Textbox(label="Logs")
    demo.load(read_logs, None, logs, every=10)
    leaderboard = gr.Dataframe(value=get_dataframe, datatype=["markdown", "number", "number", "number", "number", "number"], every=3600)
    gr.Markdown(footer)
demo.queue(default_concurrency_limit=50).launch(server_name="0.0.0.0", show_error=True, prevent_thread_lock=True)

print("Start AutoMerger...")

# Main loop
while True:
    merge_loop()
    time.sleep(WAIT_TIME)