Spaces:

muse-bench
/

MUSE-Leaderboard

Runtime error

App Files Files Community

chan030609 commited on Jul 10, 2024

Commit

6a190a4

1 Parent(s): 07e5361

First commit

Browse files

Files changed (13) hide show

.gitattributes +1 -1
.gitignore +1 -0
.pre-commit-config.yaml +53 -0
README.md +37 -6
app.py +87 -0
asset/citation_button_text.txt +1 -0
asset/p1.md +3 -0
asset/p2.md +18 -0
requirements.txt +16 -0
submission/books.csv +11 -0
submission/news.csv +11 -0
uploads.py +77 -0
utils.py +34 -0

.gitattributes CHANGED Viewed

@@ -25,7 +25,6 @@
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,53 @@

+# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+default_language_version:
+  python: python3
+ci:
+  autofix_prs: true
+  autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
+  autoupdate_schedule: quarterly
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+      - id: check-yaml
+      - id: check-case-conflict
+      - id: detect-private-key
+      - id: check-added-large-files
+        args: ['--maxkb=1000']
+      - id: requirements-txt-fixer
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        name: Format imports
+  - repo: https://github.com/psf/black
+    rev: 22.12.0
+    hooks:
+      - id: black
+        name: Format code
+        additional_dependencies: ['click==8.0.2']
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    # Ruff version.
+    rev: 'v0.0.267'
+    hooks:
+      - id: ruff

README.md CHANGED Viewed

@@ -1,13 +1,44 @@
 ---
 title: Muse Leaderboard
-emoji: 👁
-colorFrom: pink
-colorTo: gray
 sdk: gradio
-sdk_version: 4.37.2
 app_file: app.py
-pinned: false
 license: cc-by-4.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Muse Leaderboard
+emoji: 🥇
+colorFrom: green
+colorTo: indigo
 sdk: gradio
 app_file: app.py
+pinned: true
 license: cc-by-4.0
 ---
+# Start the configuration
+Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
+Results files should have the following format and be stored as json files:
+```json
+{
+    "config": {
+        "model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
+        "model_name": "path of the model on the hub: org/model",
+        "model_sha": "revision on the hub",
+    },
+    "results": {
+        "task_name": {
+            "metric_name": score,
+        },
+        "task_name2": {
+            "metric_name": score,
+        }
+    }
+}
+```
+Request files are created automatically by this tool.
+If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
+# Code logic for more complex edits
+You'll find
+- the main table' columns names and properties in `src/display/utils.py`
+- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
+- teh logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import gradio as gr
+import pandas as pd
+import os
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import HfApi
+from uploads import add_new_eval
+from utils import LEADERBOARD_PATH, CORPORA, load_data, DEFAULT_COLUMNS, DEFAULT_COLUMN_LABELS
+CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
+api = HfApi()
+TOKEN = os.environ.get("TOKEN", None)
+def restart_space():
+    api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
+demo = gr.Blocks()
+with demo:
+    with open("asset/p1.md", 'r') as f:
+        gr.Markdown(f.read())
+    with gr.Row():
+        with gr.Accordion("📙 Citation", open=False):
+            with open("asset/citation_button_text.txt", 'r') as f:
+                citation_button = gr.Textbox(
+                    value=f.read(),
+                    label="Copy the following snippet to cite these results:",
+                    elem_id="citation-button",
+                    show_copy_button=True,
+                )
+    with gr.Tabs():
+        with gr.TabItem("Leaderboard"):
+            with gr.Row():
+                corpus_dropdown = gr.Dropdown(
+                    choices=CORPORA,
+                    label="🔄 Select corpus",
+                    value=CORPORA[0],
+                )
+            leaderboard_table = gr.components.Dataframe(
+                value=load_data(CORPORA[0]),
+                interactive=True,
+                visible=True,
+            )
+            corpus_dropdown.change(
+                load_data,
+                inputs=[corpus_dropdown],
+                outputs=leaderboard_table
+            )
+    with gr.Accordion("Submit a new model for evaluation"):
+        with gr.Row():
+            with gr.Column():
+                corpus_radio = gr.Radio(['news', 'books'], value="llama", label="Corpus")
+                organization_textbox = gr.Textbox(label="Organization")
+                mail_textbox = gr.Textbox(label="Contact email")
+            with gr.Column():
+                file_output = gr.File()
+        submit_button = gr.Button("Submit Eval")
+        submission_result = gr.Markdown()
+        submit_button.click(
+            add_new_eval,
+            [
+                corpus_radio,
+                organization_textbox,
+                mail_textbox,
+                file_output
+            ],
+            submission_result,
+        )
+    with open(f"asset/p2.md", 'r') as f:
+        gr.Markdown(f.read())
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=3600)
+scheduler.start()
+demo.launch(debug=True)

asset/citation_button_text.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ Citation is going to go here.

asset/p1.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ ## 🥇 MUSE Leaderboard
2	+
3	+ MUSE is a comprehensive machine unlearning evaluation benchmark that assesses six desirable properties for unlearned models: (1) no verbatim memorization, (2) no knowledge memorization, (3) no privacy leakage, (4) utility preservation for non-removed data, (5) scalability with respect to removal requests, and (6) sustainability over sequential unlearning requests.

asset/p2.md ADDED Viewed

	@@ -0,0 +1,18 @@

+## Expected File Format
+We expect your submitted file to be in the CSV format containing the following columns:
+- `name`: Name of the evaluated unlearning method. Must be unique for each row.
+- `verbmem_f`, `privleak`, `knowmem_f`, `knowmem_r`: Evaluation scores.
+Following the instructions for evaluation in our [GitHub Repository](https://github.com/jaechan-repo/muse_bench) yields an output file precisely of this format.
+## Quick Links
+- [Website](https://muse-bench.github.io): Landing page for MUSE.
+- [arXiv Paper](): Detailed information about MUSE and analysis on the baseline unlearning methods.
+- [GitHub Repository](https://github.com/jaechan-repo/muse_bench): Evaluation scripts, implementations of the baseline unlearning methods.
+- [News Dataset](https://huggingface.co/datasets/muse-bench/MUSE-News), [Books Dataset](https://huggingface.co/datasets/muse-bench/MUSE-Books): Two evaluation corpora used by MUSE.
+- [Leaderboard](https://huggingface.co/spaces/muse-bench/muse_leaderboard) (You are here): Current rankings of unlearning algorithms for MUSE.
+- [Summary on Twitter](): A concise summary and key takeaways from the project.

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+APScheduler
+black
+datasets
+gradio
+gradio[oauth]
+gradio_leaderboard==0.0.9
+gradio_client
+huggingface-hub>=0.18.0
+matplotlib
+numpy
+pandas
+python-dateutil
+tqdm
+transformers
+tokenizers>=0.15.0
+sentencepiece

submission/books.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+name,organization,verbmem_f,knowmem_f,privleak,knowmem_r,id
+target,Baseline,99.8,59.4,-57.5,66.9,"-"
+retrain,Baseline,14.3,28.9,0.0,74.5,"-"
+"ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-25.0,0.0,"-"
+"ga_gdr (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-26.5,10.7,"-"
+"ga_klr (epoch=5, lr=1e-5, bs=32)",Baseline,16.0,21.9,-40.2,37.2,"-"
+"npo (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-24.3,0.0,"-"
+"npo_gdr (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-30.8,22.8,"-"
+"npo_klr (epoch=4, lr=1e-5, bs=32)",Baseline,17.0,25.0,-43.5,44.6,"-"
+"tv (alpha=512, lr=1e-5, bs=32)",Baseline,99.7,52.4,-57.5,64.7,"-"
+"whp (alpha=256, lr=1e-5, bs=32)",Baseline,18.0,55.7,56.5,63.6,"-"

submission/news.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+name,organization,verbmem_f,knowmem_f,privleak,knowmem_r,id
+target,Baseline,58.4,63.9,-99.8,55.2,"-"
+retrain,Baseline,20.8,33.1,0.0,55.0,"-"
+"ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,5.2,0.0,"-"
+"ga_gdr (epoch=7, lr=1e-5, bs=32)",Baseline,4.9,31.0,108.1,27.3,"-"
+"ga_klr (epoch=10, lr=1e-5, bs=32)",Baseline,27.4,50.2,-96.1,44.8,"-"
+"npo (epoch=10, lr=1e-5, bs=32)",Baseline,0.0,0.0,24.4,0.0,"-"
+"npo_gdr (epoch=10, lr=1e-5, bs=32)",Baseline,1.2,54.6,105.8,40.5,"-"
+"npo_klr (epoch=10, lr=1e-5, bs=32)",Baseline,26.9,49.0,-95.8,45.4,"-"
+"tv (alpha=512, lr=1e-5, bs=32)",Baseline,57.2,66.2,-99.8,55.8,"-"
+"whp (alpha=4, lr=1e-5, bs=32)",Baseline,19.7,21.2,109.6,28.3,"-"

uploads.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from email.utils import parseaddr
+from huggingface_hub import HfApi
+import io
+import os
+import base64
+import pandas as pd
+from utils import DEFAULT_COLUMNS, DEFAULT_METRICS, LEADERBOARD_PATH
+api = HfApi()
+TOKEN = os.environ.get("TOKEN", None)
+YEAR_VERSION = "2024"
+def format_error(msg):
+    return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
+def format_warning(msg):
+    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"
+def format_log(msg):
+    return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"
+def add_new_eval(
+    corpus: str,
+    organization: str,
+    mail: str,
+    fpath: str,
+):
+    for input in [corpus, organization, mail, fpath]:
+        if not input:
+            return format_warning("Please fill all the fields.")
+    if organization == 'Baseline':
+        return format_warning("Your organization name cannot be Baseline.")
+    _, parsed_mail = parseaddr(mail)
+    if '@' not in parsed_mail:
+        return format_warning("Please provide a valid email adress.")
+    # load the file
+    io_path = f"submission/{corpus}.csv"
+    df = pd.read_csv(io_path)
+    df_new = pd.read_csv(fpath)
+    for col in DEFAULT_METRICS:
+        if col not in df_new.columns:
+            return format_warning(f"Missing column in the submitted file: {col}")
+    df_new['organization'] = organization
+    df_new['id'] = base64.b64encode(os.urandom(6)).decode('ascii')
+    df_new = df_new[DEFAULT_COLUMNS]
+    df = pd.concat([df, df_new]).reset_index(drop=True)
+    buffer = io.BytesIO()
+    df.to_csv(buffer, index=False)  # Write the DataFrame to a buffer in CSV format
+    buffer.seek(0)  # Rewind the buffer to the beginning
+    api.delete_file(
+        repo_id = LEADERBOARD_PATH,
+        path_in_repo = io_path,
+        token = TOKEN,
+        repo_type='space'
+    )
+    api.upload_file(
+        repo_id = LEADERBOARD_PATH,
+        path_in_repo = io_path,
+        path_or_fileobj = buffer,
+        token = TOKEN,
+        repo_type = 'space',
+    )
+    return format_log(f"Submitted to {corpus} by {organization} successfully.\nPlease refresh the leaderboard, and wait a bit to see the score displayed")

utils.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import pandas as pd
+LEADERBOARD_PATH = f"chan0369/muse_leaderboard"
+DEFAULT_METRICS = [
+    'verbmem_f',
+    'knowmem_f',
+    'privleak',
+    'knowmem_r',
+]
+DEFAULT_COLUMNS = ['name', 'organization'] + DEFAULT_METRICS + ['id']
+DEFAULT_COLUMN_LABELS = [
+    'Method Name',
+    'Submitted By',
+    'VerbMem ⬇️',
+    'KnowMem Forget ⬇️',
+    'PrivLeak',
+    'KnowMem Retain (Utility) ⬆',
+    'Submission Id'
+]
+CORPORA = ['news', 'books']
+def load_data(corpus):
+    assert corpus in CORPORA
+    df = pd.read_csv(f"submission/{corpus}.csv")
+    df = df[DEFAULT_COLUMNS].rename(columns={
+        k: v for k, v in zip(DEFAULT_COLUMNS, DEFAULT_COLUMN_LABELS)
+    })
+    return df