chan030609 commited on
Commit
6a190a4
·
1 Parent(s): 07e5361

First commit

Browse files
.gitattributes CHANGED
@@ -25,7 +25,6 @@
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
29
  *.tgz filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+
.pre-commit-config.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ default_language_version:
16
+ python: python3
17
+
18
+ ci:
19
+ autofix_prs: true
20
+ autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
21
+ autoupdate_schedule: quarterly
22
+
23
+ repos:
24
+ - repo: https://github.com/pre-commit/pre-commit-hooks
25
+ rev: v4.3.0
26
+ hooks:
27
+ - id: check-yaml
28
+ - id: check-case-conflict
29
+ - id: detect-private-key
30
+ - id: check-added-large-files
31
+ args: ['--maxkb=1000']
32
+ - id: requirements-txt-fixer
33
+ - id: end-of-file-fixer
34
+ - id: trailing-whitespace
35
+
36
+ - repo: https://github.com/PyCQA/isort
37
+ rev: 5.12.0
38
+ hooks:
39
+ - id: isort
40
+ name: Format imports
41
+
42
+ - repo: https://github.com/psf/black
43
+ rev: 22.12.0
44
+ hooks:
45
+ - id: black
46
+ name: Format code
47
+ additional_dependencies: ['click==8.0.2']
48
+
49
+ - repo: https://github.com/charliermarsh/ruff-pre-commit
50
+ # Ruff version.
51
+ rev: 'v0.0.267'
52
+ hooks:
53
+ - id: ruff
README.md CHANGED
@@ -1,13 +1,44 @@
1
  ---
2
  title: Muse Leaderboard
3
- emoji: 👁
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.37.2
8
  app_file: app.py
9
- pinned: false
10
  license: cc-by-4.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Muse Leaderboard
3
+ emoji: 🥇
4
+ colorFrom: green
5
+ colorTo: indigo
6
  sdk: gradio
 
7
  app_file: app.py
8
+ pinned: true
9
  license: cc-by-4.0
10
  ---
11
 
12
+ # Start the configuration
13
+
14
+ Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
15
+
16
+ Results files should have the following format and be stored as json files:
17
+ ```json
18
+ {
19
+ "config": {
20
+ "model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
21
+ "model_name": "path of the model on the hub: org/model",
22
+ "model_sha": "revision on the hub",
23
+ },
24
+ "results": {
25
+ "task_name": {
26
+ "metric_name": score,
27
+ },
28
+ "task_name2": {
29
+ "metric_name": score,
30
+ }
31
+ }
32
+ }
33
+ ```
34
+
35
+ Request files are created automatically by this tool.
36
+
37
+ If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
38
+
39
+ # Code logic for more complex edits
40
+
41
+ You'll find
42
+ - the main table' columns names and properties in `src/display/utils.py`
43
+ - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
44
+ - teh logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ from apscheduler.schedulers.background import BackgroundScheduler
5
+ from huggingface_hub import HfApi
6
+ from uploads import add_new_eval
7
+ from utils import LEADERBOARD_PATH, CORPORA, load_data, DEFAULT_COLUMNS, DEFAULT_COLUMN_LABELS
8
+
9
+
10
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
11
+
12
+
13
+ api = HfApi()
14
+ TOKEN = os.environ.get("TOKEN", None)
15
+
16
+
17
+ def restart_space():
18
+ api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
19
+
20
+
21
+ demo = gr.Blocks()
22
+
23
+
24
+ with demo:
25
+ with open("asset/p1.md", 'r') as f:
26
+ gr.Markdown(f.read())
27
+
28
+ with gr.Row():
29
+ with gr.Accordion("📙 Citation", open=False):
30
+ with open("asset/citation_button_text.txt", 'r') as f:
31
+ citation_button = gr.Textbox(
32
+ value=f.read(),
33
+ label="Copy the following snippet to cite these results:",
34
+ elem_id="citation-button",
35
+ show_copy_button=True,
36
+ )
37
+
38
+ with gr.Tabs():
39
+ with gr.TabItem("Leaderboard"):
40
+ with gr.Row():
41
+ corpus_dropdown = gr.Dropdown(
42
+ choices=CORPORA,
43
+ label="🔄 Select corpus",
44
+ value=CORPORA[0],
45
+ )
46
+
47
+ leaderboard_table = gr.components.Dataframe(
48
+ value=load_data(CORPORA[0]),
49
+ interactive=True,
50
+ visible=True,
51
+ )
52
+
53
+ corpus_dropdown.change(
54
+ load_data,
55
+ inputs=[corpus_dropdown],
56
+ outputs=leaderboard_table
57
+ )
58
+
59
+ with gr.Accordion("Submit a new model for evaluation"):
60
+ with gr.Row():
61
+ with gr.Column():
62
+ corpus_radio = gr.Radio(['news', 'books'], value="llama", label="Corpus")
63
+ organization_textbox = gr.Textbox(label="Organization")
64
+ mail_textbox = gr.Textbox(label="Contact email")
65
+ with gr.Column():
66
+ file_output = gr.File()
67
+
68
+ submit_button = gr.Button("Submit Eval")
69
+ submission_result = gr.Markdown()
70
+ submit_button.click(
71
+ add_new_eval,
72
+ [
73
+ corpus_radio,
74
+ organization_textbox,
75
+ mail_textbox,
76
+ file_output
77
+ ],
78
+ submission_result,
79
+ )
80
+
81
+ with open(f"asset/p2.md", 'r') as f:
82
+ gr.Markdown(f.read())
83
+
84
+ scheduler = BackgroundScheduler()
85
+ scheduler.add_job(restart_space, "interval", seconds=3600)
86
+ scheduler.start()
87
+ demo.launch(debug=True)
asset/citation_button_text.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Citation is going to go here.
asset/p1.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ## 🥇 MUSE Leaderboard
2
+
3
+ MUSE is a comprehensive machine unlearning evaluation benchmark that assesses six desirable properties for unlearned models: (1) no verbatim memorization, (2) no knowledge memorization, (3) no privacy leakage, (4) utility preservation for non-removed data, (5) scalability with respect to removal requests, and (6) sustainability over sequential unlearning requests.
asset/p2.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Expected File Format
2
+
3
+ We expect your submitted file to be in the CSV format containing the following columns:
4
+ - `name`: Name of the evaluated unlearning method. Must be unique for each row.
5
+ - `verbmem_f`, `privleak`, `knowmem_f`, `knowmem_r`: Evaluation scores.
6
+
7
+ Following the instructions for evaluation in our [GitHub Repository](https://github.com/jaechan-repo/muse_bench) yields an output file precisely of this format.
8
+
9
+ ## Quick Links
10
+
11
+ - [Website](https://muse-bench.github.io): Landing page for MUSE.
12
+ - [arXiv Paper](): Detailed information about MUSE and analysis on the baseline unlearning methods.
13
+ - [GitHub Repository](https://github.com/jaechan-repo/muse_bench): Evaluation scripts, implementations of the baseline unlearning methods.
14
+ - [News Dataset](https://huggingface.co/datasets/muse-bench/MUSE-News), [Books Dataset](https://huggingface.co/datasets/muse-bench/MUSE-Books): Two evaluation corpora used by MUSE.
15
+ - [Leaderboard](https://huggingface.co/spaces/muse-bench/muse_leaderboard) (You are here): Current rankings of unlearning algorithms for MUSE.
16
+ - [Summary on Twitter](): A concise summary and key takeaways from the project.
17
+
18
+
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ APScheduler
2
+ black
3
+ datasets
4
+ gradio
5
+ gradio[oauth]
6
+ gradio_leaderboard==0.0.9
7
+ gradio_client
8
+ huggingface-hub>=0.18.0
9
+ matplotlib
10
+ numpy
11
+ pandas
12
+ python-dateutil
13
+ tqdm
14
+ transformers
15
+ tokenizers>=0.15.0
16
+ sentencepiece
submission/books.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,organization,verbmem_f,knowmem_f,privleak,knowmem_r,id
2
+ target,Baseline,99.8,59.4,-57.5,66.9,"-"
3
+ retrain,Baseline,14.3,28.9,0.0,74.5,"-"
4
+ "ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-25.0,0.0,"-"
5
+ "ga_gdr (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-26.5,10.7,"-"
6
+ "ga_klr (epoch=5, lr=1e-5, bs=32)",Baseline,16.0,21.9,-40.2,37.2,"-"
7
+ "npo (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-24.3,0.0,"-"
8
+ "npo_gdr (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-30.8,22.8,"-"
9
+ "npo_klr (epoch=4, lr=1e-5, bs=32)",Baseline,17.0,25.0,-43.5,44.6,"-"
10
+ "tv (alpha=512, lr=1e-5, bs=32)",Baseline,99.7,52.4,-57.5,64.7,"-"
11
+ "whp (alpha=256, lr=1e-5, bs=32)",Baseline,18.0,55.7,56.5,63.6,"-"
submission/news.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,organization,verbmem_f,knowmem_f,privleak,knowmem_r,id
2
+ target,Baseline,58.4,63.9,-99.8,55.2,"-"
3
+ retrain,Baseline,20.8,33.1,0.0,55.0,"-"
4
+ "ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,5.2,0.0,"-"
5
+ "ga_gdr (epoch=7, lr=1e-5, bs=32)",Baseline,4.9,31.0,108.1,27.3,"-"
6
+ "ga_klr (epoch=10, lr=1e-5, bs=32)",Baseline,27.4,50.2,-96.1,44.8,"-"
7
+ "npo (epoch=10, lr=1e-5, bs=32)",Baseline,0.0,0.0,24.4,0.0,"-"
8
+ "npo_gdr (epoch=10, lr=1e-5, bs=32)",Baseline,1.2,54.6,105.8,40.5,"-"
9
+ "npo_klr (epoch=10, lr=1e-5, bs=32)",Baseline,26.9,49.0,-95.8,45.4,"-"
10
+ "tv (alpha=512, lr=1e-5, bs=32)",Baseline,57.2,66.2,-99.8,55.8,"-"
11
+ "whp (alpha=4, lr=1e-5, bs=32)",Baseline,19.7,21.2,109.6,28.3,"-"
uploads.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from email.utils import parseaddr
2
+ from huggingface_hub import HfApi
3
+ import io
4
+ import os
5
+ import base64
6
+ import pandas as pd
7
+ from utils import DEFAULT_COLUMNS, DEFAULT_METRICS, LEADERBOARD_PATH
8
+
9
+
10
+ api = HfApi()
11
+ TOKEN = os.environ.get("TOKEN", None)
12
+ YEAR_VERSION = "2024"
13
+
14
+
15
+ def format_error(msg):
16
+ return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
17
+
18
+
19
+ def format_warning(msg):
20
+ return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"
21
+
22
+
23
+ def format_log(msg):
24
+ return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"
25
+
26
+
27
+ def add_new_eval(
28
+ corpus: str,
29
+ organization: str,
30
+ mail: str,
31
+ fpath: str,
32
+ ):
33
+ for input in [corpus, organization, mail, fpath]:
34
+ if not input:
35
+ return format_warning("Please fill all the fields.")
36
+
37
+ if organization == 'Baseline':
38
+ return format_warning("Your organization name cannot be Baseline.")
39
+
40
+ _, parsed_mail = parseaddr(mail)
41
+ if '@' not in parsed_mail:
42
+ return format_warning("Please provide a valid email adress.")
43
+
44
+ # load the file
45
+ io_path = f"submission/{corpus}.csv"
46
+ df = pd.read_csv(io_path)
47
+ df_new = pd.read_csv(fpath)
48
+
49
+ for col in DEFAULT_METRICS:
50
+ if col not in df_new.columns:
51
+ return format_warning(f"Missing column in the submitted file: {col}")
52
+
53
+ df_new['organization'] = organization
54
+ df_new['id'] = base64.b64encode(os.urandom(6)).decode('ascii')
55
+ df_new = df_new[DEFAULT_COLUMNS]
56
+
57
+ df = pd.concat([df, df_new]).reset_index(drop=True)
58
+ buffer = io.BytesIO()
59
+ df.to_csv(buffer, index=False) # Write the DataFrame to a buffer in CSV format
60
+ buffer.seek(0) # Rewind the buffer to the beginning
61
+
62
+ api.delete_file(
63
+ repo_id = LEADERBOARD_PATH,
64
+ path_in_repo = io_path,
65
+ token = TOKEN,
66
+ repo_type='space'
67
+ )
68
+
69
+ api.upload_file(
70
+ repo_id = LEADERBOARD_PATH,
71
+ path_in_repo = io_path,
72
+ path_or_fileobj = buffer,
73
+ token = TOKEN,
74
+ repo_type = 'space',
75
+ )
76
+
77
+ return format_log(f"Submitted to {corpus} by {organization} successfully.\nPlease refresh the leaderboard, and wait a bit to see the score displayed")
utils.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ LEADERBOARD_PATH = f"chan0369/muse_leaderboard"
4
+
5
+ DEFAULT_METRICS = [
6
+ 'verbmem_f',
7
+ 'knowmem_f',
8
+ 'privleak',
9
+ 'knowmem_r',
10
+ ]
11
+
12
+ DEFAULT_COLUMNS = ['name', 'organization'] + DEFAULT_METRICS + ['id']
13
+
14
+ DEFAULT_COLUMN_LABELS = [
15
+ 'Method Name',
16
+ 'Submitted By',
17
+ 'VerbMem ⬇️',
18
+ 'KnowMem Forget ⬇️',
19
+ 'PrivLeak',
20
+ 'KnowMem Retain (Utility) ⬆',
21
+ 'Submission Id'
22
+ ]
23
+
24
+ CORPORA = ['news', 'books']
25
+
26
+
27
+ def load_data(corpus):
28
+ assert corpus in CORPORA
29
+ df = pd.read_csv(f"submission/{corpus}.csv")
30
+ df = df[DEFAULT_COLUMNS].rename(columns={
31
+ k: v for k, v in zip(DEFAULT_COLUMNS, DEFAULT_COLUMN_LABELS)
32
+ })
33
+ return df
34
+