Spaces:
Runtime error
Runtime error
Commit
·
6a190a4
1
Parent(s):
07e5361
First commit
Browse files- .gitattributes +1 -1
- .gitignore +1 -0
- .pre-commit-config.yaml +53 -0
- README.md +37 -6
- app.py +87 -0
- asset/citation_button_text.txt +1 -0
- asset/p1.md +3 -0
- asset/p2.md +18 -0
- requirements.txt +16 -0
- submission/books.csv +11 -0
- submission/news.csv +11 -0
- uploads.py +77 -0
- utils.py +34 -0
.gitattributes
CHANGED
@@ -25,7 +25,6 @@
|
|
25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
@@ -33,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
28 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
*.wasm filter=lfs diff=lfs merge=lfs -text
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
.pre-commit-config.yaml
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
default_language_version:
|
16 |
+
python: python3
|
17 |
+
|
18 |
+
ci:
|
19 |
+
autofix_prs: true
|
20 |
+
autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
|
21 |
+
autoupdate_schedule: quarterly
|
22 |
+
|
23 |
+
repos:
|
24 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
25 |
+
rev: v4.3.0
|
26 |
+
hooks:
|
27 |
+
- id: check-yaml
|
28 |
+
- id: check-case-conflict
|
29 |
+
- id: detect-private-key
|
30 |
+
- id: check-added-large-files
|
31 |
+
args: ['--maxkb=1000']
|
32 |
+
- id: requirements-txt-fixer
|
33 |
+
- id: end-of-file-fixer
|
34 |
+
- id: trailing-whitespace
|
35 |
+
|
36 |
+
- repo: https://github.com/PyCQA/isort
|
37 |
+
rev: 5.12.0
|
38 |
+
hooks:
|
39 |
+
- id: isort
|
40 |
+
name: Format imports
|
41 |
+
|
42 |
+
- repo: https://github.com/psf/black
|
43 |
+
rev: 22.12.0
|
44 |
+
hooks:
|
45 |
+
- id: black
|
46 |
+
name: Format code
|
47 |
+
additional_dependencies: ['click==8.0.2']
|
48 |
+
|
49 |
+
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
50 |
+
# Ruff version.
|
51 |
+
rev: 'v0.0.267'
|
52 |
+
hooks:
|
53 |
+
- id: ruff
|
README.md
CHANGED
@@ -1,13 +1,44 @@
|
|
1 |
---
|
2 |
title: Muse Leaderboard
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.37.2
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
license: cc-by-4.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
title: Muse Leaderboard
|
3 |
+
emoji: 🥇
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: indigo
|
6 |
sdk: gradio
|
|
|
7 |
app_file: app.py
|
8 |
+
pinned: true
|
9 |
license: cc-by-4.0
|
10 |
---
|
11 |
|
12 |
+
# Start the configuration
|
13 |
+
|
14 |
+
Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
|
15 |
+
|
16 |
+
Results files should have the following format and be stored as json files:
|
17 |
+
```json
|
18 |
+
{
|
19 |
+
"config": {
|
20 |
+
"model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
|
21 |
+
"model_name": "path of the model on the hub: org/model",
|
22 |
+
"model_sha": "revision on the hub",
|
23 |
+
},
|
24 |
+
"results": {
|
25 |
+
"task_name": {
|
26 |
+
"metric_name": score,
|
27 |
+
},
|
28 |
+
"task_name2": {
|
29 |
+
"metric_name": score,
|
30 |
+
}
|
31 |
+
}
|
32 |
+
}
|
33 |
+
```
|
34 |
+
|
35 |
+
Request files are created automatically by this tool.
|
36 |
+
|
37 |
+
If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
|
38 |
+
|
39 |
+
# Code logic for more complex edits
|
40 |
+
|
41 |
+
You'll find
|
42 |
+
- the main table' columns names and properties in `src/display/utils.py`
|
43 |
+
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
44 |
+
- teh logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
app.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
5 |
+
from huggingface_hub import HfApi
|
6 |
+
from uploads import add_new_eval
|
7 |
+
from utils import LEADERBOARD_PATH, CORPORA, load_data, DEFAULT_COLUMNS, DEFAULT_COLUMN_LABELS
|
8 |
+
|
9 |
+
|
10 |
+
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
|
11 |
+
|
12 |
+
|
13 |
+
api = HfApi()
|
14 |
+
TOKEN = os.environ.get("TOKEN", None)
|
15 |
+
|
16 |
+
|
17 |
+
def restart_space():
|
18 |
+
api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
|
19 |
+
|
20 |
+
|
21 |
+
demo = gr.Blocks()
|
22 |
+
|
23 |
+
|
24 |
+
with demo:
|
25 |
+
with open("asset/p1.md", 'r') as f:
|
26 |
+
gr.Markdown(f.read())
|
27 |
+
|
28 |
+
with gr.Row():
|
29 |
+
with gr.Accordion("📙 Citation", open=False):
|
30 |
+
with open("asset/citation_button_text.txt", 'r') as f:
|
31 |
+
citation_button = gr.Textbox(
|
32 |
+
value=f.read(),
|
33 |
+
label="Copy the following snippet to cite these results:",
|
34 |
+
elem_id="citation-button",
|
35 |
+
show_copy_button=True,
|
36 |
+
)
|
37 |
+
|
38 |
+
with gr.Tabs():
|
39 |
+
with gr.TabItem("Leaderboard"):
|
40 |
+
with gr.Row():
|
41 |
+
corpus_dropdown = gr.Dropdown(
|
42 |
+
choices=CORPORA,
|
43 |
+
label="🔄 Select corpus",
|
44 |
+
value=CORPORA[0],
|
45 |
+
)
|
46 |
+
|
47 |
+
leaderboard_table = gr.components.Dataframe(
|
48 |
+
value=load_data(CORPORA[0]),
|
49 |
+
interactive=True,
|
50 |
+
visible=True,
|
51 |
+
)
|
52 |
+
|
53 |
+
corpus_dropdown.change(
|
54 |
+
load_data,
|
55 |
+
inputs=[corpus_dropdown],
|
56 |
+
outputs=leaderboard_table
|
57 |
+
)
|
58 |
+
|
59 |
+
with gr.Accordion("Submit a new model for evaluation"):
|
60 |
+
with gr.Row():
|
61 |
+
with gr.Column():
|
62 |
+
corpus_radio = gr.Radio(['news', 'books'], value="llama", label="Corpus")
|
63 |
+
organization_textbox = gr.Textbox(label="Organization")
|
64 |
+
mail_textbox = gr.Textbox(label="Contact email")
|
65 |
+
with gr.Column():
|
66 |
+
file_output = gr.File()
|
67 |
+
|
68 |
+
submit_button = gr.Button("Submit Eval")
|
69 |
+
submission_result = gr.Markdown()
|
70 |
+
submit_button.click(
|
71 |
+
add_new_eval,
|
72 |
+
[
|
73 |
+
corpus_radio,
|
74 |
+
organization_textbox,
|
75 |
+
mail_textbox,
|
76 |
+
file_output
|
77 |
+
],
|
78 |
+
submission_result,
|
79 |
+
)
|
80 |
+
|
81 |
+
with open(f"asset/p2.md", 'r') as f:
|
82 |
+
gr.Markdown(f.read())
|
83 |
+
|
84 |
+
scheduler = BackgroundScheduler()
|
85 |
+
scheduler.add_job(restart_space, "interval", seconds=3600)
|
86 |
+
scheduler.start()
|
87 |
+
demo.launch(debug=True)
|
asset/citation_button_text.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Citation is going to go here.
|
asset/p1.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
## 🥇 MUSE Leaderboard
|
2 |
+
|
3 |
+
MUSE is a comprehensive machine unlearning evaluation benchmark that assesses six desirable properties for unlearned models: (1) no verbatim memorization, (2) no knowledge memorization, (3) no privacy leakage, (4) utility preservation for non-removed data, (5) scalability with respect to removal requests, and (6) sustainability over sequential unlearning requests.
|
asset/p2.md
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Expected File Format
|
2 |
+
|
3 |
+
We expect your submitted file to be in the CSV format containing the following columns:
|
4 |
+
- `name`: Name of the evaluated unlearning method. Must be unique for each row.
|
5 |
+
- `verbmem_f`, `privleak`, `knowmem_f`, `knowmem_r`: Evaluation scores.
|
6 |
+
|
7 |
+
Following the instructions for evaluation in our [GitHub Repository](https://github.com/jaechan-repo/muse_bench) yields an output file precisely of this format.
|
8 |
+
|
9 |
+
## Quick Links
|
10 |
+
|
11 |
+
- [Website](https://muse-bench.github.io): Landing page for MUSE.
|
12 |
+
- [arXiv Paper](): Detailed information about MUSE and analysis on the baseline unlearning methods.
|
13 |
+
- [GitHub Repository](https://github.com/jaechan-repo/muse_bench): Evaluation scripts, implementations of the baseline unlearning methods.
|
14 |
+
- [News Dataset](https://huggingface.co/datasets/muse-bench/MUSE-News), [Books Dataset](https://huggingface.co/datasets/muse-bench/MUSE-Books): Two evaluation corpora used by MUSE.
|
15 |
+
- [Leaderboard](https://huggingface.co/spaces/muse-bench/muse_leaderboard) (You are here): Current rankings of unlearning algorithms for MUSE.
|
16 |
+
- [Summary on Twitter](): A concise summary and key takeaways from the project.
|
17 |
+
|
18 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
APScheduler
|
2 |
+
black
|
3 |
+
datasets
|
4 |
+
gradio
|
5 |
+
gradio[oauth]
|
6 |
+
gradio_leaderboard==0.0.9
|
7 |
+
gradio_client
|
8 |
+
huggingface-hub>=0.18.0
|
9 |
+
matplotlib
|
10 |
+
numpy
|
11 |
+
pandas
|
12 |
+
python-dateutil
|
13 |
+
tqdm
|
14 |
+
transformers
|
15 |
+
tokenizers>=0.15.0
|
16 |
+
sentencepiece
|
submission/books.csv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name,organization,verbmem_f,knowmem_f,privleak,knowmem_r,id
|
2 |
+
target,Baseline,99.8,59.4,-57.5,66.9,"-"
|
3 |
+
retrain,Baseline,14.3,28.9,0.0,74.5,"-"
|
4 |
+
"ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-25.0,0.0,"-"
|
5 |
+
"ga_gdr (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-26.5,10.7,"-"
|
6 |
+
"ga_klr (epoch=5, lr=1e-5, bs=32)",Baseline,16.0,21.9,-40.2,37.2,"-"
|
7 |
+
"npo (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-24.3,0.0,"-"
|
8 |
+
"npo_gdr (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,-30.8,22.8,"-"
|
9 |
+
"npo_klr (epoch=4, lr=1e-5, bs=32)",Baseline,17.0,25.0,-43.5,44.6,"-"
|
10 |
+
"tv (alpha=512, lr=1e-5, bs=32)",Baseline,99.7,52.4,-57.5,64.7,"-"
|
11 |
+
"whp (alpha=256, lr=1e-5, bs=32)",Baseline,18.0,55.7,56.5,63.6,"-"
|
submission/news.csv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name,organization,verbmem_f,knowmem_f,privleak,knowmem_r,id
|
2 |
+
target,Baseline,58.4,63.9,-99.8,55.2,"-"
|
3 |
+
retrain,Baseline,20.8,33.1,0.0,55.0,"-"
|
4 |
+
"ga (epoch=1, lr=1e-5, bs=32)",Baseline,0.0,0.0,5.2,0.0,"-"
|
5 |
+
"ga_gdr (epoch=7, lr=1e-5, bs=32)",Baseline,4.9,31.0,108.1,27.3,"-"
|
6 |
+
"ga_klr (epoch=10, lr=1e-5, bs=32)",Baseline,27.4,50.2,-96.1,44.8,"-"
|
7 |
+
"npo (epoch=10, lr=1e-5, bs=32)",Baseline,0.0,0.0,24.4,0.0,"-"
|
8 |
+
"npo_gdr (epoch=10, lr=1e-5, bs=32)",Baseline,1.2,54.6,105.8,40.5,"-"
|
9 |
+
"npo_klr (epoch=10, lr=1e-5, bs=32)",Baseline,26.9,49.0,-95.8,45.4,"-"
|
10 |
+
"tv (alpha=512, lr=1e-5, bs=32)",Baseline,57.2,66.2,-99.8,55.8,"-"
|
11 |
+
"whp (alpha=4, lr=1e-5, bs=32)",Baseline,19.7,21.2,109.6,28.3,"-"
|
uploads.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from email.utils import parseaddr
|
2 |
+
from huggingface_hub import HfApi
|
3 |
+
import io
|
4 |
+
import os
|
5 |
+
import base64
|
6 |
+
import pandas as pd
|
7 |
+
from utils import DEFAULT_COLUMNS, DEFAULT_METRICS, LEADERBOARD_PATH
|
8 |
+
|
9 |
+
|
10 |
+
api = HfApi()
|
11 |
+
TOKEN = os.environ.get("TOKEN", None)
|
12 |
+
YEAR_VERSION = "2024"
|
13 |
+
|
14 |
+
|
15 |
+
def format_error(msg):
|
16 |
+
return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
|
17 |
+
|
18 |
+
|
19 |
+
def format_warning(msg):
|
20 |
+
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"
|
21 |
+
|
22 |
+
|
23 |
+
def format_log(msg):
|
24 |
+
return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"
|
25 |
+
|
26 |
+
|
27 |
+
def add_new_eval(
|
28 |
+
corpus: str,
|
29 |
+
organization: str,
|
30 |
+
mail: str,
|
31 |
+
fpath: str,
|
32 |
+
):
|
33 |
+
for input in [corpus, organization, mail, fpath]:
|
34 |
+
if not input:
|
35 |
+
return format_warning("Please fill all the fields.")
|
36 |
+
|
37 |
+
if organization == 'Baseline':
|
38 |
+
return format_warning("Your organization name cannot be Baseline.")
|
39 |
+
|
40 |
+
_, parsed_mail = parseaddr(mail)
|
41 |
+
if '@' not in parsed_mail:
|
42 |
+
return format_warning("Please provide a valid email adress.")
|
43 |
+
|
44 |
+
# load the file
|
45 |
+
io_path = f"submission/{corpus}.csv"
|
46 |
+
df = pd.read_csv(io_path)
|
47 |
+
df_new = pd.read_csv(fpath)
|
48 |
+
|
49 |
+
for col in DEFAULT_METRICS:
|
50 |
+
if col not in df_new.columns:
|
51 |
+
return format_warning(f"Missing column in the submitted file: {col}")
|
52 |
+
|
53 |
+
df_new['organization'] = organization
|
54 |
+
df_new['id'] = base64.b64encode(os.urandom(6)).decode('ascii')
|
55 |
+
df_new = df_new[DEFAULT_COLUMNS]
|
56 |
+
|
57 |
+
df = pd.concat([df, df_new]).reset_index(drop=True)
|
58 |
+
buffer = io.BytesIO()
|
59 |
+
df.to_csv(buffer, index=False) # Write the DataFrame to a buffer in CSV format
|
60 |
+
buffer.seek(0) # Rewind the buffer to the beginning
|
61 |
+
|
62 |
+
api.delete_file(
|
63 |
+
repo_id = LEADERBOARD_PATH,
|
64 |
+
path_in_repo = io_path,
|
65 |
+
token = TOKEN,
|
66 |
+
repo_type='space'
|
67 |
+
)
|
68 |
+
|
69 |
+
api.upload_file(
|
70 |
+
repo_id = LEADERBOARD_PATH,
|
71 |
+
path_in_repo = io_path,
|
72 |
+
path_or_fileobj = buffer,
|
73 |
+
token = TOKEN,
|
74 |
+
repo_type = 'space',
|
75 |
+
)
|
76 |
+
|
77 |
+
return format_log(f"Submitted to {corpus} by {organization} successfully.\nPlease refresh the leaderboard, and wait a bit to see the score displayed")
|
utils.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
LEADERBOARD_PATH = f"chan0369/muse_leaderboard"
|
4 |
+
|
5 |
+
DEFAULT_METRICS = [
|
6 |
+
'verbmem_f',
|
7 |
+
'knowmem_f',
|
8 |
+
'privleak',
|
9 |
+
'knowmem_r',
|
10 |
+
]
|
11 |
+
|
12 |
+
DEFAULT_COLUMNS = ['name', 'organization'] + DEFAULT_METRICS + ['id']
|
13 |
+
|
14 |
+
DEFAULT_COLUMN_LABELS = [
|
15 |
+
'Method Name',
|
16 |
+
'Submitted By',
|
17 |
+
'VerbMem ⬇️',
|
18 |
+
'KnowMem Forget ⬇️',
|
19 |
+
'PrivLeak',
|
20 |
+
'KnowMem Retain (Utility) ⬆',
|
21 |
+
'Submission Id'
|
22 |
+
]
|
23 |
+
|
24 |
+
CORPORA = ['news', 'books']
|
25 |
+
|
26 |
+
|
27 |
+
def load_data(corpus):
|
28 |
+
assert corpus in CORPORA
|
29 |
+
df = pd.read_csv(f"submission/{corpus}.csv")
|
30 |
+
df = df[DEFAULT_COLUMNS].rename(columns={
|
31 |
+
k: v for k, v in zip(DEFAULT_COLUMNS, DEFAULT_COLUMN_LABELS)
|
32 |
+
})
|
33 |
+
return df
|
34 |
+
|