Spaces:

guipenedo
/

lighteval-tasks-explorer

Runtime error

App Files Files Community

guipenedo HF staff commited on Jun 6, 2024

Commit

28d348e

unverified ·

1 Parent(s): b25c99e

initial upload

Browse files

Files changed (2) hide show

.gitignore +163 -0
app.py +83 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,163 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+.idea/
+# ruff code style
+.ruff_cache/
+.vscode/
+playground/

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import ast
+import json
+import urllib
+import gradio as gr
+import pandas as pd
+from datasets import load_dataset
+from gradio_leaderboard import Leaderboard, SelectColumns, SearchColumns, ColumnFilter
+df = pd.read_json("https://raw.githubusercontent.com/huggingface/lighteval/main/src/lighteval/tasks/tasks_table.jsonl", lines=True).explode("suite").reset_index(drop=True)
+with urllib.request.urlopen("https://raw.githubusercontent.com/huggingface/lighteval/main/src/lighteval/tasks/tasks_prompt_formatting.py") as f:
+    tasks_prompt_functions_raw = f.read().decode('utf-8')
+    tree = ast.parse(tasks_prompt_functions_raw)
+    tasks_prompt_functions = {}
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef):
+            function_name = node.name
+            # Get the source code for the function node
+            function_code = ast.get_source_segment(tasks_prompt_functions_raw, node)
+            tasks_prompt_functions[function_name] = function_code
+def load_task_metadata(task_id):
+    task_row = df.iloc[task_id]
+    return (task_row.to_dict(), f"""Examples from the HF repository ([{task_row['hf_repo']}](https://huggingface.co/datasets/{task_row['hf_repo']}))""",
+            tasks_prompt_functions.get(task_row["prompt_function"]), "unknown")
+def load_task_examples(task_id):
+    task_row = df.iloc[task_id]
+    dataset = load_dataset(task_row["hf_repo"], task_row["hf_subset"], split="+".join(task_row["evaluation_splits"]),
+                           trust_remote_code=task_row["trust_dataset"], streaming=True)
+    sample_data = next(dataset.iter(20))
+    # dictionary of lists to list of dictionaries
+    return pd.DataFrame(dict(zip(sample_data, t if not isinstance(t, dict) and not isinstance(t, list) else json.dumps(t)))
+                        for t in zip(*sample_data.values()))
+with gr.Blocks() as demo:
+    gr.Markdown("""
+    # LightEval Tasks Explorer
+    """)
+    with gr.Tabs() as tabs:
+        with gr.TabItem("🗃️ Tasks List"):
+            Leaderboard(
+                value=df,
+                select_columns=SelectColumns(default_selection=["name", "suite", "prompt_function", "hf_repo", "hf_subset", "evaluation_splits", "metric"],
+                                            cant_deselect=["name", "suite"],
+                                            label="Columns to display"),
+                search_columns=SearchColumns(primary_column="name", secondary_columns=["suite", "prompt_function", "hf_repo", "metric"],
+                                             placeholder="Search for a task by name, suite, prompt_function, hf_repo or "
+                                                         "metric. To search by suite, for example, type 'suite:<query>'. Separate queries by \";\"",
+                                             label="Search"),
+                filter_columns=[
+                    ColumnFilter("suite", type="dropdown", label="Select suite"),
+                    # ColumnFilter("prompt_function", type="dropdown", label="Select prompt_function"),
+                    # ColumnFilter("metric", type="dropdown", label="Select metric")
+                ],
+                wrap=True
+            )
+        with gr.TabItem("🔎 Task Inspector"):
+            task_inspector_selector = gr.Dropdown(
+                choices=sorted(zip((df['suite'] + '|' + df['name']).tolist(), range(len(df)))),
+                label="Task",
+                info="Select a task"
+            )
+            with gr.Row():
+                with gr.Column():
+                    task_metadata = gr.Json(label="Task definition")
+                with gr.Column():
+                    task_prompt_function = gr.Code(label="Task prompt function", language="python", interactive=False)
+            task_dataset_header = gr.Markdown("Examples from the HF repository")
+            task_dataset = gr.Dataframe(wrap=True)
+            gr.on(triggers=[task_inspector_selector.change], inputs=[task_inspector_selector], outputs=[task_metadata, task_dataset_header, task_prompt_function], fn=load_task_metadata)
+            gr.on(triggers=[task_inspector_selector.change], inputs=[task_inspector_selector], outputs=[task_dataset], fn=load_task_examples)
+if __name__ == "__main__":
+    demo.launch()