Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaya
commited on
Commit
·
570d85c
1
Parent(s):
25580aa
Add resulted datasets [wip]
Browse files- yourbench_space/app.py +21 -0
- yourbench_space/config.py +1 -0
- yourbench_space/utils.py +21 -1
yourbench_space/app.py
CHANGED
|
@@ -12,6 +12,7 @@ from yourbench_space.utils import (
|
|
| 12 |
UPLOAD_DIRECTORY,
|
| 13 |
SubprocessManager,
|
| 14 |
save_files,
|
|
|
|
| 15 |
STAGES,
|
| 16 |
)
|
| 17 |
|
|
@@ -60,6 +61,8 @@ def generate_and_return(hf_org, hf_prefix):
|
|
| 60 |
)
|
| 61 |
)
|
| 62 |
|
|
|
|
|
|
|
| 63 |
def update_process_status():
|
| 64 |
"""Update process status and include exit details if process has terminated"""
|
| 65 |
is_running = manager.is_running()
|
|
@@ -191,5 +194,23 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
| 191 |
|
| 192 |
kill_button = gr.Button("Kill Task")
|
| 193 |
kill_button.click(manager.kill_process)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
app.launch(allowed_paths=["/app"])
|
|
|
|
| 12 |
UPLOAD_DIRECTORY,
|
| 13 |
SubprocessManager,
|
| 14 |
save_files,
|
| 15 |
+
update_dataset,
|
| 16 |
STAGES,
|
| 17 |
)
|
| 18 |
|
|
|
|
| 61 |
)
|
| 62 |
)
|
| 63 |
|
| 64 |
+
final_dataset = None
|
| 65 |
+
|
| 66 |
def update_process_status():
|
| 67 |
"""Update process status and include exit details if process has terminated"""
|
| 68 |
is_running = manager.is_running()
|
|
|
|
| 194 |
|
| 195 |
kill_button = gr.Button("Kill Task")
|
| 196 |
kill_button.click(manager.kill_process)
|
| 197 |
+
|
| 198 |
+
with gr.Row():
|
| 199 |
+
with gr.Accordion("Ingestion"):
|
| 200 |
+
ingestion_df = gr.DataFrame()
|
| 201 |
+
|
| 202 |
+
with gr.Accordion("Summarization"):
|
| 203 |
+
summarization_df = gr.DataFrame()
|
| 204 |
+
|
| 205 |
+
with gr.Accordion("Single-Hop"):
|
| 206 |
+
single_hop = gr.DataFrame()
|
| 207 |
+
|
| 208 |
+
with gr.Accordion("Answer Generation"):
|
| 209 |
+
answers_df = gr.DataFrame()
|
| 210 |
+
|
| 211 |
+
stages_table.change(
|
| 212 |
+
update_dataset, inputs=[stages_table, hf_org_dropdown, hf_dataset_prefix], outputs=[ingestion_df, summarization_df, single_hop, answers_df]
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
|
| 216 |
app.launch(allowed_paths=["/app"])
|
yourbench_space/config.py
CHANGED
|
@@ -98,3 +98,4 @@ def generate_and_save_config(hf_org, hf_prefix):
|
|
| 98 |
file_path = save_yaml_file(config)
|
| 99 |
logger.success(f"Config saved at: {file_path}")
|
| 100 |
return file_path
|
|
|
|
|
|
| 98 |
file_path = save_yaml_file(config)
|
| 99 |
logger.success(f"Config saved at: {file_path}")
|
| 100 |
return file_path
|
| 101 |
+
|
yourbench_space/utils.py
CHANGED
|
@@ -3,8 +3,10 @@ import os
|
|
| 3 |
import re
|
| 4 |
import pathlib
|
| 5 |
import shutil
|
| 6 |
-
from loguru import logger
|
| 7 |
import subprocess
|
|
|
|
|
|
|
|
|
|
| 8 |
from typing import List
|
| 9 |
|
| 10 |
UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
|
|
@@ -49,6 +51,24 @@ def save_files(files: List[pathlib.Path]) -> str:
|
|
| 49 |
else "No files were saved"
|
| 50 |
)
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
class SubprocessManager:
|
| 54 |
def __init__(self, command):
|
|
|
|
| 3 |
import re
|
| 4 |
import pathlib
|
| 5 |
import shutil
|
|
|
|
| 6 |
import subprocess
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from datasets import load_dataset, get_dataset_config_names
|
| 9 |
+
from loguru import logger
|
| 10 |
from typing import List
|
| 11 |
|
| 12 |
UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
|
|
|
|
| 51 |
else "No files were saved"
|
| 52 |
)
|
| 53 |
|
| 54 |
+
def update_dataset(stages, hf_org, hf_prefix):
|
| 55 |
+
"""
|
| 56 |
+
Updates the dataset based on the provided stages and dataset configuration.
|
| 57 |
+
"""
|
| 58 |
+
# If not the final step, return empty dataframes
|
| 59 |
+
if stages[-1] != STAGES[-1]:
|
| 60 |
+
return (pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame())
|
| 61 |
+
|
| 62 |
+
# Construct dataset name from config
|
| 63 |
+
dataset_name = f"{hf_org}/{hf_prefix}"
|
| 64 |
+
|
| 65 |
+
# Load datasets and convert to DataFrame
|
| 66 |
+
ingestion_df = pd.DataFrame(load_dataset(dataset_name, name="ingested", split="train", streaming=True))
|
| 67 |
+
summarization_df = pd.DataFrame(load_dataset(dataset_name, name="summarization", split="train", streaming=True))
|
| 68 |
+
single_hop = pd.DataFrame(load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True))
|
| 69 |
+
answers_df = pd.DataFrame(load_dataset(dataset_name, name="answer_generation", split="train", streaming=True))
|
| 70 |
+
|
| 71 |
+
return (ingestion_df, summarization_df, single_hop, answers_df)
|
| 72 |
|
| 73 |
class SubprocessManager:
|
| 74 |
def __init__(self, command):
|