Upload app.py
Browse files
app.py
CHANGED
|
@@ -66,7 +66,20 @@ def validate_dataset(dataset_name: str, split: str, input_field: str, target_fie
|
|
| 66 |
print(f"Could not get split names: {e}. Will try to load anyway...")
|
| 67 |
|
| 68 |
# Load a small sample to check fields
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
# Get first example to check fields
|
| 72 |
first_example = next(iter(dataset))
|
|
@@ -136,7 +149,15 @@ def evaluate_prompt(prompt: str, dataset_name: str, split: str, num_samples: int
|
|
| 136 |
}
|
| 137 |
|
| 138 |
# Load dataset
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
# Sample random examples
|
| 142 |
if len(dataset) > num_samples:
|
|
@@ -478,7 +499,15 @@ def evaluate(prompt: str) -> dict:
|
|
| 478 |
"""
|
| 479 |
try:
|
| 480 |
# Load dataset
|
| 481 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
|
| 483 |
# Initialize OpenAI client
|
| 484 |
api_key = os.environ.get("OPENAI_API_KEY")
|
|
@@ -989,7 +1018,7 @@ with gr.Blocks(title="OpenEvolve Prompt Optimizer", theme=gr.themes.Soft()) as d
|
|
| 989 |
label="HuggingFace Dataset (Full Name)",
|
| 990 |
value="gsm8k",
|
| 991 |
placeholder="e.g., gsm8k, stanfordnlp/imdb, openai/gsm8k",
|
| 992 |
-
info="
|
| 993 |
)
|
| 994 |
|
| 995 |
dataset_split = gr.Textbox(
|
|
|
|
| 66 |
print(f"Could not get split names: {e}. Will try to load anyway...")
|
| 67 |
|
| 68 |
# Load a small sample to check fields
|
| 69 |
+
# Try loading with just dataset name first
|
| 70 |
+
try:
|
| 71 |
+
dataset = load_dataset(dataset_name, split=split, streaming=True)
|
| 72 |
+
except ValueError as e:
|
| 73 |
+
# If it fails with config error, try with "main" config (common for datasets like gsm8k)
|
| 74 |
+
if "config" in str(e).lower() or "Config name is missing" in str(e):
|
| 75 |
+
print(f"Dataset requires config, trying with 'main' config...")
|
| 76 |
+
try:
|
| 77 |
+
dataset = load_dataset(dataset_name, "main", split=split, streaming=True)
|
| 78 |
+
except:
|
| 79 |
+
# If "main" doesn't work, raise the original error
|
| 80 |
+
raise e
|
| 81 |
+
else:
|
| 82 |
+
raise
|
| 83 |
|
| 84 |
# Get first example to check fields
|
| 85 |
first_example = next(iter(dataset))
|
|
|
|
| 149 |
}
|
| 150 |
|
| 151 |
# Load dataset
|
| 152 |
+
# Try loading with just dataset name first
|
| 153 |
+
try:
|
| 154 |
+
dataset = load_dataset(dataset_name, split=split, streaming=False)
|
| 155 |
+
except ValueError as e:
|
| 156 |
+
# If it fails with config error, try with "main" config (common for datasets like gsm8k)
|
| 157 |
+
if "config" in str(e).lower() or "Config name is missing" in str(e):
|
| 158 |
+
dataset = load_dataset(dataset_name, "main", split=split, streaming=False)
|
| 159 |
+
else:
|
| 160 |
+
raise
|
| 161 |
|
| 162 |
# Sample random examples
|
| 163 |
if len(dataset) > num_samples:
|
|
|
|
| 499 |
"""
|
| 500 |
try:
|
| 501 |
# Load dataset
|
| 502 |
+
# Try loading with just dataset name first
|
| 503 |
+
try:
|
| 504 |
+
dataset = load_dataset("{dataset_name}", split="{split}", streaming=False)
|
| 505 |
+
except ValueError as e:
|
| 506 |
+
# If it fails with config error, try with "main" config (common for datasets like gsm8k)
|
| 507 |
+
if "config" in str(e).lower() or "Config name is missing" in str(e):
|
| 508 |
+
dataset = load_dataset("{dataset_name}", "main", split="{split}", streaming=False)
|
| 509 |
+
else:
|
| 510 |
+
raise
|
| 511 |
|
| 512 |
# Initialize OpenAI client
|
| 513 |
api_key = os.environ.get("OPENAI_API_KEY")
|
|
|
|
| 1018 |
label="HuggingFace Dataset (Full Name)",
|
| 1019 |
value="gsm8k",
|
| 1020 |
placeholder="e.g., gsm8k, stanfordnlp/imdb, openai/gsm8k",
|
| 1021 |
+
info="Dataset name from HuggingFace Hub. Configs auto-detected (e.g., 'gsm8k' → 'gsm8k:main')"
|
| 1022 |
)
|
| 1023 |
|
| 1024 |
dataset_split = gr.Textbox(
|