Spaces:

algorithmicsuperintelligence
/

prompt-optimizer

Running

App Files Files Community

codelion commited on 29 days ago

Commit

2f781ff

verified ·

1 Parent(s): bd94785

Upload app.py

Browse files

Files changed (1) hide show

app.py +33 -4

app.py CHANGED Viewed

@@ -66,7 +66,20 @@ def validate_dataset(dataset_name: str, split: str, input_field: str, target_fie
             print(f"Could not get split names: {e}. Will try to load anyway...")
         # Load a small sample to check fields
-        dataset = load_dataset(dataset_name, split=split, streaming=True)
         # Get first example to check fields
         first_example = next(iter(dataset))
@@ -136,7 +149,15 @@ def evaluate_prompt(prompt: str, dataset_name: str, split: str, num_samples: int
             }
         # Load dataset
-        dataset = load_dataset(dataset_name, split=split, streaming=False)
         # Sample random examples
         if len(dataset) > num_samples:
@@ -478,7 +499,15 @@ def evaluate(prompt: str) -> dict:
     """
     try:
         # Load dataset
-        dataset = load_dataset("{dataset_name}", split="{split}", streaming=False)
         # Initialize OpenAI client
         api_key = os.environ.get("OPENAI_API_KEY")
@@ -989,7 +1018,7 @@ with gr.Blocks(title="OpenEvolve Prompt Optimizer", theme=gr.themes.Soft()) as d
                 label="HuggingFace Dataset (Full Name)",
                 value="gsm8k",
                 placeholder="e.g., gsm8k, stanfordnlp/imdb, openai/gsm8k",
-                info="Full dataset name from HuggingFace Hub (org/dataset-name or dataset-name)"
             )
             dataset_split = gr.Textbox(

             print(f"Could not get split names: {e}. Will try to load anyway...")
         # Load a small sample to check fields
+        # Try loading with just dataset name first
+        try:
+            dataset = load_dataset(dataset_name, split=split, streaming=True)
+        except ValueError as e:
+            # If it fails with config error, try with "main" config (common for datasets like gsm8k)
+            if "config" in str(e).lower() or "Config name is missing" in str(e):
+                print(f"Dataset requires config, trying with 'main' config...")
+                try:
+                    dataset = load_dataset(dataset_name, "main", split=split, streaming=True)
+                except:
+                    # If "main" doesn't work, raise the original error
+                    raise e
+            else:
+                raise
         # Get first example to check fields
         first_example = next(iter(dataset))
             }
         # Load dataset
+        # Try loading with just dataset name first
+        try:
+            dataset = load_dataset(dataset_name, split=split, streaming=False)
+        except ValueError as e:
+            # If it fails with config error, try with "main" config (common for datasets like gsm8k)
+            if "config" in str(e).lower() or "Config name is missing" in str(e):
+                dataset = load_dataset(dataset_name, "main", split=split, streaming=False)
+            else:
+                raise
         # Sample random examples
         if len(dataset) > num_samples:
     """
     try:
         # Load dataset
+        # Try loading with just dataset name first
+        try:
+            dataset = load_dataset("{dataset_name}", split="{split}", streaming=False)
+        except ValueError as e:
+            # If it fails with config error, try with "main" config (common for datasets like gsm8k)
+            if "config" in str(e).lower() or "Config name is missing" in str(e):
+                dataset = load_dataset("{dataset_name}", "main", split="{split}", streaming=False)
+            else:
+                raise
         # Initialize OpenAI client
         api_key = os.environ.get("OPENAI_API_KEY")
                 label="HuggingFace Dataset (Full Name)",
                 value="gsm8k",
                 placeholder="e.g., gsm8k, stanfordnlp/imdb, openai/gsm8k",
+                info="Dataset name from HuggingFace Hub. Configs auto-detected (e.g., 'gsm8k' → 'gsm8k:main')"
             )
             dataset_split = gr.Textbox(