codelion commited on
Commit
2f781ff
·
verified ·
1 Parent(s): bd94785

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -4
app.py CHANGED
@@ -66,7 +66,20 @@ def validate_dataset(dataset_name: str, split: str, input_field: str, target_fie
66
  print(f"Could not get split names: {e}. Will try to load anyway...")
67
 
68
  # Load a small sample to check fields
69
- dataset = load_dataset(dataset_name, split=split, streaming=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  # Get first example to check fields
72
  first_example = next(iter(dataset))
@@ -136,7 +149,15 @@ def evaluate_prompt(prompt: str, dataset_name: str, split: str, num_samples: int
136
  }
137
 
138
  # Load dataset
139
- dataset = load_dataset(dataset_name, split=split, streaming=False)
 
 
 
 
 
 
 
 
140
 
141
  # Sample random examples
142
  if len(dataset) > num_samples:
@@ -478,7 +499,15 @@ def evaluate(prompt: str) -> dict:
478
  """
479
  try:
480
  # Load dataset
481
- dataset = load_dataset("{dataset_name}", split="{split}", streaming=False)
 
 
 
 
 
 
 
 
482
 
483
  # Initialize OpenAI client
484
  api_key = os.environ.get("OPENAI_API_KEY")
@@ -989,7 +1018,7 @@ with gr.Blocks(title="OpenEvolve Prompt Optimizer", theme=gr.themes.Soft()) as d
989
  label="HuggingFace Dataset (Full Name)",
990
  value="gsm8k",
991
  placeholder="e.g., gsm8k, stanfordnlp/imdb, openai/gsm8k",
992
- info="Full dataset name from HuggingFace Hub (org/dataset-name or dataset-name)"
993
  )
994
 
995
  dataset_split = gr.Textbox(
 
66
  print(f"Could not get split names: {e}. Will try to load anyway...")
67
 
68
  # Load a small sample to check fields
69
+ # Try loading with just dataset name first
70
+ try:
71
+ dataset = load_dataset(dataset_name, split=split, streaming=True)
72
+ except ValueError as e:
73
+ # If it fails with config error, try with "main" config (common for datasets like gsm8k)
74
+ if "config" in str(e).lower() or "Config name is missing" in str(e):
75
+ print(f"Dataset requires config, trying with 'main' config...")
76
+ try:
77
+ dataset = load_dataset(dataset_name, "main", split=split, streaming=True)
78
+ except:
79
+ # If "main" doesn't work, raise the original error
80
+ raise e
81
+ else:
82
+ raise
83
 
84
  # Get first example to check fields
85
  first_example = next(iter(dataset))
 
149
  }
150
 
151
  # Load dataset
152
+ # Try loading with just dataset name first
153
+ try:
154
+ dataset = load_dataset(dataset_name, split=split, streaming=False)
155
+ except ValueError as e:
156
+ # If it fails with config error, try with "main" config (common for datasets like gsm8k)
157
+ if "config" in str(e).lower() or "Config name is missing" in str(e):
158
+ dataset = load_dataset(dataset_name, "main", split=split, streaming=False)
159
+ else:
160
+ raise
161
 
162
  # Sample random examples
163
  if len(dataset) > num_samples:
 
499
  """
500
  try:
501
  # Load dataset
502
+ # Try loading with just dataset name first
503
+ try:
504
+ dataset = load_dataset("{dataset_name}", split="{split}", streaming=False)
505
+ except ValueError as e:
506
+ # If it fails with config error, try with "main" config (common for datasets like gsm8k)
507
+ if "config" in str(e).lower() or "Config name is missing" in str(e):
508
+ dataset = load_dataset("{dataset_name}", "main", split="{split}", streaming=False)
509
+ else:
510
+ raise
511
 
512
  # Initialize OpenAI client
513
  api_key = os.environ.get("OPENAI_API_KEY")
 
1018
  label="HuggingFace Dataset (Full Name)",
1019
  value="gsm8k",
1020
  placeholder="e.g., gsm8k, stanfordnlp/imdb, openai/gsm8k",
1021
+ info="Dataset name from HuggingFace Hub. Configs auto-detected (e.g., 'gsm8k' → 'gsm8k:main')"
1022
  )
1023
 
1024
  dataset_split = gr.Textbox(