Upload app.py
Browse files
app.py
CHANGED
|
@@ -246,6 +246,43 @@ def evaluate_prompt(prompt: str, dataset_name: str, split: str, num_samples: int
|
|
| 246 |
}
|
| 247 |
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
def parse_evolution_history(output_dir: str) -> str:
|
| 250 |
"""
|
| 251 |
Parse evolution history from OpenEvolve output directory.
|
|
@@ -522,6 +559,7 @@ def create_config_file(model: str, work_dir: str):
|
|
| 522 |
"elite_ratio": 0.1,
|
| 523 |
"explore_ratio": 0.3,
|
| 524 |
"exploit_ratio": 0.6,
|
|
|
|
| 525 |
},
|
| 526 |
"evaluation": {
|
| 527 |
"timeout": None, # Disable timeout to avoid signal handling issues
|
|
@@ -538,7 +576,7 @@ def create_config_file(model: str, work_dir: str):
|
|
| 538 |
|
| 539 |
def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
| 540 |
model: str, input_field: str, target_field: str,
|
| 541 |
-
progress=gr.Progress()) -> Tuple[str, str, str, str]:
|
| 542 |
"""Run OpenEvolve to optimize the prompt."""
|
| 543 |
|
| 544 |
progress(0, desc="Validating inputs...")
|
|
@@ -549,7 +587,7 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
|
| 549 |
)
|
| 550 |
|
| 551 |
if not is_valid:
|
| 552 |
-
return f"## Validation Failed\n\n{validation_message}", "", "", ""
|
| 553 |
|
| 554 |
progress(0.05, desc=f"Validation passed: {validation_message}")
|
| 555 |
|
|
@@ -579,10 +617,10 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
|
| 579 |
)
|
| 580 |
|
| 581 |
if "error" in initial_eval:
|
| 582 |
-
return f"## Error\n\n❌ Initial evaluation failed: {initial_eval['error']}", "", "", ""
|
| 583 |
|
| 584 |
if initial_eval["total"] == 0:
|
| 585 |
-
return f"## Error\n\n❌ Initial evaluation failed: No samples could be evaluated. This usually means:\n- API key is invalid or has no credits\n- Model is unavailable or rate-limited\n- Dataset fields are incorrect\n- Network connectivity issues\n\nPlease check your configuration and try again.", "", "", ""
|
| 586 |
|
| 587 |
initial_results = f"""
|
| 588 |
### Initial Prompt Evaluation
|
|
@@ -681,17 +719,25 @@ def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
|
| 681 |
|
| 682 |
progress(1.0, desc="Complete!")
|
| 683 |
|
| 684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
|
| 686 |
except Exception as e:
|
| 687 |
-
return f"## Error During Evolution\n\n❌ {str(e)}", initial_results, "", ""
|
| 688 |
|
| 689 |
finally:
|
| 690 |
-
#
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
except:
|
| 694 |
-
pass
|
| 695 |
|
| 696 |
|
| 697 |
# Create Gradio interface
|
|
@@ -784,6 +830,30 @@ with gr.Blocks(title="OpenEvolve Prompt Optimizer", theme=gr.themes.Soft()) as d
|
|
| 784 |
with gr.Column():
|
| 785 |
evolution_progress = gr.Markdown("### Evolution Progress\nEvolution progress will appear here during optimization...", visible=True)
|
| 786 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 787 |
# Documentation section - in collapsible accordion
|
| 788 |
gr.Markdown("---")
|
| 789 |
with gr.Accordion("📚 Documentation & Examples", open=False):
|
|
@@ -825,11 +895,73 @@ with gr.Blocks(title="OpenEvolve Prompt Optimizer", theme=gr.themes.Soft()) as d
|
|
| 825 |
- [Documentation](https://github.com/algorithmicsuperintelligence/openevolve#readme)
|
| 826 |
""")
|
| 827 |
|
| 828 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 829 |
fn=optimize_prompt,
|
| 830 |
inputs=[initial_prompt, dataset_name, dataset_split, model,
|
| 831 |
input_field, target_field],
|
| 832 |
-
outputs=[summary, initial_results, evolution_progress, final_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
)
|
| 834 |
|
| 835 |
if __name__ == "__main__":
|
|
|
|
| 246 |
}
|
| 247 |
|
| 248 |
|
| 249 |
+
def collect_prompt_history(output_dir: str) -> List[Dict]:
|
| 250 |
+
"""
|
| 251 |
+
Collect all prompts discovered during evolution with their scores.
|
| 252 |
+
|
| 253 |
+
Returns a list of dicts with: {prompt, score, iteration, id}
|
| 254 |
+
"""
|
| 255 |
+
try:
|
| 256 |
+
prompts = []
|
| 257 |
+
|
| 258 |
+
# Look for all program files
|
| 259 |
+
program_files = sorted(glob.glob(os.path.join(output_dir, "program_*.txt")))
|
| 260 |
+
|
| 261 |
+
# Also check for logs to get scores
|
| 262 |
+
log_dir = os.path.join(output_dir, "logs")
|
| 263 |
+
|
| 264 |
+
for pfile in program_files:
|
| 265 |
+
try:
|
| 266 |
+
with open(pfile, 'r') as f:
|
| 267 |
+
prompt_content = f.read()
|
| 268 |
+
|
| 269 |
+
# Extract program ID from filename
|
| 270 |
+
prog_id = os.path.basename(pfile).replace("program_", "").replace(".txt", "")
|
| 271 |
+
|
| 272 |
+
prompts.append({
|
| 273 |
+
"prompt": prompt_content,
|
| 274 |
+
"id": prog_id,
|
| 275 |
+
"file": pfile
|
| 276 |
+
})
|
| 277 |
+
except:
|
| 278 |
+
continue
|
| 279 |
+
|
| 280 |
+
return prompts
|
| 281 |
+
except Exception as e:
|
| 282 |
+
print(f"Error collecting prompt history: {e}")
|
| 283 |
+
return []
|
| 284 |
+
|
| 285 |
+
|
| 286 |
def parse_evolution_history(output_dir: str) -> str:
|
| 287 |
"""
|
| 288 |
Parse evolution history from OpenEvolve output directory.
|
|
|
|
| 559 |
"elite_ratio": 0.1,
|
| 560 |
"explore_ratio": 0.3,
|
| 561 |
"exploit_ratio": 0.6,
|
| 562 |
+
"parallel": False, # Disable parallel processing to avoid signal errors
|
| 563 |
},
|
| 564 |
"evaluation": {
|
| 565 |
"timeout": None, # Disable timeout to avoid signal handling issues
|
|
|
|
| 576 |
|
| 577 |
def optimize_prompt(initial_prompt: str, dataset_name: str, dataset_split: str,
|
| 578 |
model: str, input_field: str, target_field: str,
|
| 579 |
+
progress=gr.Progress()) -> Tuple[str, str, str, str, List[str], int, int]:
|
| 580 |
"""Run OpenEvolve to optimize the prompt."""
|
| 581 |
|
| 582 |
progress(0, desc="Validating inputs...")
|
|
|
|
| 587 |
)
|
| 588 |
|
| 589 |
if not is_valid:
|
| 590 |
+
return f"## Validation Failed\n\n{validation_message}", "", "", "", [], 0, 0
|
| 591 |
|
| 592 |
progress(0.05, desc=f"Validation passed: {validation_message}")
|
| 593 |
|
|
|
|
| 617 |
)
|
| 618 |
|
| 619 |
if "error" in initial_eval:
|
| 620 |
+
return f"## Error\n\n❌ Initial evaluation failed: {initial_eval['error']}", "", "", "", [initial_prompt], 0, 1
|
| 621 |
|
| 622 |
if initial_eval["total"] == 0:
|
| 623 |
+
return f"## Error\n\n❌ Initial evaluation failed: No samples could be evaluated. This usually means:\n- API key is invalid or has no credits\n- Model is unavailable or rate-limited\n- Dataset fields are incorrect\n- Network connectivity issues\n\nPlease check your configuration and try again.", "", "", "", [initial_prompt], 0, 1
|
| 624 |
|
| 625 |
initial_results = f"""
|
| 626 |
### Initial Prompt Evaluation
|
|
|
|
| 719 |
|
| 720 |
progress(1.0, desc="Complete!")
|
| 721 |
|
| 722 |
+
# Collect all discovered prompts for browsing
|
| 723 |
+
all_prompts = [initial_prompt] # Start with initial prompt
|
| 724 |
+
prompt_history = collect_prompt_history(output_dir)
|
| 725 |
+
for p in prompt_history:
|
| 726 |
+
all_prompts.append(p["prompt"])
|
| 727 |
+
|
| 728 |
+
# Ensure we have the best prompt at the end
|
| 729 |
+
if best_prompt not in all_prompts:
|
| 730 |
+
all_prompts.append(best_prompt)
|
| 731 |
+
|
| 732 |
+
return summary, initial_results, evolution_viz, final_results, all_prompts, 0, len(all_prompts)
|
| 733 |
|
| 734 |
except Exception as e:
|
| 735 |
+
return f"## Error During Evolution\n\n❌ {str(e)}", initial_results, "", "", [initial_prompt], 0, 1
|
| 736 |
|
| 737 |
finally:
|
| 738 |
+
# Don't clean up - keep prompts for browsing
|
| 739 |
+
# User can manually clean /tmp if needed
|
| 740 |
+
pass
|
|
|
|
|
|
|
| 741 |
|
| 742 |
|
| 743 |
# Create Gradio interface
|
|
|
|
| 830 |
with gr.Column():
|
| 831 |
evolution_progress = gr.Markdown("### Evolution Progress\nEvolution progress will appear here during optimization...", visible=True)
|
| 832 |
|
| 833 |
+
# Prompt History Browser
|
| 834 |
+
gr.Markdown("---")
|
| 835 |
+
gr.Markdown("## 📜 Prompt History Browser")
|
| 836 |
+
gr.Markdown("Browse through all prompts discovered during evolution (initial → intermediate → final)")
|
| 837 |
+
|
| 838 |
+
with gr.Row():
|
| 839 |
+
with gr.Column(scale=8):
|
| 840 |
+
prompt_display = gr.TextArea(
|
| 841 |
+
label="",
|
| 842 |
+
lines=10,
|
| 843 |
+
interactive=False,
|
| 844 |
+
placeholder="Prompts will appear here after optimization completes...",
|
| 845 |
+
show_label=False
|
| 846 |
+
)
|
| 847 |
+
with gr.Column(scale=2):
|
| 848 |
+
prompt_counter = gr.Markdown("**Prompt**: -/-")
|
| 849 |
+
prev_btn = gr.Button("⬅️ Previous", size="sm")
|
| 850 |
+
next_btn = gr.Button("Next ➡️", size="sm")
|
| 851 |
+
gr.Markdown("**Prompt Types:**\n- First = Initial\n- Middle = Intermediate\n- Last = Final Best")
|
| 852 |
+
|
| 853 |
+
# Hidden state to store prompt history and current index
|
| 854 |
+
prompt_history_state = gr.State([])
|
| 855 |
+
current_prompt_index = gr.State(0)
|
| 856 |
+
|
| 857 |
# Documentation section - in collapsible accordion
|
| 858 |
gr.Markdown("---")
|
| 859 |
with gr.Accordion("📚 Documentation & Examples", open=False):
|
|
|
|
| 895 |
- [Documentation](https://github.com/algorithmicsuperintelligence/openevolve#readme)
|
| 896 |
""")
|
| 897 |
|
| 898 |
+
# Navigation functions for prompt browser
|
| 899 |
+
def show_previous_prompt(prompts, current_idx):
|
| 900 |
+
if not prompts or len(prompts) == 0:
|
| 901 |
+
return "", "**Prompt**: -/-", 0
|
| 902 |
+
new_idx = max(0, current_idx - 1)
|
| 903 |
+
counter_text = f"**Prompt**: {new_idx + 1}/{len(prompts)}"
|
| 904 |
+
if new_idx == 0:
|
| 905 |
+
counter_text += " (Initial)"
|
| 906 |
+
elif new_idx == len(prompts) - 1:
|
| 907 |
+
counter_text += " (Final Best)"
|
| 908 |
+
else:
|
| 909 |
+
counter_text += " (Intermediate)"
|
| 910 |
+
return prompts[new_idx], counter_text, new_idx
|
| 911 |
+
|
| 912 |
+
def show_next_prompt(prompts, current_idx):
|
| 913 |
+
if not prompts or len(prompts) == 0:
|
| 914 |
+
return "", "**Prompt**: -/-", 0
|
| 915 |
+
new_idx = min(len(prompts) - 1, current_idx + 1)
|
| 916 |
+
counter_text = f"**Prompt**: {new_idx + 1}/{len(prompts)}"
|
| 917 |
+
if new_idx == 0:
|
| 918 |
+
counter_text += " (Initial)"
|
| 919 |
+
elif new_idx == len(prompts) - 1:
|
| 920 |
+
counter_text += " (Final Best)"
|
| 921 |
+
else:
|
| 922 |
+
counter_text += " (Intermediate)"
|
| 923 |
+
return prompts[new_idx], counter_text, new_idx
|
| 924 |
+
|
| 925 |
+
def update_prompt_display(prompts, idx, total):
|
| 926 |
+
if not prompts or len(prompts) == 0:
|
| 927 |
+
return "", "**Prompt**: -/-"
|
| 928 |
+
idx = min(idx, len(prompts) - 1)
|
| 929 |
+
counter_text = f"**Prompt**: {idx + 1}/{len(prompts)}"
|
| 930 |
+
if idx == 0:
|
| 931 |
+
counter_text += " (Initial)"
|
| 932 |
+
elif idx == len(prompts) - 1:
|
| 933 |
+
counter_text += " (Final Best)"
|
| 934 |
+
else:
|
| 935 |
+
counter_text += " (Intermediate)"
|
| 936 |
+
return prompts[idx], counter_text
|
| 937 |
+
|
| 938 |
+
# Wire up the optimize button
|
| 939 |
+
optimize_result = optimize_btn.click(
|
| 940 |
fn=optimize_prompt,
|
| 941 |
inputs=[initial_prompt, dataset_name, dataset_split, model,
|
| 942 |
input_field, target_field],
|
| 943 |
+
outputs=[summary, initial_results, evolution_progress, final_results,
|
| 944 |
+
prompt_history_state, current_prompt_index, gr.State()] # dummy for total
|
| 945 |
+
)
|
| 946 |
+
|
| 947 |
+
# Update prompt display when optimization completes
|
| 948 |
+
optimize_result.then(
|
| 949 |
+
fn=update_prompt_display,
|
| 950 |
+
inputs=[prompt_history_state, current_prompt_index, gr.State()],
|
| 951 |
+
outputs=[prompt_display, prompt_counter]
|
| 952 |
+
)
|
| 953 |
+
|
| 954 |
+
# Wire up navigation buttons
|
| 955 |
+
prev_btn.click(
|
| 956 |
+
fn=show_previous_prompt,
|
| 957 |
+
inputs=[prompt_history_state, current_prompt_index],
|
| 958 |
+
outputs=[prompt_display, prompt_counter, current_prompt_index]
|
| 959 |
+
)
|
| 960 |
+
|
| 961 |
+
next_btn.click(
|
| 962 |
+
fn=show_next_prompt,
|
| 963 |
+
inputs=[prompt_history_state, current_prompt_index],
|
| 964 |
+
outputs=[prompt_display, prompt_counter, current_prompt_index]
|
| 965 |
)
|
| 966 |
|
| 967 |
if __name__ == "__main__":
|