import streamlit as st import openai import anthropic import pandas as pd import numpy as np import time from sklearn.metrics.pairwise import cosine_similarity from sentence_transformers import SentenceTransformer import os # Initialize clients openai.api_key = os.getenv("OPENAI_API_KEY") anthropic_client = anthropic.Client(api_key=os.getenv("ANTHROPIC_API_KEY")) # Load sentence transformer for similarity scoring similarity_model = SentenceTransformer('all-MiniLM-L6-v2') # ===== Helper Functions ===== def generate_response(prompt, model): """Generate responses from different models.""" if model == "GPT-4": response = openai.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content": prompt}] ) return response.choices[0].message.content.strip() elif model == "Claude 3": response = anthropic_client.messages.create( model="claude-3-sonnet-20240229", max_tokens=1000, messages=[{"role": "user", "content": prompt}] ) return response.content[0].text else: return "Model not supported." def calculate_similarity(text1, text2): """Calculate cosine similarity between two texts.""" embeddings = similarity_model.encode([text1, text2]) return cosine_similarity([embeddings[0]], [embeddings[1]])[0][0] def evaluate_response(response, golden_response): """Evaluate response quality.""" similarity = calculate_similarity(response, golden_response) return { "Similarity Score": similarity, "Length": len(response), "Toxicity": "Low" # Placeholder for actual toxicity detection } def optimize_prompt(prompt, target_metrics): """Optimize prompt using a simple mutation strategy.""" # Placeholder for genetic algorithm implementation optimized_prompt = prompt + " [Optimized for clarity and brevity]" return optimized_prompt # ===== Streamlit UI ===== st.title("🚀 PromptCraft Studio") st.write("The Ultimate Prompt Engineering Workbench") # Sidebar for settings with st.sidebar: st.header("⚙️ Settings") model_choice = st.selectbox("Choose Model", ["GPT-4", "Claude 3"]) optimization_goal = st.selectbox("Optimization Goal", ["Clarity", "Brevity", "Engagement"]) # Main interface st.markdown("### **Prompt Playground**") user_prompt = st.text_area("Enter your prompt:", "Explain quantum computing in simple terms.") if st.button("Generate Response"): with st.spinner("Generating response..."): response = generate_response(user_prompt, model_choice) st.markdown("### **Response**") st.write(response) # Evaluate response st.markdown("### **Evaluation Metrics**") golden_response = "Quantum computing uses qubits to perform complex calculations faster than classical computers." metrics = evaluate_response(response, golden_response) st.write(metrics) # Prompt Optimization st.markdown("### **Prompt Optimization**") if st.button("Optimize Prompt"): optimized_prompt = optimize_prompt(user_prompt, optimization_goal) st.markdown("### **Optimized Prompt**") st.write(optimized_prompt) # Multi-Model Comparison st.markdown("### **Multi-Model Comparison**") if st.button("Compare Models"): col1, col2 = st.columns(2) with col1: st.markdown("#### **GPT-4 Response**") gpt_response = generate_response(user_prompt, "GPT-4") st.write(gpt_response) with col2: st.markdown("#### **Claude 3 Response**") claude_response = generate_response(user_prompt, "Claude 3") st.write(claude_response) # Compare similarity similarity = calculate_similarity(gpt_response, claude_response) st.markdown(f"**Similarity Between Models:** {similarity:.2f}") # Bulk Processing st.markdown("### **Bulk Prompt Testing**") uploaded_file = st.file_uploader("Upload a CSV with prompts", type=["csv"]) if uploaded_file: df = pd.read_csv(uploaded_file) if "Prompt" in df.columns: results = [] for prompt in df["Prompt"]: response = generate_response(prompt, model_choice) results.append(response) df["Response"] = results st.write(df) st.download_button("Download Results", df.to_csv(index=False).encode('utf-8'), "results.csv", "text/csv") else: st.error("CSV must contain a 'Prompt' column.") # Feedback st.markdown("### **Feedback**") if st.button("👍 Good Response"): st.toast("Thanks for your feedback!") if st.button("👎 Needs Improvement"): st.toast("We'll do better next time!")