Spaces:
Sleeping
Sleeping
import streamlit as st | |
import openai | |
import anthropic | |
import pandas as pd | |
import numpy as np | |
import time | |
from sklearn.metrics.pairwise import cosine_similarity | |
from sentence_transformers import SentenceTransformer | |
import os | |
# Initialize clients | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
anthropic_client = anthropic.Client(api_key=os.getenv("ANTHROPIC_API_KEY")) | |
# Load sentence transformer for similarity scoring | |
similarity_model = SentenceTransformer('all-MiniLM-L6-v2') | |
# ===== Helper Functions ===== | |
def generate_response(prompt, model): | |
"""Generate responses from different models.""" | |
if model == "GPT-4": | |
response = openai.chat.completions.create( | |
model="gpt-4", | |
messages=[{"role": "user", "content": prompt}] | |
) | |
return response.choices[0].message.content.strip() | |
elif model == "Claude 3": | |
response = anthropic_client.messages.create( | |
model="claude-3-sonnet-20240229", | |
max_tokens=1000, | |
messages=[{"role": "user", "content": prompt}] | |
) | |
return response.content[0].text | |
else: | |
return "Model not supported." | |
def calculate_similarity(text1, text2): | |
"""Calculate cosine similarity between two texts.""" | |
embeddings = similarity_model.encode([text1, text2]) | |
return cosine_similarity([embeddings[0]], [embeddings[1]])[0][0] | |
def evaluate_response(response, golden_response): | |
"""Evaluate response quality.""" | |
similarity = calculate_similarity(response, golden_response) | |
return { | |
"Similarity Score": similarity, | |
"Length": len(response), | |
"Toxicity": "Low" # Placeholder for actual toxicity detection | |
} | |
def optimize_prompt(prompt, target_metrics): | |
"""Optimize prompt using a simple mutation strategy.""" | |
# Placeholder for genetic algorithm implementation | |
optimized_prompt = prompt + " [Optimized for clarity and brevity]" | |
return optimized_prompt | |
# ===== Streamlit UI ===== | |
st.title("π PromptCraft Studio") | |
st.write("The Ultimate Prompt Engineering Workbench") | |
# Sidebar for settings | |
with st.sidebar: | |
st.header("βοΈ Settings") | |
model_choice = st.selectbox("Choose Model", ["GPT-4", "Claude 3"]) | |
optimization_goal = st.selectbox("Optimization Goal", ["Clarity", "Brevity", "Engagement"]) | |
# Main interface | |
st.markdown("### **Prompt Playground**") | |
user_prompt = st.text_area("Enter your prompt:", "Explain quantum computing in simple terms.") | |
if st.button("Generate Response"): | |
with st.spinner("Generating response..."): | |
response = generate_response(user_prompt, model_choice) | |
st.markdown("### **Response**") | |
st.write(response) | |
# Evaluate response | |
st.markdown("### **Evaluation Metrics**") | |
golden_response = "Quantum computing uses qubits to perform complex calculations faster than classical computers." | |
metrics = evaluate_response(response, golden_response) | |
st.write(metrics) | |
# Prompt Optimization | |
st.markdown("### **Prompt Optimization**") | |
if st.button("Optimize Prompt"): | |
optimized_prompt = optimize_prompt(user_prompt, optimization_goal) | |
st.markdown("### **Optimized Prompt**") | |
st.write(optimized_prompt) | |
# Multi-Model Comparison | |
st.markdown("### **Multi-Model Comparison**") | |
if st.button("Compare Models"): | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown("#### **GPT-4 Response**") | |
gpt_response = generate_response(user_prompt, "GPT-4") | |
st.write(gpt_response) | |
with col2: | |
st.markdown("#### **Claude 3 Response**") | |
claude_response = generate_response(user_prompt, "Claude 3") | |
st.write(claude_response) | |
# Compare similarity | |
similarity = calculate_similarity(gpt_response, claude_response) | |
st.markdown(f"**Similarity Between Models:** {similarity:.2f}") | |
# Bulk Processing | |
st.markdown("### **Bulk Prompt Testing**") | |
uploaded_file = st.file_uploader("Upload a CSV with prompts", type=["csv"]) | |
if uploaded_file: | |
df = pd.read_csv(uploaded_file) | |
if "Prompt" in df.columns: | |
results = [] | |
for prompt in df["Prompt"]: | |
response = generate_response(prompt, model_choice) | |
results.append(response) | |
df["Response"] = results | |
st.write(df) | |
st.download_button("Download Results", df.to_csv(index=False).encode('utf-8'), "results.csv", "text/csv") | |
else: | |
st.error("CSV must contain a 'Prompt' column.") | |
# Feedback | |
st.markdown("### **Feedback**") | |
if st.button("π Good Response"): | |
st.toast("Thanks for your feedback!") | |
if st.button("π Needs Improvement"): | |
st.toast("We'll do better next time!") |