Spaces:
Sleeping
Sleeping
File size: 4,649 Bytes
ad029d1 9d97744 195ad28 ad029d1 195ad28 9d97744 ad029d1 195ad28 ad029d1 d9df600 195ad28 d9df600 195ad28 d9df600 195ad28 ad029d1 195ad28 d9df600 195ad28 9d97744 195ad28 9d97744 195ad28 d9df600 195ad28 9d97744 195ad28 9d97744 195ad28 9d97744 195ad28 ad029d1 d9df600 195ad28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import streamlit as st
import openai
import anthropic
import pandas as pd
import numpy as np
import time
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import os
# Initialize clients
openai.api_key = os.getenv("OPENAI_API_KEY")
anthropic_client = anthropic.Client(api_key=os.getenv("ANTHROPIC_API_KEY"))
# Load sentence transformer for similarity scoring
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
# ===== Helper Functions =====
def generate_response(prompt, model):
"""Generate responses from different models."""
if model == "GPT-4":
response = openai.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content.strip()
elif model == "Claude 3":
response = anthropic_client.messages.create(
model="claude-3-sonnet-20240229",
max_tokens=1000,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
else:
return "Model not supported."
def calculate_similarity(text1, text2):
"""Calculate cosine similarity between two texts."""
embeddings = similarity_model.encode([text1, text2])
return cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
def evaluate_response(response, golden_response):
"""Evaluate response quality."""
similarity = calculate_similarity(response, golden_response)
return {
"Similarity Score": similarity,
"Length": len(response),
"Toxicity": "Low" # Placeholder for actual toxicity detection
}
def optimize_prompt(prompt, target_metrics):
"""Optimize prompt using a simple mutation strategy."""
# Placeholder for genetic algorithm implementation
optimized_prompt = prompt + " [Optimized for clarity and brevity]"
return optimized_prompt
# ===== Streamlit UI =====
st.title("π PromptCraft Studio")
st.write("The Ultimate Prompt Engineering Workbench")
# Sidebar for settings
with st.sidebar:
st.header("βοΈ Settings")
model_choice = st.selectbox("Choose Model", ["GPT-4", "Claude 3"])
optimization_goal = st.selectbox("Optimization Goal", ["Clarity", "Brevity", "Engagement"])
# Main interface
st.markdown("### **Prompt Playground**")
user_prompt = st.text_area("Enter your prompt:", "Explain quantum computing in simple terms.")
if st.button("Generate Response"):
with st.spinner("Generating response..."):
response = generate_response(user_prompt, model_choice)
st.markdown("### **Response**")
st.write(response)
# Evaluate response
st.markdown("### **Evaluation Metrics**")
golden_response = "Quantum computing uses qubits to perform complex calculations faster than classical computers."
metrics = evaluate_response(response, golden_response)
st.write(metrics)
# Prompt Optimization
st.markdown("### **Prompt Optimization**")
if st.button("Optimize Prompt"):
optimized_prompt = optimize_prompt(user_prompt, optimization_goal)
st.markdown("### **Optimized Prompt**")
st.write(optimized_prompt)
# Multi-Model Comparison
st.markdown("### **Multi-Model Comparison**")
if st.button("Compare Models"):
col1, col2 = st.columns(2)
with col1:
st.markdown("#### **GPT-4 Response**")
gpt_response = generate_response(user_prompt, "GPT-4")
st.write(gpt_response)
with col2:
st.markdown("#### **Claude 3 Response**")
claude_response = generate_response(user_prompt, "Claude 3")
st.write(claude_response)
# Compare similarity
similarity = calculate_similarity(gpt_response, claude_response)
st.markdown(f"**Similarity Between Models:** {similarity:.2f}")
# Bulk Processing
st.markdown("### **Bulk Prompt Testing**")
uploaded_file = st.file_uploader("Upload a CSV with prompts", type=["csv"])
if uploaded_file:
df = pd.read_csv(uploaded_file)
if "Prompt" in df.columns:
results = []
for prompt in df["Prompt"]:
response = generate_response(prompt, model_choice)
results.append(response)
df["Response"] = results
st.write(df)
st.download_button("Download Results", df.to_csv(index=False).encode('utf-8'), "results.csv", "text/csv")
else:
st.error("CSV must contain a 'Prompt' column.")
# Feedback
st.markdown("### **Feedback**")
if st.button("π Good Response"):
st.toast("Thanks for your feedback!")
if st.button("π Needs Improvement"):
st.toast("We'll do better next time!") |