File size: 4,649 Bytes
ad029d1
9d97744
195ad28
ad029d1
195ad28
 
 
 
9d97744
ad029d1
195ad28
 
 
 
 
 
ad029d1
d9df600
195ad28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9df600
195ad28
 
 
 
 
 
d9df600
 
195ad28
 
ad029d1
195ad28
d9df600
 
195ad28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d97744
 
195ad28
 
 
9d97744
195ad28
 
 
 
 
 
 
d9df600
 
195ad28
 
9d97744
 
 
195ad28
 
 
 
 
9d97744
195ad28
9d97744
195ad28
ad029d1
d9df600
 
 
 
 
195ad28
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import streamlit as st
import openai
import anthropic
import pandas as pd
import numpy as np
import time
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import os

# Initialize clients
openai.api_key = os.getenv("OPENAI_API_KEY")
anthropic_client = anthropic.Client(api_key=os.getenv("ANTHROPIC_API_KEY"))

# Load sentence transformer for similarity scoring
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')

# ===== Helper Functions =====
def generate_response(prompt, model):
    """Generate responses from different models."""
    if model == "GPT-4":
        response = openai.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content.strip()
    elif model == "Claude 3":
        response = anthropic_client.messages.create(
            model="claude-3-sonnet-20240229",
            max_tokens=1000,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.content[0].text
    else:
        return "Model not supported."

def calculate_similarity(text1, text2):
    """Calculate cosine similarity between two texts."""
    embeddings = similarity_model.encode([text1, text2])
    return cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]

def evaluate_response(response, golden_response):
    """Evaluate response quality."""
    similarity = calculate_similarity(response, golden_response)
    return {
        "Similarity Score": similarity,
        "Length": len(response),
        "Toxicity": "Low"  # Placeholder for actual toxicity detection
    }

def optimize_prompt(prompt, target_metrics):
    """Optimize prompt using a simple mutation strategy."""
    # Placeholder for genetic algorithm implementation
    optimized_prompt = prompt + " [Optimized for clarity and brevity]"
    return optimized_prompt

# ===== Streamlit UI =====
st.title("πŸš€ PromptCraft Studio")
st.write("The Ultimate Prompt Engineering Workbench")

# Sidebar for settings
with st.sidebar:
    st.header("βš™οΈ Settings")
    model_choice = st.selectbox("Choose Model", ["GPT-4", "Claude 3"])
    optimization_goal = st.selectbox("Optimization Goal", ["Clarity", "Brevity", "Engagement"])

# Main interface
st.markdown("### **Prompt Playground**")
user_prompt = st.text_area("Enter your prompt:", "Explain quantum computing in simple terms.")

if st.button("Generate Response"):
    with st.spinner("Generating response..."):
        response = generate_response(user_prompt, model_choice)
        st.markdown("### **Response**")
        st.write(response)

        # Evaluate response
        st.markdown("### **Evaluation Metrics**")
        golden_response = "Quantum computing uses qubits to perform complex calculations faster than classical computers."
        metrics = evaluate_response(response, golden_response)
        st.write(metrics)

# Prompt Optimization
st.markdown("### **Prompt Optimization**")
if st.button("Optimize Prompt"):
    optimized_prompt = optimize_prompt(user_prompt, optimization_goal)
    st.markdown("### **Optimized Prompt**")
    st.write(optimized_prompt)

# Multi-Model Comparison
st.markdown("### **Multi-Model Comparison**")
if st.button("Compare Models"):
    col1, col2 = st.columns(2)
    with col1:
        st.markdown("#### **GPT-4 Response**")
        gpt_response = generate_response(user_prompt, "GPT-4")
        st.write(gpt_response)
    with col2:
        st.markdown("#### **Claude 3 Response**")
        claude_response = generate_response(user_prompt, "Claude 3")
        st.write(claude_response)

    # Compare similarity
    similarity = calculate_similarity(gpt_response, claude_response)
    st.markdown(f"**Similarity Between Models:** {similarity:.2f}")

# Bulk Processing
st.markdown("### **Bulk Prompt Testing**")
uploaded_file = st.file_uploader("Upload a CSV with prompts", type=["csv"])
if uploaded_file:
    df = pd.read_csv(uploaded_file)
    if "Prompt" in df.columns:
        results = []
        for prompt in df["Prompt"]:
            response = generate_response(prompt, model_choice)
            results.append(response)
        df["Response"] = results
        st.write(df)
        st.download_button("Download Results", df.to_csv(index=False).encode('utf-8'), "results.csv", "text/csv")
    else:
        st.error("CSV must contain a 'Prompt' column.")

# Feedback
st.markdown("### **Feedback**")
if st.button("πŸ‘ Good Response"):
    st.toast("Thanks for your feedback!")
if st.button("πŸ‘Ž Needs Improvement"):
    st.toast("We'll do better next time!")