File size: 5,301 Bytes
441e9f6 5357250 441e9f6 b4b131b 441e9f6 5357250 113f6d2 5357250 441e9f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import streamlit as st
import pandas as pd
from embedding_atlas.streamlit import embedding_atlas
import os
from glob import glob
# Set page configuration
st.set_page_config(
page_title="AI Companionship Behavior Analysis - Embedding Atlas",
page_icon="π€",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
font-weight: bold;
color: #1f77b4;
text-align: center;
margin-bottom: 1rem;
}
.sub-header {
font-size: 1.2rem;
color: #666;
text-align: center;
margin-bottom: 2rem;
}
.info-box {
background-color: #f0f2f6;
border-left: 4px solid #1f77b4;
padding: 1rem;
margin: 1rem 0;
border-radius: 0.5rem;
}
.warning-box {
background-color: #fff3cd;
border-left: 4px solid #ffc107;
padding: 1rem;
margin: 1rem 0;
border-radius: 0.5rem;
}
.tip-box {
background-color: #d1ecf1;
border-left: 4px solid #17a2b8;
padding: 1rem;
margin: 1rem 0;
border-radius: 0.5rem;
}
</style>
""", unsafe_allow_html=True)
# Main header
st.markdown('<div class="main-header">π€ AI Companionship Behavior Analysis</div>', unsafe_allow_html=True)
# Load data with caching
@st.cache_data
def load_data():
"""Load the embedding atlas dataframe"""
try:
df = pd.read_parquet('/app/src/data/embed_atlas_df.parquet')
return df
except FileNotFoundError:
st.error(f"""
β Data file not found. Please ensure 'data/embed_atlas_df.parquet' exists.
Current directory: {os.getcwd()}
Files available: {glob("*")}
""")
return None
except Exception as e:
st.error(f"β Error loading data: {str(e)}")
return None
# Load data once for the entire app
df = load_data()
# Sidebar with controls and information
with st.sidebar:
st.header("π About This Visualization")
st.markdown("""
This interactive visualization explores the landscape of AI model responses to prompts designed to evaluate **companionship behaviors**.
**Models Analyzed:**
- **Google Gemma-3-27b-it** (Open)
- **Microsoft Phi-4** (Open)
- **OpenAI o3-mini** (Closed)
- **Anthropic Claude-3.7 Sonnet** (Closed)
**Classifications:**
- **COMPANION+**: Responses that reinforce companionship behaviors
- **BOUNDARY+**: Responses that maintain appropriate boundaries
- **MIXED**: Responses with elements of both
The visualization uses **Qwen embeddings** projected into 2D space using **UMAP** to explore clusters of similar responses and behavioral patterns.
""")
st.header("π§ Usage Tips")
st.markdown("""
**Getting Started:**
1. Wait for the widget to fully load
2. Use the **"classification"** option in the color dropdown
3. Explore clusters by zooming and panning
4. Use the search bar to find specific terms
""")
# Usage recommendations
st.markdown("""
<div class="tip-box">
<strong>π‘ Recommended Settings:</strong>
<ul>
<li><strong>Color by Classification:</strong> Select "classification" from the color dropdown to see the different behavior categories</li>
<li><strong>Cluster Exploration:</strong> Shift+Click and drag to explore different regions of the embedding space</li>
<li><strong>Table and Charts:</strong> Use the table and charts on the right to explore the full data and your current selection</li>
</ul>
</div>
""", unsafe_allow_html=True)
# Main visualization
if df is not None:
st.header("πΊοΈ Interactive Embedding Atlas")
# Check required columns
required_columns = ['x', 'y', 'snippet']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
st.error(f"β Missing required columns: {missing_columns}")
st.write("Available columns:", list(df.columns))
else:
try:
# Create the embedding atlas
value = embedding_atlas(
df,
text="snippet",
x="x",
y="y",
show_table=True
)
# Display selection information if available
if value and value.get("predicate"):
st.markdown("### π Current Selection")
st.info(f"Selection filter: `{value.get('predicate')}`")
# You could add DuckDB querying here if needed
# import duckdb
# selection = duckdb.query_df(
# df, "dataframe", "SELECT * FROM dataframe WHERE " + value["predicate"]
# )
# st.dataframe(selection)
except Exception as e:
st.error(f"β Error creating visualization: {str(e)}")
st.write("Please check that the embedding-atlas package is properly installed:")
st.code("pip install embedding-atlas")
else:
st.error("Unable to load data. Please check the file path and try again.")
# Footer with additional information
st.markdown("---")
|