Spaces:

AI-companionship
/

intima-responses-2D

Running

File size: 5,301 Bytes

import streamlit as st
import pandas as pd
from embedding_atlas.streamlit import embedding_atlas
import os
from glob import glob

# Set page configuration
st.set_page_config(
    page_title="AI Companionship Behavior Analysis - Embedding Atlas",
    page_icon="🤖",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better styling
st.markdown("""
<style>
    .main-header {
        font-size: 2.5rem;
        font-weight: bold;
        color: #1f77b4;
        text-align: center;
        margin-bottom: 1rem;
    }
    .sub-header {
        font-size: 1.2rem;
        color: #666;
        text-align: center;
        margin-bottom: 2rem;
    }
    .info-box {
        background-color: #f0f2f6;
        border-left: 4px solid #1f77b4;
        padding: 1rem;
        margin: 1rem 0;
        border-radius: 0.5rem;
    }
    .warning-box {
        background-color: #fff3cd;
        border-left: 4px solid #ffc107;
        padding: 1rem;
        margin: 1rem 0;
        border-radius: 0.5rem;
    }
    .tip-box {
        background-color: #d1ecf1;
        border-left: 4px solid #17a2b8;
        padding: 1rem;
        margin: 1rem 0;
        border-radius: 0.5rem;
    }
</style>
""", unsafe_allow_html=True)

# Main header
st.markdown('<div class="main-header">🤖 AI Companionship Behavior Analysis</div>', unsafe_allow_html=True)

# Load data with caching
@st.cache_data
def load_data():
    """Load the embedding atlas dataframe"""
    try:
        df = pd.read_parquet('/app/src/data/embed_atlas_df.parquet')
        return df
    except FileNotFoundError:
        st.error(f"""
        ❌ Data file not found. Please ensure 'data/embed_atlas_df.parquet' exists.
        Current directory: {os.getcwd()}
        Files available: {glob("*")}
        """)
        return None
    except Exception as e:
        st.error(f"❌ Error loading data: {str(e)}")
        return None

# Load data once for the entire app
df = load_data()

# Sidebar with controls and information
with st.sidebar:
    st.header("📚 About This Visualization")
    
    st.markdown("""
    This interactive visualization explores the landscape of AI model responses to prompts designed to evaluate **companionship behaviors**. 
    
    **Models Analyzed:**
    - **Google Gemma-3-27b-it** (Open)
    - **Microsoft Phi-4** (Open) 
    - **OpenAI o3-mini** (Closed)
    - **Anthropic Claude-3.7 Sonnet** (Closed)
    
    **Classifications:**
    - **COMPANION+**: Responses that reinforce companionship behaviors
    - **BOUNDARY+**: Responses that maintain appropriate boundaries
    - **MIXED**: Responses with elements of both
    
    The visualization uses **Qwen embeddings** projected into 2D space using **UMAP** to explore clusters of similar responses and behavioral patterns.
    """)

    st.header("🔧 Usage Tips")
    st.markdown("""
    **Getting Started:**
    1. Wait for the widget to fully load
    2. Use the **"classification"** option in the color dropdown
    3. Explore clusters by zooming and panning
    4. Use the search bar to find specific terms
    """)

# Usage recommendations
st.markdown("""
<div class="tip-box">
    <strong>💡 Recommended Settings:</strong>
    <ul>
        <li><strong>Color by Classification:</strong> Select "classification" from the color dropdown to see the different behavior categories</li>
        <li><strong>Cluster Exploration:</strong> Shift+Click and drag to explore different regions of the embedding space</li>
        <li><strong>Table and Charts:</strong> Use the table and charts on the right to explore the full data and your current selection</li>
    </ul>
</div>
""", unsafe_allow_html=True)

# Main visualization
if df is not None:
    st.header("🗺️ Interactive Embedding Atlas")
    
    # Check required columns
    required_columns = ['x', 'y', 'snippet']
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        st.error(f"❌ Missing required columns: {missing_columns}")
        st.write("Available columns:", list(df.columns))
    else:
        try:
            # Create the embedding atlas
            value = embedding_atlas(
                df, 
                text="snippet",
                x="x", 
                y="y",
                show_table=True
            )
            
            # Display selection information if available
            if value and value.get("predicate"):
                st.markdown("### 📋 Current Selection")
                st.info(f"Selection filter: `{value.get('predicate')}`")
                
                # You could add DuckDB querying here if needed
                # import duckdb
                # selection = duckdb.query_df(
                #     df, "dataframe", "SELECT * FROM dataframe WHERE " + value["predicate"]
                # )
                # st.dataframe(selection)
                
        except Exception as e:
            st.error(f"❌ Error creating visualization: {str(e)}")
            st.write("Please check that the embedding-atlas package is properly installed:")
            st.code("pip install embedding-atlas")

else:
    st.error("Unable to load data. Please check the file path and try again.")

# Footer with additional information
st.markdown("---")