File size: 10,245 Bytes
625572e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import google.generativeai as genai
import numpy as np

# Set up Gemini API (replace with your actual API key)
genai.configure(api_key='AIzaSyBDeJo3pioFL92ErFTtmRBmWt5diryp0E0')

def load_and_preprocess_data(file):
    data = pd.read_csv(file)
    emotion_columns = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
    for col in emotion_columns:
        data[col] = pd.to_numeric(data[col], errors='coerce')
    data = data.dropna()
    return data

def calculate_emotion_stats(data, emotion):
    return {
        'mean': data[emotion].mean(),
        'median': data[emotion].median(),
        'std': data[emotion].std()
    }

import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

def calculate_emotion_percentages(data):
    emotion_columns = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
    total = data[emotion_columns].sum().sum()
    percentages = (data[emotion_columns].sum() / total * 100).round(2)
    return percentages

def visualize_all_emotions_pie_chart(data_dict, selected_movies):
    emotion_percentages = {}
    for movie in selected_movies:
        fig, ax = plt.subplots(figsize=(10, 6))
        percentages = calculate_emotion_percentages(data_dict[movie])
        ax.pie(percentages, labels=percentages.index, autopct='%1.1f%%', startangle=90)
        ax.set_title(f"Distribution of Emotions in {movie}")
        st.pyplot(fig)
        emotion_percentages[movie] = percentages
    return emotion_percentages

def visualize_comparison(data_dict, features, selected_movies):
    if not features:
        st.write("No features selected for visualization.")
        return None

    if 'all' in features:
        features = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
        return visualize_all_emotions_pie_chart(data_dict, selected_movies)
    else:
        visualize_specific_emotions(data_dict, features, selected_movies)
        return None


import seaborn as sns
import matplotlib.pyplot as plt


def visualize_specific_emotions(data_dict, features, selected_movies):
    fig, ax = plt.subplots()

    for movie in selected_movies:
        for feature in features:
            # Check if the emotion exists in the data for this candidate
            if feature in data_dict[movie]:
                sns.kdeplot(data_dict[movie][feature], ax=ax, label=movie, shade=True)
            else:
                st.warning(f"'{feature}' data not found for {movie}. Skipping this feature.")

    plt.legend()
    st.pyplot(fig)

    if len(selected_movies) == 2:
        fig, ax = plt.subplots(figsize=(12, 10))
        correlation = pd.concat([data_dict[movie][features] for movie in selected_movies], axis=1, keys=selected_movies).corr()
        sns.heatmap(correlation, annot=True, cmap='coolwarm', ax=ax)
        ax.set_title('Correlation Heatmap of Selected Features Between Movies')
        st.pyplot(fig)

def visualize_single_candidate(data, emotions):
    if 'all' in emotions:
        # Pie chart for all emotions
        emotion_columns = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
        percentages = calculate_emotion_percentages(data)
        fig, ax = plt.subplots(figsize=(10, 6))
        ax.pie(percentages, labels=percentages.index, autopct='%1.1f%%', startangle=90)
        ax.set_title(f"Distribution of All Emotions")
        st.pyplot(fig)
        return percentages.to_dict()
    else:
        # Distribution plot for specific emotions
        fig, ax = plt.subplots(figsize=(10, 6))
        for emotion in emotions:
            sns.kdeplot(data[emotion], ax=ax, label=emotion, shade=True)
        ax.set_title(f"Distribution of Selected Emotions")
        ax.set_xlabel("Intensity")
        ax.set_ylabel("Density")
        ax.legend()
        st.pyplot(fig)
        return None

def format_emotion_prompt(emotions, data_dict, selected_movies, emotion_percentages=None):
    prompt = """
    You are an AI assistant specializing in movie emotion analysis. You have access to emotion data for the following movies: {MOVIES}, focusing on these emotions: {EMOTIONS}.

    {STATS}

    Based on this data:

    1. Compare the overall levels of the specified emotions across the selected movies. Which movies exhibit more intense or frequent emotions?
    2. Analyze the distribution of emotions in each selected movie. Are they evenly spread or concentrated at certain levels?
    3. Discuss any significant differences in how these emotions are expressed across the selected movies. What might these differences suggest about the movies' content or style?
    4. Consider the variability of emotions in each selected movie. Do some movies have more consistent levels of emotions, or do they fluctuate more?
    5. Based on this emotion data, hypothesize about potential scenes or themes in each selected movie that might contribute to the observed emotion patterns.
    6. How might the differences in emotions between these selected movies affect the overall viewer experience?

    Provide a detailed analysis addressing these points, using specific data references where relevant. Your analysis should offer insights into how these emotions are utilized in each selected movie and what this reveals about their emotional content and potential audience impact.
    """

    stats = ""
    all_emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']

    for movie in selected_movies:
        stats += f"\n{movie}:\n"
        if emotion_percentages and movie in emotion_percentages:
            for emotion, percentage in emotion_percentages[movie].items():
                stats += f"{emotion.capitalize()}: {percentage:.2f}%\n"
        else:
            emotions_to_analyze = all_emotions if 'all' in emotions else emotions
            for emotion in emotions_to_analyze:
                emotion_stats = calculate_emotion_stats(data_dict[movie], emotion)
                stats += f"{emotion.capitalize()} - Mean: {emotion_stats['mean']:.2f}, Median: {emotion_stats['median']:.2f}, Standard Deviation: {emotion_stats['std']:.2f}\n"

    emotions_display = "all emotions" if 'all' in emotions else ", ".join(emotions)
    return prompt.format(MOVIES=", ".join(selected_movies), EMOTIONS=emotions_display, STATS=stats)

def generate_response(prompt, data_dict, emotions, selected_movies, emotion_percentages=None):
    model = genai.GenerativeModel('gemini-pro')

    analysis_prompt = format_emotion_prompt(emotions, data_dict, selected_movies, emotion_percentages)

    full_prompt = analysis_prompt + "\n\nUser query: " + prompt
    response = model.generate_content(full_prompt)

    if hasattr(response, 'candidates'):
        if response.candidates:
            content = response.candidates[0].content
            if hasattr(content, 'parts'):
                for part in content.parts:
                    if hasattr(part, 'text'):
                        return part.text

    return "Error: Unable to extract text from the response. Please check the API response structure."

def main():
    st.title("Multi-Movie Emotion Analysis Chat Interface")

    num_movies = st.number_input("How many movies would you like to compare?", min_value=1, max_value=10, value=1)

    data_dict = {}
    for i in range(num_movies):
        uploaded_file = st.file_uploader(f"Choose CSV file for Movie {i+1}", type="csv", key=f"movie_{i+1}")
        if uploaded_file is not None:
            data = load_and_preprocess_data(uploaded_file)
            data_dict[f"Movie {i+1}"] = data

    if len(data_dict) == num_movies:
        st.success("All files uploaded successfully. You can now start chatting!")

        st.subheader("Data Information")
        for movie, data in data_dict.items():
            st.write(f"{movie} Columns:", data.columns.tolist())

        emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
        st.write("Available Emotions:", emotions)

        if "messages" not in st.session_state:
            st.session_state.messages = []

        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])

        # Movie selection for each query
        selected_movies = st.multiselect("Select movies to compare :", list(data_dict.keys()))
        if not selected_movies:
            selected_movies = list(data_dict.keys())

        if prompt := st.chat_input("What would you like to know about the movies' emotions?"):
            st.chat_message("user").markdown(prompt)
            st.session_state.messages.append({"role": "user", "content": prompt})

            selected_emotions = []
            if "all" in prompt.lower():
                selected_emotions = ['all']
            else:
                for emotion in emotions:
                    if emotion in prompt.lower():
                        selected_emotions.append(emotion)

            if not selected_emotions:
                selected_emotions = ['all']

            emotion_percentages = None
            with st.chat_message("assistant"):
                if len(selected_movies) == 1:
                    # Single candidate scenario
                    emotion_percentages = visualize_single_candidate(data_dict[selected_movies[0]], selected_emotions)
                elif any(keyword in prompt.lower() for keyword in ["graph", "compare", "visualize", "show"]):
                    # Multiple candidates scenario
                    emotion_percentages = visualize_comparison(data_dict, selected_emotions, selected_movies)

            response = generate_response(prompt, data_dict, selected_emotions, selected_movies, emotion_percentages)

            with st.chat_message("assistant"):
                st.markdown(response)

            st.session_state.messages.append({"role": "assistant", "content": response})

        if st.checkbox("Show raw data"):
            for movie in selected_movies:
                st.subheader(f"{movie} Data")
                st.write(data_dict[movie])

if __name__ == "__main__":
    main()