Spaces:
Sleeping
Sleeping
File size: 6,640 Bytes
996aa19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import os
import streamlit as st
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from transformers import pipeline
import nltk
import numpy as np
from utils import read_poems_from_directory, emotion_labels_with_colors
# Download nltk data for tokenization
nltk.download('punkt')
# Initialize emotion classifier pipelines
models = {
"Model 1": pipeline('sentiment-analysis', model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True),
"Model 2": pipeline('sentiment-analysis', model="cardiffnlp/twitter-roberta-base-emotion", return_all_scores=True)
}
poems_directory = "./poems"
os.makedirs(poems_directory, exist_ok=True)
def analyze_poems_page():
st.header("Analyze Poems")
# Sidebar for file upload and listing files
st.sidebar.title("Upload New Poem")
uploaded_file = st.sidebar.file_uploader("Choose a text file", type="txt")
if uploaded_file is not None:
with open(os.path.join(poems_directory, uploaded_file.name), "wb") as f:
f.write(uploaded_file.getbuffer())
st.sidebar.success(f"Uploaded {uploaded_file.name}")
st.sidebar.title("Available Poems")
poem_files = [f for f in os.listdir(poems_directory) if f.endswith(".txt")]
st.sidebar.write("\n".join(poem_files))
# Sidebar input for user-specified labels
user_labels_input = st.sidebar.text_input("Enter emotion labels (comma-separated)",
"happiness,sadness,anger,fear,disgust,surprise,love,joy,anxiety,contentment,frustration,loneliness,excitement,guilt,shame,envy,jealousy,pride,gratitude,empathy,compassion,boredom,relief,curiosity,awe,confusion,nostalgia,hope,despair,embarrassment")
user_labels = [label.strip() for label in user_labels_input.split(",")]
if st.button("Analyze Poems"):
if os.path.isdir(poems_directory):
# Read poems from the specified directory
poems = read_poems_from_directory(poems_directory)
if poems:
def analyze_emotions(poem, model):
lines = nltk.sent_tokenize(poem)
emotions = []
for line in lines:
result = model(line)
emotions.append(result)
return emotions
def process_emotions(emotions):
emotion_scores = []
all_labels = set()
for line_emotions in emotions:
line_score = {emo['label']: emo['score'] for emo in line_emotions[0]}
all_labels.update(line_score.keys())
emotion_scores.append(line_score)
return emotion_scores, all_labels
def plot_emotional_arc(processed_emotions, labels, model_name):
st.subheader(model_name)
plt.figure(figsize=(15, 10))
for i, emotions in enumerate(processed_emotions):
for emotion in labels:
emotion_arc = [line_emotions.get(emotion, 0) for line_emotions in emotions]
color = emotion_labels_with_colors.get(emotion, 'black') # default to black if not found
plt.plot(emotion_arc, label=f'Poem {i+1} - {emotion}', color=color)
plt.title(f'Emotional Arc of Each Poem ({model_name})')
plt.xlabel('Line Number')
plt.ylabel('Emotion Score')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
st.pyplot(plt)
def extract_features(emotion_data, labels):
features = []
for emotions in emotion_data:
poem_features = []
for label in labels:
scores = [line_emotions.get(label, 0) for line_emotions in emotions]
mean_score = np.mean(scores)
std_score = np.std(scores)
poem_features.extend([mean_score, std_score])
features.append(poem_features)
return features
# Analyze and plot for each model
for model_name, model in models.items():
poem_emotions = [analyze_emotions(poem, model) for poem in poems]
processed_emotions = []
all_labels = set()
for emotions in poem_emotions:
processed, labels = process_emotions(emotions)
processed_emotions.append(processed)
all_labels.update(labels)
selected_labels = [label for label in user_labels if label in all_labels]
plot_emotional_arc(processed_emotions, selected_labels, model_name)
# Extract features for clustering
features = extract_features(processed_emotions, selected_labels)
# Create a DataFrame to store the features
columns = []
for label in selected_labels:
columns.extend([f'{label}_mean', f'{label}_std'])
df = pd.DataFrame(features, columns=columns)
# Standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df)
# Apply KMeans clustering
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans.fit(scaled_features)
df['Cluster'] = kmeans.labels_
# Display the DataFrame
st.write(f"Poem Sentiment Features and Clusters ({model_name}):")
st.dataframe(df)
# Visualize the clusters
if not df.empty:
plt.figure(figsize=(8, 6))
plt.scatter(df.iloc[:, 0], df.iloc[:, 1], c=df['Cluster'], cmap='viridis', marker='o')
plt.title(f'Clusters of Poem Emotional Arcs ({model_name})')
plt.xlabel(f'{columns[0]}')
plt.ylabel(f'{columns[1]}')
st.pyplot(plt)
else:
st.warning("No text files found in the specified directory.")
else:
st.error("The specified path is not a valid directory.")
|