TWIGMA / visualization.py
Yiqun Chen
fix typos
c665342
from pathlib import Path
import streamlit as st
import streamlit.components.v1 as components
import plotly.figure_factory as ff
import numpy as np
import pandas as pd
import requests
import plotly.express as px
from PIL import Image
#from streamlit_plotly_events import plotly_events
def app():
st.markdown('### Themes of AI-generated images on Twitter')
st.markdown("""
To investigate the themes and the corresponding longitudinal changes for images in the TWIGMA dataset, we applied k-means clustering to the TWIGMA
data with k = 10 and plotted the change of cluster membership over time (see Figure 2).
We note substantial changes in cluster memberships: clusters 1, 3, and 4 have experienced a
steady decline over time, while clusters 8 and 9 showed a consistent increase. Due to the lack of direct access to the prompts used for
generating every image in TWIGMA, as well as the distance between the input prompt and the resulting output,
we used BLIP (Li et al. 2022) to caption the images in TWIGMA. Prominent themes we observed include painting, woman, man, and hair, aligning with the known interest of users in generating detailed human portraits in
various styles using text-to-image models.
""")
img_2d_embed = pd.read_csv("./resources/csv_files/twigma_release_sampled.csv")
img_2d_embed = img_2d_embed.sample(frac=0.1, random_state=2023)
# txt_2d_embed = pd.read_csv('data/txt_2d_embedding.csv', index_col=0)
# txt_2d_embed = txt_2d_embed.sample(frac=0.1, random_state=0)
col1, col2 = st.columns([1,1])
with col1:
fig1 = ff.create_2d_density(
x=img_2d_embed['UMAP_dim_1'],
y=img_2d_embed['UMAP_dim_2'],
#colors=img_2d_embed['tag'],
colorscale='Greens', # set the color map
height=600, # set height of the figure
width=600, # set width of the figure
title='Figure 1: Image embedding visualized using 2D UMAP'
)
#selected_points = plotly_events(fig1, click_event=True, hover_event=True)
st.plotly_chart(fig1, use_container_width=True)
with col2:
fig4 = Image.open('./resources/4x/change_time.png')
#selected_points = plotly_events(fig1, click_event=True, hover_event=True)
st.image(fig4, output_format='png',
caption='Figure 2: Composition of image clusters (estimated using k-means clustering with k = 10) \
over time. We observe notable changes in cluster membership and underlying themes of \
TWIGMA images from 2021 to 2023.')
st.markdown('#### Visualizing the concept of each cluster')
st.markdown("""
We further visualized each cluster along with the most frequent topics derived using BLIP captions in Figure 3,
with emojis representing the relative trends over time. Our findings indicate a shift in preferences for image topics among Twitter users.
There is a growing interest in sharing artistically sophisticated or distinct content, such as intricate human portraits, while interest in simpler themes such as natural scenes has declined. In addition, clusters 5 and 8 notably contain a substantial number of images with increas- ing popularity but also significant amounts of NSFW, porno- graphic, and nude content.
This observation aligns with recent studies that highlight the rapid growth of online communities focused on generating these models, which are relatively less regulated
""")
col1, col2, col3 = st.columns([1,6,1])
with col2:
fig2 = Image.open('./resources/4x/cluster_change.png')
st.image(fig2, caption='Figure 3: Randomly sampled images from the 10 clusters \
identified using k-means clustering. Clusters 5 and 8 predominantly contain NSFW \
photos and have been pixelated accordingly. Cluster annotations are \
provided using the most frequent words in associated BLIP-inferred captions, \
with up arrow, down arrow, and right arrow emojis indicating increasing, decreasing, or unchanged trends over time, respectively.', output_format='png')
img_2d_embed_small = img_2d_embed.sample(frac=0.3, random_state=2023)
# Define the desired order of the cluster labels
cluster_order = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
img_2d_embed_small = img_2d_embed_small.sort_values(['label_10_clusters'])
col1, col2, col3 = st.columns([1,6,1])
with col2:
fig3 = px.density_contour(
img_2d_embed_small,
x='UMAP_dim_1',
y='UMAP_dim_2',
facet_col_wrap=5,
color = 'label_10_clusters',
facet_col='label_10_clusters',
color_discrete_sequence=px.colors.qualitative.Safe,
title='Figure 4: Image embedding visualized in 2D UMAP by estimated clusters')
st.plotly_chart(fig3, use_container_width=True)
# Rename the x and y axes
# fig3.update_xaxes(title='UMAP 1')
# fig3.update_yaxes(title='UMAP 2')
# st.plotly_chart(fig3)