Spaces:
Running
Running
import base64 | |
import io | |
from collections import Counter | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
def flatten_list_column(data, column): | |
"""Flatten a column containing lists into individual values with counts.""" | |
# Flatten the lists into individual items | |
flattened = [ | |
item | |
for sublist in data[column] | |
if isinstance(sublist, list) | |
for item in sublist | |
] | |
# Count occurrences | |
value_counts = pd.Series(Counter(flattened)) | |
return value_counts | |
def create_distribution_plot(data, column): | |
"""Create a beautiful distribution plot using Plotly and convert to image.""" | |
try: | |
# Check if the column contains lists | |
if isinstance(data[column].iloc[0], list): | |
print(f"Processing list column: {column}") | |
value_counts = flatten_list_column(data, column) | |
else: | |
# Handle regular columns | |
if data[column].dtype in ["int64", "float64"]: | |
# Continuous data - use histogram | |
fig = go.Figure() | |
# Add histogram | |
fig.add_trace( | |
go.Histogram( | |
x=data[column], | |
name="Count", | |
nbinsx=30, | |
marker=dict( | |
color="rgba(110, 68, 255, 0.7)", | |
line=dict(color="rgba(184, 146, 255, 1)", width=1), | |
), | |
) | |
) | |
else: | |
# Categorical data | |
value_counts = data[column].value_counts() | |
# For both list columns and categorical data | |
if "value_counts" in locals(): | |
fig = go.Figure( | |
[ | |
go.Bar( | |
x=value_counts.index, | |
y=value_counts.values, | |
marker=dict( | |
color=value_counts.values, | |
colorscale=px.colors.sequential.Plotly3, | |
), | |
) | |
] | |
) | |
# Common layout updates | |
fig.update_layout( | |
title=f"Distribution of {column}", | |
xaxis_title=column, | |
yaxis_title="Count", | |
template="plotly_white", | |
margin=dict(t=50, l=50, r=50, b=50), | |
width=1200, | |
height=800, | |
showlegend=False, | |
) | |
# Rotate x-axis labels if needed | |
if isinstance(data[column].iloc[0], list) or data[column].dtype not in [ | |
"int64", | |
"float64", | |
]: | |
fig.update_layout(xaxis_tickangle=-45) | |
# Convert to PNG | |
img_bytes = fig.to_image(format="png", scale=2.0) | |
# Encode to base64 | |
img_base64 = base64.b64encode(img_bytes).decode() | |
return img_base64 | |
except Exception as e: | |
print(f"Error creating distribution plot for {column}: {str(e)}") | |
raise e | |
def create_wordcloud(data, column): | |
"""Create a word cloud visualization.""" | |
import matplotlib.pyplot as plt | |
from wordcloud import WordCloud | |
try: | |
# Handle list columns | |
if isinstance(data[column].iloc[0], list): | |
text = " ".join( | |
[ | |
" ".join(map(str, sublist)) | |
for sublist in data[column] | |
if isinstance(sublist, list) | |
] | |
) | |
else: | |
# Handle regular columns | |
text = " ".join(data[column].astype(str)) | |
wordcloud = WordCloud( | |
width=1200, | |
height=800, | |
background_color="white", | |
colormap="plasma", | |
max_words=100, | |
).generate(text) | |
# Create matplotlib figure | |
plt.figure(figsize=(10, 5)) | |
plt.imshow(wordcloud, interpolation="bilinear") | |
plt.axis("off") | |
plt.title(f"Word Cloud for {column}") | |
# Save to bytes | |
buf = io.BytesIO() | |
plt.savefig(buf, format="png", bbox_inches="tight", dpi=300) | |
plt.close() | |
buf.seek(0) | |
# Convert to base64 | |
img_base64 = base64.b64encode(buf.getvalue()).decode() | |
return img_base64 | |
except Exception as e: | |
print(f"Error creating word cloud for {column}: {str(e)}") | |
raise e | |
def create_wordcloud(data, column): | |
"""Create a word cloud visualization.""" | |
import matplotlib.pyplot as plt | |
from wordcloud import WordCloud | |
# Generate word cloud | |
text = " ".join(data[column].astype(str)) | |
wordcloud = WordCloud( | |
width=800, | |
height=400, | |
background_color="white", | |
colormap="plasma", | |
max_words=100, | |
).generate(text) | |
# Create matplotlib figure | |
plt.figure(figsize=(10, 5)) | |
plt.imshow(wordcloud, interpolation="bilinear") | |
plt.axis("off") | |
plt.title(f"Word Cloud for {column}") | |
# Save to bytes | |
buf = io.BytesIO() | |
plt.savefig(buf, format="png", bbox_inches="tight", dpi=300) | |
plt.close() | |
buf.seek(0) | |
# Convert to base64 | |
img_base64 = base64.b64encode(buf.getvalue()).decode() | |
return img_base64 | |