Spaces:
Running
Running
File size: 5,236 Bytes
eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 |
|
import base64
import io
from collections import Counter
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
def flatten_list_column(data, column):
"""Flatten a column containing lists into individual values with counts."""
# Flatten the lists into individual items
flattened = [
item
for sublist in data[column]
if isinstance(sublist, list)
for item in sublist
]
# Count occurrences
value_counts = pd.Series(Counter(flattened))
return value_counts
def create_distribution_plot(data, column):
"""Create a beautiful distribution plot using Plotly and convert to image."""
try:
# Check if the column contains lists
if isinstance(data[column].iloc[0], list):
print(f"Processing list column: {column}")
value_counts = flatten_list_column(data, column)
else:
# Handle regular columns
if data[column].dtype in ["int64", "float64"]:
# Continuous data - use histogram
fig = go.Figure()
# Add histogram
fig.add_trace(
go.Histogram(
x=data[column],
name="Count",
nbinsx=30,
marker=dict(
color="rgba(110, 68, 255, 0.7)",
line=dict(color="rgba(184, 146, 255, 1)", width=1),
),
)
)
else:
# Categorical data
value_counts = data[column].value_counts()
# For both list columns and categorical data
if "value_counts" in locals():
fig = go.Figure(
[
go.Bar(
x=value_counts.index,
y=value_counts.values,
marker=dict(
color=value_counts.values,
colorscale=px.colors.sequential.Plotly3,
),
)
]
)
# Common layout updates
fig.update_layout(
title=f"Distribution of {column}",
xaxis_title=column,
yaxis_title="Count",
template="plotly_white",
margin=dict(t=50, l=50, r=50, b=50),
width=1200,
height=800,
showlegend=False,
)
# Rotate x-axis labels if needed
if isinstance(data[column].iloc[0], list) or data[column].dtype not in [
"int64",
"float64",
]:
fig.update_layout(xaxis_tickangle=-45)
# Convert to PNG
img_bytes = fig.to_image(format="png", scale=2.0)
# Encode to base64
img_base64 = base64.b64encode(img_bytes).decode()
return img_base64
except Exception as e:
print(f"Error creating distribution plot for {column}: {str(e)}")
raise e
def create_wordcloud(data, column):
"""Create a word cloud visualization."""
import matplotlib.pyplot as plt
from wordcloud import WordCloud
try:
# Handle list columns
if isinstance(data[column].iloc[0], list):
text = " ".join(
[
" ".join(map(str, sublist))
for sublist in data[column]
if isinstance(sublist, list)
]
)
else:
# Handle regular columns
text = " ".join(data[column].astype(str))
wordcloud = WordCloud(
width=1200,
height=800,
background_color="white",
colormap="plasma",
max_words=100,
).generate(text)
# Create matplotlib figure
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.title(f"Word Cloud for {column}")
# Save to bytes
buf = io.BytesIO()
plt.savefig(buf, format="png", bbox_inches="tight", dpi=300)
plt.close()
buf.seek(0)
# Convert to base64
img_base64 = base64.b64encode(buf.getvalue()).decode()
return img_base64
except Exception as e:
print(f"Error creating word cloud for {column}: {str(e)}")
raise e
def create_wordcloud(data, column):
"""Create a word cloud visualization."""
import matplotlib.pyplot as plt
from wordcloud import WordCloud
# Generate word cloud
text = " ".join(data[column].astype(str))
wordcloud = WordCloud(
width=800,
height=400,
background_color="white",
colormap="plasma",
max_words=100,
).generate(text)
# Create matplotlib figure
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.title(f"Word Cloud for {column}")
# Save to bytes
buf = io.BytesIO()
plt.savefig(buf, format="png", bbox_inches="tight", dpi=300)
plt.close()
buf.seek(0)
# Convert to base64
img_base64 = base64.b64encode(buf.getvalue()).decode()
return img_base64
|