Spaces:
Running
Running
File size: 5,236 Bytes
eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 0629e69 eb03925 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import base64
import io
from collections import Counter
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
def flatten_list_column(data, column):
"""Flatten a column containing lists into individual values with counts."""
# Flatten the lists into individual items
flattened = [
item
for sublist in data[column]
if isinstance(sublist, list)
for item in sublist
]
# Count occurrences
value_counts = pd.Series(Counter(flattened))
return value_counts
def create_distribution_plot(data, column):
"""Create a beautiful distribution plot using Plotly and convert to image."""
try:
# Check if the column contains lists
if isinstance(data[column].iloc[0], list):
print(f"Processing list column: {column}")
value_counts = flatten_list_column(data, column)
else:
# Handle regular columns
if data[column].dtype in ["int64", "float64"]:
# Continuous data - use histogram
fig = go.Figure()
# Add histogram
fig.add_trace(
go.Histogram(
x=data[column],
name="Count",
nbinsx=30,
marker=dict(
color="rgba(110, 68, 255, 0.7)",
line=dict(color="rgba(184, 146, 255, 1)", width=1),
),
)
)
else:
# Categorical data
value_counts = data[column].value_counts()
# For both list columns and categorical data
if "value_counts" in locals():
fig = go.Figure(
[
go.Bar(
x=value_counts.index,
y=value_counts.values,
marker=dict(
color=value_counts.values,
colorscale=px.colors.sequential.Plotly3,
),
)
]
)
# Common layout updates
fig.update_layout(
title=f"Distribution of {column}",
xaxis_title=column,
yaxis_title="Count",
template="plotly_white",
margin=dict(t=50, l=50, r=50, b=50),
width=1200,
height=800,
showlegend=False,
)
# Rotate x-axis labels if needed
if isinstance(data[column].iloc[0], list) or data[column].dtype not in [
"int64",
"float64",
]:
fig.update_layout(xaxis_tickangle=-45)
# Convert to PNG
img_bytes = fig.to_image(format="png", scale=2.0)
# Encode to base64
img_base64 = base64.b64encode(img_bytes).decode()
return img_base64
except Exception as e:
print(f"Error creating distribution plot for {column}: {str(e)}")
raise e
def create_wordcloud(data, column):
"""Create a word cloud visualization."""
import matplotlib.pyplot as plt
from wordcloud import WordCloud
try:
# Handle list columns
if isinstance(data[column].iloc[0], list):
text = " ".join(
[
" ".join(map(str, sublist))
for sublist in data[column]
if isinstance(sublist, list)
]
)
else:
# Handle regular columns
text = " ".join(data[column].astype(str))
wordcloud = WordCloud(
width=1200,
height=800,
background_color="white",
colormap="plasma",
max_words=100,
).generate(text)
# Create matplotlib figure
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.title(f"Word Cloud for {column}")
# Save to bytes
buf = io.BytesIO()
plt.savefig(buf, format="png", bbox_inches="tight", dpi=300)
plt.close()
buf.seek(0)
# Convert to base64
img_base64 = base64.b64encode(buf.getvalue()).decode()
return img_base64
except Exception as e:
print(f"Error creating word cloud for {column}: {str(e)}")
raise e
def create_wordcloud(data, column):
"""Create a word cloud visualization."""
import matplotlib.pyplot as plt
from wordcloud import WordCloud
# Generate word cloud
text = " ".join(data[column].astype(str))
wordcloud = WordCloud(
width=800,
height=400,
background_color="white",
colormap="plasma",
max_words=100,
).generate(text)
# Create matplotlib figure
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.title(f"Word Cloud for {column}")
# Save to bytes
buf = io.BytesIO()
plt.savefig(buf, format="png", bbox_inches="tight", dpi=300)
plt.close()
buf.seek(0)
# Convert to base64
img_base64 = base64.b64encode(buf.getvalue()).decode()
return img_base64
|