Spaces:

TroglodyteDerivations
/

Bluesky_2M_Plotly_Emojis_Vis

Sleeping

App Files Files Community

TroglodyteDerivations commited on Dec 1, 2024

Commit

8edd6e7

verified ·

1 Parent(s): 8c1b5e5

Create app.py

Browse files

Files changed (1) hide show

app.py +62 -0

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# bsky2M_emojis_streamlit.py
+# Packages required
+import streamlit as st
+from datasets import load_dataset
+import emoji
+from dateutil import parser
+from collections import Counter
+import plotly.express as px
+import pandas as pd
+from collections import defaultdict
+# Streamlit app title
+st.title("Top 200 Most Frequent Emojis in Bluesky Posts")
+# Step 1: Load the Dataset
+# Load the dataset
+dataset = load_dataset("alpindale/two-million-bluesky-posts")
+# Access the first split
+data = dataset['train']
+# Step 2: Extract Emojis from Text
+def extract_emojis(text):
+    return [e['emoji'] for e in emoji.emoji_list(text)]
+# Apply the function to the 'text' column and ensure the dataset is updated
+data = data.map(lambda x: {"emojis": extract_emojis(x["text"])})
+# Step 3: Convert created_ad to Datatime
+# Convert 'created_at' to datetime
+data = data.map(lambda x: {"created_at": parser.isoparse(x["created_at"])})
+# Step 4: Count Emoji Frequencies
+# Flatten the list of emojis
+all_emojis = [emoji for entry in data for emoji in entry["emojis"]]
+# Count the frequency of each emoji
+emoji_counts = Counter(all_emojis)
+# Step 5: Visualize Emoji Frequencies
+# Get the top 200 most common emojis
+top_emojis = emoji_counts.most_common(200)
+# Extract emojis and their counts
+emojis, counts = zip(*top_emojis)
+# Create a DataFrame for Plotly
+df = pd.DataFrame({'Emojis': emojis, 'Frequency': counts})
+# Display the dataframe in the Streamlit app
+st.write('### Top 200 Emojis Dataframe')
+st.dataframe(df)
+# Plot the bar chart
+fig = px.bar(df, x='Emojis', y='Frequency', title='Top 200 Most Frequent Emojis')
+fig.update_xaxes(title_text='Emojis')
+fig.update_yaxes(title_text='Frequency')
+# Display the plot in the Streamlit app
+st.plotly_chart(fig)