TroglodyteDerivations commited on
Commit
8edd6e7
1 Parent(s): 8c1b5e5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # bsky2M_emojis_streamlit.py
2
+
3
+ # Packages required
4
+ import streamlit as st
5
+ from datasets import load_dataset
6
+ import emoji
7
+ from dateutil import parser
8
+ from collections import Counter
9
+ import plotly.express as px
10
+ import pandas as pd
11
+ from collections import defaultdict
12
+
13
+
14
+ # Streamlit app title
15
+ st.title("Top 200 Most Frequent Emojis in Bluesky Posts")
16
+
17
+ # Step 1: Load the Dataset
18
+ # Load the dataset
19
+ dataset = load_dataset("alpindale/two-million-bluesky-posts")
20
+
21
+ # Access the first split
22
+ data = dataset['train']
23
+
24
+ # Step 2: Extract Emojis from Text
25
+ def extract_emojis(text):
26
+ return [e['emoji'] for e in emoji.emoji_list(text)]
27
+
28
+ # Apply the function to the 'text' column and ensure the dataset is updated
29
+ data = data.map(lambda x: {"emojis": extract_emojis(x["text"])})
30
+
31
+ # Step 3: Convert created_ad to Datatime
32
+ # Convert 'created_at' to datetime
33
+ data = data.map(lambda x: {"created_at": parser.isoparse(x["created_at"])})
34
+
35
+ # Step 4: Count Emoji Frequencies
36
+ # Flatten the list of emojis
37
+ all_emojis = [emoji for entry in data for emoji in entry["emojis"]]
38
+
39
+ # Count the frequency of each emoji
40
+ emoji_counts = Counter(all_emojis)
41
+
42
+ # Step 5: Visualize Emoji Frequencies
43
+ # Get the top 200 most common emojis
44
+ top_emojis = emoji_counts.most_common(200)
45
+
46
+ # Extract emojis and their counts
47
+ emojis, counts = zip(*top_emojis)
48
+
49
+ # Create a DataFrame for Plotly
50
+ df = pd.DataFrame({'Emojis': emojis, 'Frequency': counts})
51
+
52
+ # Display the dataframe in the Streamlit app
53
+ st.write('### Top 200 Emojis Dataframe')
54
+ st.dataframe(df)
55
+
56
+ # Plot the bar chart
57
+ fig = px.bar(df, x='Emojis', y='Frequency', title='Top 200 Most Frequent Emojis')
58
+ fig.update_xaxes(title_text='Emojis')
59
+ fig.update_yaxes(title_text='Frequency')
60
+
61
+ # Display the plot in the Streamlit app
62
+ st.plotly_chart(fig)