shvuuuu commited on
Commit
87010b2
1 Parent(s): 6454eb3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+ import plotly.graph_objects as go
4
+ import streamlit as st
5
+ import tweepy
6
+ from plotly.subplots import make_subplots
7
+ from transformers import pipeline
8
+ consumer_key = "kG4NXwrJllh7Jv5aLA9yjfb1U"
9
+ consumer_secret = "fH27zr7ZcqYdbQMOSPY3v5a6nEgcOXDyFCJPFSb0VNNinZafCz"
10
+ access_key = "1116912581434695680-SA7ddRFq6GUxISNrL1V5IoN2Z9FK3m"
11
+ access_secret = "JDu1Rj4tj8kSilqawlH88LU8Y7nyu9GcbNZygNCpTk9kd"
12
+ auth = tweepy.OAuthHandler(consumer_key,consumer_secret)
13
+ auth.set_access_token(access_key,access_secret)
14
+ api = tweepy.API(auth)
15
+
16
+
17
+ def get_tweets(username, count):
18
+ tweets = tweepy.Cursor(
19
+ api.user_timeline,
20
+ screen_name=username,
21
+ tweet_mode="extended",
22
+ exclude_replies=True,
23
+ include_rts=False,
24
+ ).items(count)
25
+
26
+ tweets = list(tweets)
27
+ response = {
28
+ "tweets": [tweet.full_text.replace("\n", "").lower() for tweet in tweets],
29
+ "timestamps": [str(tweet.created_at) for tweet in tweets],
30
+ "retweets": [tweet.retweet_count for tweet in tweets],
31
+ "likes": [tweet.favorite_count for tweet in tweets],
32
+ }
33
+ return response
34
+
35
+
36
+ def get_sentiment(texts):
37
+ preds = pipe(texts)
38
+
39
+ response = dict()
40
+ response["labels"] = [pred["label"] for pred in preds]
41
+ response["scores"] = [pred["score"] for pred in preds]
42
+ return response
43
+
44
+
45
+ def neutralise_sentiment(preds):
46
+ for i, (label, score) in enumerate(zip(preds["labels"], preds["scores"])):
47
+ if score < 0.5:
48
+ preds["labels"][i] = "neutral"
49
+ preds["scores"][i] = 1.0 - score
50
+
51
+
52
+ def get_aggregation_period(df):
53
+ t_min, t_max = df["timestamps"].min(), df["timestamps"].max()
54
+ t_delta = t_max - t_min
55
+ if t_delta < pd.to_timedelta("30D"):
56
+ return "1D"
57
+ elif t_delta < pd.to_timedelta("365D"):
58
+ return "7D"
59
+ else:
60
+ return "30D"
61
+
62
+
63
+ @st.cache(allow_output_mutation=True)
64
+ def load_model():
65
+ pipe = pipeline(task="sentiment-analysis", model="bhadresh-savani/distilbert-base-uncased-emotion")
66
+ return pipe
67
+
68
+
69
+ """
70
+ # Twitter Emotion Analyser
71
+ """
72
+
73
+
74
+ pipe = load_model()
75
+ twitter_handle = st.sidebar.text_input("Twitter handle:", "huggingface")
76
+ twitter_count = st.sidebar.selectbox("Number of tweets:", (10, 100, 500, 1000, 3200))
77
+
78
+
79
+ if st.sidebar.button("Get tweets!"):
80
+ tweets = get_tweets(twitter_handle, twitter_count)
81
+ preds = get_sentiment(tweets["tweets"])
82
+ # neutralise_sentiment(preds)
83
+ tweets.update(preds)
84
+ # dataframe creation + preprocessing
85
+ df = pd.DataFrame(tweets)
86
+ df["timestamps"] = pd.to_datetime(df["timestamps"])
87
+ # plots
88
+ agg_period = get_aggregation_period(df)
89
+ ts_sentiment = (
90
+ df.groupby(["timestamps", "labels"])
91
+ .count()["likes"]
92
+ .unstack()
93
+ .resample(agg_period)
94
+ .count()
95
+ .stack()
96
+ .reset_index()
97
+ )
98
+ ts_sentiment.columns = ["timestamp", "label", "count"]
99
+
100
+ fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.15)
101
+
102
+ # TODO: check that stacking makes sense!
103
+ for label in ts_sentiment["label"].unique():
104
+ fig.add_trace(
105
+ go.Scatter(
106
+ x=ts_sentiment.query("label == @label")["timestamp"],
107
+ y=ts_sentiment.query("label == @label")["count"],
108
+ mode="lines",
109
+ name=label,
110
+ stackgroup="one",
111
+ hoverinfo="x+y",
112
+ ),
113
+ row=1,
114
+ col=1,
115
+ )
116
+
117
+ likes_per_label = df.groupby("labels")["likes"].mean().reset_index()
118
+
119
+ fig.add_trace(
120
+ go.Bar(
121
+ x=likes_per_label["labels"],
122
+ y=likes_per_label["likes"],
123
+ showlegend=False,
124
+ marker_color=px.colors.qualitative.Plotly,
125
+ opacity=0.6,
126
+ ),
127
+ row=1,
128
+ col=2,
129
+ )
130
+
131
+ fig.update_yaxes(title_text="Number of Tweets", row=1, col=1)
132
+ fig.update_yaxes(title_text="Number of Likes", row=1, col=2)
133
+ fig.update_layout(height=350, width=750)
134
+
135
+ st.plotly_chart(fig)
136
+
137
+ # tweet sample
138
+ st.markdown(df.sample(n=5).to_markdown())