import pandas as pd import plotly.express as px import plotly.graph_objects as go import streamlit as st import tweepy from plotly.subplots import make_subplots from transformers import pipeline auth = tweepy.OAuthHandler(st.secrets["consumer_key"], st.secrets["consumer_secret"]) auth.set_access_token(st.secrets["access_key"], st.secrets["access_secret"]) api = tweepy.API(auth) def get_tweets(username, count): tweets = tweepy.Cursor( api.user_timeline, screen_name=username, tweet_mode="extended", exclude_replies=True, include_rts=False, ).items(count) tweets = list(tweets) response = { "tweets": [tweet.full_text.replace("\n", "").lower() for tweet in tweets], "timestamps": [str(tweet.created_at) for tweet in tweets], "retweets": [tweet.retweet_count for tweet in tweets], "likes": [tweet.favorite_count for tweet in tweets], } return response def get_sentiment(texts): preds = pipe(texts) response = dict() response["labels"] = [pred["label"] for pred in preds] response["scores"] = [pred["score"] for pred in preds] return response def neutralise_sentiment(preds): for i, (label, score) in enumerate(zip(preds["labels"], preds["scores"])): if score < 0.5: preds["labels"][i] = "neutral" preds["scores"][i] = 1.0 - score def get_aggregation_period(df): t_min, t_max = df["timestamps"].min(), df["timestamps"].max() t_delta = t_max - t_min if t_delta < pd.to_timedelta("30D"): return "1D" elif t_delta < pd.to_timedelta("365D"): return "7D" else: return "30D" @st.cache(allow_output_mutation=True) def load_model(): pipe = pipeline(task="sentiment-analysis", model="bhadresh-savani/distilbert-base-uncased-emotion") return pipe """ # Twitter Emotion Analyser """ pipe = load_model() twitter_handle = st.sidebar.text_input("Twitter handle:", "huggingface") twitter_count = st.sidebar.selectbox("Number of tweets:", (10, 100, 500, 1000, 3200)) if st.sidebar.button("Get tweets!"): tweets = get_tweets(twitter_handle, twitter_count) preds = get_sentiment(tweets["tweets"]) # neutralise_sentiment(preds) tweets.update(preds) # dataframe creation + preprocessing df = pd.DataFrame(tweets) df["timestamps"] = pd.to_datetime(df["timestamps"]) # plots agg_period = get_aggregation_period(df) ts_sentiment = ( df.groupby(["timestamps", "labels"]) .count()["likes"] .unstack() .resample(agg_period) .count() .stack() .reset_index() ) ts_sentiment.columns = ["timestamp", "label", "count"] fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.15) # TODO: check that stacking makes sense! for label in ts_sentiment["label"].unique(): fig.add_trace( go.Scatter( x=ts_sentiment.query("label == @label")["timestamp"], y=ts_sentiment.query("label == @label")["count"], mode="lines", name=label, stackgroup="one", hoverinfo="x+y", ), row=1, col=1, ) likes_per_label = df.groupby("labels")["likes"].mean().reset_index() fig.add_trace( go.Bar( x=likes_per_label["labels"], y=likes_per_label["likes"], showlegend=False, marker_color=px.colors.qualitative.Plotly, opacity=0.6, ), row=1, col=2, ) fig.update_yaxes(title_text="Number of Tweets", row=1, col=1) fig.update_yaxes(title_text="Number of Likes", row=1, col=2) fig.update_layout(height=350, width=750) st.plotly_chart(fig) # tweet sample st.markdown(df.sample(n=5).to_markdown())