lewtun HF staff commited on
Commit
3877927
1 Parent(s): 51e981a
Files changed (3) hide show
  1. .gitignore +132 -0
  2. app.py +135 -0
  3. requirements.txt +6 -0
.gitignore ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ # Streamlit
132
+ .streamlit
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+ import plotly.graph_objects as go
4
+ import streamlit as st
5
+ import tweepy
6
+ from plotly.subplots import make_subplots
7
+ from transformers import pipeline
8
+
9
+ auth = tweepy.OAuthHandler(st.secrets["consumer_key"], st.secrets["consumer_secret"])
10
+ auth.set_access_token(st.secrets["access_key"], st.secrets["access_secret"])
11
+ api = tweepy.API(auth)
12
+
13
+
14
+ def get_tweets(username, count):
15
+ tweets = tweepy.Cursor(
16
+ api.user_timeline,
17
+ screen_name=username,
18
+ tweet_mode="extended",
19
+ exclude_replies=True,
20
+ include_rts=False,
21
+ ).items(count)
22
+
23
+ tweets = list(tweets)
24
+ response = {
25
+ "tweets": [tweet.full_text.replace("\n", "").lower() for tweet in tweets],
26
+ "timestamps": [str(tweet.created_at) for tweet in tweets],
27
+ "retweets": [tweet.retweet_count for tweet in tweets],
28
+ "likes": [tweet.favorite_count for tweet in tweets],
29
+ }
30
+ return response
31
+
32
+
33
+ def get_sentiment(texts):
34
+ preds = pipe(texts)
35
+
36
+ response = dict()
37
+ response["labels"] = [pred["label"] for pred in preds]
38
+ response["scores"] = [pred["score"] for pred in preds]
39
+ return response
40
+
41
+
42
+ def neutralise_sentiment(preds):
43
+ for i, (label, score) in enumerate(zip(preds["labels"], preds["scores"])):
44
+ if score < 0.5:
45
+ preds["labels"][i] = "neutral"
46
+ preds["scores"][i] = 1.0 - score
47
+
48
+
49
+ def get_aggregation_period(df):
50
+ t_min, t_max = df["timestamps"].min(), df["timestamps"].max()
51
+ t_delta = t_max - t_min
52
+ if t_delta < pd.to_timedelta("30D"):
53
+ return "1D"
54
+ elif t_delta < pd.to_timedelta("365D"):
55
+ return "7D"
56
+ else:
57
+ return "30D"
58
+
59
+
60
+ @st.cache(allow_output_mutation=True)
61
+ def load_model():
62
+ pipe = pipeline(task="sentiment-analysis", model="bhadresh-savani/distilbert-base-uncased-emotion")
63
+ return pipe
64
+
65
+
66
+ """
67
+ # Twitter Emotion Analyser
68
+ """
69
+
70
+
71
+ pipe = load_model()
72
+ twitter_handle = st.sidebar.text_input("Twitter handle:", "huggingface")
73
+ twitter_count = st.sidebar.selectbox("Number of tweets:", (10, 100, 500, 1000, 3200))
74
+
75
+
76
+ if st.sidebar.button("Get tweets!"):
77
+ tweets = get_tweets(twitter_handle, twitter_count)
78
+ preds = get_sentiment(tweets["tweets"])
79
+ # neutralise_sentiment(preds)
80
+ tweets.update(preds)
81
+ # dataframe creation + preprocessing
82
+ df = pd.DataFrame(tweets)
83
+ df["timestamps"] = pd.to_datetime(df["timestamps"])
84
+ # plots
85
+ agg_period = get_aggregation_period(df)
86
+ ts_sentiment = (
87
+ df.groupby(["timestamps", "labels"])
88
+ .count()["likes"]
89
+ .unstack()
90
+ .resample(agg_period)
91
+ .count()
92
+ .stack()
93
+ .reset_index()
94
+ )
95
+ ts_sentiment.columns = ["timestamp", "label", "count"]
96
+
97
+ fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.15)
98
+
99
+ # TODO: check that stacking makes sense!
100
+ for label in ts_sentiment["label"].unique():
101
+ fig.add_trace(
102
+ go.Scatter(
103
+ x=ts_sentiment.query("label == @label")["timestamp"],
104
+ y=ts_sentiment.query("label == @label")["count"],
105
+ mode="lines",
106
+ name=label,
107
+ stackgroup="one",
108
+ hoverinfo="x+y",
109
+ ),
110
+ row=1,
111
+ col=1,
112
+ )
113
+
114
+ likes_per_label = df.groupby("labels")["likes"].mean().reset_index()
115
+
116
+ fig.add_trace(
117
+ go.Bar(
118
+ x=likes_per_label["labels"],
119
+ y=likes_per_label["likes"],
120
+ showlegend=False,
121
+ marker_color=px.colors.qualitative.Plotly,
122
+ opacity=0.6,
123
+ ),
124
+ row=1,
125
+ col=2,
126
+ )
127
+
128
+ fig.update_yaxes(title_text="Number of Tweets", row=1, col=1)
129
+ fig.update_yaxes(title_text="Number of Likes", row=1, col=2)
130
+ fig.update_layout(height=350, width=750)
131
+
132
+ st.plotly_chart(fig)
133
+
134
+ # tweet sample
135
+ st.markdown(df.sample(n=5).to_markdown())
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ transformers
4
+ tweepy
5
+ torch
6
+ tabulate