File size: 26,428 Bytes
1639c67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
# Importing necessary libraries.
import streamlit as st
st.set_page_config(page_title="Monkeypox misinformation detector",
                   page_icon=":lion:",
                   layout="wide",
                   initial_sidebar_state="auto",
                   menu_items=None)
import tweepy as tw
import textacy
from textacy import preprocessing
import emoji
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf
import datetime as dt
import time
import copy
import altair as alt


@st.experimental_singleton(show_spinner=False)
def load_model():
    """
    This function loads the fine-tuned HuggingFace model and caches
    it (using the experimental_singleton decorator) to improve
    computation times.

    Parameters: none.
    Returns: HuggingFace transformer model.
    """

    model = TFAutoModelForSequenceClassification.from_pretrained("smcrone/monkeypox-misinformation")
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=5e-6),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=tf.keras.metrics.SparseCategoricalAccuracy())
    return model


@st.experimental_singleton(show_spinner=False)
def load_tokenizer():
    """
    This function loads a tokenizer for the transformer model and caches
    it (using the experimental_singleton decorator) to improve
    computation times.

    Parameters: none.
    Returns: tokenizer.
    """

    tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased",use_fast=False)
    return tokenizer


@st.experimental_singleton(show_spinner=False)
def load_client():
    """
    This function authenticates the Tweepy client and caches
    the object (using the experimental_singleton decorator) to 
    improve computation times.

    Parameters: none.
    Returns: Tweepy client.
    """

    bearer_token = st.secrets["bearer_token"]
    client = tw.Client(bearer_token,wait_on_rate_limit=True)
    return client


def dataframe_preprocessing(df_to_preprocess:pd.DataFrame):
    """
    The program overall collects tweet data at two junctures: firstly
    on provision of the initial tweet, and secondly if the classification
    of the initial tweet prompts a review of the user's other recent tweets.
    At both of these junctures certain preprocessing steps -- designed to
    increase the intelligibility of text inputs to the model -- are identical,
    so this function is designed to avoid the unnecessary repetition of this
    code. The function takes a Pandas DataFrame for preprocessing and returns
    the DataFrame, having executed certain preprocessing steps (e.g. removal
    of emojis, normalization of whitespace, removal of columns, etc.)

    Parameters: df_to_preprocess (DataFrame)
    Returns: df_to_preprocess (DataFrame)
    """

    # userlocation will not be in dataframe is user not supplied field. So, for time being, fill with none if it does not exist.
    # !!! note: we will likely NOT use userlocation, so can remove this bit of code in later versions!!!
    if 'userlocation' not in df_to_preprocess.columns:
        df_to_preprocess['userlocation'] = 'None'
    # Dropping redundant columns.
    df_to_preprocess = df_to_preprocess.drop(labels=['public_metrics', 'userpublic_metrics'], axis=1)
    # Stripping timezone info for export to Excel.
    df_to_preprocess['created_at'] = df_to_preprocess['created_at'].dt.tz_localize(None)
    df_to_preprocess['usercreated_at'] = df_to_preprocess['usercreated_at'].dt.tz_localize(None)
    # Replacing URLs and emojis; normalizing bullet points, whitespace, etc.
    for feature in ['text','userdescription','userlocation','userurl','username']:
        df_to_preprocess[feature] = df_to_preprocess[feature].fillna('None').apply(str)
        df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: textacy.preprocessing.replace.urls(text= x, repl= '_URL_'))
        df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: emoji.demojize(x))
        df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: textacy.preprocessing.normalize.bullet_points(text=x))
        df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: textacy.preprocessing.normalize.quotation_marks(text=x))
        df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: textacy.preprocessing.normalize.whitespace(text=x))
        df_to_preprocess[feature] = df_to_preprocess[feature].replace('\n', ' ', regex=True).replace('\r', '', regex=True)
    # Renaming columns (for greater model intelligibility).
    df_to_preprocess.rename(columns={"userverified": "user is verified",
                        "userurl": "user has url",
                        "userdescription": "user description",
                        "usercreated_at": "user created at",
                        "followers_count": "followers count",
                        "following_count": "following count",
                        "tweet_count": "tweet count",
                        "userlocation": "user location"},
                        inplace=True)
    # Making URL column binary.
    df_to_preprocess['user has url'].replace({'_URL_': 'True', "": 'False'}, inplace=True)
    # Adding some extra features.
    df_to_preprocess['years since account created'] = df_to_preprocess['created_at'].dt.year.astype('Int64') - df_to_preprocess['user created at'].dt.year.astype('Int64')
    df_to_preprocess['tweets per day'] = df_to_preprocess['tweet count']/((df_to_preprocess['created_at'] - df_to_preprocess['user created at']).dt.days)
    df_to_preprocess['follower to following ratio'] = df_to_preprocess['followers count']/(df_to_preprocess['following count']+1)
    # Returning processed DataFrame.
    return df_to_preprocess


def feature_concatenation(dataframe_to_concatenate:pd.DataFrame,features:list):
    """
    Our transformer model was fine-tuned on text input that combines
    a number of fields in a single string. This function performs
    the concatenation of these features, which in addition to dataframe
    preprocessing, is a necessary preprocessing step. The final dataframe
    consists of just two columns: one containing the concatenated text and
    the other containing the number of retweets that the tweet received
    (for use later on).

    Parameters:

    1. dataframe_to_concatenate (DataFrame): the df from which to take the features.
    2. features (list of str): the features to concatenate.

    Returns:

    1. finalDataFrame (DataFrame): the dataframe to be passed to the model.
    """

    # Make copy of dataframe consisting only of specified features.
    concatenated_dataframe = dataframe_to_concatenate[features].copy()
    # Concatenate chosen features.
    for i in features:
        concatenated_dataframe[i] = concatenated_dataframe[i].name + ": " + concatenated_dataframe[i].astype(str)
        concatenated_dataframe['combined'] = concatenated_dataframe[features].apply(lambda row: ' [SEP] '.join(row.values.astype(str)), axis=1)
    final_concatenated_dataframe = pd.DataFrame({"combined":concatenated_dataframe['combined'],"retweets":dataframe_to_concatenate['retweet_count']})
    # Return the final DataFrame.
    return final_concatenated_dataframe


def classify_tweets(dataframe_to_classify:pd.DataFrame):
    """
    This function takes a DataFrame of tweets which, having gone through
    the necessary preprocessing steps, is ready to classify. The function
    is called both for the initial classification of a single tweet and,
    where necessary, the superspreader analysis of the user's recent tweets.
    The function iterates through the DataFrame provided, tokenizing and
    classifying each tweet, and assigning it to one of two lists within a
    dictionary: 'goodPosts' (i.e. non-misleading posts) and 'badPosts (i.e.
    misleading posts). The function then returns the dictionary, which for
    each post includes the tweet itself, the predicted class, the confidence
    of the prediction, and the number of retweets received by the post.

    Parameters: dataframe_to_classify (DataFrame) -- the preprocessed
    DataFrame of tweet(s).

    Returns: tweet_dict (dict): a dictionary of classification results.
    """

    # Storing classification results in a dictionary with two keys.
    tweet_dict ={}
    tweet_dict['goodPosts'] = []
    tweet_dict['badPosts'] = []
    # Iterate through each tweet string in the DataFrame provided.
    for i in range(len(dataframe_to_classify['combined'])):
        # First, tokenize the tweet.
        tokenized_tweet = tokenizer(dataframe_to_classify['combined'].iloc[i],padding="max_length",truncation=True)
        # Next, convert tweet to a format that TensorFlow will accept.
        predict_dict = {}
        for x,y in tokenized_tweet.items():
            a = tf.convert_to_tensor(y, dtype=None, dtype_hint=None, name=None)
            b = tf.reshape(a,[1,512])
            predict_dict[x] = b
        # Call model to predict tweet.
        prediction = model(predict_dict,training=False)
        # Take pred. class and confidence in pred. class
        pred_class = np.argmax(np.array(tf.nn.softmax(prediction.logits)))
        pred_conf = np.max(np.array(tf.nn.softmax(prediction.logits)))
        # Construct a list of variables that we wish to store.
        seq_to_append = [dataframe_to_classify['combined'].iloc[i],pred_class,pred_conf,dataframe_to_classify['retweets'].iloc[i]]
        # Add list under appropriate dictionary key.
        if pred_class == 1:
            tweet_dict['badPosts'].append(seq_to_append)
        elif pred_class == 0:
            tweet_dict['goodPosts'].append(seq_to_append)
        else:
            print("Something went wrong.")
            return
    # Return the dictionary of results.
    return tweet_dict


def get_user_tweets(user_id:str, days_to_go_back:int, client:tw.Client):
    """
    If the initial tweet provided to the web app is classified as
    misleading, then relevant tweets from the user must be gathered
    in order to perform the superspreader calculation. This function
    supports this process by collecting relevant user tweets, undertaking
    the necessary preprocessing steps (with support from other functions),
    and classifying the tweets using the classify_tweets function. It
    then returns the dictionary of results produced by classify_tweets.

    Parameters:

    1. user_id (int|str): the user_id to be fed to Tweepy.
    2. days_to_go_back (int): how many days' tweets to investigate.
    3. client: the Tweepy client instantiated by load_client.

    Returns:

    1. user_tweets_classified (dict): model outputs for user tweets. 
    """

    # STAGE 1. FETCH USER TWEETS

    # Converting days_to_go_back into variables that can be fed to Tweepy.
    d = dt.datetime.today() - dt.timedelta(days=days_to_go_back)
    year = str(d.year)
    month = str(d.month)
    if len(month) == 1:
        month = '0'+month
    day = str(d.day)
    if len(day) == 1:
        day = '0'+day
    hour = str(d.hour)
    if len(hour) == 1:
        hour = '0'+hour
    # Gathering tweets from user.
    try:
        tweets_we_want_to_check = tw.Paginator(client.get_users_tweets,
                                               id = user_id,
                                               end_time=None,
                                               exclude=None,
                                               expansions=['author_id'],
                                               max_results=100,
                                               media_fields=None,
                                               pagination_token=None,
                                               place_fields=None,
                                               poll_fields=None,
                                               since_id=None,
                                               start_time='{}-{}-{}T{}:00:00Z'.format(year,month,day,hour),
                                               tweet_fields=['author_id','created_at','public_metrics','source'],
                                               until_id=None,
                                               user_fields=['created_at','description','location','public_metrics','url','verified'],
                                               user_auth=False,
                                               limit=500)
    except:
        return "Something went wrong whilst performing superspreader analysis."
    
    # STAGE 2. PREPROCESSING TWEET DATA

    # Parsing response data into an intermediate form.
    tweet_data_for_user = []
    user_data_for_user = []
    for page in tweets_we_want_to_check:
        # Converting each set of tweet fields into a dict and appending to list.
        for tweet in page.data:
            result = dict(tweet)
            tweet_data_for_user.append(result)
        # Converting each set of user fields into a dict and appending to list.
        for user in page.includes['users']:
            result = dict(user)
            user_data_for_user.append(result)
    # Adding user fields to tweet fields.
    for tweet in tweet_data_for_user:
        for user in user_data_for_user:
            for key, val in user.items():
                newKey = "user"+key
                tweet[newKey] = val
            break
    # Unpack and append any values that are dictionaries.
    for tweet in tweet_data_for_user:
        additional_values = {}
        for key, val in tweet.items():
            if type(val) == dict:
                for subkey, subval in val.items():
                    additional_values[subkey] = subval
        tweet.update(additional_values)
    # Create a Pandas DataFrame to store the data.
    user_df = pd.DataFrame(tweet_data_for_user)
    # Perform additional preprocessing using dedicated function.
    user_df = dataframe_preprocessing(user_df)
    # Drop non-monkeypox related rows.
    user_df['monkeypox'] = user_df['text'].str.contains('monkeypox|monkey pox|money pox', case=False, regex=True)
    user_df.drop(user_df[user_df.monkeypox == False].index, inplace=True)
    # Concatenating chosen features.
    concatenated_df = feature_concatenation(user_df,['text'])
    
    # STAGE 3. CALLING CLASSIFIER AND RETURNING RESULTS

    # Calling classifier.
    classified_tweets = classify_tweets(concatenated_df)
    # Returning dictionary of classified tweets.
    return classified_tweets


def on_receipt_of_tweet_query(request:str,client:tw.Client):
    """
    This function defines what the app should do on receipt of a tweet
    URL / ID from the end-user. It performs the following steps:
    (i) formats the string submitted by the userinto a parsable form;
    (ii) fetches data for the tweet using Tweepy; (iii) performs some
    basic preprocessing on the data; (iv) calls dedicated preprocessing
    functions to finish preprocessing the data; (v) calls the classifier
    on the tweet; (vi) determines whether superspreader analysis is
    needed (i.e. if tweet is classed as misleading); (vii) if so,
    calls get_user_tweet function and calculates a superspreader score;
    (viii) returns a tuple of data for the application to display.

    Parameters:

    1. request (str): the URL or ID provided by the end-user.
    2. client: the Tweepy client instantiated by load_client.

    Returns:

    1. classified_tweet (dict): the metrics returned for the tweet by classify_tweets.
    2. spreader_score (float): where applicable, a metric representing the
    3. extent to which the user can be regarded as a superspreader of misinformation.
    4. tweet_text (str): the text of the tweet queried by the end-user.
    5. followers_count (int): the number of followers that the user has.
    6. classified_user_tweets (dict): where applicable, the metrics returned by
    7. get_user_tweets. 
    """

    # STAGE 1. FETCH DATA FOR REQUESTED TWEET

    # If URL is provided by the end-user, strip out the tweet ID.
    if '/' in request:
        request = request.split('/')[-1]
    # Collect tweet data -- interrupt if invalid input provided.
    tweet = client.get_tweets(ids=request,
                              expansions=['author_id'],
                              media_fields=None, 
                              place_fields=None,
                              poll_fields=None,
                              tweet_fields=['author_id','created_at','public_metrics','source'],
                              user_fields=['created_at','description','location','public_metrics','url','verified'],
                              user_auth=False)
    
    # STAGE 2. PREPROCESSING OF TWEET DATA

    # Create dictionaries out of the tweet and user data.
    for i in tweet.data:
        tweet_fields = dict(i)
    for i in tweet.includes['users']:
        user_fields = dict(i)
    # Add the data from the user dict to the tweet dict.
    for key, val in user_fields.items():
        newKey = "user"+key
        tweet_fields[newKey] = val
    # Unpack any values which are themselves dictionaries.
    additional_values = {}
    for key, val in tweet_fields.items():
        if type(val) == dict:
            for subkey, subval in val.items():
                additional_values[subkey] = subval
    tweet_fields.update(additional_values)
    # Convert everything to a DataFrame.
    tweet_df = pd.DataFrame(tweet_fields,index=[0])
    # Store the raw tweet text itself for later use.
    tweet_text = tweet_df['text'][0]
    # Store the followers count for later use.
    followers_count = tweet_df['followers_count'][0]
    # Preprocess the data using dedicated functions.
    tweet_df = dataframe_preprocessing(tweet_df)
    concatenated_tweet_df = feature_concatenation(tweet_df,['text'])

    # STAGE 3. CALLING CLASSIFIER AND DETERMINING NEXT STEPS

    # Call the classifier on the tweet.
    classified_tweet = classify_tweets(concatenated_tweet_df)
    # If the tweet is misleading, call get_user_tweets and calculate
    # the user's superspreader score.
    if len(classified_tweet['badPosts']) == 1:
        # Fetch a dictionary of classified user tweets
        classified_user_tweets = get_user_tweets(tweet_df['userid'][0],14,client=client)
        # Calculate the total number of retweets for all misleading posts.
        retweets_total = 0
        for tweet in classified_user_tweets['badPosts']:
            retweets_total += tweet[-1]
        # Assign the p (post) value.
        p = (0.21 * len(classified_user_tweets['badPosts'])) ** 1.13
        # Assign the f (follower) value
        f = (0.25 * (np.log10(followers_count+1))) ** 4.73
        # Assign the r (retweet) value
        r = (1.04 * (np.log10(retweets_total+1))) ** 0.96
        # Calculate spreader_score and return a tuple of info.
        spreader_score = max(((1 - (1/(max(1,p+f+r))))*100),1)
        return classified_tweet, tweet_text, followers_count, classified_user_tweets, retweets_total, spreader_score,
    # Otherwise, if tweet is not misleading, return the same info
    # (excluding any superspreader related variables).
    elif len(classified_tweet['goodPosts']) == 1:
        return classified_tweet, tweet_text, followers_count, 0, 0, 0
    # Contingency in case an error should unexpectedly occur.
    else:
        raise Exception("Something went wrong whilst processing tweet data.")


def webpage():
    """
    This function structures the main page of the web app using the
    conventions of Streamlit. It begins by loading the model, the tokenizer
    and the Tweepy client using the functions dedicated to those tasks.
    Each of these elements is then cached. The remaining content that the
    function generates then depends mostly on the inputs provided by the
    end-user.

    Parameters: none.
    Returns: nothing.
    """

    # Create a container for displaying loading messages which will clear
    # once the tokenizer, Tweepy client and transformer model have loaded.
    loading_container = st.empty()
    with loading_container.container():
        global model
        model = load_model()
        global client
        client = load_client()
        global tokenizer
        tokenizer = load_tokenizer()
    loading_container.empty()

    # Write header content (e.g. banner image, title, description).
    st.image("monkeypox-small.jpg")
    st.title("Monkeypox misinformation detector")
    st.write("Use this tool to detect whether a tweet contains\
            monkeypox misinformation and assess the extent to which its\
            poster can be considered a misinformation superspreader.")

    st.sidebar.subheader("About")
    st.sidebar.write("This app has been developed using a\
                     [COVID-Twitter-BERT](https://huggingface.co/digitalepidemiologylab/covid-twitter-bert-v2)\
                     model fine-tuned on a monkeypox misinformation\
                     dataset. Users can learn more about the\
                     [model](https://www.bbc.co.uk/sport) on the\
                     HuggingFace model repository and can explore on\
                     Kaggle the [dataset](https://www.kaggle.com/datasets/stephencrone/monkeypox)\
                     on which the model was trained. Further\
                     [documentation](https://www.kaggle.com/datasets/stephencrone/monkeypox),\
                     as well as the source code for the app, can be\
                     found in the project's GitHub repository.")

    st.sidebar.subheader("Contact")
    st.sidebar.write("If you have any questions, comments or feedback\
                    regarding this app that are not answered by the\
                    supporting documentation for the underpinning\
                    dataset or transformer model, please feel free\
                    to contact the author at sgscrone@liverpool.ac.uk.")

    # Provide a text box for user to enter tweet ID / URL.
    tweet_to_check = st.text_input("Please provide a tweet URL or ID", key="name")
    # If the string provided by the user is empty, do nothing.
    if tweet_to_check != "":
        # Otherwise, if string is not empty, try fetching tweet using function.
        try:
            classified_tweet, tweet_text, followers_count, classified_user_tweets, retweets_total, spreader_score = on_receipt_of_tweet_query(tweet_to_check,client)
            st.markdown("""<hr style="height:1px;border:none;background-color:#a6a6a6; margin-top:16px; margin-bottom:20px;" /> """, unsafe_allow_html=True)
            col1, col2 = st.columns(2)
            # In left column, present tweet text.
            col1.subheader("Tweet")
            tweet_text = textacy.preprocessing.normalize.whitespace(tweet_text)
            col1.markdown('<p style="background-color: #F0F2F6; padding: 8px 8px 8px 8px;">{}{}</p>'.format(tweet_text,type(tweet_text)),unsafe_allow_html=True)
            # In right column, present tweet classification.
            col2.subheader("Rating for this tweet")
            if len(classified_tweet['goodPosts']) != 0:
                # Format blue for not misinformation.
                col2.markdown('<p style="color:White; background-color: #1661AD; text-align: center; font-size: 20px;">Not misinformation</p>',unsafe_allow_html=True)
                col2.markdown('<p style="font-size: 40px; text-align: center;">{}</p>'.format(format(classified_tweet['goodPosts'][0][2],'.0%')), unsafe_allow_html=True)
                col2.markdown('<p style="text-align: center;">confidence level</p>', unsafe_allow_html=True)
            else:
                # Format red for misinformation.
                col2.markdown('<p style="color:White; background-color: #701B20; text-align: center; font-size: 20px;">Misinformation</p>',unsafe_allow_html=True)
                col2.markdown('<p style="font-size: 40px; text-align: center;">{}</p>'.format(format(classified_tweet['badPosts'][0][2],'.0%')), unsafe_allow_html=True)
                col2.markdown('<p style="text-align: center;">confidence level</p>', unsafe_allow_html=True)
                # Add additional container to display superspreader analysis.
                superspreader_container = st.container()
                superspreader_container.subheader("Superspreader rating for this user")
                # Plot the superspreader score as a bar chart.
                score_to_plot = pd.DataFrame({"classified_tweet":["score"],"spreader_score":[spreader_score]})
                bar = alt.Chart(score_to_plot).mark_bar().encode(alt.X('spreader_score:Q',scale=alt.Scale(domain=(0, 100)), axis=None), alt.Y('classified_tweet',axis=None)).properties(height=60)
                if spreader_score > 10:
                    label = bar.mark_text(align='right',baseline='middle', dx=-10, color='white', fontSize=20).encode(text=alt.Text("spreader_score:Q", format=",.0f"))
                else:
                    label = bar.mark_text(align='right',baseline='middle', dx=25, color='black', fontSize=20).encode(text=alt.Text("spreader_score:Q", format=",.0f"))
                x = bar+label
                x = x.configure_mark(color='#701B20')
                superspreader_container.altair_chart(x, use_container_width=True)
                # Display stats on which calculation was based.
                superspreader_container.write("Based on the user's **{:,} followers** and the following **{} tweet(s)** published over the last two weeks, which together received **{:,} retweet(s)**.".format(followers_count,len(classified_user_tweets['badPosts']),retweets_total))
                # And print offending tweets from user's recent history.
                for i in range(len(classified_user_tweets['badPosts'])):
                    recent_tweet = classified_user_tweets['badPosts'][i][0]
                    recent_tweet = recent_tweet.split('text:')[-1]
                    superspreader_container.markdown('<p style="background-color: #F0F2F6; padding: 8px 8px 8px 8px;">{}</p>'.format(recent_tweet),unsafe_allow_html=True)
        except:
            st.error("Could not retrieve information for tweet. Please ensure you are supplying a valid tweet ID or URL.")
    st.markdown("""<hr style="height:1px;border:none;background-color:#a6a6a6; margin-top:16px; margin-bottom:20px;" /> """, unsafe_allow_html=True)

if __name__ == "__main__":
    webpage()