Upload app.py
Browse files
app.py
CHANGED
@@ -85,7 +85,7 @@ def dataframe_preprocessing(df_to_preprocess:pd.DataFrame):
|
|
85 |
Returns: df_to_preprocess (DataFrame)
|
86 |
"""
|
87 |
|
88 |
-
# userlocation will not be in dataframe
|
89 |
# !!! note: we will likely NOT use userlocation, so can remove this bit of code in later versions!!!
|
90 |
if 'userlocation' not in df_to_preprocess.columns:
|
91 |
df_to_preprocess['userlocation'] = 'None'
|
@@ -95,7 +95,7 @@ def dataframe_preprocessing(df_to_preprocess:pd.DataFrame):
|
|
95 |
df_to_preprocess['created_at'] = df_to_preprocess['created_at'].dt.tz_localize(None)
|
96 |
df_to_preprocess['usercreated_at'] = df_to_preprocess['usercreated_at'].dt.tz_localize(None)
|
97 |
# Replacing URLs and emojis; normalizing bullet points, whitespace, etc.
|
98 |
-
for feature in ['text','userdescription','userlocation','
|
99 |
df_to_preprocess[feature] = df_to_preprocess[feature].fillna('None').apply(str)
|
100 |
df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: textacy.preprocessing.replace.urls(text= x, repl= '_URL_'))
|
101 |
df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: emoji.demojize(x))
|
@@ -343,14 +343,14 @@ def on_receipt_of_tweet_query(request:str,client:tw.Client):
|
|
343 |
request = request.split('/')[-1]
|
344 |
if '?' in request:
|
345 |
request = request.split('?')[0]
|
346 |
-
# Collect tweet data
|
347 |
tweet = client.get_tweets(ids=request,
|
348 |
expansions=['author_id'],
|
349 |
media_fields=None,
|
350 |
place_fields=None,
|
351 |
poll_fields=None,
|
352 |
tweet_fields=['author_id','created_at','public_metrics','source'],
|
353 |
-
user_fields=['created_at','description','location','public_metrics','verified'],
|
354 |
user_auth=False)
|
355 |
|
356 |
# STAGE 2. PREPROCESSING OF TWEET DATA
|
|
|
85 |
Returns: df_to_preprocess (DataFrame)
|
86 |
"""
|
87 |
|
88 |
+
# userlocation will not be in dataframe if user has not supplied field. So, for time being, fill with none if it does not exist.
|
89 |
# !!! note: we will likely NOT use userlocation, so can remove this bit of code in later versions!!!
|
90 |
if 'userlocation' not in df_to_preprocess.columns:
|
91 |
df_to_preprocess['userlocation'] = 'None'
|
|
|
95 |
df_to_preprocess['created_at'] = df_to_preprocess['created_at'].dt.tz_localize(None)
|
96 |
df_to_preprocess['usercreated_at'] = df_to_preprocess['usercreated_at'].dt.tz_localize(None)
|
97 |
# Replacing URLs and emojis; normalizing bullet points, whitespace, etc.
|
98 |
+
for feature in ['text','userdescription','userlocation','username']:
|
99 |
df_to_preprocess[feature] = df_to_preprocess[feature].fillna('None').apply(str)
|
100 |
df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: textacy.preprocessing.replace.urls(text= x, repl= '_URL_'))
|
101 |
df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: emoji.demojize(x))
|
|
|
343 |
request = request.split('/')[-1]
|
344 |
if '?' in request:
|
345 |
request = request.split('?')[0]
|
346 |
+
# Collect tweet data.
|
347 |
tweet = client.get_tweets(ids=request,
|
348 |
expansions=['author_id'],
|
349 |
media_fields=None,
|
350 |
place_fields=None,
|
351 |
poll_fields=None,
|
352 |
tweet_fields=['author_id','created_at','public_metrics','source'],
|
353 |
+
user_fields=['created_at','description','location','public_metrics','url','verified'],
|
354 |
user_auth=False)
|
355 |
|
356 |
# STAGE 2. PREPROCESSING OF TWEET DATA
|