smcrone commited on
Commit
cebcb15
1 Parent(s): 56fd811

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -85,7 +85,7 @@ def dataframe_preprocessing(df_to_preprocess:pd.DataFrame):
85
  Returns: df_to_preprocess (DataFrame)
86
  """
87
 
88
- # userlocation will not be in dataframe is user not supplied field. So, for time being, fill with none if it does not exist.
89
  # !!! note: we will likely NOT use userlocation, so can remove this bit of code in later versions!!!
90
  if 'userlocation' not in df_to_preprocess.columns:
91
  df_to_preprocess['userlocation'] = 'None'
@@ -95,7 +95,7 @@ def dataframe_preprocessing(df_to_preprocess:pd.DataFrame):
95
  df_to_preprocess['created_at'] = df_to_preprocess['created_at'].dt.tz_localize(None)
96
  df_to_preprocess['usercreated_at'] = df_to_preprocess['usercreated_at'].dt.tz_localize(None)
97
  # Replacing URLs and emojis; normalizing bullet points, whitespace, etc.
98
- for feature in ['text','userdescription','userlocation','userurl','username']:
99
  df_to_preprocess[feature] = df_to_preprocess[feature].fillna('None').apply(str)
100
  df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: textacy.preprocessing.replace.urls(text= x, repl= '_URL_'))
101
  df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: emoji.demojize(x))
@@ -343,14 +343,14 @@ def on_receipt_of_tweet_query(request:str,client:tw.Client):
343
  request = request.split('/')[-1]
344
  if '?' in request:
345
  request = request.split('?')[0]
346
- # Collect tweet data -- interrupt if invalid input provided.
347
  tweet = client.get_tweets(ids=request,
348
  expansions=['author_id'],
349
  media_fields=None,
350
  place_fields=None,
351
  poll_fields=None,
352
  tweet_fields=['author_id','created_at','public_metrics','source'],
353
- user_fields=['created_at','description','location','public_metrics','verified'],
354
  user_auth=False)
355
 
356
  # STAGE 2. PREPROCESSING OF TWEET DATA
 
85
  Returns: df_to_preprocess (DataFrame)
86
  """
87
 
88
+ # userlocation will not be in dataframe if user has not supplied field. So, for time being, fill with none if it does not exist.
89
  # !!! note: we will likely NOT use userlocation, so can remove this bit of code in later versions!!!
90
  if 'userlocation' not in df_to_preprocess.columns:
91
  df_to_preprocess['userlocation'] = 'None'
 
95
  df_to_preprocess['created_at'] = df_to_preprocess['created_at'].dt.tz_localize(None)
96
  df_to_preprocess['usercreated_at'] = df_to_preprocess['usercreated_at'].dt.tz_localize(None)
97
  # Replacing URLs and emojis; normalizing bullet points, whitespace, etc.
98
+ for feature in ['text','userdescription','userlocation','username']:
99
  df_to_preprocess[feature] = df_to_preprocess[feature].fillna('None').apply(str)
100
  df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: textacy.preprocessing.replace.urls(text= x, repl= '_URL_'))
101
  df_to_preprocess[feature] = df_to_preprocess[feature].apply(lambda x: emoji.demojize(x))
 
343
  request = request.split('/')[-1]
344
  if '?' in request:
345
  request = request.split('?')[0]
346
+ # Collect tweet data.
347
  tweet = client.get_tweets(ids=request,
348
  expansions=['author_id'],
349
  media_fields=None,
350
  place_fields=None,
351
  poll_fields=None,
352
  tweet_fields=['author_id','created_at','public_metrics','source'],
353
+ user_fields=['created_at','description','location','public_metrics','url','verified'],
354
  user_auth=False)
355
 
356
  # STAGE 2. PREPROCESSING OF TWEET DATA