aus10powell commited on
Commit
9f54156
1 Parent(s): 572f2ed

Update scripts/sentiment.py

Browse files

issue with pulling tweets for certain accounts due to empty string after cleaning

Files changed (1) hide show
  1. scripts/sentiment.py +8 -22
scripts/sentiment.py CHANGED
@@ -18,30 +18,16 @@ def tweet_cleaner(tweet: str) -> str:
18
  Returns:
19
  str: The cleaned tweet.
20
  """
21
- # Remove @ mentions from the tweet
22
- # tweet = re.sub("@[A-Za-z0-9]+", "", tweet)
23
-
24
- # # Remove URLs from the tweet
25
- # tweet = re.sub(r"(?:\@|http?\://|https?\://|www)\S+", "", tweet)
26
-
27
- # # Remove extra whitespaces from the tweet
28
- # tweet = " ".join(tweet.split())
29
-
30
- # # Remove hashtag sign but keep the text
31
- # tweet = tweet.replace("#", "").replace("_", " ")
32
-
33
- # # Tokenize the tweet and keep only valid words
34
- # tweet = " ".join(
35
- # w
36
- # for w in nltk.wordpunct_tokenize(tweet)
37
- # if w.lower() in words or not w.isalpha()
38
- # )
39
-
40
- # # Return the cleaned tweet
41
- # return tweet
42
  bad_start = ["http:", "https:"]
43
  for w in bad_start:
44
- tweet = re.sub(f" {w}\\S+", "", tweet) # removes white space before url
45
  tweet = re.sub(f"{w}\\S+ ", "", tweet) # in case a tweet starts with a url
46
  tweet = re.sub(f"\n{w}\\S+ ", "", tweet) # in case the url is on a new line
47
  tweet = re.sub(
 
18
  Returns:
19
  str: The cleaned tweet.
20
  """
21
+ if not isinstance(tweet, str):
22
+ try:
23
+ tweet = str(tweet)
24
+ except Exception as e:
25
+ print(f"Error converting tweet to string: {e}")
26
+ return tweet
27
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  bad_start = ["http:", "https:"]
29
  for w in bad_start:
30
+ tweet = re.sub(f" {w}\\S+", "", tweet) # remove white space before url
31
  tweet = re.sub(f"{w}\\S+ ", "", tweet) # in case a tweet starts with a url
32
  tweet = re.sub(f"\n{w}\\S+ ", "", tweet) # in case the url is on a new line
33
  tweet = re.sub(