Spaces:

aus10powell
/

TwitterAccounts

Runtime error

App Files Files Community

aus10powell commited on Jun 1, 2023

Commit

2a3e5e3

1 Parent(s): 33d6c4f

Update scripts/sentiment.py

Browse files

Files changed (1) hide show

scripts/sentiment.py +84 -19

scripts/sentiment.py CHANGED Viewed

@@ -3,7 +3,9 @@ import nltk
 from typing import List
 from transformers import pipeline
 from tqdm import tqdm
 def tweet_cleaner(tweet: str) -> str:
     # words = set(nltk.corpus.words.words())
@@ -68,30 +70,93 @@ def fix_text(text):
     return text
-def get_tweets_sentiment(tweets: List[str]) -> List[float]:
     """
-    Takes in a list of tweet texts and returns their sentiment scores as a list of floats between 0 and 1.
-    Parameters:
-    tweets (List[str]): A list of tweet texts.
     Returns:
-    List[float]: A list of sentiment scores for the input tweets, where each score is a float between 0 and 1.
     """
-    # Load the sentiment analysis pipeline
-    classifier = pipeline(
-        "sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"
-    )
-    if type(tweets[0]) == dict:
-        # Clean tweets
-        tweet_texts = [tweet_cleaner(t["content"]) for t in tqdm(tweets)]
-    else:
-        tweet_texts = [tweet_cleaner(t) for t in tqdm(tweets)]
-    # Get tweet sentiment score
-    tweet_sentiments = classifier(tweet_texts)
-    # Extract the sentiment score from each result and return as a list
-    return [t["score"] for t in tweet_sentiments]

 from typing import List
 from transformers import pipeline
 from tqdm import tqdm
+import numpy as np
+import numpy as np
+import scipy
 def tweet_cleaner(tweet: str) -> str:
     # words = set(nltk.corpus.words.words())
     return text
+def twitter_sentiment_api_score(
+    tweet_list: list = None, return_argmax: bool = True, use_api=False
+):
     """
+    Sends a list of tweets to the Hugging Face Twitter Sentiment Analysis API and returns a list of sentiment scores for each tweet.
+    Args:
+        tweet_list (list): A list of strings, where each string represents a tweet.
+        return_argmax (bool): Whether to also return the predicted sentiment label with the highest confidence score for each tweet.
     Returns:
+        A list of dictionaries, where each dictionary contains the sentiment scores for a single tweet. Each sentiment score dictionary
+        contains three key-value pairs: "positive", "neutral", and "negative". The value for each key is a float between 0 and 1 that
+        represents the confidence score for that sentiment label, where higher values indicate higher confidence in that sentiment. If
+        `return_argmax` is True, each dictionary will also contain an additional key "argmax" with the predicted sentiment label for
+        that tweet.
     """
+    if use_api:
+        import requests
+        # URL and authentication header for the Hugging Face Twitter Sentiment Analysis API
+        API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment"
+        headers = {"Authorization": "Bearer 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"}
+        # Function to send a POST request with a JSON payload to the API and return the response as a JSON object
+        def query(payload):
+            response = requests.post(API_URL, headers=headers, json=payload)
+            return response.json()
+        # Send a list of tweets to the API and receive a list of sentiment scores for each tweet
+        output = query(
+            {
+                "inputs": tweet_list,
+            }
+        )
+    else:
+        from transformers import AutoModelForSequenceClassification
+        from transformers import TFAutoModelForSequenceClassification
+        from transformers import AutoTokenizer
+        from scipy.special import softmax
+        import os
+        task = "sentiment"
+        MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
+        tokenizer = AutoTokenizer.from_pretrained(MODEL)
+        model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+        # model.save_pretrained(MODEL)
+        def get_sentimet(text):
+            labels = ["negative", "neutral", "positive"]
+            # text = "Good night 😊"
+            text = tweet_cleaner(text)
+            encoded_input = tokenizer(text, return_tensors="pt")
+            output = model(**encoded_input)
+            scores = output[0][0].detach().numpy()
+            scores = softmax(scores)
+            ranking = np.argsort(scores)[::-1]
+            results = {
+                labels[ranking[i]]: np.round(float(scores[ranking[i]]), 4)
+                for i in range(scores.shape[0])
+            }
+            max_key = max(results, key=results.get)
+            results["argmax"] = max_key
+            return results
+        return [get_sentimet(t) for t in tweet_list]
+    # Loop through the list of sentiment scores and replace the sentiment labels with more intuitive labels
+    result = []
+    for s in output:
+        sentiment_dict = {}
+        for d in s:
+            if isinstance(d, dict):
+                if d["label"] == "LABEL_2":
+                    sentiment_dict["positive"] = d["score"]
+                elif d["label"] == "LABEL_1":
+                    sentiment_dict["neutral"] = d["score"]
+                elif d["label"] == "LABEL_0":
+                    sentiment_dict["negative"] = d["score"]
+        if return_argmax and len(sentiment_dict) > 0:
+            argmax_label = max(sentiment_dict, key=sentiment_dict.get)
+            sentiment_dict["argmax"] = argmax_label
+        result.append(sentiment_dict)
+    # Return a list of dictionaries, where each dictionary contains the sentiment scores for a single tweet
+    return result