File size: 2,559 Bytes
679ba35
 
 
69c8a29
679ba35
69c8a29
679ba35
 
 
 
69c8a29
 
 
 
 
 
 
679ba35
 
69c8a29
 
 
 
 
 
679ba35
 
 
 
 
 
69c8a29
 
 
 
 
 
 
 
 
 
679ba35
 
69c8a29
679ba35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69c8a29
 
 
 
679ba35
 
 
 
69c8a29
679ba35
 
69c8a29
 
 
 
679ba35
69c8a29
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from flask import Flask, request, jsonify
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from scipy.special import softmax
import torch

# Initialize Flask app
app = Flask(__name__)

# Check if the VADER lexicon is already downloaded
try:
    nltk.data.find('sentiment/vader_lexicon.zip')
except LookupError:
    nltk.download('vader_lexicon')

# Load NLTK's VADER lexicon once
sia = SentimentIntensityAnalyzer()

# Lazy load transformer model and tokenizer
def get_transformer_pipeline():
    tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
    model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
    nlp = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
    return nlp

def analyze_sentiment(text):
    # VADER sentiment analysis
    vader_result = sia.polarity_scores(text)

    # RoBERTa sentiment analysis
    nlp = get_transformer_pipeline()
    roberta_result = nlp(text)[0]

    sentiment_scores = {
        'vader_neg': vader_result['neg'],
        'vader_neu': vader_result['neu'],
        'vader_pos': vader_result['pos'],
        'roberta_neg': roberta_result['score'] if roberta_result['label'] == 'LABEL_0' else 0,
        'roberta_neu': roberta_result['score'] if roberta_result['label'] == 'LABEL_1' else 0,
        'roberta_pos': roberta_result['score'] if roberta_result['label'] == 'LABEL_2' else 0
    }

    return sentiment_scores

def sentiment_to_stars(sentiment_score):
    thresholds = [0.2, 0.4, 0.6, 0.8]
    if sentiment_score <= thresholds[0]:
        return 1
    elif sentiment_score <= thresholds[1]:
        return 2
    elif sentiment_score <= thresholds[2]:
        return 3
    elif sentiment_score <= thresholds[3]:
        return 4
    else:
        return 5

@app.route('/analyze', methods=['POST'])
def analyze():
    data = request.json
    text = data['text']
    sentiment_scores = analyze_sentiment(text)
    star_rating = sentiment_to_stars(sentiment_scores['roberta_pos'])

    # Convert float32 values to standard float
    sentiment_scores = {k: float(v) for k, v in sentiment_scores.items()}

    response = {
        'sentiment_scores': sentiment_scores,
        'star_rating': star_rating
    }

    return jsonify(response)

# Health check endpoint
@app.route('/')
def health_check():
    return jsonify({"status": "OK"}), 200

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)