File size: 3,490 Bytes
ebf4f9e
 
 
 
 
 
6e5cb42
 
7f708bd
ebf4f9e
 
 
 
 
6e5cb42
ebf4f9e
 
 
 
 
 
 
 
 
 
7f708bd
 
 
 
 
 
 
ebf4f9e
6e5cb42
7f708bd
6e5cb42
 
 
 
 
7f708bd
 
 
 
6e5cb42
7f708bd
6e5cb42
 
 
 
 
 
 
7f708bd
6e5cb42
7f708bd
6e5cb42
7f708bd
6e5cb42
 
 
 
 
 
ebf4f9e
 
6e5cb42
ebf4f9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f708bd
 
6e5cb42
ebf4f9e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
from dotenv import load_dotenv
from transformers import pipeline
import feedparser
import json
from dateutil import parser
import re
load_dotenv()
# Load Setiment Classifier
sentiment_analysis = pipeline(
    "sentiment-analysis", model="siebert/sentiment-roberta-large-english")
app = Flask(__name__, static_url_path='/static')

CORS(app)


@app.route('/')
def index():
    return app.send_static_file('index.html')


@app.route('/news')
def get_news():
    feed_url = request.args.get('feed_url')
    # check if string is a valid

    # file name for cache
    file_name = "".join(re.split(r"https://|\.|/", feed_url))

    feed_entries = get_feed(feed_url)
    # filter only titles for sentiment analysis
    try:
        with open(f'{file_name}_cache.json') as file:
            cache = json.load(file)
    except:
        cache = {}

    # if new homepage is newer than cache, update cache and return
    print("new date", feed_entries['last_update'])
    print("old date", cache['last_update']
          if 'last_update' in cache else "None")
    if not cache or parser.parse(feed_entries['last_update']) > parser.parse(cache['last_update']):
        print("Updating cache with new preditions")
        titles = [entry['title'] for entry in feed_entries['entries']]
        # run sentiment analysis on titles
        predictions = [sentiment_analysis(sentence) for sentence in titles]
        # parse Negative and Positive, normalize to -1 to 1
        predictions = [-prediction[0]['score'] if prediction[0]['label'] ==
                       'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
        # merge rss data with predictions
        entries_predicitons = [{**entry, 'sentiment': prediction}
                               for entry, prediction in zip(feed_entries['entries'], predictions)]
        output = {'entries': entries_predicitons,
                  'last_update': feed_entries['last_update']}
        # update last precitions cache
        with open(f'{file_name}_cache.json', 'w') as file:
            json.dump(output, file)
        # send back json
        return jsonify(output)
    else:
        print("Returning cached predictions")
        return jsonify(cache)


@ app.route('/predict', methods=['POST'])
def predict():
    # get data from POST
    if request.method == 'POST':
        # get current news
        # get post body data
        data = request.get_json()
        if data.get('sentences') is None:
            return jsonify({'error': 'No text provided'})
        # get post expeceted to be under {'sentences': ['text': '...']}
        sentences = data.get('sentences')
        # prencit sentiments
        predictions = [sentiment_analysis(sentence) for sentence in sentences]
        # parse Negative and Positive, normalize to -1 to 1
        predictions = [-prediction[0]['score'] if prediction[0]['label'] ==
                       'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
        output = [dict(sentence=sentence, sentiment=prediction)
                  for sentence, prediction in zip(sentences, predictions)]
        # send back json
        return jsonify(output)


def get_feed(feed_url):
    feed = feedparser.parse(feed_url)
    return {'entries': feed['entries'], 'last_update': feed["feed"]['updated']}


if __name__ == '__main__':
    app.run(host='0.0.0.0',  port=int(os.environ.get('PORT', 7860)))