Spaces:
Runtime error
Runtime error
change logic to enable different feeds
Browse files- .gitignore +1 -1
- app.py +19 -14
- client/src/components/NewsBlock.svelte +1 -1
- client/src/routes/index.svelte +53 -9
.gitignore
CHANGED
@@ -24,4 +24,4 @@ htmlcov/
|
|
24 |
.coverage.*
|
25 |
*,cover
|
26 |
venv
|
27 |
-
|
|
|
24 |
.coverage.*
|
25 |
*,cover
|
26 |
venv
|
27 |
+
*_cache.json
|
app.py
CHANGED
@@ -6,9 +6,7 @@ from transformers import pipeline
|
|
6 |
import feedparser
|
7 |
import json
|
8 |
from dateutil import parser
|
9 |
-
|
10 |
-
nyt_homepage_rss = "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml"
|
11 |
-
|
12 |
load_dotenv()
|
13 |
# Load Setiment Classifier
|
14 |
sentiment_analysis = pipeline(
|
@@ -25,20 +23,27 @@ def index():
|
|
25 |
|
26 |
@app.route('/news')
|
27 |
def get_news():
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
# filter only titles for sentiment analysis
|
30 |
try:
|
31 |
-
with open('
|
32 |
cache = json.load(file)
|
33 |
except:
|
34 |
cache = {}
|
35 |
|
36 |
# if new homepage is newer than cache, update cache and return
|
37 |
-
print("new date",
|
38 |
-
print("old date",cache['last_update']
|
39 |
-
|
|
|
40 |
print("Updating cache with new preditions")
|
41 |
-
titles = [entry['title'] for entry in
|
42 |
# run sentiment analysis on titles
|
43 |
predictions = [sentiment_analysis(sentence) for sentence in titles]
|
44 |
# parse Negative and Positive, normalize to -1 to 1
|
@@ -46,11 +51,11 @@ def get_news():
|
|
46 |
'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
|
47 |
# merge rss data with predictions
|
48 |
entries_predicitons = [{**entry, 'sentiment': prediction}
|
49 |
-
for entry, prediction in zip(
|
50 |
output = {'entries': entries_predicitons,
|
51 |
-
'last_update':
|
52 |
# update last precitions cache
|
53 |
-
with open('
|
54 |
json.dump(output, file)
|
55 |
# send back json
|
56 |
return jsonify(output)
|
@@ -81,8 +86,8 @@ def predict():
|
|
81 |
return jsonify(output)
|
82 |
|
83 |
|
84 |
-
def
|
85 |
-
feed = feedparser.parse(
|
86 |
return {'entries': feed['entries'], 'last_update': feed["feed"]['updated']}
|
87 |
|
88 |
|
|
|
6 |
import feedparser
|
7 |
import json
|
8 |
from dateutil import parser
|
9 |
+
import re
|
|
|
|
|
10 |
load_dotenv()
|
11 |
# Load Setiment Classifier
|
12 |
sentiment_analysis = pipeline(
|
|
|
23 |
|
24 |
@app.route('/news')
|
25 |
def get_news():
|
26 |
+
feed_url = request.args.get('feed_url')
|
27 |
+
# check if string is a valid
|
28 |
+
|
29 |
+
# file name for cache
|
30 |
+
file_name = "".join(re.split(r"https://|\.|/", feed_url))
|
31 |
+
|
32 |
+
feed_entries = get_feed(feed_url)
|
33 |
# filter only titles for sentiment analysis
|
34 |
try:
|
35 |
+
with open(f'{file_name}_cache.json') as file:
|
36 |
cache = json.load(file)
|
37 |
except:
|
38 |
cache = {}
|
39 |
|
40 |
# if new homepage is newer than cache, update cache and return
|
41 |
+
print("new date", feed_entries['last_update'])
|
42 |
+
print("old date", cache['last_update']
|
43 |
+
if 'last_update' in cache else "None")
|
44 |
+
if not cache or parser.parse(feed_entries['last_update']) > parser.parse(cache['last_update']):
|
45 |
print("Updating cache with new preditions")
|
46 |
+
titles = [entry['title'] for entry in feed_entries['entries']]
|
47 |
# run sentiment analysis on titles
|
48 |
predictions = [sentiment_analysis(sentence) for sentence in titles]
|
49 |
# parse Negative and Positive, normalize to -1 to 1
|
|
|
51 |
'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
|
52 |
# merge rss data with predictions
|
53 |
entries_predicitons = [{**entry, 'sentiment': prediction}
|
54 |
+
for entry, prediction in zip(feed_entries['entries'], predictions)]
|
55 |
output = {'entries': entries_predicitons,
|
56 |
+
'last_update': feed_entries['last_update']}
|
57 |
# update last precitions cache
|
58 |
+
with open(f'{file_name}_cache.json', 'w') as file:
|
59 |
json.dump(output, file)
|
60 |
# send back json
|
61 |
return jsonify(output)
|
|
|
86 |
return jsonify(output)
|
87 |
|
88 |
|
89 |
+
def get_feed(feed_url):
|
90 |
+
feed = feedparser.parse(feed_url)
|
91 |
return {'entries': feed['entries'], 'last_update': feed["feed"]['updated']}
|
92 |
|
93 |
|
client/src/components/NewsBlock.svelte
CHANGED
@@ -25,7 +25,7 @@
|
|
25 |
<a target="_blank" href={feedEntry.link}>
|
26 |
<h2 class="text-2xl font-bold font-serif leading-tight">{feedEntry.title}</h2>
|
27 |
<h4 class="text-sm font-bold leading-tight">By {feedEntry.author}</h4>
|
28 |
-
<p class="py-3 max-w-prose leading-normal">{feedEntry.summary}</p>
|
29 |
<!-- {#if feedEntry.tags}
|
30 |
<div class="text-sm">
|
31 |
{#each feedEntry.tags as tag}
|
|
|
25 |
<a target="_blank" href={feedEntry.link}>
|
26 |
<h2 class="text-2xl font-bold font-serif leading-tight">{feedEntry.title}</h2>
|
27 |
<h4 class="text-sm font-bold leading-tight">By {feedEntry.author}</h4>
|
28 |
+
<p class="py-3 max-w-prose leading-normal">{@html feedEntry.summary}</p>
|
29 |
<!-- {#if feedEntry.tags}
|
30 |
<div class="text-sm">
|
31 |
{#each feedEntry.tags as tag}
|
client/src/routes/index.svelte
CHANGED
@@ -1,18 +1,50 @@
|
|
1 |
<script>
|
2 |
import NewsBlock from '../components/NewsBlock.svelte';
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
let predictions;
|
5 |
let lastUpdate;
|
6 |
let positiveOrder = true;
|
7 |
-
async function fecthPredictions() {
|
|
|
8 |
try {
|
9 |
-
predictions = await fetch(
|
10 |
} catch (e) {
|
11 |
// hack to develop locally without having to run the server
|
12 |
predictions = await fetch('static/test.json').then((d) => d.json());
|
13 |
}
|
14 |
lastUpdate = new Date(predictions.last_update);
|
15 |
predictions = predictions.entries.sort((a, b) => b.sentiment - a.sentiment);
|
|
|
16 |
console.log(lastUpdate, predictions);
|
17 |
}
|
18 |
|
@@ -38,9 +70,9 @@
|
|
38 |
target="_blank"
|
39 |
href="https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml">New York Times</a
|
40 |
>
|
41 |
-
homepage headlines RSS. It also provides
|
42 |
-
? 'good
|
43 |
-
: 'bad
|
44 |
<a
|
45 |
class="text-blue-500 underline hover:no-underline"
|
46 |
target="_blank"
|
@@ -66,9 +98,21 @@
|
|
66 |
</a>
|
67 |
</p>
|
68 |
<details>
|
69 |
-
<summary class="cursor-pointer">
|
70 |
-
<p
|
71 |
</details>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
<div class="py-4" />
|
73 |
<button
|
74 |
class="{positiveOrder
|
@@ -78,7 +122,7 @@
|
|
78 |
>
|
79 |
{!positiveOrder ? 'Sorted by negative scores' : 'Sorted by positive scores'}
|
80 |
</button>
|
81 |
-
{#await fecthPredictions()}
|
82 |
<div class="py-4">
|
83 |
<svg class="animate-spin inline-block" width="25" height="25" viewBox="0 0 100 100">
|
84 |
<path d="M0,50 a1,1 0 0,0 100,0" fill="lightgrey" />
|
|
|
1 |
<script>
|
2 |
import NewsBlock from '../components/NewsBlock.svelte';
|
3 |
+
let feeds = [
|
4 |
+
{
|
5 |
+
label: 'NYTimes',
|
6 |
+
value: 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml'
|
7 |
+
},
|
8 |
+
{
|
9 |
+
label: 'SF Gate Bay Area',
|
10 |
+
value: 'https://www.sfgate.com/bayarea/feed/Bay-Area-News-429.php'
|
11 |
+
},
|
12 |
+
{
|
13 |
+
label: 'BBC News',
|
14 |
+
value: 'https://feeds.bbci.co.uk/news/rss.xml'
|
15 |
+
},
|
16 |
+
{
|
17 |
+
label: 'Buzz Feed World',
|
18 |
+
value: 'https://www.buzzfeed.com/world.xml'
|
19 |
+
},
|
20 |
+
{
|
21 |
+
label: 'Al Jazeera',
|
22 |
+
value: 'https://aljazeera.com/xml/rss/all.xml'
|
23 |
+
},
|
24 |
+
{
|
25 |
+
label: 'Hacker News Front Page',
|
26 |
+
value: 'https://hnrss.org/frontpage'
|
27 |
+
},
|
28 |
+
{
|
29 |
+
label: 'Reddit World News',
|
30 |
+
value: 'https://www.reddit.com/r/worldnews/.rss'
|
31 |
+
}
|
32 |
+
];
|
33 |
+
let selectedFeedUrl = feeds[0].value;
|
34 |
let predictions;
|
35 |
let lastUpdate;
|
36 |
let positiveOrder = true;
|
37 |
+
async function fecthPredictions(feedUrl) {
|
38 |
+
console.log(feedUrl);
|
39 |
try {
|
40 |
+
predictions = await fetch(`news?feed_url=${feedUrl}`).then((d) => d.json());
|
41 |
} catch (e) {
|
42 |
// hack to develop locally without having to run the server
|
43 |
predictions = await fetch('static/test.json').then((d) => d.json());
|
44 |
}
|
45 |
lastUpdate = new Date(predictions.last_update);
|
46 |
predictions = predictions.entries.sort((a, b) => b.sentiment - a.sentiment);
|
47 |
+
positiveOrder = true
|
48 |
console.log(lastUpdate, predictions);
|
49 |
}
|
50 |
|
|
|
70 |
target="_blank"
|
71 |
href="https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml">New York Times</a
|
72 |
>
|
73 |
+
homepage headlines RSS. It also provides a sorting button to toggle between {positiveOrder
|
74 |
+
? 'good and bad news'
|
75 |
+
: 'bad and good news'} first😛 . It's built with a
|
76 |
<a
|
77 |
class="text-blue-500 underline hover:no-underline"
|
78 |
target="_blank"
|
|
|
98 |
</a>
|
99 |
</p>
|
100 |
<details>
|
101 |
+
<summary class="cursor-pointer">Notes</summary>
|
102 |
+
<p />
|
103 |
</details>
|
104 |
+
|
105 |
+
<p class="py-3 max-w-prose leading-normal">
|
106 |
+
You can try other news feeds <select
|
107 |
+
class="inline-block text-sm bg-gray-200 border border-gray-200 text-gray-700 px-1 py-1 rounded leading-tight focus:outline-none focus:bg-white focus:border-gray-500"
|
108 |
+
bind:value={selectedFeedUrl}
|
109 |
+
>
|
110 |
+
{#each feeds as feed (feed.value)}
|
111 |
+
<option value={feed.value}>{feed.label}</option>
|
112 |
+
{/each}
|
113 |
+
</select>; however the NYTimes feed comes with more information than the other feeds, such as
|
114 |
+
the thumbnail image, author, and more.
|
115 |
+
</p>
|
116 |
<div class="py-4" />
|
117 |
<button
|
118 |
class="{positiveOrder
|
|
|
122 |
>
|
123 |
{!positiveOrder ? 'Sorted by negative scores' : 'Sorted by positive scores'}
|
124 |
</button>
|
125 |
+
{#await fecthPredictions(selectedFeedUrl)}
|
126 |
<div class="py-4">
|
127 |
<svg class="animate-spin inline-block" width="25" height="25" viewBox="0 0 100 100">
|
128 |
<path d="M0,50 a1,1 0 0,0 100,0" fill="lightgrey" />
|