radames HF staff commited on
Commit
7f708bd
1 Parent(s): 9546bbe

change logic to enable different feeds

Browse files
.gitignore CHANGED
@@ -24,4 +24,4 @@ htmlcov/
24
  .coverage.*
25
  *,cover
26
  venv
27
- last_predictions_cache.json
 
24
  .coverage.*
25
  *,cover
26
  venv
27
+ *_cache.json
app.py CHANGED
@@ -6,9 +6,7 @@ from transformers import pipeline
6
  import feedparser
7
  import json
8
  from dateutil import parser
9
-
10
- nyt_homepage_rss = "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml"
11
-
12
  load_dotenv()
13
  # Load Setiment Classifier
14
  sentiment_analysis = pipeline(
@@ -25,20 +23,27 @@ def index():
25
 
26
  @app.route('/news')
27
  def get_news():
28
- nyt_homepage = get_nytimes()
 
 
 
 
 
 
29
  # filter only titles for sentiment analysis
30
  try:
31
- with open('last_predictions_cache.json') as file:
32
  cache = json.load(file)
33
  except:
34
  cache = {}
35
 
36
  # if new homepage is newer than cache, update cache and return
37
- print("new date",nyt_homepage['last_update'])
38
- print("old date",cache['last_update'] if 'last_update' in cache else "None")
39
- if not cache or parser.parse(nyt_homepage['last_update']) > parser.parse(cache['last_update']):
 
40
  print("Updating cache with new preditions")
41
- titles = [entry['title'] for entry in nyt_homepage['entries']]
42
  # run sentiment analysis on titles
43
  predictions = [sentiment_analysis(sentence) for sentence in titles]
44
  # parse Negative and Positive, normalize to -1 to 1
@@ -46,11 +51,11 @@ def get_news():
46
  'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
47
  # merge rss data with predictions
48
  entries_predicitons = [{**entry, 'sentiment': prediction}
49
- for entry, prediction in zip(nyt_homepage['entries'], predictions)]
50
  output = {'entries': entries_predicitons,
51
- 'last_update': nyt_homepage['last_update']}
52
  # update last precitions cache
53
- with open('last_predictions_cache.json', 'w') as file:
54
  json.dump(output, file)
55
  # send back json
56
  return jsonify(output)
@@ -81,8 +86,8 @@ def predict():
81
  return jsonify(output)
82
 
83
 
84
- def get_nytimes():
85
- feed = feedparser.parse(nyt_homepage_rss)
86
  return {'entries': feed['entries'], 'last_update': feed["feed"]['updated']}
87
 
88
 
 
6
  import feedparser
7
  import json
8
  from dateutil import parser
9
+ import re
 
 
10
  load_dotenv()
11
  # Load Setiment Classifier
12
  sentiment_analysis = pipeline(
 
23
 
24
  @app.route('/news')
25
  def get_news():
26
+ feed_url = request.args.get('feed_url')
27
+ # check if string is a valid
28
+
29
+ # file name for cache
30
+ file_name = "".join(re.split(r"https://|\.|/", feed_url))
31
+
32
+ feed_entries = get_feed(feed_url)
33
  # filter only titles for sentiment analysis
34
  try:
35
+ with open(f'{file_name}_cache.json') as file:
36
  cache = json.load(file)
37
  except:
38
  cache = {}
39
 
40
  # if new homepage is newer than cache, update cache and return
41
+ print("new date", feed_entries['last_update'])
42
+ print("old date", cache['last_update']
43
+ if 'last_update' in cache else "None")
44
+ if not cache or parser.parse(feed_entries['last_update']) > parser.parse(cache['last_update']):
45
  print("Updating cache with new preditions")
46
+ titles = [entry['title'] for entry in feed_entries['entries']]
47
  # run sentiment analysis on titles
48
  predictions = [sentiment_analysis(sentence) for sentence in titles]
49
  # parse Negative and Positive, normalize to -1 to 1
 
51
  'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
52
  # merge rss data with predictions
53
  entries_predicitons = [{**entry, 'sentiment': prediction}
54
+ for entry, prediction in zip(feed_entries['entries'], predictions)]
55
  output = {'entries': entries_predicitons,
56
+ 'last_update': feed_entries['last_update']}
57
  # update last precitions cache
58
+ with open(f'{file_name}_cache.json', 'w') as file:
59
  json.dump(output, file)
60
  # send back json
61
  return jsonify(output)
 
86
  return jsonify(output)
87
 
88
 
89
+ def get_feed(feed_url):
90
+ feed = feedparser.parse(feed_url)
91
  return {'entries': feed['entries'], 'last_update': feed["feed"]['updated']}
92
 
93
 
client/src/components/NewsBlock.svelte CHANGED
@@ -25,7 +25,7 @@
25
  <a target="_blank" href={feedEntry.link}>
26
  <h2 class="text-2xl font-bold font-serif leading-tight">{feedEntry.title}</h2>
27
  <h4 class="text-sm font-bold leading-tight">By {feedEntry.author}</h4>
28
- <p class="py-3 max-w-prose leading-normal">{feedEntry.summary}</p>
29
  <!-- {#if feedEntry.tags}
30
  <div class="text-sm">
31
  {#each feedEntry.tags as tag}
 
25
  <a target="_blank" href={feedEntry.link}>
26
  <h2 class="text-2xl font-bold font-serif leading-tight">{feedEntry.title}</h2>
27
  <h4 class="text-sm font-bold leading-tight">By {feedEntry.author}</h4>
28
+ <p class="py-3 max-w-prose leading-normal">{@html feedEntry.summary}</p>
29
  <!-- {#if feedEntry.tags}
30
  <div class="text-sm">
31
  {#each feedEntry.tags as tag}
client/src/routes/index.svelte CHANGED
@@ -1,18 +1,50 @@
1
  <script>
2
  import NewsBlock from '../components/NewsBlock.svelte';
3
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  let predictions;
5
  let lastUpdate;
6
  let positiveOrder = true;
7
- async function fecthPredictions() {
 
8
  try {
9
- predictions = await fetch('news').then((d) => d.json());
10
  } catch (e) {
11
  // hack to develop locally without having to run the server
12
  predictions = await fetch('static/test.json').then((d) => d.json());
13
  }
14
  lastUpdate = new Date(predictions.last_update);
15
  predictions = predictions.entries.sort((a, b) => b.sentiment - a.sentiment);
 
16
  console.log(lastUpdate, predictions);
17
  }
18
 
@@ -38,9 +70,9 @@
38
  target="_blank"
39
  href="https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml">New York Times</a
40
  >
41
- homepage headlines RSS. It also provides you with a sorting button to choose to see {positiveOrder
42
- ? 'good or bad news first'
43
- : 'bad or good news first'}. It's built with a
44
  <a
45
  class="text-blue-500 underline hover:no-underline"
46
  target="_blank"
@@ -66,9 +98,21 @@
66
  </a>
67
  </p>
68
  <details>
69
- <summary class="cursor-pointer"> Details </summary>
70
- <p></p>
71
  </details>
 
 
 
 
 
 
 
 
 
 
 
 
72
  <div class="py-4" />
73
  <button
74
  class="{positiveOrder
@@ -78,7 +122,7 @@
78
  >
79
  {!positiveOrder ? 'Sorted by negative scores' : 'Sorted by positive scores'}
80
  </button>
81
- {#await fecthPredictions()}
82
  <div class="py-4">
83
  <svg class="animate-spin inline-block" width="25" height="25" viewBox="0 0 100 100">
84
  <path d="M0,50 a1,1 0 0,0 100,0" fill="lightgrey" />
 
1
  <script>
2
  import NewsBlock from '../components/NewsBlock.svelte';
3
+ let feeds = [
4
+ {
5
+ label: 'NYTimes',
6
+ value: 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml'
7
+ },
8
+ {
9
+ label: 'SF Gate Bay Area',
10
+ value: 'https://www.sfgate.com/bayarea/feed/Bay-Area-News-429.php'
11
+ },
12
+ {
13
+ label: 'BBC News',
14
+ value: 'https://feeds.bbci.co.uk/news/rss.xml'
15
+ },
16
+ {
17
+ label: 'Buzz Feed World',
18
+ value: 'https://www.buzzfeed.com/world.xml'
19
+ },
20
+ {
21
+ label: 'Al Jazeera',
22
+ value: 'https://aljazeera.com/xml/rss/all.xml'
23
+ },
24
+ {
25
+ label: 'Hacker News Front Page',
26
+ value: 'https://hnrss.org/frontpage'
27
+ },
28
+ {
29
+ label: 'Reddit World News',
30
+ value: 'https://www.reddit.com/r/worldnews/.rss'
31
+ }
32
+ ];
33
+ let selectedFeedUrl = feeds[0].value;
34
  let predictions;
35
  let lastUpdate;
36
  let positiveOrder = true;
37
+ async function fecthPredictions(feedUrl) {
38
+ console.log(feedUrl);
39
  try {
40
+ predictions = await fetch(`news?feed_url=${feedUrl}`).then((d) => d.json());
41
  } catch (e) {
42
  // hack to develop locally without having to run the server
43
  predictions = await fetch('static/test.json').then((d) => d.json());
44
  }
45
  lastUpdate = new Date(predictions.last_update);
46
  predictions = predictions.entries.sort((a, b) => b.sentiment - a.sentiment);
47
+ positiveOrder = true
48
  console.log(lastUpdate, predictions);
49
  }
50
 
 
70
  target="_blank"
71
  href="https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml">New York Times</a
72
  >
73
+ homepage headlines RSS. It also provides a sorting button to toggle between {positiveOrder
74
+ ? 'good and bad news'
75
+ : 'bad and good news'} first😛 . It's built with a
76
  <a
77
  class="text-blue-500 underline hover:no-underline"
78
  target="_blank"
 
98
  </a>
99
  </p>
100
  <details>
101
+ <summary class="cursor-pointer">Notes</summary>
102
+ <p />
103
  </details>
104
+
105
+ <p class="py-3 max-w-prose leading-normal">
106
+ You can try other news feeds <select
107
+ class="inline-block text-sm bg-gray-200 border border-gray-200 text-gray-700 px-1 py-1 rounded leading-tight focus:outline-none focus:bg-white focus:border-gray-500"
108
+ bind:value={selectedFeedUrl}
109
+ >
110
+ {#each feeds as feed (feed.value)}
111
+ <option value={feed.value}>{feed.label}</option>
112
+ {/each}
113
+ </select>; however the NYTimes feed comes with more information than the other feeds, such as
114
+ the thumbnail image, author, and more.
115
+ </p>
116
  <div class="py-4" />
117
  <button
118
  class="{positiveOrder
 
122
  >
123
  {!positiveOrder ? 'Sorted by negative scores' : 'Sorted by positive scores'}
124
  </button>
125
+ {#await fecthPredictions(selectedFeedUrl)}
126
  <div class="py-4">
127
  <svg class="animate-spin inline-block" width="25" height="25" viewBox="0 0 100 100">
128
  <path d="M0,50 a1,1 0 0,0 100,0" fill="lightgrey" />