radames commited on
Commit
8f4b172
1 Parent(s): e4ea573
Files changed (10) hide show
  1. .gitattributes +1 -0
  2. .gitignore +4 -0
  3. Dockerfile +15 -0
  4. app.py +52 -0
  5. cache.db +3 -0
  6. db.py +82 -0
  7. news_data.py +186 -0
  8. requirements.txt +5 -0
  9. schema.sql +13 -0
  10. templates/index.j2 +19 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.db filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ venv
2
+ gradio_cached_examples
3
+ __pycache__/
4
+ cache/
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.13
2
+ WORKDIR /code
3
+ COPY ./requirements.txt /code/requirements.txt
4
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
5
+ RUN useradd -m -u 1000 user
6
+ USER user
7
+ ENV HOME=/home/user \
8
+ PATH=/home/user/.local/bin:$PATH \
9
+ PYTHONPATH=$HOME/app \
10
+ PYTHONUNBUFFERED=1 \
11
+ SYSTEM=spaces
12
+
13
+ WORKDIR $HOME/app
14
+ COPY --chown=user . $HOME/app
15
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uvicorn
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.templating import Jinja2Templates
5
+ import logging
6
+ from fastapi.responses import HTMLResponse
7
+ from fastapi import FastAPI, Request, HTTPException
8
+ from pathlib import Path
9
+ from dateutil import parser
10
+
11
+ from db import Database
12
+
13
+ database = Database(Path("./"))
14
+ logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
15
+
16
+
17
+ app = FastAPI()
18
+
19
+ app.add_middleware(
20
+ CORSMiddleware,
21
+ allow_origins=["*"],
22
+ allow_credentials=True,
23
+ allow_methods=["*"],
24
+ allow_headers=["*"],
25
+ )
26
+
27
+
28
+ def format_date(value):
29
+ format = "%A, %d %B %Y"
30
+ # Use dateutil's parser to automatically handle different date formats
31
+ try:
32
+ date = parser.parse(value)
33
+ return date.strftime(format)
34
+ except Exception as e:
35
+ logging.error(e)
36
+ return value
37
+
38
+
39
+ templates = Jinja2Templates(directory="templates")
40
+ templates.env.filters["formatdate"] = format_date
41
+
42
+
43
+ @app.get("/", response_class=HTMLResponse)
44
+ async def main(request: Request):
45
+ data = database.filter("world")
46
+ return templates.TemplateResponse(
47
+ request=request, name="index.j2", context={"data": data}
48
+ )
49
+
50
+
51
+ if __name__ == "__main__":
52
+ uvicorn.run(app, host="0.0.0.0", port=7860)
cache.db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61b9c38672deb60e6ccf7426699dc087558391d8d518d7a90c45b1184c4dcf96
3
+ size 10891264
db.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ from pathlib import Path
3
+ from typing import List, Tuple
4
+ import json
5
+
6
+
7
+ class Database:
8
+ def __init__(self, db_path=None):
9
+ if db_path is None:
10
+ raise ValueError("db_path must be provided")
11
+ self.db_path = db_path
12
+ self.db_file = self.db_path / "cache.db"
13
+ if not self.db_file.exists():
14
+ print("Creating database")
15
+ print("DB_FILE", self.db_file)
16
+ db = sqlite3.connect(self.db_file)
17
+ with open(Path("schema.sql"), "r") as f:
18
+ db.executescript(f.read())
19
+ db.commit()
20
+ db.close()
21
+
22
+ def get_db(self):
23
+ db = sqlite3.connect(self.db_file, check_same_thread=False)
24
+ db.row_factory = sqlite3.Row
25
+ return db
26
+
27
+ def __enter__(self):
28
+ self.db = self.get_db()
29
+ return self.db
30
+
31
+ def __exit__(self, exc_type, exc_value, traceback):
32
+ self.db.close()
33
+
34
+ def __call__(self):
35
+ return self
36
+
37
+ def insert(self, data: List[Tuple[str, str, str]]):
38
+ with self() as db:
39
+ cursor = db.cursor()
40
+ try:
41
+ for entry in data:
42
+ url, title, entries = entry
43
+ cursor.execute(
44
+ "INSERT INTO cache (url, title, entries) VALUES (?, ?, ?)",
45
+ (url, title, entries),
46
+ )
47
+ except Exception as e:
48
+ print(e)
49
+ db.commit()
50
+
51
+ def filter(self, category: str):
52
+ with self() as db:
53
+ entries = db.execute("SELECT url, title, entries FROM cache").fetchall()
54
+ out = []
55
+ for row in entries:
56
+ # parse json
57
+ data = json.loads(row["entries"])
58
+ try:
59
+ data = [
60
+ {
61
+ "title": entry["title"],
62
+ "link": entry["link"],
63
+ "published": entry["published"]
64
+ if "published" in entry
65
+ else entry["pubDate"]
66
+ if "pubDate" in entry
67
+ else "",
68
+ "summary": entry["summary"] if "summary" in entry else "",
69
+ }
70
+ for entry in data["entries"]
71
+ ]
72
+ if len(data) > 0:
73
+ out.append(
74
+ {
75
+ "entries": data,
76
+ "url": row["url"],
77
+ "title": row["title"],
78
+ }
79
+ )
80
+ except Exception as e:
81
+ print(f"Errro on {row['url']}: {e}")
82
+ return out
news_data.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import feedparser
2
+ from pathlib import Path
3
+ import json
4
+ from db import Database
5
+
6
+ database = Database(Path("./"))
7
+
8
+
9
+ def get_feed(feed_url):
10
+ feed = feedparser.parse(feed_url)
11
+ return {"entries": feed["entries"]}
12
+
13
+
14
+ def cache_news():
15
+ data = []
16
+ for feed in TOP_NEWS_FEEDS:
17
+ url = feed["url"]
18
+ label = feed["label"]
19
+ print(f"Fetching {label} from {url}")
20
+ try:
21
+ feed = get_feed(url)
22
+ data.append((url, label, json.dumps(feed)))
23
+ except Exception as e:
24
+ print(f"Failed to fetch {label} from {url}: {e}")
25
+
26
+ with open("data.json", "w") as f:
27
+ f.write(json.dumps(data))
28
+
29
+ database.insert(data)
30
+
31
+
32
+ TOP_NEWS_FEEDS = [
33
+ {"label": "BBC World News", "url": "http://feeds.bbci.co.uk/news/world/rss.xml"},
34
+ {
35
+ "label": "Reddit World News",
36
+ "url": "https://www.reddit.com/r/worldnews/top/.rss",
37
+ },
38
+ {"label": "Vox", "url": "http://www.vox.com/rss/index.xml"},
39
+ {"label": "CBS News", "url": "https://www.cbsnews.com/latest/rss/main"},
40
+ {"label": "ABC News", "url": "http://abcnews.go.com/abcnews/topstories"},
41
+ {"label": "CNN Top Stories", "url": "http://rss.cnn.com/rss/cnn_topstories.rss"},
42
+ {"label": "CNN World News", "url": "http://rss.cnn.com/rss/cnn_world.rss"},
43
+ {
44
+ "label": "The New York Times",
45
+ "url": "http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml",
46
+ },
47
+ {
48
+ "label": "The Economist",
49
+ "url": "http://www.economist.com/sections/business-finance/rss.xml",
50
+ },
51
+ {"label": "The Guardian", "url": "https://www.theguardian.com/international/rss"},
52
+ {"label": "NPR", "url": "http://www.npr.org/rss/rss.php?id=1001"},
53
+ {"label": "Al Jazeera", "url": "https://www.aljazeera.com/xml/rss/all.xml"},
54
+ {
55
+ "label": "The Guardian World News",
56
+ "url": "https://www.theguardian.com/world/rss",
57
+ },
58
+ {"label": "The Atlantic", "url": "https://www.theatlantic.com/feed/all/"},
59
+ {"label": "Vice", "url": "http://www.vice.com/rss"},
60
+ {
61
+ "label": "The New York Times",
62
+ "url": "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",
63
+ },
64
+ {
65
+ "label": "The New Yorker",
66
+ "url": "http://www.newyorker.com/services/rss/feeds/everything.xml",
67
+ },
68
+ {"label": "Pew Research Center", "url": "http://www.pewresearch.org/feed/"},
69
+ {"label": "Fox News", "url": "http://feeds.feedburner.com/foxnews/latest"},
70
+ {
71
+ "label": "The Washington Post",
72
+ "url": "http://feeds.washingtonpost.com/rss/world",
73
+ },
74
+ {"label": "The Guardian UK", "url": "https://www.theguardian.com/uk/rss"},
75
+ {"label": "TIME", "url": "http://rss.time.com/web/time/rss/top/index.xml"},
76
+ {
77
+ "label": "The New York Times",
78
+ "url": "http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",
79
+ },
80
+ {"label": "NPR", "url": "https://feeds.npr.org/1001/rss.xml"},
81
+ {"label": "Fortune", "url": "http://fortune.com/feed/"},
82
+ {"label": "Fox News", "url": "http://feeds.foxnews.com/foxnews/latest"},
83
+ {
84
+ "label": "BBC World News",
85
+ "url": "http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml",
86
+ },
87
+ {"label": "Al Jazeera", "url": "http://www.aljazeera.com/xml/rss/all.xml"},
88
+ {"label": "Le Monde", "url": "https://www.lemonde.fr/rss/une.xml"},
89
+ {"label": "Vox", "url": "https://www.vox.com/rss/index.xml"},
90
+ {
91
+ "label": "The New York Times",
92
+ "url": "http://rss.nytimes.com/services/xml/rss/nyt/World.xml",
93
+ },
94
+ {"label": "The Guardian US", "url": "https://www.theguardian.com/us/rss"},
95
+ {"label": "ProPublica", "url": "http://feeds.propublica.org/propublica/main"},
96
+ {"label": "The Washington Post", "url": "https://feedx.net/rss/washingtonpost.xml"},
97
+ {"label": "Axios", "url": "https://api.axios.com/feed/top/"},
98
+ {"label": "RT", "url": "https://www.rt.com/rss/"},
99
+ {"label": "ABC News US", "url": "http://feeds.abcnews.com/abcnews/usheadlines"},
100
+ {"label": "CNN US", "url": "http://rss.cnn.com/rss/cnn_topstories.rss"},
101
+ {"label": "CBS News", "url": "http://www.cbsnews.com/latest/rss/main"},
102
+ {
103
+ "label": "The Wall Street Journal",
104
+ "url": "http://online.wsj.com/xml/rss/3_7085.xml",
105
+ },
106
+ {
107
+ "label": "USA Today",
108
+ "url": "http://content.usatoday.com/marketing/rss/rsstrans.aspx?feedId=news2",
109
+ },
110
+ {
111
+ "label": "The Christian Science Monitor",
112
+ "url": "http://rss.csmonitor.com/feeds/usa",
113
+ },
114
+ {
115
+ "label": "NBC News Top Stories",
116
+ "url": "http://feeds.nbcnews.com/feeds/topstories",
117
+ },
118
+ {"label": "NBC News World News", "url": "http://feeds.nbcnews.com/feeds/worldnews"},
119
+ {
120
+ "label": "Reuters World News",
121
+ "url": "http://feeds.reuters.com/Reuters/worldNews",
122
+ },
123
+ {
124
+ "label": "Reuters US News",
125
+ "url": "http://feeds.reuters.com/Reuters/domesticNews",
126
+ },
127
+ {
128
+ "label": "Associated Press US Headlines",
129
+ "url": "http://hosted.ap.org/lineups/USHEADS.rss",
130
+ },
131
+ {
132
+ "label": "Associated Press World Headlines",
133
+ "url": "http://hosted.ap.org/lineups/WORLDHEADS.rss",
134
+ },
135
+ {
136
+ "label": "HuffPost World News",
137
+ "url": "http://www.huffingtonpost.com/feeds/verticals/world/index.xml",
138
+ },
139
+ {
140
+ "label": "BBC News US and Canada",
141
+ "url": "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml",
142
+ },
143
+ {"label": "Yahoo News US", "url": "http://news.yahoo.com/rss/us"},
144
+ {"label": "Yahoo News World", "url": "http://rss.news.yahoo.com/rss/world"},
145
+ {"label": "Newsweek", "url": "http://www.newsweek.com/rss"},
146
+ {
147
+ "label": "The Daily Beast",
148
+ "url": "http://feeds.feedburner.com/thedailybeast/articles",
149
+ },
150
+ {"label": "Quartz", "url": "http://qz.com/feed"},
151
+ {"label": "The Guardian USA", "url": "http://www.theguardian.com/world/usa/rss"},
152
+ {"label": "Politico", "url": "http://www.politico.com/rss/politicopicks.xml"},
153
+ {"label": "The New Yorker News", "url": "http://www.newyorker.com/feed/news"},
154
+ {"label": "PBS NewsHour", "url": "http://feeds.feedburner.com/NationPBSNewsHour"},
155
+ {"label": "PBS NewsHour World", "url": "http://feeds.feedburner.com/NewshourWorld"},
156
+ {"label": "NPR Politics", "url": "http://www.npr.org/rss/rss.php?id=1003"},
157
+ {"label": "NPR World", "url": "http://www.npr.org/rss/rss.php?id=1004"},
158
+ {
159
+ "label": "The Atlantic National",
160
+ "url": "http://feeds.feedburner.com/AtlanticNational",
161
+ },
162
+ {
163
+ "label": "The Atlantic Wire",
164
+ "url": "http://feeds.feedburner.com/TheAtlanticWire",
165
+ },
166
+ {
167
+ "label": "Los Angeles Times US",
168
+ "url": "http://www.latimes.com/nation/rss2.0.xml",
169
+ },
170
+ {
171
+ "label": "Los Angeles Times World",
172
+ "url": "http://www.latimes.com/world/rss2.0.xml",
173
+ },
174
+ {
175
+ "label": "Breaking News",
176
+ "url": "http://api.breakingnews.com/api/v1/item/?format=rss",
177
+ },
178
+ {"label": "VICE News", "url": "https://news.vice.com/rss"},
179
+ {
180
+ "label": "Talking Points Memo",
181
+ "url": "http://talkingpointsmemo.com/feed/livewire",
182
+ },
183
+ {"label": "TIME Newsfeed", "url": "http://time.com/newsfeed/feed/"},
184
+ {"label": "Fox News", "url": "http://feeds.foxnews.com/foxnews/latest?format=xml"},
185
+ {"label": "Mashable US & World", "url": "http://mashable.com/us-world/rss/"},
186
+ ]
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ feedparser
4
+ Jinja2
5
+ python-dateutil
schema.sql ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PRAGMA foreign_keys = OFF;
2
+
3
+ BEGIN TRANSACTION;
4
+
5
+ CREATE TABLE cache (
6
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
7
+ url TEXT NOT NULL,
8
+ title TEXT NOT NULL,
9
+ entries json,
10
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
11
+ );
12
+
13
+ COMMIT;
templates/index.j2 ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <html>
2
+ <head>
3
+ <title>Item Details</title>
4
+ </head>
5
+ <body>
6
+ <div>
7
+ {% for item in data %}
8
+ <h1>{{ item.title }}</h1>
9
+ {% for entry in item['entries'] %}
10
+ <a href="{{ entry.link }}" target="_blank" rel="noopener noreferrer">
11
+ <h2>{{ entry.title }}</h2></a>
12
+ <h3>{{ entry.published|formatdate }}</h3>
13
+ <p>{{ entry.summary }}</p>
14
+ {% endfor %}
15
+
16
+ {% endfor %}
17
+ </div>
18
+ </body>
19
+ </html>