Benjamin Bossan commited on
Commit
66bc8ec
1 Parent(s): 329262b
README.md CHANGED
@@ -34,3 +34,19 @@ uvicorn webservice:app --reload --port 8080
34
  ```
35
 
36
  For example requests, check `requests.org`.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  ```
35
 
36
  For example requests, check `requests.org`.
37
+
38
+ ## Checks
39
+
40
+ ### Running tests
41
+
42
+ ```sh
43
+ python -m pytest tests/
44
+ ```
45
+
46
+ ### Other
47
+
48
+ ```sh
49
+ mypy src/
50
+ black src/ && black tests/
51
+ ruff src/
52
+ ```
pyproject.toml CHANGED
@@ -9,6 +9,9 @@ line-length = 88
9
  [tool.isort]
10
  profile = "black"
11
 
 
 
 
12
  [tool.mypy]
13
  no_implicit_optional = true
14
  strict = true
 
9
  [tool.isort]
10
  profile = "black"
11
 
12
+ [tool.pytest.ini_options]
13
+ addopts = "--cov=src --cov-report=term-missing"
14
+
15
  [tool.mypy]
16
  no_implicit_optional = true
17
  strict = true
src/gistillery/webservice.py CHANGED
@@ -90,7 +90,9 @@ def recent_tag(tag: str) -> list[EntriesResult]:
90
  FROM entries e
91
  JOIN summaries s ON e.id = s.entry_id
92
  JOIN tags t ON e.id = t.entry_id
93
- WHERE t.tag = ?
 
 
94
  GROUP BY e.id
95
  ORDER BY e.created_at DESC
96
  LIMIT 10
 
90
  FROM entries e
91
  JOIN summaries s ON e.id = s.entry_id
92
  JOIN tags t ON e.id = t.entry_id
93
+ WHERE e.id IN (
94
+ SELECT entry_id FROM tags WHERE tag = ?
95
+ )
96
  GROUP BY e.id
97
  ORDER BY e.created_at DESC
98
  LIMIT 10
tests/test_webservice.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime as dt
2
+ import os
3
+ from types import SimpleNamespace
4
+
5
+ import pytest
6
+ from fastapi.testclient import TestClient
7
+
8
+
9
+ def is_roughly_now(datetime_str):
10
+ """Check if a datetime string is roughly from now"""
11
+ now = dt.datetime.utcnow()
12
+ datetime = dt.datetime.fromisoformat(datetime_str)
13
+ return (now - datetime).total_seconds() < 3
14
+
15
+
16
+ class TestWebservice:
17
+ @pytest.fixture(autouse=True)
18
+ def db_file(self, tmp_path):
19
+ filename = tmp_path / "test-db.sqlite"
20
+ os.environ["DB_FILE_NAME"] = str(filename)
21
+
22
+ @pytest.fixture
23
+ def client(self):
24
+ from gistillery.webservice import app
25
+
26
+ client = TestClient(app)
27
+ client.get("/clear")
28
+ return client
29
+
30
+ @pytest.fixture
31
+ def mlregistry(self):
32
+ # use dummy models
33
+ from gistillery.ml import MlRegistry, RawTextProcessor, Summarizer, Tagger
34
+
35
+ class DummySummarizer(Summarizer):
36
+ """Returns the first 10 characters of the input"""
37
+ def __init__(self, *args, **kwargs):
38
+ pass
39
+
40
+ def get_name(self):
41
+ return "dummy summarizer"
42
+
43
+ def __call__(self, x):
44
+ return x[:10]
45
+
46
+ class DummyTagger(Tagger):
47
+ """Returns the first 3 words of the input"""
48
+ def __init__(self, *args, **kwargs):
49
+ pass
50
+
51
+ def get_name(self):
52
+ return "dummy tagger"
53
+
54
+ def __call__(self, x):
55
+ return ["#" + word for word in x.split(maxsplit=4)[:3]]
56
+
57
+ registry = MlRegistry()
58
+ registry.register_processor(RawTextProcessor())
59
+
60
+ # arguments don't matter for dummy summarizer and tagger
61
+ summarizer = DummySummarizer(None, None, None, None)
62
+ registry.register_summarizer(summarizer)
63
+
64
+ tagger = DummyTagger(None, None, None, None)
65
+ registry.register_tagger(tagger)
66
+ return registry
67
+
68
+ def process_jobs(self, registry):
69
+ # emulate work of the background worker
70
+ from gistillery.worker import check_pending_jobs, process_job
71
+
72
+ jobs = check_pending_jobs()
73
+ for job in jobs:
74
+ process_job(job, registry)
75
+
76
+ def test_status(self, client):
77
+ resp = client.get("/status")
78
+ assert resp.status_code == 200
79
+ assert resp.json() == "OK"
80
+
81
+ def test_recent_empty(self, client):
82
+ resp = client.get("/recent")
83
+ assert resp.json() == []
84
+
85
+ def test_recent_tag_empty(self, client):
86
+ resp = client.get("/recent/general")
87
+ assert resp.json() == []
88
+
89
+ def test_submitted_job_status_pending(self, client, monkeypatch):
90
+ # monkeypatch uuid4 to return a known value
91
+ job_id = "abc1234"
92
+ monkeypatch.setattr("uuid.uuid4", lambda: SimpleNamespace(hex=job_id))
93
+ client.post("/submit", json={"author": "ben", "content": "this is a test"})
94
+
95
+ resp = client.get(f"/check_job_status/{job_id}")
96
+ output = resp.json()
97
+ last_updated = output.pop("last_updated")
98
+ assert output == {
99
+ "id": job_id,
100
+ "status": "pending",
101
+ }
102
+ assert is_roughly_now(last_updated)
103
+
104
+ def test_submitted_job_status_not_found(self, client, monkeypatch):
105
+ # monkeypatch uuid4 to return a known value
106
+ job_id = "abc1234"
107
+ monkeypatch.setattr("uuid.uuid4", lambda: SimpleNamespace(hex=job_id))
108
+ client.post("/submit", json={"author": "ben", "content": "this is a test"})
109
+
110
+ other_job_id = "def5678"
111
+ resp = client.get(f"/check_job_status/{other_job_id}")
112
+ output = resp.json()
113
+ last_updated = output.pop("last_updated")
114
+ assert output == {
115
+ "id": other_job_id,
116
+ "status": "not found",
117
+ }
118
+ assert last_updated is None
119
+
120
+ def test_submitted_job_status_done(self, client, mlregistry, monkeypatch):
121
+ # monkeypatch uuid4 to return a known value
122
+ job_id = "abc1234"
123
+ monkeypatch.setattr("uuid.uuid4", lambda: SimpleNamespace(hex=job_id))
124
+ client.post("/submit", json={"author": "ben", "content": "this is a test"})
125
+ self.process_jobs(mlregistry)
126
+
127
+ resp = client.get(f"/check_job_status/{job_id}")
128
+ output = resp.json()
129
+ last_updated = output.pop("last_updated")
130
+ assert output == {
131
+ "id": job_id,
132
+ "status": "done",
133
+ }
134
+ assert is_roughly_now(last_updated)
135
+
136
+ def test_recent_with_entries(self, client, mlregistry):
137
+ # submit 2 entries
138
+ client.post(
139
+ "/submit", json={"author": "maxi", "content": "this is a first test"}
140
+ )
141
+ client.post(
142
+ "/submit", json={"author": "mini", "content": "this would be something else"}
143
+ )
144
+ self.process_jobs(mlregistry)
145
+ resp = client.get("/recent").json()
146
+
147
+ # results are sorted by recency but since dummy models are so fast, the
148
+ # date in the db could be the same, so we sort by author
149
+ resp = sorted(resp, key=lambda x: x["author"])
150
+ assert len(resp) == 2
151
+
152
+ resp0 = resp[0]
153
+ assert resp0["author"] == "maxi"
154
+ assert resp0["summary"] == "this is a "
155
+ assert resp0["tags"] == sorted(["#this", "#is", "#a"])
156
+
157
+ resp1 = resp[1]
158
+ assert resp1["author"] == "mini"
159
+ assert resp1["summary"] == "this would"
160
+ assert resp1["tags"] == sorted(["#this", "#would", "#be"])
161
+
162
+ def test_recent_tag_with_entries(self, client, mlregistry):
163
+ # submit 2 entries
164
+ client.post(
165
+ "/submit", json={"author": "maxi", "content": "this is a first test"}
166
+ )
167
+ client.post(
168
+ "/submit", json={"author": "mini", "content": "this would be something else"}
169
+ )
170
+ self.process_jobs(mlregistry)
171
+
172
+ # the "this" tag is in both entries
173
+ resp = client.get("/recent/this").json()
174
+ assert len(resp) == 2
175
+
176
+ # the "would" tag is in only one entry
177
+ resp = client.get("/recent/would").json()
178
+ assert len(resp) == 1
179
+
180
+ resp0 = resp[0]
181
+ assert resp0["author"] == "mini"
182
+ assert resp0["summary"] == "this would"
183
+ assert resp0["tags"] == sorted(["#this", "#would", "#be"])
184
+
185
+ def test_submitted_job_failed(self, client, mlregistry, monkeypatch):
186
+ # monkeypatch uuid4 to return a known value
187
+ job_id = "abc1234"
188
+ monkeypatch.setattr("uuid.uuid4", lambda: SimpleNamespace(hex=job_id))
189
+ client.post("/submit", json={"author": "ben", "content": "this is a test"})
190
+ # patch gistillery.worker._process_job to raise an exception
191
+
192
+ def raise_(ex):
193
+ raise ex
194
+
195
+ monkeypatch.setattr(
196
+ "gistillery.worker._process_job",
197
+ lambda job, registry: raise_(RuntimeError("something went wrong")),
198
+ )
199
+ self.process_jobs(mlregistry)
200
+
201
+ resp = client.get(f"/check_job_status/{job_id}")
202
+ output = resp.json()
203
+ output.pop("last_updated")
204
+ assert output == {
205
+ "id": job_id,
206
+ "status": "failed",
207
+ }