nomagick commited on
Commit
d2bebec
·
unverified ·
1 Parent(s): 249408d

fix: abuse blocker

Browse files
backend/functions/package-lock.json CHANGED
@@ -38,6 +38,7 @@
38
  "set-cookie-parser": "^2.6.0",
39
  "stripe": "^11.11.0",
40
  "tiktoken": "^1.0.10",
 
41
  "turndown": "^7.1.3",
42
  "turndown-plugin-gfm": "^1.0.2",
43
  "undici": "^5.24.0"
@@ -11306,8 +11307,7 @@
11306
  "node_modules/tld-extract": {
11307
  "version": "2.1.0",
11308
  "resolved": "https://registry.npmjs.org/tld-extract/-/tld-extract-2.1.0.tgz",
11309
- "integrity": "sha512-Y9QHWIoDQPJJVm3/pOC7kOfOj7vsNSVZl4JGoEHb605FiwZgIfzSMyU0HC0wYw5Cx8435vaG1yGZtIm1yiQGOw==",
11310
- "optional": true
11311
  },
11312
  "node_modules/tmpl": {
11313
  "version": "1.0.5",
 
38
  "set-cookie-parser": "^2.6.0",
39
  "stripe": "^11.11.0",
40
  "tiktoken": "^1.0.10",
41
+ "tld-extract": "^2.1.0",
42
  "turndown": "^7.1.3",
43
  "turndown-plugin-gfm": "^1.0.2",
44
  "undici": "^5.24.0"
 
11307
  "node_modules/tld-extract": {
11308
  "version": "2.1.0",
11309
  "resolved": "https://registry.npmjs.org/tld-extract/-/tld-extract-2.1.0.tgz",
11310
+ "integrity": "sha512-Y9QHWIoDQPJJVm3/pOC7kOfOj7vsNSVZl4JGoEHb605FiwZgIfzSMyU0HC0wYw5Cx8435vaG1yGZtIm1yiQGOw=="
 
11311
  },
11312
  "node_modules/tmpl": {
11313
  "version": "1.0.5",
backend/functions/package.json CHANGED
@@ -58,6 +58,7 @@
58
  "set-cookie-parser": "^2.6.0",
59
  "stripe": "^11.11.0",
60
  "tiktoken": "^1.0.10",
 
61
  "turndown": "^7.1.3",
62
  "turndown-plugin-gfm": "^1.0.2",
63
  "undici": "^5.24.0"
 
58
  "set-cookie-parser": "^2.6.0",
59
  "stripe": "^11.11.0",
60
  "tiktoken": "^1.0.10",
61
+ "tld-extract": "^2.1.0",
62
  "turndown": "^7.1.3",
63
  "turndown-plugin-gfm": "^1.0.2",
64
  "undici": "^5.24.0"
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -65,7 +65,7 @@ export class CrawlerHost extends RPCHost {
65
  cacheRetentionMs = 1000 * 3600 * 24 * 7;
66
  cacheValidMs = 1000 * 3600;
67
  urlValidMs = 1000 * 3600 * 4;
68
- abuseBlockMs = 1000 * 3600 * 24;
69
 
70
  constructor(
71
  protected globalLogger: Logger,
 
65
  cacheRetentionMs = 1000 * 3600 * 24 * 7;
66
  cacheValidMs = 1000 * 3600;
67
  urlValidMs = 1000 * 3600 * 4;
68
+ abuseBlockMs = 1000 * 3600;
69
 
70
  constructor(
71
  protected globalLogger: Logger,
backend/functions/src/services/puppeteer.ts CHANGED
@@ -12,6 +12,7 @@ import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
12
  import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
13
  import { SecurityCompromiseError, ServiceCrashedError } from '../shared/lib/errors';
14
  import { Readability } from '@mozilla/readability';
 
15
 
16
  const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
17
 
@@ -279,8 +280,10 @@ function giveSnapshot(stopActiveSnapshot) {
279
  if (!requestUrl.startsWith("http:") && !requestUrl.startsWith("https:") && requestUrl !== 'about:blank') {
280
  return req.abort('blockedbyclient', 1000);
281
  }
 
 
 
282
  const parsedUrl = new URL(requestUrl);
283
- domainSet.add(parsedUrl.hostname);
284
 
285
  if (
286
  parsedUrl.hostname === 'localhost' ||
@@ -291,13 +294,13 @@ function giveSnapshot(stopActiveSnapshot) {
291
  return req.abort('blockedbyclient', 1000);
292
  }
293
 
294
- if (reqCounter > 200) {
295
  page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests: ${reqCounter}` });
296
 
297
  return req.abort('blockedbyclient', 1000);
298
  }
299
 
300
- if (domainSet.size > 51) {
301
  page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains (${domainSet.size})` });
302
 
303
  return req.abort('blockedbyclient', 1000);
 
12
  import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
13
  import { SecurityCompromiseError, ServiceCrashedError } from '../shared/lib/errors';
14
  import { Readability } from '@mozilla/readability';
15
+ const tldExtract = require('tld-extract');
16
 
17
  const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
18
 
 
280
  if (!requestUrl.startsWith("http:") && !requestUrl.startsWith("https:") && requestUrl !== 'about:blank') {
281
  return req.abort('blockedbyclient', 1000);
282
  }
283
+ const tldParsed = tldExtract(requestUrl);
284
+ domainSet.add(tldParsed.domain);
285
+
286
  const parsedUrl = new URL(requestUrl);
 
287
 
288
  if (
289
  parsedUrl.hostname === 'localhost' ||
 
294
  return req.abort('blockedbyclient', 1000);
295
  }
296
 
297
+ if (reqCounter > 2000) {
298
  page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests: ${reqCounter}` });
299
 
300
  return req.abort('blockedbyclient', 1000);
301
  }
302
 
303
+ if (domainSet.size > 200) {
304
  page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains (${domainSet.size})` });
305
 
306
  return req.abort('blockedbyclient', 1000);