Spaces:
Build error
Build error
fix: abuse blocker
Browse files
backend/functions/package-lock.json
CHANGED
|
@@ -38,6 +38,7 @@
|
|
| 38 |
"set-cookie-parser": "^2.6.0",
|
| 39 |
"stripe": "^11.11.0",
|
| 40 |
"tiktoken": "^1.0.10",
|
|
|
|
| 41 |
"turndown": "^7.1.3",
|
| 42 |
"turndown-plugin-gfm": "^1.0.2",
|
| 43 |
"undici": "^5.24.0"
|
|
@@ -11306,8 +11307,7 @@
|
|
| 11306 |
"node_modules/tld-extract": {
|
| 11307 |
"version": "2.1.0",
|
| 11308 |
"resolved": "https://registry.npmjs.org/tld-extract/-/tld-extract-2.1.0.tgz",
|
| 11309 |
-
"integrity": "sha512-Y9QHWIoDQPJJVm3/pOC7kOfOj7vsNSVZl4JGoEHb605FiwZgIfzSMyU0HC0wYw5Cx8435vaG1yGZtIm1yiQGOw=="
|
| 11310 |
-
"optional": true
|
| 11311 |
},
|
| 11312 |
"node_modules/tmpl": {
|
| 11313 |
"version": "1.0.5",
|
|
|
|
| 38 |
"set-cookie-parser": "^2.6.0",
|
| 39 |
"stripe": "^11.11.0",
|
| 40 |
"tiktoken": "^1.0.10",
|
| 41 |
+
"tld-extract": "^2.1.0",
|
| 42 |
"turndown": "^7.1.3",
|
| 43 |
"turndown-plugin-gfm": "^1.0.2",
|
| 44 |
"undici": "^5.24.0"
|
|
|
|
| 11307 |
"node_modules/tld-extract": {
|
| 11308 |
"version": "2.1.0",
|
| 11309 |
"resolved": "https://registry.npmjs.org/tld-extract/-/tld-extract-2.1.0.tgz",
|
| 11310 |
+
"integrity": "sha512-Y9QHWIoDQPJJVm3/pOC7kOfOj7vsNSVZl4JGoEHb605FiwZgIfzSMyU0HC0wYw5Cx8435vaG1yGZtIm1yiQGOw=="
|
|
|
|
| 11311 |
},
|
| 11312 |
"node_modules/tmpl": {
|
| 11313 |
"version": "1.0.5",
|
backend/functions/package.json
CHANGED
|
@@ -58,6 +58,7 @@
|
|
| 58 |
"set-cookie-parser": "^2.6.0",
|
| 59 |
"stripe": "^11.11.0",
|
| 60 |
"tiktoken": "^1.0.10",
|
|
|
|
| 61 |
"turndown": "^7.1.3",
|
| 62 |
"turndown-plugin-gfm": "^1.0.2",
|
| 63 |
"undici": "^5.24.0"
|
|
|
|
| 58 |
"set-cookie-parser": "^2.6.0",
|
| 59 |
"stripe": "^11.11.0",
|
| 60 |
"tiktoken": "^1.0.10",
|
| 61 |
+
"tld-extract": "^2.1.0",
|
| 62 |
"turndown": "^7.1.3",
|
| 63 |
"turndown-plugin-gfm": "^1.0.2",
|
| 64 |
"undici": "^5.24.0"
|
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -65,7 +65,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 65 |
cacheRetentionMs = 1000 * 3600 * 24 * 7;
|
| 66 |
cacheValidMs = 1000 * 3600;
|
| 67 |
urlValidMs = 1000 * 3600 * 4;
|
| 68 |
-
abuseBlockMs = 1000 * 3600
|
| 69 |
|
| 70 |
constructor(
|
| 71 |
protected globalLogger: Logger,
|
|
|
|
| 65 |
cacheRetentionMs = 1000 * 3600 * 24 * 7;
|
| 66 |
cacheValidMs = 1000 * 3600;
|
| 67 |
urlValidMs = 1000 * 3600 * 4;
|
| 68 |
+
abuseBlockMs = 1000 * 3600;
|
| 69 |
|
| 70 |
constructor(
|
| 71 |
protected globalLogger: Logger,
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -12,6 +12,7 @@ import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
|
| 12 |
import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
|
| 13 |
import { SecurityCompromiseError, ServiceCrashedError } from '../shared/lib/errors';
|
| 14 |
import { Readability } from '@mozilla/readability';
|
|
|
|
| 15 |
|
| 16 |
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
| 17 |
|
|
@@ -279,8 +280,10 @@ function giveSnapshot(stopActiveSnapshot) {
|
|
| 279 |
if (!requestUrl.startsWith("http:") && !requestUrl.startsWith("https:") && requestUrl !== 'about:blank') {
|
| 280 |
return req.abort('blockedbyclient', 1000);
|
| 281 |
}
|
|
|
|
|
|
|
|
|
|
| 282 |
const parsedUrl = new URL(requestUrl);
|
| 283 |
-
domainSet.add(parsedUrl.hostname);
|
| 284 |
|
| 285 |
if (
|
| 286 |
parsedUrl.hostname === 'localhost' ||
|
|
@@ -291,13 +294,13 @@ function giveSnapshot(stopActiveSnapshot) {
|
|
| 291 |
return req.abort('blockedbyclient', 1000);
|
| 292 |
}
|
| 293 |
|
| 294 |
-
if (reqCounter >
|
| 295 |
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests: ${reqCounter}` });
|
| 296 |
|
| 297 |
return req.abort('blockedbyclient', 1000);
|
| 298 |
}
|
| 299 |
|
| 300 |
-
if (domainSet.size >
|
| 301 |
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains (${domainSet.size})` });
|
| 302 |
|
| 303 |
return req.abort('blockedbyclient', 1000);
|
|
|
|
| 12 |
import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
|
| 13 |
import { SecurityCompromiseError, ServiceCrashedError } from '../shared/lib/errors';
|
| 14 |
import { Readability } from '@mozilla/readability';
|
| 15 |
+
const tldExtract = require('tld-extract');
|
| 16 |
|
| 17 |
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
| 18 |
|
|
|
|
| 280 |
if (!requestUrl.startsWith("http:") && !requestUrl.startsWith("https:") && requestUrl !== 'about:blank') {
|
| 281 |
return req.abort('blockedbyclient', 1000);
|
| 282 |
}
|
| 283 |
+
const tldParsed = tldExtract(requestUrl);
|
| 284 |
+
domainSet.add(tldParsed.domain);
|
| 285 |
+
|
| 286 |
const parsedUrl = new URL(requestUrl);
|
|
|
|
| 287 |
|
| 288 |
if (
|
| 289 |
parsedUrl.hostname === 'localhost' ||
|
|
|
|
| 294 |
return req.abort('blockedbyclient', 1000);
|
| 295 |
}
|
| 296 |
|
| 297 |
+
if (reqCounter > 2000) {
|
| 298 |
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many requests: ${reqCounter}` });
|
| 299 |
|
| 300 |
return req.abort('blockedbyclient', 1000);
|
| 301 |
}
|
| 302 |
|
| 303 |
+
if (domainSet.size > 200) {
|
| 304 |
page.emit('abuse', { url: requestUrl, page, sn, reason: `DDoS attack suspected: Too many domains (${domainSet.size})` });
|
| 305 |
|
| 306 |
return req.abort('blockedbyclient', 1000);
|