Spaces:
Build error
Build error
fix: ignore match all target selectors for performance
Browse files
backend/functions/src/dto/scrapping-options.ts
CHANGED
|
@@ -237,6 +237,7 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 237 |
instance.targetSelector ??= targetSelector;
|
| 238 |
const waitForSelector = ctx?.req.get('x-wait-for-selector')?.split(', ');
|
| 239 |
instance.waitForSelector ??= waitForSelector || instance.targetSelector;
|
|
|
|
| 240 |
const overrideUserAgent = ctx?.req.get('x-user-agent');
|
| 241 |
instance.userAgent ??= overrideUserAgent;
|
| 242 |
|
|
@@ -286,3 +287,20 @@ export class CrawlerOptionsHeaderOnly extends CrawlerOptions {
|
|
| 286 |
return instance;
|
| 287 |
}
|
| 288 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
instance.targetSelector ??= targetSelector;
|
| 238 |
const waitForSelector = ctx?.req.get('x-wait-for-selector')?.split(', ');
|
| 239 |
instance.waitForSelector ??= waitForSelector || instance.targetSelector;
|
| 240 |
+
instance.targetSelector = filterSelector(instance.targetSelector);
|
| 241 |
const overrideUserAgent = ctx?.req.get('x-user-agent');
|
| 242 |
instance.userAgent ??= overrideUserAgent;
|
| 243 |
|
|
|
|
| 287 |
return instance;
|
| 288 |
}
|
| 289 |
}
|
| 290 |
+
|
| 291 |
+
function filterSelector(s?: string | string[]) {
|
| 292 |
+
if (!s) {
|
| 293 |
+
return s;
|
| 294 |
+
}
|
| 295 |
+
const sr = Array.isArray(s) ? s : [s];
|
| 296 |
+
const selectors = sr.filter((i)=> {
|
| 297 |
+
const innerSelectors = i.split(',').map((s) => s.trim());
|
| 298 |
+
const someViolation = innerSelectors.find((x) => x.startsWith('*') || x.startsWith(':') || x.includes('*:'));
|
| 299 |
+
if (someViolation) {
|
| 300 |
+
return false;
|
| 301 |
+
}
|
| 302 |
+
return true;
|
| 303 |
+
})
|
| 304 |
+
|
| 305 |
+
return selectors;
|
| 306 |
+
};
|