Spaces:
Build error
Build error
fix: tweak default timing to be more conservative
Browse files- src/dto/crawler-options.ts +11 -8
src/dto/crawler-options.ts
CHANGED
|
@@ -225,7 +225,8 @@ class Viewport extends AutoCastable {
|
|
| 225 |
`- html: unrendered HTML is enough to return\n` +
|
| 226 |
`- mutation-idle: wait for DOM mutations to settle and remain unchanged for at least 0.2s\n` +
|
| 227 |
`- resource-idle: wait for no additional resources that would affect page logic and content SUCCEEDED loading for at least 0.5s\n` +
|
| 228 |
-
`- media-idle: wait for no additional resources, including media resources, SUCCEEDED loading for at least 0.5s\n
|
|
|
|
| 229 |
in: 'header',
|
| 230 |
schema: { type: 'string' }
|
| 231 |
},
|
|
@@ -583,11 +584,13 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 583 |
if (respondTiming) {
|
| 584 |
instance.respondTiming ??= respondTiming as RESPOND_TIMING;
|
| 585 |
}
|
| 586 |
-
instance.
|
| 587 |
-
instance.
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
|
|
|
|
|
|
| 591 |
|
| 592 |
if (instance.cacheTolerance) {
|
| 593 |
instance.cacheTolerance = instance.cacheTolerance * 1000;
|
|
@@ -607,7 +610,7 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 607 |
if (this.respondTiming === RESPOND_TIMING.HTML && snapshot.html) {
|
| 608 |
return true;
|
| 609 |
}
|
| 610 |
-
if (this.respondTiming === RESPOND_TIMING.MEDIA_IDLE && snapshot.lastMediaResourceLoaded) {
|
| 611 |
const now = Date.now();
|
| 612 |
if ((Math.max(snapshot.lastMediaResourceLoaded, snapshot.lastContentResourceLoaded || 0) + 500) < now) {
|
| 613 |
return true;
|
|
@@ -619,7 +622,7 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 619 |
if ((this.respondWith.includes('vlm') || this.respondWith.includes('screenshot')) && !snapshot.screenshot) {
|
| 620 |
return false;
|
| 621 |
}
|
| 622 |
-
if (this.respondTiming === RESPOND_TIMING.RESOURCE_IDLE && snapshot.lastContentResourceLoaded) {
|
| 623 |
const now = Date.now();
|
| 624 |
if ((snapshot.lastContentResourceLoaded + 500) < now) {
|
| 625 |
return true;
|
|
|
|
| 225 |
`- html: unrendered HTML is enough to return\n` +
|
| 226 |
`- mutation-idle: wait for DOM mutations to settle and remain unchanged for at least 0.2s\n` +
|
| 227 |
`- resource-idle: wait for no additional resources that would affect page logic and content SUCCEEDED loading for at least 0.5s\n` +
|
| 228 |
+
`- media-idle: wait for no additional resources, including media resources, SUCCEEDED loading for at least 0.5s\n` +
|
| 229 |
+
`- network-idle: wait for full load of webpage, as usual.\n\n`,
|
| 230 |
in: 'header',
|
| 231 |
schema: { type: 'string' }
|
| 232 |
},
|
|
|
|
| 584 |
if (respondTiming) {
|
| 585 |
instance.respondTiming ??= respondTiming as RESPOND_TIMING;
|
| 586 |
}
|
| 587 |
+
if (instance.timeout) {
|
| 588 |
+
instance.respondTiming ??= RESPOND_TIMING.NETWORK_IDLE;
|
| 589 |
+
}
|
| 590 |
+
if (instance.respondWith.includes('shot') || instance.respondWith.includes('vlm')) {
|
| 591 |
+
instance.respondTiming ??= RESPOND_TIMING.MEDIA_IDLE;
|
| 592 |
+
}
|
| 593 |
+
instance.respondTiming ??= RESPOND_TIMING.RESOURCE_IDLE;
|
| 594 |
|
| 595 |
if (instance.cacheTolerance) {
|
| 596 |
instance.cacheTolerance = instance.cacheTolerance * 1000;
|
|
|
|
| 610 |
if (this.respondTiming === RESPOND_TIMING.HTML && snapshot.html) {
|
| 611 |
return true;
|
| 612 |
}
|
| 613 |
+
if (this.respondTiming === RESPOND_TIMING.MEDIA_IDLE && snapshot.lastMediaResourceLoaded && snapshot.lastMutationIdle) {
|
| 614 |
const now = Date.now();
|
| 615 |
if ((Math.max(snapshot.lastMediaResourceLoaded, snapshot.lastContentResourceLoaded || 0) + 500) < now) {
|
| 616 |
return true;
|
|
|
|
| 622 |
if ((this.respondWith.includes('vlm') || this.respondWith.includes('screenshot')) && !snapshot.screenshot) {
|
| 623 |
return false;
|
| 624 |
}
|
| 625 |
+
if (this.respondTiming === RESPOND_TIMING.RESOURCE_IDLE && snapshot.lastContentResourceLoaded && snapshot.lastMutationIdle) {
|
| 626 |
const now = Date.now();
|
| 627 |
if ((snapshot.lastContentResourceLoaded + 500) < now) {
|
| 628 |
return true;
|