nomagick commited on
Commit
6027963
·
unverified ·
1 Parent(s): 8121d62

fix: tweak default timing to be more conservative

Browse files
Files changed (1) hide show
  1. src/dto/crawler-options.ts +11 -8
src/dto/crawler-options.ts CHANGED
@@ -225,7 +225,8 @@ class Viewport extends AutoCastable {
225
  `- html: unrendered HTML is enough to return\n` +
226
  `- mutation-idle: wait for DOM mutations to settle and remain unchanged for at least 0.2s\n` +
227
  `- resource-idle: wait for no additional resources that would affect page logic and content SUCCEEDED loading for at least 0.5s\n` +
228
- `- media-idle: wait for no additional resources, including media resources, SUCCEEDED loading for at least 0.5s\n\n`,
 
229
  in: 'header',
230
  schema: { type: 'string' }
231
  },
@@ -583,11 +584,13 @@ export class CrawlerOptions extends AutoCastable {
583
  if (respondTiming) {
584
  instance.respondTiming ??= respondTiming as RESPOND_TIMING;
585
  }
586
- instance.respondTiming ??= (
587
- instance.timeout ||
588
- instance.respondWith.includes('shot') ||
589
- instance.respondWith.includes('vlm')
590
- ) ? RESPOND_TIMING.MEDIA_IDLE : RESPOND_TIMING.MUTATION_IDLE;
 
 
591
 
592
  if (instance.cacheTolerance) {
593
  instance.cacheTolerance = instance.cacheTolerance * 1000;
@@ -607,7 +610,7 @@ export class CrawlerOptions extends AutoCastable {
607
  if (this.respondTiming === RESPOND_TIMING.HTML && snapshot.html) {
608
  return true;
609
  }
610
- if (this.respondTiming === RESPOND_TIMING.MEDIA_IDLE && snapshot.lastMediaResourceLoaded) {
611
  const now = Date.now();
612
  if ((Math.max(snapshot.lastMediaResourceLoaded, snapshot.lastContentResourceLoaded || 0) + 500) < now) {
613
  return true;
@@ -619,7 +622,7 @@ export class CrawlerOptions extends AutoCastable {
619
  if ((this.respondWith.includes('vlm') || this.respondWith.includes('screenshot')) && !snapshot.screenshot) {
620
  return false;
621
  }
622
- if (this.respondTiming === RESPOND_TIMING.RESOURCE_IDLE && snapshot.lastContentResourceLoaded) {
623
  const now = Date.now();
624
  if ((snapshot.lastContentResourceLoaded + 500) < now) {
625
  return true;
 
225
  `- html: unrendered HTML is enough to return\n` +
226
  `- mutation-idle: wait for DOM mutations to settle and remain unchanged for at least 0.2s\n` +
227
  `- resource-idle: wait for no additional resources that would affect page logic and content SUCCEEDED loading for at least 0.5s\n` +
228
+ `- media-idle: wait for no additional resources, including media resources, SUCCEEDED loading for at least 0.5s\n` +
229
+ `- network-idle: wait for full load of webpage, as usual.\n\n`,
230
  in: 'header',
231
  schema: { type: 'string' }
232
  },
 
584
  if (respondTiming) {
585
  instance.respondTiming ??= respondTiming as RESPOND_TIMING;
586
  }
587
+ if (instance.timeout) {
588
+ instance.respondTiming ??= RESPOND_TIMING.NETWORK_IDLE;
589
+ }
590
+ if (instance.respondWith.includes('shot') || instance.respondWith.includes('vlm')) {
591
+ instance.respondTiming ??= RESPOND_TIMING.MEDIA_IDLE;
592
+ }
593
+ instance.respondTiming ??= RESPOND_TIMING.RESOURCE_IDLE;
594
 
595
  if (instance.cacheTolerance) {
596
  instance.cacheTolerance = instance.cacheTolerance * 1000;
 
610
  if (this.respondTiming === RESPOND_TIMING.HTML && snapshot.html) {
611
  return true;
612
  }
613
+ if (this.respondTiming === RESPOND_TIMING.MEDIA_IDLE && snapshot.lastMediaResourceLoaded && snapshot.lastMutationIdle) {
614
  const now = Date.now();
615
  if ((Math.max(snapshot.lastMediaResourceLoaded, snapshot.lastContentResourceLoaded || 0) + 500) < now) {
616
  return true;
 
622
  if ((this.respondWith.includes('vlm') || this.respondWith.includes('screenshot')) && !snapshot.screenshot) {
623
  return false;
624
  }
625
+ if (this.respondTiming === RESPOND_TIMING.RESOURCE_IDLE && snapshot.lastContentResourceLoaded && snapshot.lastMutationIdle) {
626
  const now = Date.now();
627
  if ((snapshot.lastContentResourceLoaded + 500) < now) {
628
  return true;