nomagick commited on
Commit
e551695
·
unverified ·
1 Parent(s): 26f6202

fix: fail early on special cookie redirects

Browse files
Files changed (1) hide show
  1. src/services/curl.ts +10 -3
src/services/curl.ts CHANGED
@@ -294,7 +294,8 @@ export class CurlControl extends AsyncService {
294
  }
295
 
296
  async urlToFile(urlToCrawl: URL, crawlOpts?: CURLScrappingOptions) {
297
- let leftRedirection = 10;
 
298
  let opts = { ...crawlOpts };
299
  let nextHopUrl = urlToCrawl;
300
  const fakeHeaderInfos: HeaderInfo[] = [];
@@ -312,10 +313,16 @@ export class CurlControl extends AsyncService {
312
  if (parsed.length) {
313
  opts.cookies = [...(opts.cookies || []), ...parsed];
314
  }
 
 
 
315
  }
316
 
317
  if (!location && !setCookieHeader) {
318
- throw new AssertionFailureError(`Failed to access ${urlToCrawl}: Bad redirection from ${nextHopUrl}`);
 
 
 
319
  }
320
 
321
  nextHopUrl = new URL(location || '', nextHopUrl);
@@ -331,7 +338,7 @@ export class CurlControl extends AsyncService {
331
  };
332
  } while (leftRedirection > 0);
333
 
334
- throw new AssertionFailureError(`Failed to access ${urlToCrawl}: Too many redirections.`);
335
  }
336
 
337
  async sideLoad(targetUrl: URL, crawlOpts?: CURLScrappingOptions) {
 
294
  }
295
 
296
  async urlToFile(urlToCrawl: URL, crawlOpts?: CURLScrappingOptions) {
297
+ let leftRedirection = 6;
298
+ let cookieRedirects = 0;
299
  let opts = { ...crawlOpts };
300
  let nextHopUrl = urlToCrawl;
301
  const fakeHeaderInfos: HeaderInfo[] = [];
 
313
  if (parsed.length) {
314
  opts.cookies = [...(opts.cookies || []), ...parsed];
315
  }
316
+ if (!location) {
317
+ cookieRedirects += 1;
318
+ }
319
  }
320
 
321
  if (!location && !setCookieHeader) {
322
+ throw new ServiceBadAttemptError(`Failed to access ${urlToCrawl}: Bad redirection from ${nextHopUrl}`);
323
+ }
324
+ if (!location && cookieRedirects > 1) {
325
+ throw new ServiceBadAttemptError(`Failed to access ${urlToCrawl}: Browser required to solve complex cookie preconditions.`);
326
  }
327
 
328
  nextHopUrl = new URL(location || '', nextHopUrl);
 
338
  };
339
  } while (leftRedirection > 0);
340
 
341
+ throw new ServiceBadAttemptError(`Failed to access ${urlToCrawl}: Too many redirections.`);
342
  }
343
 
344
  async sideLoad(targetUrl: URL, crawlOpts?: CURLScrappingOptions) {