nomagick commited on
Commit
440ff4d
·
unverified ·
1 Parent(s): 4bc6394

fix: expect failure while loading pdf

Browse files
Files changed (1) hide show
  1. src/api/crawler.ts +16 -3
src/api/crawler.ts CHANGED
@@ -1010,10 +1010,23 @@ export class CrawlerHost extends RPCHost {
1010
  return this.snapshotFormatter.formatSnapshot(mode, snapshotCopy, nominalUrl, urlValidMs);
1011
  }
1012
 
1013
- const r = await this.curlControl.sideLoad(new URL(pdfUrl), scrappingOptions);
1014
- if (r.file) {
1015
- snapshotCopy.pdfs[0] = pathToFileURL(await r.file.filePath).href;
 
 
 
 
 
 
 
 
 
 
 
 
1016
  }
 
1017
  }
1018
  }
1019
 
 
1010
  return this.snapshotFormatter.formatSnapshot(mode, snapshotCopy, nominalUrl, urlValidMs);
1011
  }
1012
 
1013
+ const r = await this.curlControl.sideLoad(new URL(pdfUrl), scrappingOptions).catch((err) => {
1014
+ if (err instanceof ServiceBadAttemptError) {
1015
+ return Promise.reject(new AssertionFailureError(`Failed to load PDF(${pdfUrl}): ${err.message}`));
1016
+ }
1017
+
1018
+ return Promise.reject(err);
1019
+ });
1020
+ if (r.status !== 200) {
1021
+ throw new AssertionFailureError(`Failed to load PDF(${pdfUrl}): Server responded status ${r.status}`);
1022
+ }
1023
+ if (!r.contentType.includes('application/pdf')) {
1024
+ throw new AssertionFailureError(`Failed to load PDF(${pdfUrl}): Server responded with wrong content type ${r.contentType}`);
1025
+ }
1026
+ if (!r.file) {
1027
+ throw new AssertionFailureError(`Failed to load PDF(${pdfUrl}): Server did not return a body`);
1028
  }
1029
+ snapshotCopy.pdfs[0] = pathToFileURL(await r.file.filePath).href;
1030
  }
1031
  }
1032