nomagick commited on
Commit
5199b00
·
unverified ·
1 Parent(s): 5ed3f90
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
  import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
4
  import _ from 'lodash';
@@ -90,10 +90,6 @@ ${this.content}
90
 
91
  try {
92
  for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
93
- if (!scrapped) {
94
- continue;
95
- }
96
-
97
  const formatted = this.formatSnapshot(scrapped);
98
 
99
  if (scrapped.screenshot && screenshotEnabled) {
@@ -134,6 +130,10 @@ ${this.content}
134
  return formatted;
135
  }
136
 
 
 
 
 
137
  return this.formatSnapshot(lastScrapped);
138
  }
139
 
@@ -148,6 +148,10 @@ ${this.content}
148
  return assignTransferProtocolMeta(`${formatted}`, { contentType: 'text/plain', envelope: null });
149
  }
150
 
 
 
 
 
151
  return `${this.formatSnapshot(lastScrapped)}`;
152
  }
153
 
 
1
+ import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
  import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
4
  import _ from 'lodash';
 
90
 
91
  try {
92
  for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
 
 
 
 
93
  const formatted = this.formatSnapshot(scrapped);
94
 
95
  if (scrapped.screenshot && screenshotEnabled) {
 
130
  return formatted;
131
  }
132
 
133
+ if (!lastScrapped) {
134
+ throw new AssertionFailureError(`No content available for URL ${urlToCrawl}`);
135
+ }
136
+
137
  return this.formatSnapshot(lastScrapped);
138
  }
139
 
 
148
  return assignTransferProtocolMeta(`${formatted}`, { contentType: 'text/plain', envelope: null });
149
  }
150
 
151
+ if (!lastScrapped) {
152
+ throw new AssertionFailureError(`No content available for URL ${urlToCrawl}`);
153
+ }
154
+
155
  return `${this.formatSnapshot(lastScrapped)}`;
156
  }
157
 
backend/functions/src/services/puppeteer.ts CHANGED
@@ -153,7 +153,7 @@ function giveSnapshot() {
153
  return page;
154
  }
155
 
156
- async *scrap(url: string, noCache: string | boolean = false) {
157
  const parsedUrl = new URL(url);
158
  // parsedUrl.search = '';
159
  parsedUrl.hash = '';
 
153
  return page;
154
  }
155
 
156
+ async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot> {
157
  const parsedUrl = new URL(url);
158
  // parsedUrl.search = '';
159
  parsedUrl.hash = '';