Spaces:
Build error
Build error
fix
Browse files
backend/functions/package.json
CHANGED
|
@@ -18,7 +18,8 @@
|
|
| 18 |
"from-preset": "npm run build && npm run emu:reset && npm run emu:start",
|
| 19 |
"start": "npm run shell",
|
| 20 |
"deploy": "firebase deploy --only functions",
|
| 21 |
-
"logs": "firebase functions:log"
|
|
|
|
| 22 |
},
|
| 23 |
"engines": {
|
| 24 |
"node": "18"
|
|
|
|
| 18 |
"from-preset": "npm run build && npm run emu:reset && npm run emu:start",
|
| 19 |
"start": "npm run shell",
|
| 20 |
"deploy": "firebase deploy --only functions",
|
| 21 |
+
"logs": "firebase functions:log",
|
| 22 |
+
"gcp-build": "npx puppeteer browsers install chrome"
|
| 23 |
},
|
| 24 |
"engines": {
|
| 25 |
"node": "18"
|
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -36,16 +36,16 @@ export class CrawlerHost extends RPCHost {
|
|
| 36 |
|
| 37 |
const formatted = {
|
| 38 |
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
toString() {
|
| 43 |
return `Title: ${this.title}
|
| 44 |
|
| 45 |
-
URL Source: ${this.
|
| 46 |
|
| 47 |
Markdown Content:
|
| 48 |
-
${
|
| 49 |
`;
|
| 50 |
}
|
| 51 |
};
|
|
|
|
| 36 |
|
| 37 |
const formatted = {
|
| 38 |
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
|
| 39 |
+
url: snapshot.href.trim(),
|
| 40 |
+
content: contentText.trim(),
|
| 41 |
|
| 42 |
toString() {
|
| 43 |
return `Title: ${this.title}
|
| 44 |
|
| 45 |
+
URL Source: ${this.url}
|
| 46 |
|
| 47 |
Markdown Content:
|
| 48 |
+
${this.content}
|
| 49 |
`;
|
| 50 |
}
|
| 51 |
};
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -145,7 +145,7 @@ function giveSnapshot() {
|
|
| 145 |
|
| 146 |
async *scrap(url: string, noCache: string | boolean = false) {
|
| 147 |
const parsedUrl = new URL(url);
|
| 148 |
-
parsedUrl.search = '';
|
| 149 |
parsedUrl.hash = '';
|
| 150 |
const normalizedUrl = parsedUrl.toString().toLowerCase();
|
| 151 |
const digest = md5Hasher.hash(normalizedUrl);
|
|
@@ -191,7 +191,17 @@ function giveSnapshot() {
|
|
| 191 |
page.on('snapshot', hdl);
|
| 192 |
|
| 193 |
const gotoPromise = page.goto(url, { waitUntil: ['load', 'domcontentloaded', 'networkidle0'], timeout: 30_000 })
|
| 194 |
-
.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
screenshot = await page.screenshot({
|
| 196 |
type: 'jpeg',
|
| 197 |
quality: 85,
|
|
@@ -210,16 +220,6 @@ function giveSnapshot() {
|
|
| 210 |
).catch((err) => {
|
| 211 |
this.logger.warn(`Failed to save snapshot`, { err: marshalErrorLike(err) });
|
| 212 |
});
|
| 213 |
-
|
| 214 |
-
return r;
|
| 215 |
-
}).catch((err) => {
|
| 216 |
-
this.logger.warn(`Failed to goto ${url}`, { err: marshalErrorLike(err) });
|
| 217 |
-
return Promise.reject(new AssertionFailureError({
|
| 218 |
-
message: `Failed to goto ${url}: ${err}`,
|
| 219 |
-
cause: err,
|
| 220 |
-
}));
|
| 221 |
-
}).finally(() => {
|
| 222 |
-
finalized = true;
|
| 223 |
});
|
| 224 |
|
| 225 |
try {
|
|
|
|
| 145 |
|
| 146 |
async *scrap(url: string, noCache: string | boolean = false) {
|
| 147 |
const parsedUrl = new URL(url);
|
| 148 |
+
// parsedUrl.search = '';
|
| 149 |
parsedUrl.hash = '';
|
| 150 |
const normalizedUrl = parsedUrl.toString().toLowerCase();
|
| 151 |
const digest = md5Hasher.hash(normalizedUrl);
|
|
|
|
| 191 |
page.on('snapshot', hdl);
|
| 192 |
|
| 193 |
const gotoPromise = page.goto(url, { waitUntil: ['load', 'domcontentloaded', 'networkidle0'], timeout: 30_000 })
|
| 194 |
+
.catch((err) => {
|
| 195 |
+
this.logger.warn(`Browsing of ${url} did not fully succeed`, { err: marshalErrorLike(err) });
|
| 196 |
+
return Promise.reject(new AssertionFailureError({
|
| 197 |
+
message: `Failed to goto ${url}: ${err}`,
|
| 198 |
+
cause: err,
|
| 199 |
+
}));
|
| 200 |
+
}).finally(async () => {
|
| 201 |
+
finalized = true;
|
| 202 |
+
if (!snapshot?.html) {
|
| 203 |
+
return;
|
| 204 |
+
}
|
| 205 |
screenshot = await page.screenshot({
|
| 206 |
type: 'jpeg',
|
| 207 |
quality: 85,
|
|
|
|
| 220 |
).catch((err) => {
|
| 221 |
this.logger.warn(`Failed to save snapshot`, { err: marshalErrorLike(err) });
|
| 222 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
});
|
| 224 |
|
| 225 |
try {
|