Spaces:
Build error
Build error
fix
Browse files
backend/functions/package.json
CHANGED
|
@@ -18,8 +18,7 @@
|
|
| 18 |
"from-preset": "npm run build && npm run emu:reset && npm run emu:start",
|
| 19 |
"start": "npm run shell",
|
| 20 |
"deploy": "firebase deploy --only functions",
|
| 21 |
-
"logs": "firebase functions:log"
|
| 22 |
-
"gcp-build": "node node_modules/puppeteer/install.js"
|
| 23 |
},
|
| 24 |
"engines": {
|
| 25 |
"node": "18"
|
|
|
|
| 18 |
"from-preset": "npm run build && npm run emu:reset && npm run emu:start",
|
| 19 |
"start": "npm run shell",
|
| 20 |
"deploy": "firebase deploy --only functions",
|
| 21 |
+
"logs": "firebase functions:log"
|
|
|
|
| 22 |
},
|
| 23 |
"engines": {
|
| 24 |
"node": "18"
|
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -30,7 +30,9 @@ export class CrawlerHost extends RPCHost {
|
|
| 30 |
formatSnapshot(snapshot: PageSnapshot) {
|
| 31 |
|
| 32 |
const toBeTurnedToMd = snapshot.parsed?.content;
|
| 33 |
-
const
|
|
|
|
|
|
|
| 34 |
|
| 35 |
const formatted = {
|
| 36 |
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
|
|
@@ -51,6 +53,16 @@ ${contentText}
|
|
| 51 |
return formatted;
|
| 52 |
}
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
@CloudHTTPv2({
|
| 55 |
runtime: {
|
| 56 |
memory: '4GiB',
|
|
|
|
| 30 |
formatSnapshot(snapshot: PageSnapshot) {
|
| 31 |
|
| 32 |
const toBeTurnedToMd = snapshot.parsed?.content;
|
| 33 |
+
const turnedDown = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd).trim() : '';
|
| 34 |
+
|
| 35 |
+
const contentText = turnedDown && !(turnedDown.startsWith('<') && turnedDown.endsWith('>')) ? turnedDown : snapshot.text.trim();
|
| 36 |
|
| 37 |
const formatted = {
|
| 38 |
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
|
|
|
|
| 53 |
return formatted;
|
| 54 |
}
|
| 55 |
|
| 56 |
+
@CloudHTTPv2({
|
| 57 |
+
name: 'crawl2',
|
| 58 |
+
runtime: {
|
| 59 |
+
memory: '4GiB',
|
| 60 |
+
timeoutSeconds: 540,
|
| 61 |
+
concurrency: 4,
|
| 62 |
+
},
|
| 63 |
+
httpMethod: ['get', 'post'],
|
| 64 |
+
returnType: [String, OutputServerEventStream],
|
| 65 |
+
})
|
| 66 |
@CloudHTTPv2({
|
| 67 |
runtime: {
|
| 68 |
memory: '4GiB',
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -71,8 +71,7 @@ export class PuppeteerControl extends AsyncService {
|
|
| 71 |
}
|
| 72 |
}
|
| 73 |
this.browser = await puppeteer.launch({
|
| 74 |
-
headless: true
|
| 75 |
-
timeout: 60_000
|
| 76 |
});
|
| 77 |
this.browser.once('disconnected', () => {
|
| 78 |
this.logger.warn(`Browser disconnected`);
|
|
|
|
| 71 |
}
|
| 72 |
}
|
| 73 |
this.browser = await puppeteer.launch({
|
| 74 |
+
headless: true
|
|
|
|
| 75 |
});
|
| 76 |
this.browser.once('disconnected', () => {
|
| 77 |
this.logger.warn(`Browser disconnected`);
|