Spaces:
Build error
Build error
fix: respond with markdown
Browse files
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -74,7 +74,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 74 |
return turnDownService;
|
| 75 |
}
|
| 76 |
|
| 77 |
-
async formatSnapshot(mode: string | 'markdown' | '
|
| 78 |
screenshotUrl?: string;
|
| 79 |
}, nominalUrl?: URL) {
|
| 80 |
if (mode === 'screenshot') {
|
|
@@ -112,8 +112,8 @@ export class CrawlerHost extends RPCHost {
|
|
| 112 |
};
|
| 113 |
}
|
| 114 |
|
| 115 |
-
const toBeTurnedToMd = mode === '
|
| 116 |
-
let turnDownService = mode === 'markdown' ? this.getTurndown('without any rule')
|
| 117 |
for (const plugin of this.turnDownPlugins) {
|
| 118 |
turnDownService = turnDownService.use(plugin);
|
| 119 |
}
|
|
@@ -198,7 +198,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 198 |
mixins.push(`Published Time: ${this.publishedTime}`);
|
| 199 |
}
|
| 200 |
|
| 201 |
-
if (mode === '
|
| 202 |
return this.content;
|
| 203 |
}
|
| 204 |
|
|
@@ -253,14 +253,12 @@ ${this.content}
|
|
| 253 |
schema: { type: 'string' }
|
| 254 |
},
|
| 255 |
'X-Respond-With': {
|
| 256 |
-
description: `Specifies the form factor of the crawled data you prefer. \n\n` +
|
| 257 |
`Supported formats:\n` +
|
| 258 |
`- markdown\n` +
|
| 259 |
-
`- full-markdown\n` +
|
| 260 |
`- html\n` +
|
| 261 |
`- text\n` +
|
| 262 |
-
`- screenshot\n
|
| 263 |
-
`Defaults to: markdown`
|
| 264 |
,
|
| 265 |
in: 'header',
|
| 266 |
schema: { type: 'string' }
|
|
@@ -322,7 +320,7 @@ ${this.content}
|
|
| 322 |
});
|
| 323 |
}
|
| 324 |
|
| 325 |
-
const customMode = ctx.req.get('x-respond-with') || '
|
| 326 |
const noCache = Boolean(ctx.req.get('x-no-cache'));
|
| 327 |
const cookies: CookieParam[] = [];
|
| 328 |
const setCookieHeaders = ctx.req.headers['x-set-cookie'];
|
|
|
|
| 74 |
return turnDownService;
|
| 75 |
}
|
| 76 |
|
| 77 |
+
async formatSnapshot(mode: string | 'markdown' | 'html' | 'text' | 'screenshot', snapshot: PageSnapshot & {
|
| 78 |
screenshotUrl?: string;
|
| 79 |
}, nominalUrl?: URL) {
|
| 80 |
if (mode === 'screenshot') {
|
|
|
|
| 112 |
};
|
| 113 |
}
|
| 114 |
|
| 115 |
+
const toBeTurnedToMd = mode === 'markdown' ? snapshot.html : snapshot.parsed?.content;
|
| 116 |
+
let turnDownService = mode === 'markdown' ? this.getTurndown() : this.getTurndown('without any rule');
|
| 117 |
for (const plugin of this.turnDownPlugins) {
|
| 118 |
turnDownService = turnDownService.use(plugin);
|
| 119 |
}
|
|
|
|
| 198 |
mixins.push(`Published Time: ${this.publishedTime}`);
|
| 199 |
}
|
| 200 |
|
| 201 |
+
if (mode === 'markdown') {
|
| 202 |
return this.content;
|
| 203 |
}
|
| 204 |
|
|
|
|
| 253 |
schema: { type: 'string' }
|
| 254 |
},
|
| 255 |
'X-Respond-With': {
|
| 256 |
+
description: `Specifies the (non-default) form factor of the crawled data you prefer. \n\n` +
|
| 257 |
`Supported formats:\n` +
|
| 258 |
`- markdown\n` +
|
|
|
|
| 259 |
`- html\n` +
|
| 260 |
`- text\n` +
|
| 261 |
+
`- screenshot\n`
|
|
|
|
| 262 |
,
|
| 263 |
in: 'header',
|
| 264 |
schema: { type: 'string' }
|
|
|
|
| 320 |
});
|
| 321 |
}
|
| 322 |
|
| 323 |
+
const customMode = ctx.req.get('x-respond-with') || 'default';
|
| 324 |
const noCache = Boolean(ctx.req.get('x-no-cache'));
|
| 325 |
const cookies: CookieParam[] = [];
|
| 326 |
const setCookieHeaders = ctx.req.headers['x-set-cookie'];
|