Spaces:
Build error
Build error
fix: catch turndown errors
Browse files
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -51,7 +51,7 @@ function tidyMarkdown(markdown: string): string {
|
|
| 51 |
export class CrawlerHost extends RPCHost {
|
| 52 |
logger = this.globalLogger.child({ service: this.constructor.name });
|
| 53 |
|
| 54 |
-
turnDownPlugins = [require('turndown-plugin-gfm').
|
| 55 |
|
| 56 |
constructor(
|
| 57 |
protected globalLogger: Logger,
|
|
@@ -107,11 +107,31 @@ export class CrawlerHost extends RPCHost {
|
|
| 107 |
}
|
| 108 |
});
|
| 109 |
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
}
|
| 112 |
|
| 113 |
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
}
|
| 116 |
if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
|
| 117 |
contentText = snapshot.text;
|
|
|
|
| 51 |
export class CrawlerHost extends RPCHost {
|
| 52 |
logger = this.globalLogger.child({ service: this.constructor.name });
|
| 53 |
|
| 54 |
+
turnDownPlugins = [require('turndown-plugin-gfm').tables];
|
| 55 |
|
| 56 |
constructor(
|
| 57 |
protected globalLogger: Logger,
|
|
|
|
| 107 |
}
|
| 108 |
});
|
| 109 |
|
| 110 |
+
try {
|
| 111 |
+
contentText = turnDownService.turndown(toBeTurnedToMd).trim();
|
| 112 |
+
} catch (err) {
|
| 113 |
+
this.logger.warn(`Turndown failed to run, retrying without plugins`, { err });
|
| 114 |
+
const vanillaTurnDownService = new TurndownService();
|
| 115 |
+
try {
|
| 116 |
+
contentText = vanillaTurnDownService.turndown(toBeTurnedToMd).trim();
|
| 117 |
+
} catch (err2) {
|
| 118 |
+
this.logger.warn(`Turndown failed to run, giving up`, { err: err2 });
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
}
|
| 122 |
|
| 123 |
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
|
| 124 |
+
try {
|
| 125 |
+
contentText = turnDownService.turndown(snapshot.html);
|
| 126 |
+
} catch (err) {
|
| 127 |
+
this.logger.warn(`Turndown failed to run, retrying without plugins`, { err });
|
| 128 |
+
const vanillaTurnDownService = new TurndownService();
|
| 129 |
+
try {
|
| 130 |
+
contentText = vanillaTurnDownService.turndown(snapshot.html);
|
| 131 |
+
} catch (err2) {
|
| 132 |
+
this.logger.warn(`Turndown failed to run, giving up`, { err: err2 });
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
}
|
| 136 |
if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
|
| 137 |
contentText = snapshot.text;
|