nomagick commited on
Commit
cbc13ec
·
unverified ·
1 Parent(s): 6ee0f2d

fix: catch turndown errors

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -51,7 +51,7 @@ function tidyMarkdown(markdown: string): string {
51
  export class CrawlerHost extends RPCHost {
52
  logger = this.globalLogger.child({ service: this.constructor.name });
53
 
54
- turnDownPlugins = [require('turndown-plugin-gfm').gfm];
55
 
56
  constructor(
57
  protected globalLogger: Logger,
@@ -107,11 +107,31 @@ export class CrawlerHost extends RPCHost {
107
  }
108
  });
109
 
110
- contentText = turnDownService.turndown(toBeTurnedToMd).trim();
 
 
 
 
 
 
 
 
 
 
111
  }
112
 
113
  if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
114
- contentText = turnDownService.turndown(snapshot.html);
 
 
 
 
 
 
 
 
 
 
115
  }
116
  if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
117
  contentText = snapshot.text;
 
51
  export class CrawlerHost extends RPCHost {
52
  logger = this.globalLogger.child({ service: this.constructor.name });
53
 
54
+ turnDownPlugins = [require('turndown-plugin-gfm').tables];
55
 
56
  constructor(
57
  protected globalLogger: Logger,
 
107
  }
108
  });
109
 
110
+ try {
111
+ contentText = turnDownService.turndown(toBeTurnedToMd).trim();
112
+ } catch (err) {
113
+ this.logger.warn(`Turndown failed to run, retrying without plugins`, { err });
114
+ const vanillaTurnDownService = new TurndownService();
115
+ try {
116
+ contentText = vanillaTurnDownService.turndown(toBeTurnedToMd).trim();
117
+ } catch (err2) {
118
+ this.logger.warn(`Turndown failed to run, giving up`, { err: err2 });
119
+ }
120
+ }
121
  }
122
 
123
  if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
124
+ try {
125
+ contentText = turnDownService.turndown(snapshot.html);
126
+ } catch (err) {
127
+ this.logger.warn(`Turndown failed to run, retrying without plugins`, { err });
128
+ const vanillaTurnDownService = new TurndownService();
129
+ try {
130
+ contentText = vanillaTurnDownService.turndown(snapshot.html);
131
+ } catch (err2) {
132
+ this.logger.warn(`Turndown failed to run, giving up`, { err: err2 });
133
+ }
134
+ }
135
  }
136
  if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
137
  contentText = snapshot.text;