Aaron Ji commited on
Commit
69e02cb
·
unverified ·
1 Parent(s): 1a3a5ad

feat: support fallback query (#1179)

Browse files

* feat: support fallback query

* chore: update scaler value

Files changed (1) hide show
  1. src/api/searcher-serper.ts +100 -13
src/api/searcher-serper.ts CHANGED
@@ -112,6 +112,7 @@ export class SearcherHost extends RPCHost {
112
  @Param('hl', { validate: (v: string) => WORLD_LANGUAGES.some(l => l.code === v) }) hl?: string,
113
  @Param('location') location?: string,
114
  @Param('page') page?: number,
 
115
  @Param('q') q?: string,
116
  ) {
117
  // We want to make our search API follow SERP schema, so we need to expose 'num' parameter.
@@ -262,16 +263,19 @@ export class SearcherHost extends RPCHost {
262
  fetchNum = count > 10 ? 30 : 20;
263
  }
264
 
 
265
  let chargeAmountScaler = 1;
266
  if (searchEngine === 'bing') {
267
  this.threadLocal.set('bing-preferred', true);
268
  chargeAmountScaler = 3;
269
  }
 
270
  if (variant !== 'web') {
271
  chargeAmountScaler = 5;
272
  }
273
 
274
- const r = await this.cachedSearch({
 
275
  variant,
276
  provider: searchEngine,
277
  q: searchQuery,
@@ -280,7 +284,14 @@ export class SearcherHost extends RPCHost {
280
  hl,
281
  location,
282
  page,
283
- }, crawlerOptions.noCache);
 
 
 
 
 
 
 
284
 
285
  let results;
286
  switch (variant) {
@@ -312,7 +323,11 @@ export class SearcherHost extends RPCHost {
312
  const targetResultCount = crawlWithoutContent ? count : count + 2;
313
  const trimmedResults = results.filter((x) => Boolean(x.link)).slice(0, targetResultCount).map((x) => this.mapToFinalResults(x));
314
  trimmedResults.toString = function () {
315
- return this.map((x, i) => x ? Reflect.apply(x.toString, x, [i]) : '').join('\n\n').trimEnd() + '\n';
 
 
 
 
316
  };
317
  if (!crawlerOptions.respondWith.includes('no-content') &&
318
  ['html', 'text', 'shot', 'markdown', 'content'].some((x) => crawlerOptions.respondWith.includes(x))
@@ -349,8 +364,16 @@ export class SearcherHost extends RPCHost {
349
  break;
350
  }
351
 
352
- chargeAmount = this.assignChargeAmount(scrapped, count, chargeAmountScaler);
353
  lastScrapped = scrapped;
 
 
 
 
 
 
 
 
354
  sseStream.write({
355
  event: 'data',
356
  data: scrapped,
@@ -383,7 +406,8 @@ export class SearcherHost extends RPCHost {
383
  return;
384
  }
385
  await assigningOfGeneralMixins;
386
- chargeAmount = this.assignChargeAmount(lastScrapped, count, chargeAmountScaler);
 
387
  rpcReflect.return(lastScrapped);
388
  earlyReturn = true;
389
  }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
@@ -404,7 +428,7 @@ export class SearcherHost extends RPCHost {
404
  clearTimeout(earlyReturnTimer);
405
  }
406
  await assigningOfGeneralMixins;
407
- chargeAmount = this.assignChargeAmount(scrapped, count, chargeAmountScaler);
408
 
409
  return scrapped;
410
  }
@@ -419,7 +443,7 @@ export class SearcherHost extends RPCHost {
419
 
420
  if (!earlyReturn) {
421
  await assigningOfGeneralMixins;
422
- chargeAmount = this.assignChargeAmount(lastScrapped, count, chargeAmountScaler);
423
  }
424
 
425
  return lastScrapped;
@@ -435,7 +459,8 @@ export class SearcherHost extends RPCHost {
435
  return;
436
  }
437
  await assigningOfGeneralMixins;
438
- chargeAmount = this.assignChargeAmount(lastScrapped, count, chargeAmountScaler);
 
439
  rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
440
  earlyReturn = true;
441
  }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
@@ -458,7 +483,7 @@ export class SearcherHost extends RPCHost {
458
  clearTimeout(earlyReturnTimer);
459
  }
460
  await assigningOfGeneralMixins;
461
- chargeAmount = this.assignChargeAmount(scrapped, count, chargeAmountScaler);
462
 
463
  return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
464
  }
@@ -473,12 +498,70 @@ export class SearcherHost extends RPCHost {
473
 
474
  if (!earlyReturn) {
475
  await assigningOfGeneralMixins;
476
- chargeAmount = this.assignChargeAmount(lastScrapped, count, chargeAmountScaler);
477
  }
478
 
479
  return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
480
  }
481
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
  async *fetchSearchResults(
483
  mode: string | 'markdown' | 'html' | 'text' | 'screenshot' | 'favicon' | 'content',
484
  searchResults?: FormattedPage[],
@@ -540,7 +623,7 @@ export class SearcherHost extends RPCHost {
540
  return resultArray;
541
  }
542
 
543
- assignChargeAmount(formatted: FormattedPage[], num: number, scaler: number) {
544
  let contentCharge = 0;
545
  for (const x of formatted) {
546
  const itemAmount = this.crawler.assignChargeAmount(x) || 0;
@@ -562,8 +645,12 @@ export class SearcherHost extends RPCHost {
562
  }
563
  }
564
 
 
 
 
 
565
 
566
- assignMeta(formatted, { usage: { tokens: final } });
567
 
568
  return final;
569
  }
@@ -757,4 +844,4 @@ const searchResultProto = {
757
 
758
  return chunks.join('\n');
759
  }
760
- };
 
112
  @Param('hl', { validate: (v: string) => WORLD_LANGUAGES.some(l => l.code === v) }) hl?: string,
113
  @Param('location') location?: string,
114
  @Param('page') page?: number,
115
+ @Param('fallback', { type: Boolean, default: false }) fallback?: boolean,
116
  @Param('q') q?: string,
117
  ) {
118
  // We want to make our search API follow SERP schema, so we need to expose 'num' parameter.
 
263
  fetchNum = count > 10 ? 30 : 20;
264
  }
265
 
266
+ let fallbackQuery: string | undefined;
267
  let chargeAmountScaler = 1;
268
  if (searchEngine === 'bing') {
269
  this.threadLocal.set('bing-preferred', true);
270
  chargeAmountScaler = 3;
271
  }
272
+
273
  if (variant !== 'web') {
274
  chargeAmountScaler = 5;
275
  }
276
 
277
+ // Search with fallback logic if enabled
278
+ const searchParams = {
279
  variant,
280
  provider: searchEngine,
281
  q: searchQuery,
 
284
  hl,
285
  location,
286
  page,
287
+ };
288
+
289
+ const { response: r, query: successQuery, tryTimes } = await this.searchWithFallback(
290
+ searchParams, fallback, crawlerOptions.noCache
291
+ );
292
+ chargeAmountScaler *= tryTimes;
293
+
294
+ fallbackQuery = successQuery !== searchQuery ? successQuery : undefined;
295
 
296
  let results;
297
  switch (variant) {
 
323
  const targetResultCount = crawlWithoutContent ? count : count + 2;
324
  const trimmedResults = results.filter((x) => Boolean(x.link)).slice(0, targetResultCount).map((x) => this.mapToFinalResults(x));
325
  trimmedResults.toString = function () {
326
+ let r = this.map((x, i) => x ? Reflect.apply(x.toString, x, [i]) : '').join('\n\n').trimEnd() + '\n';
327
+ if (fallbackQuery) {
328
+ r = `Fallback query: ${fallbackQuery}\n\n${r}`;
329
+ }
330
+ return r;
331
  };
332
  if (!crawlerOptions.respondWith.includes('no-content') &&
333
  ['html', 'text', 'shot', 'markdown', 'content'].some((x) => crawlerOptions.respondWith.includes(x))
 
364
  break;
365
  }
366
 
367
+ chargeAmount = this.assignChargeAmount(scrapped, count, chargeAmountScaler, fallbackQuery);
368
  lastScrapped = scrapped;
369
+
370
+ if (fallbackQuery) {
371
+ sseStream.write({
372
+ event: 'meta',
373
+ data: { fallback: fallbackQuery },
374
+ });
375
+ }
376
+
377
  sseStream.write({
378
  event: 'data',
379
  data: scrapped,
 
406
  return;
407
  }
408
  await assigningOfGeneralMixins;
409
+ chargeAmount = this.assignChargeAmount(lastScrapped, count, chargeAmountScaler, fallbackQuery);
410
+
411
  rpcReflect.return(lastScrapped);
412
  earlyReturn = true;
413
  }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
 
428
  clearTimeout(earlyReturnTimer);
429
  }
430
  await assigningOfGeneralMixins;
431
+ chargeAmount = this.assignChargeAmount(scrapped, count, chargeAmountScaler, fallbackQuery);
432
 
433
  return scrapped;
434
  }
 
443
 
444
  if (!earlyReturn) {
445
  await assigningOfGeneralMixins;
446
+ chargeAmount = this.assignChargeAmount(lastScrapped, count, chargeAmountScaler, fallbackQuery);
447
  }
448
 
449
  return lastScrapped;
 
459
  return;
460
  }
461
  await assigningOfGeneralMixins;
462
+ chargeAmount = this.assignChargeAmount(lastScrapped, count, chargeAmountScaler, fallbackQuery);
463
+
464
  rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
465
  earlyReturn = true;
466
  }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
 
483
  clearTimeout(earlyReturnTimer);
484
  }
485
  await assigningOfGeneralMixins;
486
+ chargeAmount = this.assignChargeAmount(scrapped, count, chargeAmountScaler, fallbackQuery);
487
 
488
  return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
489
  }
 
498
 
499
  if (!earlyReturn) {
500
  await assigningOfGeneralMixins;
501
+ chargeAmount = this.assignChargeAmount(lastScrapped, count, chargeAmountScaler, fallbackQuery);
502
  }
503
 
504
  return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
505
  }
506
 
507
+ /**
508
+ * Search with fallback to progressively shorter queries if no results found
509
+ * @param params Search parameters
510
+ * @param useFallback Whether to use the fallback mechanism
511
+ * @param noCache Whether to bypass cache
512
+ * @returns Search response and the successful query
513
+ */
514
+ async searchWithFallback(
515
+ params: SerperSearchQueryParams & { variant: 'web' | 'images' | 'news'; provider?: string; },
516
+ useFallback: boolean = false,
517
+ noCache: boolean = false
518
+ ): Promise<{ response: SerperSearchResponse; query: string; tryTimes: number }> {
519
+ // Try original query first
520
+ const originalQuery = params.q;
521
+ const response = await this.cachedSearch(params, noCache);
522
+
523
+ // Extract results based on variant
524
+ let results: any[] = [];
525
+ let tryTimes = 1;
526
+ switch (params.variant) {
527
+ case 'images': results = (response as SerperImageSearchResponse).images; break;
528
+ case 'news': results = (response as SerperNewsSearchResponse).news; break;
529
+ case 'web': default: results = (response as SerperWebSearchResponse).organic; break;
530
+ }
531
+
532
+ // Return early if we got results or fallback is disabled
533
+ if (results.length > 0 || !useFallback) {
534
+ return { response, query: originalQuery, tryTimes };
535
+ }
536
+
537
+ // Try with progressively shorter queries
538
+ const terms = originalQuery.trim().split(/\s+/);
539
+
540
+ this.logger.info(`No results for "${originalQuery}", trying fallback queries`);
541
+
542
+ while (terms.length > 1) {
543
+ terms.pop(); // Remove last term
544
+ const shortenedQuery = terms.join(' ');
545
+
546
+ const fallbackParams = { ...params, q: shortenedQuery };
547
+ const fallbackResponse = await this.cachedSearch(fallbackParams, noCache);
548
+
549
+ let fallbackResults: any[] = [];
550
+ switch (params.variant) {
551
+ case 'images': fallbackResults = (fallbackResponse as SerperImageSearchResponse).images; break;
552
+ case 'news': fallbackResults = (fallbackResponse as SerperNewsSearchResponse).news; break;
553
+ case 'web': default: fallbackResults = (fallbackResponse as SerperWebSearchResponse).organic; break;
554
+ }
555
+
556
+ tryTimes++;
557
+ if (fallbackResults.length > 0) {
558
+ return { response: fallbackResponse, query: shortenedQuery, tryTimes };
559
+ }
560
+ }
561
+
562
+ return { response, query: originalQuery, tryTimes };
563
+ }
564
+
565
  async *fetchSearchResults(
566
  mode: string | 'markdown' | 'html' | 'text' | 'screenshot' | 'favicon' | 'content',
567
  searchResults?: FormattedPage[],
 
623
  return resultArray;
624
  }
625
 
626
+ assignChargeAmount(formatted: FormattedPage[], num: number, scaler: number, fallbackQuery?: string) {
627
  let contentCharge = 0;
628
  for (const x of formatted) {
629
  const itemAmount = this.crawler.assignChargeAmount(x) || 0;
 
645
  }
646
  }
647
 
648
+ const metadata: Record<string, any> = { usage: { tokens: final } };
649
+ if (fallbackQuery) {
650
+ metadata.fallback = fallbackQuery;
651
+ }
652
 
653
+ assignMeta(formatted, metadata);
654
 
655
  return final;
656
  }
 
844
 
845
  return chunks.join('\n');
846
  }
847
+ };