Spaces:
Build error
Build error
Aaron Ji commited on
chore: fix search result amount (#1163)
Browse files- src/api/searcher-serper.ts +17 -16
src/api/searcher-serper.ts
CHANGED
|
@@ -154,7 +154,7 @@ export class SearcherHost extends RPCHost {
|
|
| 154 |
const searchQuery = searchExplicitOperators.addTo(q || noSlashPath);
|
| 155 |
const r = await this.cachedWebSearch({
|
| 156 |
q: searchQuery,
|
| 157 |
-
num: count > 10 ?
|
| 158 |
gl,
|
| 159 |
hl,
|
| 160 |
location,
|
|
@@ -176,11 +176,8 @@ export class SearcherHost extends RPCHost {
|
|
| 176 |
if (crawlWithoutContent || count === 0) {
|
| 177 |
const fakeResults = await this.fakeResult(crawlerOptions, organicSearchResults, !crawlWithoutContent, withFavicon);
|
| 178 |
lastScrapped = fakeResults;
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
} else {
|
| 182 |
-
chargeAmount = 10000;
|
| 183 |
-
}
|
| 184 |
this.assignTokenUsage(lastScrapped, chargeAmount, crawlWithoutContent);
|
| 185 |
if ((!ctx.accepts('text/plain') && (ctx.accepts('text/json') || ctx.accepts('application/json'))) || count === 0) {
|
| 186 |
return lastScrapped;
|
|
@@ -207,7 +204,7 @@ export class SearcherHost extends RPCHost {
|
|
| 207 |
break;
|
| 208 |
}
|
| 209 |
|
| 210 |
-
chargeAmount = this.assignChargeAmount(scrapped);
|
| 211 |
sseStream.write({
|
| 212 |
event: 'data',
|
| 213 |
data: scrapped,
|
|
@@ -239,7 +236,7 @@ export class SearcherHost extends RPCHost {
|
|
| 239 |
if (!lastScrapped) {
|
| 240 |
return;
|
| 241 |
}
|
| 242 |
-
chargeAmount = this.assignChargeAmount(lastScrapped);
|
| 243 |
rpcReflect.return(lastScrapped);
|
| 244 |
earlyReturn = true;
|
| 245 |
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
|
|
@@ -259,7 +256,7 @@ export class SearcherHost extends RPCHost {
|
|
| 259 |
if (earlyReturnTimer) {
|
| 260 |
clearTimeout(earlyReturnTimer);
|
| 261 |
}
|
| 262 |
-
chargeAmount = this.assignChargeAmount(scrapped);
|
| 263 |
|
| 264 |
this.assignTokenUsage(scrapped, chargeAmount, crawlWithoutContent);
|
| 265 |
return scrapped;
|
|
@@ -274,7 +271,7 @@ export class SearcherHost extends RPCHost {
|
|
| 274 |
}
|
| 275 |
|
| 276 |
if (!earlyReturn) {
|
| 277 |
-
chargeAmount = this.assignChargeAmount(lastScrapped);
|
| 278 |
}
|
| 279 |
|
| 280 |
this.assignTokenUsage(lastScrapped, chargeAmount, crawlWithoutContent);
|
|
@@ -290,7 +287,7 @@ export class SearcherHost extends RPCHost {
|
|
| 290 |
if (!lastScrapped) {
|
| 291 |
return;
|
| 292 |
}
|
| 293 |
-
chargeAmount = this.assignChargeAmount(lastScrapped);
|
| 294 |
rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
|
| 295 |
earlyReturn = true;
|
| 296 |
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
|
|
@@ -313,7 +310,7 @@ export class SearcherHost extends RPCHost {
|
|
| 313 |
clearTimeout(earlyReturnTimer);
|
| 314 |
}
|
| 315 |
|
| 316 |
-
chargeAmount = this.assignChargeAmount(scrapped);
|
| 317 |
|
| 318 |
return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
|
| 319 |
}
|
|
@@ -327,7 +324,7 @@ export class SearcherHost extends RPCHost {
|
|
| 327 |
}
|
| 328 |
|
| 329 |
if (!earlyReturn) {
|
| 330 |
-
chargeAmount = this.assignChargeAmount(lastScrapped);
|
| 331 |
}
|
| 332 |
|
| 333 |
return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
|
|
@@ -335,7 +332,6 @@ export class SearcherHost extends RPCHost {
|
|
| 335 |
|
| 336 |
assignTokenUsage(result: FormattedPage[], chargeAmount: number, crawlWithoutContent: boolean) {
|
| 337 |
if (crawlWithoutContent) {
|
| 338 |
-
chargeAmount = 10000;
|
| 339 |
if (result) {
|
| 340 |
result.forEach((x) => {
|
| 341 |
delete x.usage;
|
|
@@ -533,10 +529,15 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
|
|
| 533 |
return resultArray;
|
| 534 |
}
|
| 535 |
|
| 536 |
-
assignChargeAmount(formatted: FormattedPage[]) {
|
| 537 |
-
|
| 538 |
formatted.map((x) => this.crawler.assignChargeAmount(x) || 0)
|
| 539 |
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
}
|
| 541 |
|
| 542 |
pageQualified(formattedPage: FormattedPage) {
|
|
|
|
| 154 |
const searchQuery = searchExplicitOperators.addTo(q || noSlashPath);
|
| 155 |
const r = await this.cachedWebSearch({
|
| 156 |
q: searchQuery,
|
| 157 |
+
num: count > 10 ? 30 : 20,
|
| 158 |
gl,
|
| 159 |
hl,
|
| 160 |
location,
|
|
|
|
| 176 |
if (crawlWithoutContent || count === 0) {
|
| 177 |
const fakeResults = await this.fakeResult(crawlerOptions, organicSearchResults, !crawlWithoutContent, withFavicon);
|
| 178 |
lastScrapped = fakeResults;
|
| 179 |
+
chargeAmount = this.assignChargeAmount(!crawlWithoutContent ? lastScrapped : [], count);
|
| 180 |
+
|
|
|
|
|
|
|
|
|
|
| 181 |
this.assignTokenUsage(lastScrapped, chargeAmount, crawlWithoutContent);
|
| 182 |
if ((!ctx.accepts('text/plain') && (ctx.accepts('text/json') || ctx.accepts('application/json'))) || count === 0) {
|
| 183 |
return lastScrapped;
|
|
|
|
| 204 |
break;
|
| 205 |
}
|
| 206 |
|
| 207 |
+
chargeAmount = this.assignChargeAmount(scrapped, count);
|
| 208 |
sseStream.write({
|
| 209 |
event: 'data',
|
| 210 |
data: scrapped,
|
|
|
|
| 236 |
if (!lastScrapped) {
|
| 237 |
return;
|
| 238 |
}
|
| 239 |
+
chargeAmount = this.assignChargeAmount(lastScrapped, count);
|
| 240 |
rpcReflect.return(lastScrapped);
|
| 241 |
earlyReturn = true;
|
| 242 |
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
|
|
|
|
| 256 |
if (earlyReturnTimer) {
|
| 257 |
clearTimeout(earlyReturnTimer);
|
| 258 |
}
|
| 259 |
+
chargeAmount = this.assignChargeAmount(scrapped, count);
|
| 260 |
|
| 261 |
this.assignTokenUsage(scrapped, chargeAmount, crawlWithoutContent);
|
| 262 |
return scrapped;
|
|
|
|
| 271 |
}
|
| 272 |
|
| 273 |
if (!earlyReturn) {
|
| 274 |
+
chargeAmount = this.assignChargeAmount(lastScrapped, count);
|
| 275 |
}
|
| 276 |
|
| 277 |
this.assignTokenUsage(lastScrapped, chargeAmount, crawlWithoutContent);
|
|
|
|
| 287 |
if (!lastScrapped) {
|
| 288 |
return;
|
| 289 |
}
|
| 290 |
+
chargeAmount = this.assignChargeAmount(lastScrapped, count);
|
| 291 |
rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
|
| 292 |
earlyReturn = true;
|
| 293 |
}, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
|
|
|
|
| 310 |
clearTimeout(earlyReturnTimer);
|
| 311 |
}
|
| 312 |
|
| 313 |
+
chargeAmount = this.assignChargeAmount(scrapped, count);
|
| 314 |
|
| 315 |
return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
|
| 316 |
}
|
|
|
|
| 324 |
}
|
| 325 |
|
| 326 |
if (!earlyReturn) {
|
| 327 |
+
chargeAmount = this.assignChargeAmount(lastScrapped, count);
|
| 328 |
}
|
| 329 |
|
| 330 |
return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
|
|
|
|
| 332 |
|
| 333 |
assignTokenUsage(result: FormattedPage[], chargeAmount: number, crawlWithoutContent: boolean) {
|
| 334 |
if (crawlWithoutContent) {
|
|
|
|
| 335 |
if (result) {
|
| 336 |
result.forEach((x) => {
|
| 337 |
delete x.usage;
|
|
|
|
| 529 |
return resultArray;
|
| 530 |
}
|
| 531 |
|
| 532 |
+
assignChargeAmount(formatted: FormattedPage[], num: number) {
|
| 533 |
+
const countentCharge = _.sum(
|
| 534 |
formatted.map((x) => this.crawler.assignChargeAmount(x) || 0)
|
| 535 |
);
|
| 536 |
+
|
| 537 |
+
const numCharge = Math.ceil(num / 10) * 10000;
|
| 538 |
+
|
| 539 |
+
return Math.max(countentCharge, numCharge);
|
| 540 |
+
|
| 541 |
}
|
| 542 |
|
| 543 |
pageQualified(formattedPage: FormattedPage) {
|