Spaces:
Build error
Build error
feat: expose brave search operators explicitly
Browse files
backend/functions/src/cloud-functions/searcher.ts
CHANGED
|
@@ -10,7 +10,7 @@ import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
|
|
| 10 |
import _ from 'lodash';
|
| 11 |
import { Request, Response } from 'express';
|
| 12 |
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
|
| 13 |
-
import { BraveSearchService } from '../services/brave-search';
|
| 14 |
import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler';
|
| 15 |
import { CookieParam } from 'puppeteer';
|
| 16 |
|
|
@@ -84,6 +84,7 @@ export class SearcherHost extends RPCHost {
|
|
| 84 |
},
|
| 85 |
auth: JinaEmbeddingsAuthDTO,
|
| 86 |
crawlerOptions: CrawlerOptions,
|
|
|
|
| 87 |
) {
|
| 88 |
const uid = await auth.solveUID();
|
| 89 |
let chargeAmount = 0;
|
|
@@ -153,7 +154,7 @@ export class SearcherHost extends RPCHost {
|
|
| 153 |
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
| 154 |
});
|
| 155 |
}
|
| 156 |
-
const searchQuery =
|
| 157 |
const r = await this.cachedWebSearch({
|
| 158 |
q: searchQuery,
|
| 159 |
count: 10
|
|
|
|
| 10 |
import _ from 'lodash';
|
| 11 |
import { Request, Response } from 'express';
|
| 12 |
import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
|
| 13 |
+
import { BraveSearchExplicitOperatorsDto, BraveSearchService } from '../services/brave-search';
|
| 14 |
import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler';
|
| 15 |
import { CookieParam } from 'puppeteer';
|
| 16 |
|
|
|
|
| 84 |
},
|
| 85 |
auth: JinaEmbeddingsAuthDTO,
|
| 86 |
crawlerOptions: CrawlerOptions,
|
| 87 |
+
braveSearchExplicitOperators: BraveSearchExplicitOperatorsDto,
|
| 88 |
) {
|
| 89 |
const uid = await auth.solveUID();
|
| 90 |
let chargeAmount = 0;
|
|
|
|
| 154 |
...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
|
| 155 |
});
|
| 156 |
}
|
| 157 |
+
const searchQuery = braveSearchExplicitOperators.addTo(ctx.req.path.slice(1));
|
| 158 |
const r = await this.cachedWebSearch({
|
| 159 |
q: searchQuery,
|
| 160 |
count: 10
|
backend/functions/src/services/brave-search.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import { AsyncService, DownstreamServiceFailureError, marshalErrorLike } from 'civkit';
|
| 2 |
import { singleton } from 'tsyringe';
|
| 3 |
import { Logger } from '../shared/services/logger';
|
| 4 |
import { SecretExposer } from '../shared/services/secrets';
|
|
@@ -76,3 +76,74 @@ export class BraveSearchService extends AsyncService {
|
|
| 76 |
}
|
| 77 |
|
| 78 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, marshalErrorLike } from 'civkit';
|
| 2 |
import { singleton } from 'tsyringe';
|
| 3 |
import { Logger } from '../shared/services/logger';
|
| 4 |
import { SecretExposer } from '../shared/services/secrets';
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
}
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
export class BraveSearchExplicitOperatorsDto extends AutoCastable {
|
| 82 |
+
@Prop({
|
| 83 |
+
arrayOf: String,
|
| 84 |
+
desc: `Returns web pages with a specific file extension. Example: to find the Honda GX120 Owner’s manual in PDF, type “Honda GX120 ownners manual ext:pdf”.`
|
| 85 |
+
})
|
| 86 |
+
ext?: string | string[];
|
| 87 |
+
|
| 88 |
+
@Prop({
|
| 89 |
+
arrayOf: String,
|
| 90 |
+
desc: `Returns web pages created in the specified file type. Example: to find a web page created in PDF format about the evaluation of age-related cognitive changes, type “evaluation of age cognitive changes filetype:pdf”.`
|
| 91 |
+
})
|
| 92 |
+
filetype?: string | string[];
|
| 93 |
+
|
| 94 |
+
@Prop({
|
| 95 |
+
arrayOf: String,
|
| 96 |
+
desc: `Returns web pages containing the specified term in the body of the page. Example: to find information about the Nvidia GeForce GTX 1080 Ti, making sure the page contains the keywords “founders edition” in the body, type “nvidia 1080 ti inbody:“founders edition””.`
|
| 97 |
+
})
|
| 98 |
+
inbody?: string | string[];
|
| 99 |
+
|
| 100 |
+
@Prop({
|
| 101 |
+
arrayOf: String,
|
| 102 |
+
desc: `Returns webpages containing the specified term in the title of the page. Example: to find pages about SEO conferences making sure the results contain 2023 in the title, type “seo conference intitle:2023”.`
|
| 103 |
+
})
|
| 104 |
+
intitle?: string | string[];
|
| 105 |
+
|
| 106 |
+
@Prop({
|
| 107 |
+
arrayOf: String,
|
| 108 |
+
desc: `Returns webpages containing the specified term either in the title or in the body of the page. Example: to find pages about the 2024 Oscars containing the keywords “best costume design” in the page, type “oscars 2024 inpage:“best costume design””.`
|
| 109 |
+
})
|
| 110 |
+
inpage?: string | string[];
|
| 111 |
+
|
| 112 |
+
@Prop({
|
| 113 |
+
arrayOf: String,
|
| 114 |
+
desc: `Returns web pages written in the specified language. The language code must be in the ISO 639-1 two-letter code format. Example: to find information on visas only in Spanish, type “visas lang:es”.`
|
| 115 |
+
})
|
| 116 |
+
lang?: string | string[];
|
| 117 |
+
|
| 118 |
+
@Prop({
|
| 119 |
+
arrayOf: String,
|
| 120 |
+
desc: `Returns web pages written in the specified language. The language code must be in the ISO 639-1 two-letter code format. Example: to find information on visas only in Spanish, type “visas lang:es”.`
|
| 121 |
+
})
|
| 122 |
+
loc?: string | string[];
|
| 123 |
+
|
| 124 |
+
@Prop({
|
| 125 |
+
arrayOf: String,
|
| 126 |
+
desc: `Returns web pages coming only from a specific web site. Example: to find information about Goggles only on Brave pages, type “goggles site:brave.com”.`
|
| 127 |
+
})
|
| 128 |
+
site?: string | string[];
|
| 129 |
+
|
| 130 |
+
addTo(searchTerm: string) {
|
| 131 |
+
const chunks = [];
|
| 132 |
+
for (const [key, value] of Object.entries(this)) {
|
| 133 |
+
if (value) {
|
| 134 |
+
const values = Array.isArray(value) ? value : [value];
|
| 135 |
+
const textValue = values.map((v) => `${key}:${v}`).join(' OR ');
|
| 136 |
+
if (textValue) {
|
| 137 |
+
chunks.push(textValue);
|
| 138 |
+
}
|
| 139 |
+
}
|
| 140 |
+
}
|
| 141 |
+
const opPart = chunks.length > 1 ? chunks.map((x) => `(${x})`).join(' AND ') : chunks;
|
| 142 |
+
|
| 143 |
+
if (opPart.length) {
|
| 144 |
+
return [searchTerm, opPart].join(' ');
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
return searchTerm
|
| 148 |
+
}
|
| 149 |
+
}
|
thinapps-shared
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
Subproject commit
|
|
|
|
| 1 |
+
Subproject commit f166680848c5700030389cb69181e5de1535acff
|