nomagick commited on
Commit
d0e2920
·
unverified ·
1 Parent(s): 336931b

feat: expose brave search operators explicitly

Browse files
backend/functions/src/cloud-functions/searcher.ts CHANGED
@@ -10,7 +10,7 @@ import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
10
  import _ from 'lodash';
11
  import { Request, Response } from 'express';
12
  import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
13
- import { BraveSearchService } from '../services/brave-search';
14
  import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler';
15
  import { CookieParam } from 'puppeteer';
16
 
@@ -84,6 +84,7 @@ export class SearcherHost extends RPCHost {
84
  },
85
  auth: JinaEmbeddingsAuthDTO,
86
  crawlerOptions: CrawlerOptions,
 
87
  ) {
88
  const uid = await auth.solveUID();
89
  let chargeAmount = 0;
@@ -153,7 +154,7 @@ export class SearcherHost extends RPCHost {
153
  ...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
154
  });
155
  }
156
- const searchQuery = noSlashPath;
157
  const r = await this.cachedWebSearch({
158
  q: searchQuery,
159
  count: 10
 
10
  import _ from 'lodash';
11
  import { Request, Response } from 'express';
12
  import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
13
+ import { BraveSearchExplicitOperatorsDto, BraveSearchService } from '../services/brave-search';
14
  import { CrawlerHost, ExtraScrappingOptions, FormattedPage } from './crawler';
15
  import { CookieParam } from 'puppeteer';
16
 
 
84
  },
85
  auth: JinaEmbeddingsAuthDTO,
86
  crawlerOptions: CrawlerOptions,
87
+ braveSearchExplicitOperators: BraveSearchExplicitOperatorsDto,
88
  ) {
89
  const uid = await auth.solveUID();
90
  let chargeAmount = 0;
 
154
  ...parseSetCookieString(setCookieHeaders, { decodeValues: false }) as CookieParam,
155
  });
156
  }
157
+ const searchQuery = braveSearchExplicitOperators.addTo(ctx.req.path.slice(1));
158
  const r = await this.cachedWebSearch({
159
  q: searchQuery,
160
  count: 10
backend/functions/src/services/brave-search.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { AsyncService, DownstreamServiceFailureError, marshalErrorLike } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
  import { Logger } from '../shared/services/logger';
4
  import { SecretExposer } from '../shared/services/secrets';
@@ -76,3 +76,74 @@ export class BraveSearchService extends AsyncService {
76
  }
77
 
78
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, marshalErrorLike } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
  import { Logger } from '../shared/services/logger';
4
  import { SecretExposer } from '../shared/services/secrets';
 
76
  }
77
 
78
  }
79
+
80
+
81
+ export class BraveSearchExplicitOperatorsDto extends AutoCastable {
82
+ @Prop({
83
+ arrayOf: String,
84
+ desc: `Returns web pages with a specific file extension. Example: to find the Honda GX120 Owner’s manual in PDF, type “Honda GX120 ownners manual ext:pdf”.`
85
+ })
86
+ ext?: string | string[];
87
+
88
+ @Prop({
89
+ arrayOf: String,
90
+ desc: `Returns web pages created in the specified file type. Example: to find a web page created in PDF format about the evaluation of age-related cognitive changes, type “evaluation of age cognitive changes filetype:pdf”.`
91
+ })
92
+ filetype?: string | string[];
93
+
94
+ @Prop({
95
+ arrayOf: String,
96
+ desc: `Returns web pages containing the specified term in the body of the page. Example: to find information about the Nvidia GeForce GTX 1080 Ti, making sure the page contains the keywords “founders edition” in the body, type “nvidia 1080 ti inbody:“founders edition””.`
97
+ })
98
+ inbody?: string | string[];
99
+
100
+ @Prop({
101
+ arrayOf: String,
102
+ desc: `Returns webpages containing the specified term in the title of the page. Example: to find pages about SEO conferences making sure the results contain 2023 in the title, type “seo conference intitle:2023”.`
103
+ })
104
+ intitle?: string | string[];
105
+
106
+ @Prop({
107
+ arrayOf: String,
108
+ desc: `Returns webpages containing the specified term either in the title or in the body of the page. Example: to find pages about the 2024 Oscars containing the keywords “best costume design” in the page, type “oscars 2024 inpage:“best costume design””.`
109
+ })
110
+ inpage?: string | string[];
111
+
112
+ @Prop({
113
+ arrayOf: String,
114
+ desc: `Returns web pages written in the specified language. The language code must be in the ISO 639-1 two-letter code format. Example: to find information on visas only in Spanish, type “visas lang:es”.`
115
+ })
116
+ lang?: string | string[];
117
+
118
+ @Prop({
119
+ arrayOf: String,
120
+ desc: `Returns web pages written in the specified language. The language code must be in the ISO 639-1 two-letter code format. Example: to find information on visas only in Spanish, type “visas lang:es”.`
121
+ })
122
+ loc?: string | string[];
123
+
124
+ @Prop({
125
+ arrayOf: String,
126
+ desc: `Returns web pages coming only from a specific web site. Example: to find information about Goggles only on Brave pages, type “goggles site:brave.com”.`
127
+ })
128
+ site?: string | string[];
129
+
130
+ addTo(searchTerm: string) {
131
+ const chunks = [];
132
+ for (const [key, value] of Object.entries(this)) {
133
+ if (value) {
134
+ const values = Array.isArray(value) ? value : [value];
135
+ const textValue = values.map((v) => `${key}:${v}`).join(' OR ');
136
+ if (textValue) {
137
+ chunks.push(textValue);
138
+ }
139
+ }
140
+ }
141
+ const opPart = chunks.length > 1 ? chunks.map((x) => `(${x})`).join(' AND ') : chunks;
142
+
143
+ if (opPart.length) {
144
+ return [searchTerm, opPart].join(' ');
145
+ }
146
+
147
+ return searchTerm
148
+ }
149
+ }
thinapps-shared CHANGED
@@ -1 +1 @@
1
- Subproject commit f4becc28564c90a571c655804c4f3910747f657a
 
1
+ Subproject commit f166680848c5700030389cb69181e5de1535acff