nomagick commited on
Commit
66db317
·
unverified ·
1 Parent(s): 512f225

cleanup: use local project code as much as possible

Browse files
src/api/crawler.ts CHANGED
@@ -32,14 +32,16 @@ import { GlobalLogger } from '../services/logger';
32
  import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
33
  import { AsyncLocalContext } from '../services/async-context';
34
  import { Context, Ctx, Method, Param, RPCReflect } from '../services/registry';
35
- import { BudgetExceededError, InsufficientBalanceError, SecurityCompromiseError } from '../services/errors';
 
 
 
36
 
37
  import { countGPTToken as estimateToken } from '../shared/utils/openai';
38
  import { ProxyProvider } from '../shared/services/proxy-provider';
39
  import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
40
  import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
41
  import { RobotsTxtService } from '../services/robots-text';
42
- import { ServiceBadAttemptError } from '../shared/lib/errors';
43
 
44
  export interface ExtraScrappingOptions extends ScrappingOptions {
45
  withIframe?: boolean | 'quoted';
@@ -758,7 +760,9 @@ export class CrawlerHost extends RPCHost {
758
  let analyzed = await this.jsdomControl.analyzeHTMLTextLite(draftSnapshot.html);
759
  draftSnapshot.title ??= analyzed.title;
760
  let fallbackProxyIsUsed = false;
761
- if ((!crawlOpts?.allocProxy && !crawlOpts?.proxyUrl) && (analyzed.tokens < 42 || sideLoaded.status !== 200)) {
 
 
762
  const proxyLoaded = await this.sideLoadWithAllocatedProxy(urlToCrawl, altOpts);
763
  if (!proxyLoaded.file) {
764
  throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
@@ -904,7 +908,7 @@ export class CrawlerHost extends RPCHost {
904
  }
905
  this.threadLocal.set('retainImages', opts.retainImages);
906
  this.threadLocal.set('noGfm', opts.noGfm);
907
- this.threadLocal.set('DNT', Boolean(opts.doNotTrack))
908
 
909
  const crawlOpts: ExtraScrappingOptions = {
910
  proxyUrl: opts.proxyUrl,
@@ -1146,6 +1150,9 @@ export class CrawlerHost extends RPCHost {
1146
  }
1147
 
1148
  @retryWith((err) => {
 
 
 
1149
  if (err instanceof ServiceBadAttemptError) {
1150
  // Keep trying
1151
  return true;
@@ -1157,6 +1164,9 @@ export class CrawlerHost extends RPCHost {
1157
  return undefined;
1158
  }, 3)
1159
  async sideLoadWithAllocatedProxy(url: URL, opts?: ExtraScrappingOptions) {
 
 
 
1160
  const proxy = await this.proxyProvider.alloc(opts?.allocProxy);
1161
  const r = await this.curlControl.sideLoad(url, {
1162
  ...opts,
 
32
  import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
33
  import { AsyncLocalContext } from '../services/async-context';
34
  import { Context, Ctx, Method, Param, RPCReflect } from '../services/registry';
35
+ import {
36
+ BudgetExceededError, InsufficientBalanceError,
37
+ SecurityCompromiseError, ServiceBadApproachError, ServiceBadAttemptError
38
+ } from '../services/errors';
39
 
40
  import { countGPTToken as estimateToken } from '../shared/utils/openai';
41
  import { ProxyProvider } from '../shared/services/proxy-provider';
42
  import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
43
  import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
44
  import { RobotsTxtService } from '../services/robots-text';
 
45
 
46
  export interface ExtraScrappingOptions extends ScrappingOptions {
47
  withIframe?: boolean | 'quoted';
 
760
  let analyzed = await this.jsdomControl.analyzeHTMLTextLite(draftSnapshot.html);
761
  draftSnapshot.title ??= analyzed.title;
762
  let fallbackProxyIsUsed = false;
763
+ if (((!crawlOpts?.allocProxy || crawlOpts.allocProxy === 'none') && !crawlOpts?.proxyUrl) &&
764
+ (analyzed.tokens < 42 || sideLoaded.status !== 200)
765
+ ) {
766
  const proxyLoaded = await this.sideLoadWithAllocatedProxy(urlToCrawl, altOpts);
767
  if (!proxyLoaded.file) {
768
  throw new ServiceBadAttemptError(`Remote server did not return a body: ${urlToCrawl}`);
 
908
  }
909
  this.threadLocal.set('retainImages', opts.retainImages);
910
  this.threadLocal.set('noGfm', opts.noGfm);
911
+ this.threadLocal.set('DNT', Boolean(opts.doNotTrack));
912
 
913
  const crawlOpts: ExtraScrappingOptions = {
914
  proxyUrl: opts.proxyUrl,
 
1150
  }
1151
 
1152
  @retryWith((err) => {
1153
+ if (err instanceof ServiceBadApproachError) {
1154
+ return false;
1155
+ }
1156
  if (err instanceof ServiceBadAttemptError) {
1157
  // Keep trying
1158
  return true;
 
1164
  return undefined;
1165
  }, 3)
1166
  async sideLoadWithAllocatedProxy(url: URL, opts?: ExtraScrappingOptions) {
1167
+ if (opts?.allocProxy === 'none') {
1168
+ return this.curlControl.sideLoad(url, opts);
1169
+ }
1170
  const proxy = await this.proxyProvider.alloc(opts?.allocProxy);
1171
  const r = await this.curlControl.sideLoad(url, {
1172
  ...opts,
src/services/alt-text.ts CHANGED
@@ -1,6 +1,6 @@
1
  import { AssertionFailureError, AsyncService, HashManager } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
- import { Logger } from '../shared/services/logger';
4
  import { CanvasService } from '../shared/services/canvas';
5
  import { ImageInterrogationManager } from '../shared/services/common-iminterrogate';
6
  import { ImgBrief } from './puppeteer';
@@ -16,7 +16,7 @@ export class AltTextService extends AsyncService {
16
  logger = this.globalLogger.child({ service: this.constructor.name });
17
 
18
  constructor(
19
- protected globalLogger: Logger,
20
  protected imageInterrogator: ImageInterrogationManager,
21
  protected canvasService: CanvasService,
22
  protected asyncLocalContext: AsyncLocalContext
 
1
  import { AssertionFailureError, AsyncService, HashManager } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
+ import { GlobalLogger } from './logger';
4
  import { CanvasService } from '../shared/services/canvas';
5
  import { ImageInterrogationManager } from '../shared/services/common-iminterrogate';
6
  import { ImgBrief } from './puppeteer';
 
16
  logger = this.globalLogger.child({ service: this.constructor.name });
17
 
18
  constructor(
19
+ protected globalLogger: GlobalLogger,
20
  protected imageInterrogator: ImageInterrogationManager,
21
  protected canvasService: CanvasService,
22
  protected asyncLocalContext: AsyncLocalContext
src/services/brave-search.ts CHANGED
@@ -1,10 +1,10 @@
1
  import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, RPC_CALL_ENVIRONMENT, delay, marshalErrorLike } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
- import { Logger } from '../shared/services/logger';
4
  import { SecretExposer } from '../shared/services/secrets';
5
  import { BraveSearchHTTP, WebSearchQueryParams } from '../shared/3rd-party/brave-search';
6
  import { GEOIP_SUPPORTED_LANGUAGES, GeoIPService } from './geoip';
7
- import { AsyncContext } from '../shared';
8
  import { WebSearchOptionalHeaderOptions } from '../shared/3rd-party/brave-types';
9
  import type { Request, Response } from 'express';
10
  import { BlackHoleDetector } from './blackhole-detector';
@@ -17,10 +17,10 @@ export class BraveSearchService extends AsyncService {
17
  braveSearchHTTP!: BraveSearchHTTP;
18
 
19
  constructor(
20
- protected globalLogger: Logger,
21
  protected secretExposer: SecretExposer,
22
  protected geoipControl: GeoIPService,
23
- protected threadLocal: AsyncContext,
24
  protected blackHoleDetector: BlackHoleDetector,
25
  ) {
26
  super(...arguments);
 
1
  import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, RPC_CALL_ENVIRONMENT, delay, marshalErrorLike } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
+ import { GlobalLogger } from './logger';
4
  import { SecretExposer } from '../shared/services/secrets';
5
  import { BraveSearchHTTP, WebSearchQueryParams } from '../shared/3rd-party/brave-search';
6
  import { GEOIP_SUPPORTED_LANGUAGES, GeoIPService } from './geoip';
7
+ import { AsyncLocalContext } from './async-context';
8
  import { WebSearchOptionalHeaderOptions } from '../shared/3rd-party/brave-types';
9
  import type { Request, Response } from 'express';
10
  import { BlackHoleDetector } from './blackhole-detector';
 
17
  braveSearchHTTP!: BraveSearchHTTP;
18
 
19
  constructor(
20
+ protected globalLogger: GlobalLogger,
21
  protected secretExposer: SecretExposer,
22
  protected geoipControl: GeoIPService,
23
+ protected threadLocal: AsyncLocalContext,
24
  protected blackHoleDetector: BlackHoleDetector,
25
  ) {
26
  super(...arguments);
src/services/cf-browser-rendering.ts CHANGED
@@ -1,6 +1,7 @@
1
  import { container, singleton } from 'tsyringe';
2
  import { AsyncService } from 'civkit/async-service';
3
- import { Logger, SecretExposer } from '../shared';
 
4
  import { CloudFlareHTTP } from '../shared/3rd-party/cloud-flare';
5
 
6
  @singleton()
@@ -10,7 +11,7 @@ export class CFBrowserRendering extends AsyncService {
10
  client!: CloudFlareHTTP;
11
 
12
  constructor(
13
- protected globalLogger: Logger,
14
  protected secretExposer: SecretExposer,
15
  ) {
16
  super(...arguments);
 
1
  import { container, singleton } from 'tsyringe';
2
  import { AsyncService } from 'civkit/async-service';
3
+ import { SecretExposer } from '../shared/services/secrets';
4
+ import { GlobalLogger } from './logger';
5
  import { CloudFlareHTTP } from '../shared/3rd-party/cloud-flare';
6
 
7
  @singleton()
 
11
  client!: CloudFlareHTTP;
12
 
13
  constructor(
14
+ protected globalLogger: GlobalLogger,
15
  protected secretExposer: SecretExposer,
16
  ) {
17
  super(...arguments);
src/services/curl.ts CHANGED
@@ -5,9 +5,10 @@ import { Curl, CurlCode, CurlFeature, HeaderInfo } from 'node-libcurl';
5
  import { parseString as parseSetCookieString } from 'set-cookie-parser';
6
 
7
  import { ScrappingOptions } from './puppeteer';
8
- import { Logger } from '../shared/services/logger';
9
  import { AssertionFailureError, FancyFile } from 'civkit';
10
- import { ServiceBadAttemptError, TempFileManager } from '../shared';
 
11
  import { createBrotliDecompress, createInflate, createGunzip } from 'zlib';
12
  import { ZSTDDecompress } from 'simple-zstd';
13
  import _ from 'lodash';
@@ -32,7 +33,7 @@ export class CurlControl extends AsyncService {
32
  lifeCycleTrack = new WeakMap();
33
 
34
  constructor(
35
- protected globalLogger: Logger,
36
  protected tempFileManager: TempFileManager,
37
  protected asyncLocalContext: AsyncLocalContext,
38
  ) {
@@ -328,7 +329,7 @@ export class CurlControl extends AsyncService {
328
  };
329
  }
330
  if (!location && cookieRedirects > 1) {
331
- throw new ServiceBadAttemptError(`Failed to access ${urlToCrawl}: Browser required to solve complex cookie preconditions.`);
332
  }
333
 
334
  nextHopUrl = new URL(location || '', nextHopUrl);
 
5
  import { parseString as parseSetCookieString } from 'set-cookie-parser';
6
 
7
  import { ScrappingOptions } from './puppeteer';
8
+ import { GlobalLogger } from './logger';
9
  import { AssertionFailureError, FancyFile } from 'civkit';
10
+ import { ServiceBadAttemptError, ServiceBadApproachError } from './errors';
11
+ import { TempFileManager } from '../services/temp-file';
12
  import { createBrotliDecompress, createInflate, createGunzip } from 'zlib';
13
  import { ZSTDDecompress } from 'simple-zstd';
14
  import _ from 'lodash';
 
33
  lifeCycleTrack = new WeakMap();
34
 
35
  constructor(
36
+ protected globalLogger: GlobalLogger,
37
  protected tempFileManager: TempFileManager,
38
  protected asyncLocalContext: AsyncLocalContext,
39
  ) {
 
329
  };
330
  }
331
  if (!location && cookieRedirects > 1) {
332
+ throw new ServiceBadApproachError(`Failed to access ${urlToCrawl}: Browser required to solve complex cookie preconditions.`);
333
  }
334
 
335
  nextHopUrl = new URL(location || '', nextHopUrl);
src/services/errors.ts CHANGED
@@ -14,6 +14,12 @@ export class ServiceCrashedError extends ApplicationError { }
14
  @StatusCode(50303)
15
  export class ServiceNodeResourceDrainError extends ApplicationError { }
16
 
 
 
 
 
 
 
17
  @StatusCode(40104)
18
  export class EmailUnverifiedError extends ApplicationError { }
19
 
 
14
  @StatusCode(50303)
15
  export class ServiceNodeResourceDrainError extends ApplicationError { }
16
 
17
+ @StatusCode(50304)
18
+ export class ServiceBadAttemptError extends ApplicationError { }
19
+
20
+ @StatusCode(50305)
21
+ export class ServiceBadApproachError extends ServiceBadAttemptError { }
22
+
23
  @StatusCode(40104)
24
  export class EmailUnverifiedError extends ApplicationError { }
25
 
src/services/geoip.ts CHANGED
@@ -2,7 +2,7 @@ import { container, singleton } from 'tsyringe';
2
  import fsp from 'fs/promises';
3
  import { CityResponse, Reader } from 'maxmind';
4
  import { AsyncService, AutoCastable, Prop, runOnce } from 'civkit';
5
- import { Logger } from '../shared';
6
  import path from 'path';
7
 
8
  export enum GEOIP_SUPPORTED_LANGUAGES {
@@ -61,7 +61,7 @@ export class GeoIPService extends AsyncService {
61
  mmdbCity!: Reader<CityResponse>;
62
 
63
  constructor(
64
- protected globalLogger: Logger,
65
  ) {
66
  super(...arguments);
67
  }
 
2
  import fsp from 'fs/promises';
3
  import { CityResponse, Reader } from 'maxmind';
4
  import { AsyncService, AutoCastable, Prop, runOnce } from 'civkit';
5
+ import { GlobalLogger } from './logger';
6
  import path from 'path';
7
 
8
  export enum GEOIP_SUPPORTED_LANGUAGES {
 
61
  mmdbCity!: Reader<CityResponse>;
62
 
63
  constructor(
64
+ protected globalLogger: GlobalLogger,
65
  ) {
66
  super(...arguments);
67
  }
src/services/jsdom.ts CHANGED
@@ -1,13 +1,13 @@
1
  import { container, singleton } from 'tsyringe';
2
  import { AsyncService, marshalErrorLike } from 'civkit';
3
- import { Logger } from '../shared/services/logger';
4
  import { ExtendedSnapshot, ImgBrief, PageSnapshot } from './puppeteer';
5
  import { Readability } from '@mozilla/readability';
6
  import TurndownService from 'turndown';
7
  import { Threaded } from '../services/threaded';
8
  import type { ExtraScrappingOptions } from '../api/crawler';
9
  import { tailwindClasses } from '../utils/tailwind-classes';
10
- import { countGPTToken } from '../shared';
11
 
12
  const pLinkedom = import('linkedom');
13
 
@@ -19,7 +19,7 @@ export class JSDomControl extends AsyncService {
19
  linkedom!: Awaited<typeof pLinkedom>;
20
 
21
  constructor(
22
- protected globalLogger: Logger,
23
  ) {
24
  super(...arguments);
25
  }
 
1
  import { container, singleton } from 'tsyringe';
2
  import { AsyncService, marshalErrorLike } from 'civkit';
3
+ import { GlobalLogger } from './logger';
4
  import { ExtendedSnapshot, ImgBrief, PageSnapshot } from './puppeteer';
5
  import { Readability } from '@mozilla/readability';
6
  import TurndownService from 'turndown';
7
  import { Threaded } from '../services/threaded';
8
  import type { ExtraScrappingOptions } from '../api/crawler';
9
  import { tailwindClasses } from '../utils/tailwind-classes';
10
+ import { countGPTToken } from '../shared/utils/openai';
11
 
12
  const pLinkedom = import('linkedom');
13
 
 
19
  linkedom!: Awaited<typeof pLinkedom>;
20
 
21
  constructor(
22
+ protected globalLogger: GlobalLogger,
23
  ) {
24
  super(...arguments);
25
  }
src/services/lm.ts CHANGED
@@ -2,7 +2,7 @@ import { AsyncService } from 'civkit/async-service';
2
  import { singleton } from 'tsyringe';
3
 
4
  import { PageSnapshot } from './puppeteer';
5
- import { Logger } from '../shared/services/logger';
6
  import _ from 'lodash';
7
  import { AssertionFailureError } from 'civkit';
8
  import { LLMManager } from '../shared/services/common-llm';
@@ -16,7 +16,7 @@ export class LmControl extends AsyncService {
16
  logger = this.globalLogger.child({ service: this.constructor.name });
17
 
18
  constructor(
19
- protected globalLogger: Logger,
20
  protected commonLLM: LLMManager,
21
  protected jsdomControl: JSDomControl,
22
  ) {
 
2
  import { singleton } from 'tsyringe';
3
 
4
  import { PageSnapshot } from './puppeteer';
5
+ import { GlobalLogger } from './logger';
6
  import _ from 'lodash';
7
  import { AssertionFailureError } from 'civkit';
8
  import { LLMManager } from '../shared/services/common-llm';
 
16
  logger = this.globalLogger.child({ service: this.constructor.name });
17
 
18
  constructor(
19
+ protected globalLogger: GlobalLogger,
20
  protected commonLLM: LLMManager,
21
  protected jsdomControl: JSDomControl,
22
  ) {
src/services/pdf-extract.ts CHANGED
@@ -3,10 +3,10 @@ import { singleton } from 'tsyringe';
3
  import _ from 'lodash';
4
  import { TextItem } from 'pdfjs-dist/types/src/display/api';
5
  import { AsyncService, HashManager } from 'civkit';
6
- import { Logger } from '../shared/services/logger';
7
  import { PDFContent } from '../db/pdf';
8
  import dayjs from 'dayjs';
9
- import { FirebaseStorageBucketControl } from '../shared';
10
  import { randomUUID } from 'crypto';
11
  import type { PDFDocumentLoadingTask } from 'pdfjs-dist';
12
  import path from 'path';
@@ -55,7 +55,7 @@ export class PDFExtractor extends AsyncService {
55
  cacheRetentionMs = 1000 * 3600 * 24 * 7;
56
 
57
  constructor(
58
- protected globalLogger: Logger,
59
  protected firebaseObjectStorage: FirebaseStorageBucketControl,
60
  protected asyncLocalContext: AsyncLocalContext,
61
  ) {
 
3
  import _ from 'lodash';
4
  import { TextItem } from 'pdfjs-dist/types/src/display/api';
5
  import { AsyncService, HashManager } from 'civkit';
6
+ import { GlobalLogger } from './logger';
7
  import { PDFContent } from '../db/pdf';
8
  import dayjs from 'dayjs';
9
+ import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
10
  import { randomUUID } from 'crypto';
11
  import type { PDFDocumentLoadingTask } from 'pdfjs-dist';
12
  import path from 'path';
 
55
  cacheRetentionMs = 1000 * 3600 * 24 * 7;
56
 
57
  constructor(
58
+ protected globalLogger: GlobalLogger,
59
  protected firebaseObjectStorage: FirebaseStorageBucketControl,
60
  protected asyncLocalContext: AsyncLocalContext,
61
  ) {
src/services/puppeteer.ts CHANGED
@@ -2,14 +2,13 @@ import os from 'os';
2
  import fs from 'fs';
3
  import { container, singleton } from 'tsyringe';
4
  import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick, ParamValidationError, FancyFile } from 'civkit';
5
- import { Logger } from '../shared/services/logger';
6
 
7
  import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page, Viewport } from 'puppeteer';
8
  import type { Cookie } from 'set-cookie-parser';
9
  import puppeteer from 'puppeteer-extra';
10
 
11
  import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
12
- import puppeteerPageProxy from 'puppeteer-extra-plugin-page-proxy';
13
  import { SecurityCompromiseError, ServiceCrashedError, ServiceNodeResourceDrainError } from '../shared/lib/errors';
14
  import { TimeoutError } from 'puppeteer';
15
  import _ from 'lodash';
@@ -108,9 +107,6 @@ puppeteer.use(puppeteerBlockResources({
108
  blockedTypes: new Set(['media']),
109
  interceptResolutionPriority: 1,
110
  }));
111
- puppeteer.use(puppeteerPageProxy({
112
- interceptResolutionPriority: 1,
113
- }));
114
 
115
  const SIMULATE_SCROLL = `
116
  (function () {
@@ -472,7 +468,7 @@ export class PuppeteerControl extends AsyncService {
472
  lifeCycleTrack = new WeakMap();
473
 
474
  constructor(
475
- protected globalLogger: Logger,
476
  protected asyncLocalContext: AsyncLocalContext,
477
  protected curlControl: CurlControl,
478
  protected blackHoleDetector: BlackHoleDetector,
 
2
  import fs from 'fs';
3
  import { container, singleton } from 'tsyringe';
4
  import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, Deferred, perNextTick, ParamValidationError, FancyFile } from 'civkit';
5
+ import { GlobalLogger } from './logger';
6
 
7
  import type { Browser, CookieParam, GoToOptions, HTTPResponse, Page, Viewport } from 'puppeteer';
8
  import type { Cookie } from 'set-cookie-parser';
9
  import puppeteer from 'puppeteer-extra';
10
 
11
  import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
 
12
  import { SecurityCompromiseError, ServiceCrashedError, ServiceNodeResourceDrainError } from '../shared/lib/errors';
13
  import { TimeoutError } from 'puppeteer';
14
  import _ from 'lodash';
 
107
  blockedTypes: new Set(['media']),
108
  interceptResolutionPriority: 1,
109
  }));
 
 
 
110
 
111
  const SIMULATE_SCROLL = `
112
  (function () {
 
468
  lifeCycleTrack = new WeakMap();
469
 
470
  constructor(
471
+ protected globalLogger: GlobalLogger,
472
  protected asyncLocalContext: AsyncLocalContext,
473
  protected curlControl: CurlControl,
474
  protected blackHoleDetector: BlackHoleDetector,
src/services/robots-text.ts CHANGED
@@ -1,13 +1,12 @@
1
  import { singleton } from 'tsyringe';
 
2
  import { DownstreamServiceFailureError, ResourcePolicyDenyError } from 'civkit/civ-rpc';
3
  import { AsyncService } from 'civkit/async-service';
4
  import { HashManager } from 'civkit/hash';
5
  import { marshalErrorLike } from 'civkit/lang';
6
 
7
- import { Logger } from '../shared/services/logger';
8
- import { BraveSearchHTTP } from '../shared/3rd-party/brave-search';
9
- import { FirebaseStorageBucketControl } from '../shared';
10
- import { URL } from 'url';
11
  import { Threaded } from '../services/threaded';
12
 
13
 
@@ -18,10 +17,8 @@ export class RobotsTxtService extends AsyncService {
18
 
19
  logger = this.globalLogger.child({ service: this.constructor.name });
20
 
21
- braveSearchHTTP!: BraveSearchHTTP;
22
-
23
  constructor(
24
- protected globalLogger: Logger,
25
  protected firebaseStorageBucketControl: FirebaseStorageBucketControl,
26
  ) {
27
  super(...arguments);
 
1
  import { singleton } from 'tsyringe';
2
+ import { URL } from 'url';
3
  import { DownstreamServiceFailureError, ResourcePolicyDenyError } from 'civkit/civ-rpc';
4
  import { AsyncService } from 'civkit/async-service';
5
  import { HashManager } from 'civkit/hash';
6
  import { marshalErrorLike } from 'civkit/lang';
7
 
8
+ import { GlobalLogger } from './logger';
9
+ import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
 
 
10
  import { Threaded } from '../services/threaded';
11
 
12
 
 
17
 
18
  logger = this.globalLogger.child({ service: this.constructor.name });
19
 
 
 
20
  constructor(
21
+ protected globalLogger: GlobalLogger,
22
  protected firebaseStorageBucketControl: FirebaseStorageBucketControl,
23
  ) {
24
  super(...arguments);
src/services/serper-search.ts CHANGED
@@ -1,9 +1,9 @@
1
  import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, RPC_CALL_ENVIRONMENT, delay, marshalErrorLike } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
- import { Logger } from '../shared/services/logger';
4
  import { SecretExposer } from '../shared/services/secrets';
5
  import { GEOIP_SUPPORTED_LANGUAGES, GeoIPService } from './geoip';
6
- import { AsyncContext } from '../shared';
7
  import { SerperGoogleHTTP, SerperSearchQueryParams, WORLD_COUNTRIES } from '../shared/3rd-party/serper-search';
8
  import { BlackHoleDetector } from './blackhole-detector';
9
  import { Context } from './registry';
@@ -16,10 +16,10 @@ export class SerperSearchService extends AsyncService {
16
  serperSearchHTTP!: SerperGoogleHTTP;
17
 
18
  constructor(
19
- protected globalLogger: Logger,
20
  protected secretExposer: SecretExposer,
21
  protected geoipControl: GeoIPService,
22
- protected threadLocal: AsyncContext,
23
  protected blackHoleDetector: BlackHoleDetector,
24
  ) {
25
  super(...arguments);
 
1
  import { AsyncService, AutoCastable, DownstreamServiceFailureError, Prop, RPC_CALL_ENVIRONMENT, delay, marshalErrorLike } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
+ import { GlobalLogger } from './logger';
4
  import { SecretExposer } from '../shared/services/secrets';
5
  import { GEOIP_SUPPORTED_LANGUAGES, GeoIPService } from './geoip';
6
+ import { AsyncLocalContext } from './async-context';
7
  import { SerperGoogleHTTP, SerperSearchQueryParams, WORLD_COUNTRIES } from '../shared/3rd-party/serper-search';
8
  import { BlackHoleDetector } from './blackhole-detector';
9
  import { Context } from './registry';
 
16
  serperSearchHTTP!: SerperGoogleHTTP;
17
 
18
  constructor(
19
+ protected globalLogger: GlobalLogger,
20
  protected secretExposer: SecretExposer,
21
  protected geoipControl: GeoIPService,
22
+ protected threadLocal: AsyncLocalContext,
23
  protected blackHoleDetector: BlackHoleDetector,
24
  ) {
25
  super(...arguments);
src/services/snapshot-formatter.ts CHANGED
@@ -2,7 +2,7 @@ import { randomUUID } from 'crypto';
2
  import { container, singleton } from 'tsyringe';
3
  import { AssertionFailureError, AsyncService, FancyFile, HashManager, marshalErrorLike } from 'civkit';
4
  import TurndownService, { Filter, Rule } from 'turndown';
5
- import { Logger } from '../shared/services/logger';
6
  import { PageSnapshot } from './puppeteer';
7
  import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
8
  import { AsyncContext } from '../shared/services/async-context';
@@ -16,7 +16,7 @@ import { STATUS_CODES } from 'http';
16
  import type { CrawlerOptions } from '../dto/crawler-options';
17
  import { readFile } from 'fs/promises';
18
  import { pathToFileURL } from 'url';
19
- import { countGPTToken } from '../shared';
20
 
21
 
22
  export interface FormattedPage {
@@ -82,7 +82,7 @@ export class SnapshotFormatter extends AsyncService {
82
  gfmNoTable = [highlightedCodeBlock, gfmPlugin.strikethrough, gfmPlugin.taskListItems];
83
 
84
  constructor(
85
- protected globalLogger: Logger,
86
  protected jsdomControl: JSDomControl,
87
  protected altTextService: AltTextService,
88
  protected pdfExtractor: PDFExtractor,
 
2
  import { container, singleton } from 'tsyringe';
3
  import { AssertionFailureError, AsyncService, FancyFile, HashManager, marshalErrorLike } from 'civkit';
4
  import TurndownService, { Filter, Rule } from 'turndown';
5
+ import { GlobalLogger } from './logger';
6
  import { PageSnapshot } from './puppeteer';
7
  import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
8
  import { AsyncContext } from '../shared/services/async-context';
 
16
  import type { CrawlerOptions } from '../dto/crawler-options';
17
  import { readFile } from 'fs/promises';
18
  import { pathToFileURL } from 'url';
19
+ import { countGPTToken } from '../shared/utils/openai';
20
 
21
 
22
  export interface FormattedPage {
 
82
  gfmNoTable = [highlightedCodeBlock, gfmPlugin.strikethrough, gfmPlugin.taskListItems];
83
 
84
  constructor(
85
+ protected globalLogger: GlobalLogger,
86
  protected jsdomControl: JSDomControl,
87
  protected altTextService: AltTextService,
88
  protected pdfExtractor: PDFExtractor,
thinapps-shared CHANGED
@@ -1 +1 @@
1
- Subproject commit 20417f5bb7f8c773a835304f0624a180b558ff65
 
1
+ Subproject commit 755639081df7640733bb5f704460892a1a9059e7