nomagick commited on
Commit
6b1bfda
·
unverified ·
1 Parent(s): 5ba9306

fix: ua switch

Browse files
Files changed (2) hide show
  1. src/api/crawler.ts +2 -2
  2. src/services/puppeteer.ts +4 -2
src/api/crawler.ts CHANGED
@@ -153,8 +153,8 @@ export class CrawlerHost extends RPCHost {
153
  override async init() {
154
  await this.dependencyReady();
155
 
156
- if (this.puppeteerControl.ua) {
157
- this.curlControl.impersonateChrome(this.puppeteerControl.ua.replace(/Headless/i, ''));
158
  }
159
 
160
  this.emit('ready');
 
153
  override async init() {
154
  await this.dependencyReady();
155
 
156
+ if (this.puppeteerControl.effectiveUA) {
157
+ this.curlControl.impersonateChrome(this.puppeteerControl.effectiveUA);
158
  }
159
 
160
  this.emit('ready');
src/services/puppeteer.ts CHANGED
@@ -510,6 +510,7 @@ export class PuppeteerControl extends AsyncService {
510
  pagePhase = new WeakMap<Page, 'idle' | 'active' | 'background'>();
511
  lastPageCratedAt: number = 0;
512
  ua: string = '';
 
513
 
514
  concurrentRequestsPerPage: number = 32;
515
  pageReqCtrl = new WeakMap<Page, PageReqCtrlKit>();
@@ -582,7 +583,8 @@ export class PuppeteerControl extends AsyncService {
582
  });
583
  this.ua = await this.browser.userAgent();
584
  this.logger.info(`Browser launched: ${this.browser.process()?.pid}, ${this.ua}`);
585
- this.curlControl.impersonateChrome(this.ua.replace(/Headless/i, ''));
 
586
 
587
  await this.newPage('beware_deadlock').then((r) => this.__loadedPage.push(r));
588
 
@@ -615,7 +617,7 @@ export class PuppeteerControl extends AsyncService {
615
  }
616
  const preparations = [];
617
 
618
- preparations.push(page.setUserAgent(this.ua.replace(/Headless/i, '')));
619
  // preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
620
  // preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
621
  preparations.push(page.setBypassCSP(true));
 
510
  pagePhase = new WeakMap<Page, 'idle' | 'active' | 'background'>();
511
  lastPageCratedAt: number = 0;
512
  ua: string = '';
513
+ effectiveUA: string = '';
514
 
515
  concurrentRequestsPerPage: number = 32;
516
  pageReqCtrl = new WeakMap<Page, PageReqCtrlKit>();
 
583
  });
584
  this.ua = await this.browser.userAgent();
585
  this.logger.info(`Browser launched: ${this.browser.process()?.pid}, ${this.ua}`);
586
+ this.effectiveUA = this.ua.replace(/Headless/i, '').replace('Mozilla/5.0 (X11; Linux x86_64)', 'Mozilla/5.0 (Linux; Android 10; K)');
587
+ this.curlControl.impersonateChrome(this.effectiveUA);
588
 
589
  await this.newPage('beware_deadlock').then((r) => this.__loadedPage.push(r));
590
 
 
617
  }
618
  const preparations = [];
619
 
620
+ preparations.push(page.setUserAgent(this.effectiveUA));
621
  // preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
622
  // preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
623
  preparations.push(page.setBypassCSP(true));