Spaces:
Build error
Build error
fix: ua switch
Browse files- src/api/crawler.ts +2 -2
- src/services/puppeteer.ts +4 -2
src/api/crawler.ts
CHANGED
|
@@ -153,8 +153,8 @@ export class CrawlerHost extends RPCHost {
|
|
| 153 |
override async init() {
|
| 154 |
await this.dependencyReady();
|
| 155 |
|
| 156 |
-
if (this.puppeteerControl.
|
| 157 |
-
this.curlControl.impersonateChrome(this.puppeteerControl.
|
| 158 |
}
|
| 159 |
|
| 160 |
this.emit('ready');
|
|
|
|
| 153 |
override async init() {
|
| 154 |
await this.dependencyReady();
|
| 155 |
|
| 156 |
+
if (this.puppeteerControl.effectiveUA) {
|
| 157 |
+
this.curlControl.impersonateChrome(this.puppeteerControl.effectiveUA);
|
| 158 |
}
|
| 159 |
|
| 160 |
this.emit('ready');
|
src/services/puppeteer.ts
CHANGED
|
@@ -510,6 +510,7 @@ export class PuppeteerControl extends AsyncService {
|
|
| 510 |
pagePhase = new WeakMap<Page, 'idle' | 'active' | 'background'>();
|
| 511 |
lastPageCratedAt: number = 0;
|
| 512 |
ua: string = '';
|
|
|
|
| 513 |
|
| 514 |
concurrentRequestsPerPage: number = 32;
|
| 515 |
pageReqCtrl = new WeakMap<Page, PageReqCtrlKit>();
|
|
@@ -582,7 +583,8 @@ export class PuppeteerControl extends AsyncService {
|
|
| 582 |
});
|
| 583 |
this.ua = await this.browser.userAgent();
|
| 584 |
this.logger.info(`Browser launched: ${this.browser.process()?.pid}, ${this.ua}`);
|
| 585 |
-
this.
|
|
|
|
| 586 |
|
| 587 |
await this.newPage('beware_deadlock').then((r) => this.__loadedPage.push(r));
|
| 588 |
|
|
@@ -615,7 +617,7 @@ export class PuppeteerControl extends AsyncService {
|
|
| 615 |
}
|
| 616 |
const preparations = [];
|
| 617 |
|
| 618 |
-
preparations.push(page.setUserAgent(this.
|
| 619 |
// preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
|
| 620 |
// preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
|
| 621 |
preparations.push(page.setBypassCSP(true));
|
|
|
|
| 510 |
pagePhase = new WeakMap<Page, 'idle' | 'active' | 'background'>();
|
| 511 |
lastPageCratedAt: number = 0;
|
| 512 |
ua: string = '';
|
| 513 |
+
effectiveUA: string = '';
|
| 514 |
|
| 515 |
concurrentRequestsPerPage: number = 32;
|
| 516 |
pageReqCtrl = new WeakMap<Page, PageReqCtrlKit>();
|
|
|
|
| 583 |
});
|
| 584 |
this.ua = await this.browser.userAgent();
|
| 585 |
this.logger.info(`Browser launched: ${this.browser.process()?.pid}, ${this.ua}`);
|
| 586 |
+
this.effectiveUA = this.ua.replace(/Headless/i, '').replace('Mozilla/5.0 (X11; Linux x86_64)', 'Mozilla/5.0 (Linux; Android 10; K)');
|
| 587 |
+
this.curlControl.impersonateChrome(this.effectiveUA);
|
| 588 |
|
| 589 |
await this.newPage('beware_deadlock').then((r) => this.__loadedPage.push(r));
|
| 590 |
|
|
|
|
| 617 |
}
|
| 618 |
const preparations = [];
|
| 619 |
|
| 620 |
+
preparations.push(page.setUserAgent(this.effectiveUA));
|
| 621 |
// preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
|
| 622 |
// preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
|
| 623 |
preparations.push(page.setBypassCSP(true));
|