hanxiao commited on
Commit
e050a5b
·
2 Parent(s): 8e241c7dbeb695

Merge remote-tracking branch 'origin/main'

Browse files
backend/functions/package-lock.json CHANGED
@@ -27,6 +27,8 @@
27
  "minio": "^7.1.3",
28
  "openai": "^4.20.0",
29
  "puppeteer": "^22.6.3",
 
 
30
  "stripe": "^11.11.0",
31
  "tiktoken": "^1.0.10",
32
  "turndown": "^7.1.3",
@@ -2526,6 +2528,14 @@
2526
  "@types/node": "*"
2527
  }
2528
  },
 
 
 
 
 
 
 
 
2529
  "node_modules/@types/express": {
2530
  "version": "4.17.3",
2531
  "resolved": "https://registry.npmjs.org/@types/express/-/express-4.17.3.tgz",
@@ -2673,6 +2683,11 @@
2673
  "integrity": "sha512-K0VQKziLUWkVKiRVrx4a40iPaxTUefQmjtkQofBkYRcoaaL/8rhwDWww9qWbrgicNOgnpIsMxyNIUM4+n6dUIA==",
2674
  "optional": true
2675
  },
 
 
 
 
 
2676
  "node_modules/@types/node": {
2677
  "version": "18.19.31",
2678
  "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.31.tgz",
@@ -3234,6 +3249,14 @@
3234
  "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
3235
  "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
3236
  },
 
 
 
 
 
 
 
 
3237
  "node_modules/array-buffer-byte-length": {
3238
  "version": "1.0.1",
3239
  "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz",
@@ -4076,6 +4099,21 @@
4076
  "node": ">=12"
4077
  }
4078
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4079
  "node_modules/co": {
4080
  "version": "4.6.0",
4081
  "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
@@ -4466,8 +4504,6 @@
4466
  "version": "4.3.1",
4467
  "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
4468
  "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
4469
- "dev": true,
4470
- "peer": true,
4471
  "engines": {
4472
  "node": ">=0.10.0"
4473
  }
@@ -5739,6 +5775,25 @@
5739
  "is-callable": "^1.1.3"
5740
  }
5741
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5742
  "node_modules/form-data": {
5743
  "version": "4.0.0",
5744
  "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
@@ -6786,6 +6841,11 @@
6786
  "url": "https://github.com/sponsors/ljharb"
6787
  }
6788
  },
 
 
 
 
 
6789
  "node_modules/is-callable": {
6790
  "version": "1.2.7",
6791
  "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz",
@@ -6839,6 +6899,14 @@
6839
  "url": "https://github.com/sponsors/ljharb"
6840
  }
6841
  },
 
 
 
 
 
 
 
 
6842
  "node_modules/is-extglob": {
6843
  "version": "2.1.1",
6844
  "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
@@ -6948,6 +7016,17 @@
6948
  "node": ">=8"
6949
  }
6950
  },
 
 
 
 
 
 
 
 
 
 
 
6951
  "node_modules/is-regex": {
6952
  "version": "1.1.4",
6953
  "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.4.tgz",
@@ -7064,6 +7143,14 @@
7064
  "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
7065
  "dev": true
7066
  },
 
 
 
 
 
 
 
 
7067
  "node_modules/istanbul-lib-coverage": {
7068
  "version": "3.2.2",
7069
  "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz",
@@ -8049,6 +8136,17 @@
8049
  "json-buffer": "3.0.1"
8050
  }
8051
  },
 
 
 
 
 
 
 
 
 
 
 
8052
  "node_modules/klaw": {
8053
  "version": "3.0.0",
8054
  "resolved": "https://registry.npmjs.org/klaw/-/klaw-3.0.0.tgz",
@@ -8184,6 +8282,14 @@
8184
  "unicode-9.0.0": "0.7.0"
8185
  }
8186
  },
 
 
 
 
 
 
 
 
8187
  "node_modules/lazystream": {
8188
  "version": "1.0.1",
8189
  "resolved": "https://registry.npmjs.org/lazystream/-/lazystream-1.0.1.tgz",
@@ -8504,6 +8610,19 @@
8504
  "optional": true,
8505
  "peer": true
8506
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
8507
  "node_modules/merge-descriptors": {
8508
  "version": "1.0.1",
8509
  "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
@@ -8672,6 +8791,26 @@
8672
  "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
8673
  "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw=="
8674
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8675
  "node_modules/mkdirp": {
8676
  "version": "1.0.4",
8677
  "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz",
@@ -9719,6 +9858,150 @@
9719
  "node": ">=18"
9720
  }
9721
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9722
  "node_modules/pure-rand": {
9723
  "version": "6.1.0",
9724
  "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz",
@@ -10314,6 +10597,39 @@
10314
  "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
10315
  "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="
10316
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10317
  "node_modules/shebang-command": {
10318
  "version": "2.0.0",
10319
  "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
 
27
  "minio": "^7.1.3",
28
  "openai": "^4.20.0",
29
  "puppeteer": "^22.6.3",
30
+ "puppeteer-extra": "^3.3.6",
31
+ "puppeteer-extra-plugin-stealth": "^2.11.2",
32
  "stripe": "^11.11.0",
33
  "tiktoken": "^1.0.10",
34
  "turndown": "^7.1.3",
 
2528
  "@types/node": "*"
2529
  }
2530
  },
2531
+ "node_modules/@types/debug": {
2532
+ "version": "4.1.12",
2533
+ "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz",
2534
+ "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==",
2535
+ "dependencies": {
2536
+ "@types/ms": "*"
2537
+ }
2538
+ },
2539
  "node_modules/@types/express": {
2540
  "version": "4.17.3",
2541
  "resolved": "https://registry.npmjs.org/@types/express/-/express-4.17.3.tgz",
 
2683
  "integrity": "sha512-K0VQKziLUWkVKiRVrx4a40iPaxTUefQmjtkQofBkYRcoaaL/8rhwDWww9qWbrgicNOgnpIsMxyNIUM4+n6dUIA==",
2684
  "optional": true
2685
  },
2686
+ "node_modules/@types/ms": {
2687
+ "version": "0.7.34",
2688
+ "resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.34.tgz",
2689
+ "integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g=="
2690
+ },
2691
  "node_modules/@types/node": {
2692
  "version": "18.19.31",
2693
  "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.31.tgz",
 
3249
  "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
3250
  "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
3251
  },
3252
+ "node_modules/arr-union": {
3253
+ "version": "3.1.0",
3254
+ "resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz",
3255
+ "integrity": "sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q==",
3256
+ "engines": {
3257
+ "node": ">=0.10.0"
3258
+ }
3259
+ },
3260
  "node_modules/array-buffer-byte-length": {
3261
  "version": "1.0.1",
3262
  "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz",
 
4099
  "node": ">=12"
4100
  }
4101
  },
4102
+ "node_modules/clone-deep": {
4103
+ "version": "0.2.4",
4104
+ "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz",
4105
+ "integrity": "sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==",
4106
+ "dependencies": {
4107
+ "for-own": "^0.1.3",
4108
+ "is-plain-object": "^2.0.1",
4109
+ "kind-of": "^3.0.2",
4110
+ "lazy-cache": "^1.0.3",
4111
+ "shallow-clone": "^0.1.2"
4112
+ },
4113
+ "engines": {
4114
+ "node": ">=0.10.0"
4115
+ }
4116
+ },
4117
  "node_modules/co": {
4118
  "version": "4.6.0",
4119
  "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
 
4504
  "version": "4.3.1",
4505
  "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
4506
  "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
 
 
4507
  "engines": {
4508
  "node": ">=0.10.0"
4509
  }
 
5775
  "is-callable": "^1.1.3"
5776
  }
5777
  },
5778
+ "node_modules/for-in": {
5779
+ "version": "1.0.2",
5780
+ "resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz",
5781
+ "integrity": "sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==",
5782
+ "engines": {
5783
+ "node": ">=0.10.0"
5784
+ }
5785
+ },
5786
+ "node_modules/for-own": {
5787
+ "version": "0.1.5",
5788
+ "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz",
5789
+ "integrity": "sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==",
5790
+ "dependencies": {
5791
+ "for-in": "^1.0.1"
5792
+ },
5793
+ "engines": {
5794
+ "node": ">=0.10.0"
5795
+ }
5796
+ },
5797
  "node_modules/form-data": {
5798
  "version": "4.0.0",
5799
  "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
 
6841
  "url": "https://github.com/sponsors/ljharb"
6842
  }
6843
  },
6844
+ "node_modules/is-buffer": {
6845
+ "version": "1.1.6",
6846
+ "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
6847
+ "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="
6848
+ },
6849
  "node_modules/is-callable": {
6850
  "version": "1.2.7",
6851
  "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz",
 
6899
  "url": "https://github.com/sponsors/ljharb"
6900
  }
6901
  },
6902
+ "node_modules/is-extendable": {
6903
+ "version": "0.1.1",
6904
+ "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
6905
+ "integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==",
6906
+ "engines": {
6907
+ "node": ">=0.10.0"
6908
+ }
6909
+ },
6910
  "node_modules/is-extglob": {
6911
  "version": "2.1.1",
6912
  "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
 
7016
  "node": ">=8"
7017
  }
7018
  },
7019
+ "node_modules/is-plain-object": {
7020
+ "version": "2.0.4",
7021
+ "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz",
7022
+ "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==",
7023
+ "dependencies": {
7024
+ "isobject": "^3.0.1"
7025
+ },
7026
+ "engines": {
7027
+ "node": ">=0.10.0"
7028
+ }
7029
+ },
7030
  "node_modules/is-regex": {
7031
  "version": "1.1.4",
7032
  "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.4.tgz",
 
7143
  "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
7144
  "dev": true
7145
  },
7146
+ "node_modules/isobject": {
7147
+ "version": "3.0.1",
7148
+ "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
7149
+ "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==",
7150
+ "engines": {
7151
+ "node": ">=0.10.0"
7152
+ }
7153
+ },
7154
  "node_modules/istanbul-lib-coverage": {
7155
  "version": "3.2.2",
7156
  "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz",
 
8136
  "json-buffer": "3.0.1"
8137
  }
8138
  },
8139
+ "node_modules/kind-of": {
8140
+ "version": "3.2.2",
8141
+ "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
8142
+ "integrity": "sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==",
8143
+ "dependencies": {
8144
+ "is-buffer": "^1.1.5"
8145
+ },
8146
+ "engines": {
8147
+ "node": ">=0.10.0"
8148
+ }
8149
+ },
8150
  "node_modules/klaw": {
8151
  "version": "3.0.0",
8152
  "resolved": "https://registry.npmjs.org/klaw/-/klaw-3.0.0.tgz",
 
8282
  "unicode-9.0.0": "0.7.0"
8283
  }
8284
  },
8285
+ "node_modules/lazy-cache": {
8286
+ "version": "1.0.4",
8287
+ "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-1.0.4.tgz",
8288
+ "integrity": "sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==",
8289
+ "engines": {
8290
+ "node": ">=0.10.0"
8291
+ }
8292
+ },
8293
  "node_modules/lazystream": {
8294
  "version": "1.0.1",
8295
  "resolved": "https://registry.npmjs.org/lazystream/-/lazystream-1.0.1.tgz",
 
8610
  "optional": true,
8611
  "peer": true
8612
  },
8613
+ "node_modules/merge-deep": {
8614
+ "version": "3.0.3",
8615
+ "resolved": "https://registry.npmjs.org/merge-deep/-/merge-deep-3.0.3.tgz",
8616
+ "integrity": "sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==",
8617
+ "dependencies": {
8618
+ "arr-union": "^3.1.0",
8619
+ "clone-deep": "^0.2.4",
8620
+ "kind-of": "^3.0.2"
8621
+ },
8622
+ "engines": {
8623
+ "node": ">=0.10.0"
8624
+ }
8625
+ },
8626
  "node_modules/merge-descriptors": {
8627
  "version": "1.0.1",
8628
  "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
 
8791
  "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
8792
  "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw=="
8793
  },
8794
+ "node_modules/mixin-object": {
8795
+ "version": "2.0.1",
8796
+ "resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz",
8797
+ "integrity": "sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==",
8798
+ "dependencies": {
8799
+ "for-in": "^0.1.3",
8800
+ "is-extendable": "^0.1.1"
8801
+ },
8802
+ "engines": {
8803
+ "node": ">=0.10.0"
8804
+ }
8805
+ },
8806
+ "node_modules/mixin-object/node_modules/for-in": {
8807
+ "version": "0.1.8",
8808
+ "resolved": "https://registry.npmjs.org/for-in/-/for-in-0.1.8.tgz",
8809
+ "integrity": "sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g==",
8810
+ "engines": {
8811
+ "node": ">=0.10.0"
8812
+ }
8813
+ },
8814
  "node_modules/mkdirp": {
8815
  "version": "1.0.4",
8816
  "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz",
 
9858
  "node": ">=18"
9859
  }
9860
  },
9861
+ "node_modules/puppeteer-extra": {
9862
+ "version": "3.3.6",
9863
+ "resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz",
9864
+ "integrity": "sha512-rsLBE/6mMxAjlLd06LuGacrukP2bqbzKCLzV1vrhHFavqQE/taQ2UXv3H5P0Ls7nsrASa+6x3bDbXHpqMwq+7A==",
9865
+ "dependencies": {
9866
+ "@types/debug": "^4.1.0",
9867
+ "debug": "^4.1.1",
9868
+ "deepmerge": "^4.2.2"
9869
+ },
9870
+ "engines": {
9871
+ "node": ">=8"
9872
+ },
9873
+ "peerDependencies": {
9874
+ "@types/puppeteer": "*",
9875
+ "puppeteer": "*",
9876
+ "puppeteer-core": "*"
9877
+ },
9878
+ "peerDependenciesMeta": {
9879
+ "@types/puppeteer": {
9880
+ "optional": true
9881
+ },
9882
+ "puppeteer": {
9883
+ "optional": true
9884
+ },
9885
+ "puppeteer-core": {
9886
+ "optional": true
9887
+ }
9888
+ }
9889
+ },
9890
+ "node_modules/puppeteer-extra-plugin": {
9891
+ "version": "3.2.3",
9892
+ "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.3.tgz",
9893
+ "integrity": "sha512-6RNy0e6pH8vaS3akPIKGg28xcryKscczt4wIl0ePciZENGE2yoaQJNd17UiEbdmh5/6WW6dPcfRWT9lxBwCi2Q==",
9894
+ "dependencies": {
9895
+ "@types/debug": "^4.1.0",
9896
+ "debug": "^4.1.1",
9897
+ "merge-deep": "^3.0.1"
9898
+ },
9899
+ "engines": {
9900
+ "node": ">=9.11.2"
9901
+ },
9902
+ "peerDependencies": {
9903
+ "playwright-extra": "*",
9904
+ "puppeteer-extra": "*"
9905
+ },
9906
+ "peerDependenciesMeta": {
9907
+ "playwright-extra": {
9908
+ "optional": true
9909
+ },
9910
+ "puppeteer-extra": {
9911
+ "optional": true
9912
+ }
9913
+ }
9914
+ },
9915
+ "node_modules/puppeteer-extra-plugin-stealth": {
9916
+ "version": "2.11.2",
9917
+ "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.11.2.tgz",
9918
+ "integrity": "sha512-bUemM5XmTj9i2ZerBzsk2AN5is0wHMNE6K0hXBzBXOzP5m5G3Wl0RHhiqKeHToe/uIH8AoZiGhc1tCkLZQPKTQ==",
9919
+ "dependencies": {
9920
+ "debug": "^4.1.1",
9921
+ "puppeteer-extra-plugin": "^3.2.3",
9922
+ "puppeteer-extra-plugin-user-preferences": "^2.4.1"
9923
+ },
9924
+ "engines": {
9925
+ "node": ">=8"
9926
+ },
9927
+ "peerDependencies": {
9928
+ "playwright-extra": "*",
9929
+ "puppeteer-extra": "*"
9930
+ },
9931
+ "peerDependenciesMeta": {
9932
+ "playwright-extra": {
9933
+ "optional": true
9934
+ },
9935
+ "puppeteer-extra": {
9936
+ "optional": true
9937
+ }
9938
+ }
9939
+ },
9940
+ "node_modules/puppeteer-extra-plugin-user-data-dir": {
9941
+ "version": "2.4.1",
9942
+ "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.4.1.tgz",
9943
+ "integrity": "sha512-kH1GnCcqEDoBXO7epAse4TBPJh9tEpVEK/vkedKfjOVOhZAvLkHGc9swMs5ChrJbRnf8Hdpug6TJlEuimXNQ+g==",
9944
+ "dependencies": {
9945
+ "debug": "^4.1.1",
9946
+ "fs-extra": "^10.0.0",
9947
+ "puppeteer-extra-plugin": "^3.2.3",
9948
+ "rimraf": "^3.0.2"
9949
+ },
9950
+ "engines": {
9951
+ "node": ">=8"
9952
+ },
9953
+ "peerDependencies": {
9954
+ "playwright-extra": "*",
9955
+ "puppeteer-extra": "*"
9956
+ },
9957
+ "peerDependenciesMeta": {
9958
+ "playwright-extra": {
9959
+ "optional": true
9960
+ },
9961
+ "puppeteer-extra": {
9962
+ "optional": true
9963
+ }
9964
+ }
9965
+ },
9966
+ "node_modules/puppeteer-extra-plugin-user-data-dir/node_modules/fs-extra": {
9967
+ "version": "10.1.0",
9968
+ "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz",
9969
+ "integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==",
9970
+ "dependencies": {
9971
+ "graceful-fs": "^4.2.0",
9972
+ "jsonfile": "^6.0.1",
9973
+ "universalify": "^2.0.0"
9974
+ },
9975
+ "engines": {
9976
+ "node": ">=12"
9977
+ }
9978
+ },
9979
+ "node_modules/puppeteer-extra-plugin-user-preferences": {
9980
+ "version": "2.4.1",
9981
+ "resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.4.1.tgz",
9982
+ "integrity": "sha512-i1oAZxRbc1bk8MZufKCruCEC3CCafO9RKMkkodZltI4OqibLFXF3tj6HZ4LZ9C5vCXZjYcDWazgtY69mnmrQ9A==",
9983
+ "dependencies": {
9984
+ "debug": "^4.1.1",
9985
+ "deepmerge": "^4.2.2",
9986
+ "puppeteer-extra-plugin": "^3.2.3",
9987
+ "puppeteer-extra-plugin-user-data-dir": "^2.4.1"
9988
+ },
9989
+ "engines": {
9990
+ "node": ">=8"
9991
+ },
9992
+ "peerDependencies": {
9993
+ "playwright-extra": "*",
9994
+ "puppeteer-extra": "*"
9995
+ },
9996
+ "peerDependenciesMeta": {
9997
+ "playwright-extra": {
9998
+ "optional": true
9999
+ },
10000
+ "puppeteer-extra": {
10001
+ "optional": true
10002
+ }
10003
+ }
10004
+ },
10005
  "node_modules/pure-rand": {
10006
  "version": "6.1.0",
10007
  "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz",
 
10597
  "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
10598
  "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="
10599
  },
10600
+ "node_modules/shallow-clone": {
10601
+ "version": "0.1.2",
10602
+ "resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz",
10603
+ "integrity": "sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==",
10604
+ "dependencies": {
10605
+ "is-extendable": "^0.1.1",
10606
+ "kind-of": "^2.0.1",
10607
+ "lazy-cache": "^0.2.3",
10608
+ "mixin-object": "^2.0.1"
10609
+ },
10610
+ "engines": {
10611
+ "node": ">=0.10.0"
10612
+ }
10613
+ },
10614
+ "node_modules/shallow-clone/node_modules/kind-of": {
10615
+ "version": "2.0.1",
10616
+ "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz",
10617
+ "integrity": "sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==",
10618
+ "dependencies": {
10619
+ "is-buffer": "^1.0.2"
10620
+ },
10621
+ "engines": {
10622
+ "node": ">=0.10.0"
10623
+ }
10624
+ },
10625
+ "node_modules/shallow-clone/node_modules/lazy-cache": {
10626
+ "version": "0.2.7",
10627
+ "resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-0.2.7.tgz",
10628
+ "integrity": "sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ==",
10629
+ "engines": {
10630
+ "node": ">=0.10.0"
10631
+ }
10632
+ },
10633
  "node_modules/shebang-command": {
10634
  "version": "2.0.0",
10635
  "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
backend/functions/package.json CHANGED
@@ -47,6 +47,8 @@
47
  "minio": "^7.1.3",
48
  "openai": "^4.20.0",
49
  "puppeteer": "^22.6.3",
 
 
50
  "stripe": "^11.11.0",
51
  "tiktoken": "^1.0.10",
52
  "turndown": "^7.1.3",
 
47
  "minio": "^7.1.3",
48
  "openai": "^4.20.0",
49
  "puppeteer": "^22.6.3",
50
+ "puppeteer-extra": "^3.3.6",
51
+ "puppeteer-extra-plugin-stealth": "^2.11.2",
52
  "stripe": "^11.11.0",
53
  "tiktoken": "^1.0.10",
54
  "turndown": "^7.1.3",
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
  import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
4
  import _ from 'lodash';
@@ -32,11 +32,11 @@ export class CrawlerHost extends RPCHost {
32
  const toBeTurnedToMd = snapshot.parsed?.content;
33
  const turnedDown = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd).trim() : '';
34
 
35
- const contentText = turnedDown && !(turnedDown.startsWith('<') && turnedDown.endsWith('>')) ? turnedDown : snapshot.text.trim();
36
 
37
  const formatted = {
38
  title: (snapshot.parsed?.title || snapshot.title || '').trim(),
39
- url: snapshot.href.trim(),
40
  content: contentText.trim(),
41
 
42
  toString() {
@@ -80,7 +80,15 @@ ${this.content}
80
  },
81
  ) {
82
  const noSlashURL = ctx.req.url.slice(1);
83
- const urlToCrawl = new URL(normalizeUrl(noSlashURL));
 
 
 
 
 
 
 
 
84
  const screenshotEnabled = Boolean(ctx.req.headers['x-screenshot']);
85
  const noCache = Boolean(ctx.req.headers['x-no-cache']);
86
 
@@ -125,7 +133,7 @@ ${this.content}
125
  if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
126
  for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
127
  lastScrapped = scrapped;
128
- if (!scrapped?.parsed?.content) {
129
  continue;
130
  }
131
 
@@ -143,7 +151,7 @@ ${this.content}
143
 
144
  for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
145
  lastScrapped = scrapped;
146
- if (!scrapped?.parsed?.content) {
147
  continue;
148
  }
149
 
 
1
+ import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError, ParamValidationError } from 'civkit';
2
  import { singleton } from 'tsyringe';
3
  import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
4
  import _ from 'lodash';
 
32
  const toBeTurnedToMd = snapshot.parsed?.content;
33
  const turnedDown = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd).trim() : '';
34
 
35
+ const contentText = turnedDown && !(turnedDown.startsWith('<') && turnedDown.endsWith('>')) ? turnedDown : snapshot.text?.trim();
36
 
37
  const formatted = {
38
  title: (snapshot.parsed?.title || snapshot.title || '').trim(),
39
+ url: snapshot.href?.trim(),
40
  content: contentText.trim(),
41
 
42
  toString() {
 
80
  },
81
  ) {
82
  const noSlashURL = ctx.req.url.slice(1);
83
+ let urlToCrawl;
84
+ try {
85
+ urlToCrawl = new URL(normalizeUrl(noSlashURL.trim()));
86
+ } catch (err) {
87
+ throw new ParamValidationError({
88
+ message: `${err}`,
89
+ path: 'url'
90
+ });
91
+ }
92
  const screenshotEnabled = Boolean(ctx.req.headers['x-screenshot']);
93
  const noCache = Boolean(ctx.req.headers['x-no-cache']);
94
 
 
133
  if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
134
  for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
135
  lastScrapped = scrapped;
136
+ if (!scrapped?.parsed?.content || !(scrapped.title?.trim())) {
137
  continue;
138
  }
139
 
 
151
 
152
  for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
153
  lastScrapped = scrapped;
154
+ if (!scrapped?.parsed?.content || !(scrapped.title?.trim())) {
155
  continue;
156
  }
157
 
backend/functions/src/index.ts CHANGED
@@ -21,6 +21,7 @@ initializeApp();
21
 
22
  import { loadModulesDynamically, registry } from './shared';
23
  import path from 'path';
 
24
  loadModulesDynamically(path.resolve(__dirname, 'cloud-functions'));
25
 
26
  Object.assign(exports, registry.exportAll());
@@ -31,4 +32,12 @@ Object.assign(exports, registry.exportGrouped({
31
  registry.title = 'reader';
32
  registry.version = '0.1.0';
33
 
34
- process.on('unhandledRejection', () => 'no big deal');
 
 
 
 
 
 
 
 
 
21
 
22
  import { loadModulesDynamically, registry } from './shared';
23
  import path from 'path';
24
+ import { ApplicationError } from 'civkit';
25
  loadModulesDynamically(path.resolve(__dirname, 'cloud-functions'));
26
 
27
  Object.assign(exports, registry.exportAll());
 
32
  registry.title = 'reader';
33
  registry.version = '0.1.0';
34
 
35
+ process.on('unhandledRejection', (err) => {
36
+ // Walk around Firebase runtime bug.
37
+ if (err instanceof ApplicationError) {
38
+ // Application error shall not crash the process;
39
+ return;
40
+ }
41
+
42
+ throw err;
43
+ });
backend/functions/src/services/puppeteer.ts CHANGED
@@ -1,11 +1,13 @@
1
  import { AssertionFailureError, AsyncService, Defer, HashManager, marshalErrorLike } from 'civkit';
2
  import { container, singleton } from 'tsyringe';
3
- import puppeteer, { Browser } from 'puppeteer';
4
  import { Logger } from '../shared/services/logger';
5
  import genericPool from 'generic-pool';
6
  import os from 'os';
7
  import fs from 'fs';
8
  import { Crawled } from '../db/crawled';
 
 
9
 
10
 
11
  const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
@@ -31,6 +33,12 @@ export interface PageSnapshot {
31
  }
32
  const md5Hasher = new HashManager('md5', 'hex');
33
 
 
 
 
 
 
 
34
  @singleton()
35
  export class PuppeteerControl extends AsyncService {
36
 
@@ -77,9 +85,10 @@ export class PuppeteerControl extends AsyncService {
77
  headless: true,
78
  timeout: 10_000
79
  }).catch((err) => {
80
- this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err });
81
  process.nextTick(() => {
82
- process.exit(1);
 
83
  });
84
  return Promise.reject(err);
85
  });
@@ -100,7 +109,7 @@ export class PuppeteerControl extends AsyncService {
100
  const preparations = [];
101
 
102
  // preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
103
- preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
104
  preparations.push(page.setBypassCSP(true));
105
  preparations.push(page.setViewport({ width: 1920, height: 1080 }));
106
  preparations.push(page.exposeFunction('reportSnapshot', (snapshot: any) => {
 
1
  import { AssertionFailureError, AsyncService, Defer, HashManager, marshalErrorLike } from 'civkit';
2
  import { container, singleton } from 'tsyringe';
3
+ import type { Browser } from 'puppeteer';
4
  import { Logger } from '../shared/services/logger';
5
  import genericPool from 'generic-pool';
6
  import os from 'os';
7
  import fs from 'fs';
8
  import { Crawled } from '../db/crawled';
9
+ import puppeteer from 'puppeteer-extra';
10
+ import puppeteerStealth from 'puppeteer-extra-plugin-stealth';
11
 
12
 
13
  const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
 
33
  }
34
  const md5Hasher = new HashManager('md5', 'hex');
35
 
36
+ puppeteer.use(puppeteerStealth());
37
+ // const puppeteerUAOverride = require('puppeteer-extra-plugin-stealth/evasions/user-agent-override');
38
+ // puppeteer.use(puppeteerUAOverride({
39
+ // userAgent: `Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`
40
+ // }))
41
+
42
  @singleton()
43
  export class PuppeteerControl extends AsyncService {
44
 
 
85
  headless: true,
86
  timeout: 10_000
87
  }).catch((err) => {
88
+ this.logger.error(`Unknown firebase issue, just die fast.`, { err });
89
  process.nextTick(() => {
90
+ this.emit('error', err);
91
+ // process.exit(1);
92
  });
93
  return Promise.reject(err);
94
  });
 
109
  const preparations = [];
110
 
111
  // preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
112
+ // preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
113
  preparations.push(page.setBypassCSP(true));
114
  preparations.push(page.setViewport({ width: 1920, height: 1080 }));
115
  preparations.push(page.exposeFunction('reportSnapshot', (snapshot: any) => {