Spaces:
Build error
Build error
Merge remote-tracking branch 'origin/main'
Browse files
backend/functions/package-lock.json
CHANGED
|
@@ -27,6 +27,8 @@
|
|
| 27 |
"minio": "^7.1.3",
|
| 28 |
"openai": "^4.20.0",
|
| 29 |
"puppeteer": "^22.6.3",
|
|
|
|
|
|
|
| 30 |
"stripe": "^11.11.0",
|
| 31 |
"tiktoken": "^1.0.10",
|
| 32 |
"turndown": "^7.1.3",
|
|
@@ -2526,6 +2528,14 @@
|
|
| 2526 |
"@types/node": "*"
|
| 2527 |
}
|
| 2528 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2529 |
"node_modules/@types/express": {
|
| 2530 |
"version": "4.17.3",
|
| 2531 |
"resolved": "https://registry.npmjs.org/@types/express/-/express-4.17.3.tgz",
|
|
@@ -2673,6 +2683,11 @@
|
|
| 2673 |
"integrity": "sha512-K0VQKziLUWkVKiRVrx4a40iPaxTUefQmjtkQofBkYRcoaaL/8rhwDWww9qWbrgicNOgnpIsMxyNIUM4+n6dUIA==",
|
| 2674 |
"optional": true
|
| 2675 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2676 |
"node_modules/@types/node": {
|
| 2677 |
"version": "18.19.31",
|
| 2678 |
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.31.tgz",
|
|
@@ -3234,6 +3249,14 @@
|
|
| 3234 |
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
|
| 3235 |
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
|
| 3236 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3237 |
"node_modules/array-buffer-byte-length": {
|
| 3238 |
"version": "1.0.1",
|
| 3239 |
"resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz",
|
|
@@ -4076,6 +4099,21 @@
|
|
| 4076 |
"node": ">=12"
|
| 4077 |
}
|
| 4078 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4079 |
"node_modules/co": {
|
| 4080 |
"version": "4.6.0",
|
| 4081 |
"resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
|
|
@@ -4466,8 +4504,6 @@
|
|
| 4466 |
"version": "4.3.1",
|
| 4467 |
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
|
| 4468 |
"integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
|
| 4469 |
-
"dev": true,
|
| 4470 |
-
"peer": true,
|
| 4471 |
"engines": {
|
| 4472 |
"node": ">=0.10.0"
|
| 4473 |
}
|
|
@@ -5739,6 +5775,25 @@
|
|
| 5739 |
"is-callable": "^1.1.3"
|
| 5740 |
}
|
| 5741 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5742 |
"node_modules/form-data": {
|
| 5743 |
"version": "4.0.0",
|
| 5744 |
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
|
@@ -6786,6 +6841,11 @@
|
|
| 6786 |
"url": "https://github.com/sponsors/ljharb"
|
| 6787 |
}
|
| 6788 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6789 |
"node_modules/is-callable": {
|
| 6790 |
"version": "1.2.7",
|
| 6791 |
"resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz",
|
|
@@ -6839,6 +6899,14 @@
|
|
| 6839 |
"url": "https://github.com/sponsors/ljharb"
|
| 6840 |
}
|
| 6841 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6842 |
"node_modules/is-extglob": {
|
| 6843 |
"version": "2.1.1",
|
| 6844 |
"resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
|
|
@@ -6948,6 +7016,17 @@
|
|
| 6948 |
"node": ">=8"
|
| 6949 |
}
|
| 6950 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6951 |
"node_modules/is-regex": {
|
| 6952 |
"version": "1.1.4",
|
| 6953 |
"resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.4.tgz",
|
|
@@ -7064,6 +7143,14 @@
|
|
| 7064 |
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
|
| 7065 |
"dev": true
|
| 7066 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7067 |
"node_modules/istanbul-lib-coverage": {
|
| 7068 |
"version": "3.2.2",
|
| 7069 |
"resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz",
|
|
@@ -8049,6 +8136,17 @@
|
|
| 8049 |
"json-buffer": "3.0.1"
|
| 8050 |
}
|
| 8051 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8052 |
"node_modules/klaw": {
|
| 8053 |
"version": "3.0.0",
|
| 8054 |
"resolved": "https://registry.npmjs.org/klaw/-/klaw-3.0.0.tgz",
|
|
@@ -8184,6 +8282,14 @@
|
|
| 8184 |
"unicode-9.0.0": "0.7.0"
|
| 8185 |
}
|
| 8186 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8187 |
"node_modules/lazystream": {
|
| 8188 |
"version": "1.0.1",
|
| 8189 |
"resolved": "https://registry.npmjs.org/lazystream/-/lazystream-1.0.1.tgz",
|
|
@@ -8504,6 +8610,19 @@
|
|
| 8504 |
"optional": true,
|
| 8505 |
"peer": true
|
| 8506 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8507 |
"node_modules/merge-descriptors": {
|
| 8508 |
"version": "1.0.1",
|
| 8509 |
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
|
|
@@ -8672,6 +8791,26 @@
|
|
| 8672 |
"resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
|
| 8673 |
"integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw=="
|
| 8674 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8675 |
"node_modules/mkdirp": {
|
| 8676 |
"version": "1.0.4",
|
| 8677 |
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz",
|
|
@@ -9719,6 +9858,150 @@
|
|
| 9719 |
"node": ">=18"
|
| 9720 |
}
|
| 9721 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9722 |
"node_modules/pure-rand": {
|
| 9723 |
"version": "6.1.0",
|
| 9724 |
"resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz",
|
|
@@ -10314,6 +10597,39 @@
|
|
| 10314 |
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
|
| 10315 |
"integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="
|
| 10316 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10317 |
"node_modules/shebang-command": {
|
| 10318 |
"version": "2.0.0",
|
| 10319 |
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
|
|
|
|
| 27 |
"minio": "^7.1.3",
|
| 28 |
"openai": "^4.20.0",
|
| 29 |
"puppeteer": "^22.6.3",
|
| 30 |
+
"puppeteer-extra": "^3.3.6",
|
| 31 |
+
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
| 32 |
"stripe": "^11.11.0",
|
| 33 |
"tiktoken": "^1.0.10",
|
| 34 |
"turndown": "^7.1.3",
|
|
|
|
| 2528 |
"@types/node": "*"
|
| 2529 |
}
|
| 2530 |
},
|
| 2531 |
+
"node_modules/@types/debug": {
|
| 2532 |
+
"version": "4.1.12",
|
| 2533 |
+
"resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz",
|
| 2534 |
+
"integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==",
|
| 2535 |
+
"dependencies": {
|
| 2536 |
+
"@types/ms": "*"
|
| 2537 |
+
}
|
| 2538 |
+
},
|
| 2539 |
"node_modules/@types/express": {
|
| 2540 |
"version": "4.17.3",
|
| 2541 |
"resolved": "https://registry.npmjs.org/@types/express/-/express-4.17.3.tgz",
|
|
|
|
| 2683 |
"integrity": "sha512-K0VQKziLUWkVKiRVrx4a40iPaxTUefQmjtkQofBkYRcoaaL/8rhwDWww9qWbrgicNOgnpIsMxyNIUM4+n6dUIA==",
|
| 2684 |
"optional": true
|
| 2685 |
},
|
| 2686 |
+
"node_modules/@types/ms": {
|
| 2687 |
+
"version": "0.7.34",
|
| 2688 |
+
"resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.34.tgz",
|
| 2689 |
+
"integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g=="
|
| 2690 |
+
},
|
| 2691 |
"node_modules/@types/node": {
|
| 2692 |
"version": "18.19.31",
|
| 2693 |
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.31.tgz",
|
|
|
|
| 3249 |
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
|
| 3250 |
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
|
| 3251 |
},
|
| 3252 |
+
"node_modules/arr-union": {
|
| 3253 |
+
"version": "3.1.0",
|
| 3254 |
+
"resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz",
|
| 3255 |
+
"integrity": "sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q==",
|
| 3256 |
+
"engines": {
|
| 3257 |
+
"node": ">=0.10.0"
|
| 3258 |
+
}
|
| 3259 |
+
},
|
| 3260 |
"node_modules/array-buffer-byte-length": {
|
| 3261 |
"version": "1.0.1",
|
| 3262 |
"resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz",
|
|
|
|
| 4099 |
"node": ">=12"
|
| 4100 |
}
|
| 4101 |
},
|
| 4102 |
+
"node_modules/clone-deep": {
|
| 4103 |
+
"version": "0.2.4",
|
| 4104 |
+
"resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz",
|
| 4105 |
+
"integrity": "sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==",
|
| 4106 |
+
"dependencies": {
|
| 4107 |
+
"for-own": "^0.1.3",
|
| 4108 |
+
"is-plain-object": "^2.0.1",
|
| 4109 |
+
"kind-of": "^3.0.2",
|
| 4110 |
+
"lazy-cache": "^1.0.3",
|
| 4111 |
+
"shallow-clone": "^0.1.2"
|
| 4112 |
+
},
|
| 4113 |
+
"engines": {
|
| 4114 |
+
"node": ">=0.10.0"
|
| 4115 |
+
}
|
| 4116 |
+
},
|
| 4117 |
"node_modules/co": {
|
| 4118 |
"version": "4.6.0",
|
| 4119 |
"resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
|
|
|
|
| 4504 |
"version": "4.3.1",
|
| 4505 |
"resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
|
| 4506 |
"integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
|
|
|
|
|
|
|
| 4507 |
"engines": {
|
| 4508 |
"node": ">=0.10.0"
|
| 4509 |
}
|
|
|
|
| 5775 |
"is-callable": "^1.1.3"
|
| 5776 |
}
|
| 5777 |
},
|
| 5778 |
+
"node_modules/for-in": {
|
| 5779 |
+
"version": "1.0.2",
|
| 5780 |
+
"resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz",
|
| 5781 |
+
"integrity": "sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==",
|
| 5782 |
+
"engines": {
|
| 5783 |
+
"node": ">=0.10.0"
|
| 5784 |
+
}
|
| 5785 |
+
},
|
| 5786 |
+
"node_modules/for-own": {
|
| 5787 |
+
"version": "0.1.5",
|
| 5788 |
+
"resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz",
|
| 5789 |
+
"integrity": "sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==",
|
| 5790 |
+
"dependencies": {
|
| 5791 |
+
"for-in": "^1.0.1"
|
| 5792 |
+
},
|
| 5793 |
+
"engines": {
|
| 5794 |
+
"node": ">=0.10.0"
|
| 5795 |
+
}
|
| 5796 |
+
},
|
| 5797 |
"node_modules/form-data": {
|
| 5798 |
"version": "4.0.0",
|
| 5799 |
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
|
|
|
| 6841 |
"url": "https://github.com/sponsors/ljharb"
|
| 6842 |
}
|
| 6843 |
},
|
| 6844 |
+
"node_modules/is-buffer": {
|
| 6845 |
+
"version": "1.1.6",
|
| 6846 |
+
"resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
|
| 6847 |
+
"integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="
|
| 6848 |
+
},
|
| 6849 |
"node_modules/is-callable": {
|
| 6850 |
"version": "1.2.7",
|
| 6851 |
"resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz",
|
|
|
|
| 6899 |
"url": "https://github.com/sponsors/ljharb"
|
| 6900 |
}
|
| 6901 |
},
|
| 6902 |
+
"node_modules/is-extendable": {
|
| 6903 |
+
"version": "0.1.1",
|
| 6904 |
+
"resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz",
|
| 6905 |
+
"integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==",
|
| 6906 |
+
"engines": {
|
| 6907 |
+
"node": ">=0.10.0"
|
| 6908 |
+
}
|
| 6909 |
+
},
|
| 6910 |
"node_modules/is-extglob": {
|
| 6911 |
"version": "2.1.1",
|
| 6912 |
"resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
|
|
|
|
| 7016 |
"node": ">=8"
|
| 7017 |
}
|
| 7018 |
},
|
| 7019 |
+
"node_modules/is-plain-object": {
|
| 7020 |
+
"version": "2.0.4",
|
| 7021 |
+
"resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz",
|
| 7022 |
+
"integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==",
|
| 7023 |
+
"dependencies": {
|
| 7024 |
+
"isobject": "^3.0.1"
|
| 7025 |
+
},
|
| 7026 |
+
"engines": {
|
| 7027 |
+
"node": ">=0.10.0"
|
| 7028 |
+
}
|
| 7029 |
+
},
|
| 7030 |
"node_modules/is-regex": {
|
| 7031 |
"version": "1.1.4",
|
| 7032 |
"resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.4.tgz",
|
|
|
|
| 7143 |
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
|
| 7144 |
"dev": true
|
| 7145 |
},
|
| 7146 |
+
"node_modules/isobject": {
|
| 7147 |
+
"version": "3.0.1",
|
| 7148 |
+
"resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz",
|
| 7149 |
+
"integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==",
|
| 7150 |
+
"engines": {
|
| 7151 |
+
"node": ">=0.10.0"
|
| 7152 |
+
}
|
| 7153 |
+
},
|
| 7154 |
"node_modules/istanbul-lib-coverage": {
|
| 7155 |
"version": "3.2.2",
|
| 7156 |
"resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz",
|
|
|
|
| 8136 |
"json-buffer": "3.0.1"
|
| 8137 |
}
|
| 8138 |
},
|
| 8139 |
+
"node_modules/kind-of": {
|
| 8140 |
+
"version": "3.2.2",
|
| 8141 |
+
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz",
|
| 8142 |
+
"integrity": "sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==",
|
| 8143 |
+
"dependencies": {
|
| 8144 |
+
"is-buffer": "^1.1.5"
|
| 8145 |
+
},
|
| 8146 |
+
"engines": {
|
| 8147 |
+
"node": ">=0.10.0"
|
| 8148 |
+
}
|
| 8149 |
+
},
|
| 8150 |
"node_modules/klaw": {
|
| 8151 |
"version": "3.0.0",
|
| 8152 |
"resolved": "https://registry.npmjs.org/klaw/-/klaw-3.0.0.tgz",
|
|
|
|
| 8282 |
"unicode-9.0.0": "0.7.0"
|
| 8283 |
}
|
| 8284 |
},
|
| 8285 |
+
"node_modules/lazy-cache": {
|
| 8286 |
+
"version": "1.0.4",
|
| 8287 |
+
"resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-1.0.4.tgz",
|
| 8288 |
+
"integrity": "sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==",
|
| 8289 |
+
"engines": {
|
| 8290 |
+
"node": ">=0.10.0"
|
| 8291 |
+
}
|
| 8292 |
+
},
|
| 8293 |
"node_modules/lazystream": {
|
| 8294 |
"version": "1.0.1",
|
| 8295 |
"resolved": "https://registry.npmjs.org/lazystream/-/lazystream-1.0.1.tgz",
|
|
|
|
| 8610 |
"optional": true,
|
| 8611 |
"peer": true
|
| 8612 |
},
|
| 8613 |
+
"node_modules/merge-deep": {
|
| 8614 |
+
"version": "3.0.3",
|
| 8615 |
+
"resolved": "https://registry.npmjs.org/merge-deep/-/merge-deep-3.0.3.tgz",
|
| 8616 |
+
"integrity": "sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==",
|
| 8617 |
+
"dependencies": {
|
| 8618 |
+
"arr-union": "^3.1.0",
|
| 8619 |
+
"clone-deep": "^0.2.4",
|
| 8620 |
+
"kind-of": "^3.0.2"
|
| 8621 |
+
},
|
| 8622 |
+
"engines": {
|
| 8623 |
+
"node": ">=0.10.0"
|
| 8624 |
+
}
|
| 8625 |
+
},
|
| 8626 |
"node_modules/merge-descriptors": {
|
| 8627 |
"version": "1.0.1",
|
| 8628 |
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
|
|
|
|
| 8791 |
"resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
|
| 8792 |
"integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw=="
|
| 8793 |
},
|
| 8794 |
+
"node_modules/mixin-object": {
|
| 8795 |
+
"version": "2.0.1",
|
| 8796 |
+
"resolved": "https://registry.npmjs.org/mixin-object/-/mixin-object-2.0.1.tgz",
|
| 8797 |
+
"integrity": "sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==",
|
| 8798 |
+
"dependencies": {
|
| 8799 |
+
"for-in": "^0.1.3",
|
| 8800 |
+
"is-extendable": "^0.1.1"
|
| 8801 |
+
},
|
| 8802 |
+
"engines": {
|
| 8803 |
+
"node": ">=0.10.0"
|
| 8804 |
+
}
|
| 8805 |
+
},
|
| 8806 |
+
"node_modules/mixin-object/node_modules/for-in": {
|
| 8807 |
+
"version": "0.1.8",
|
| 8808 |
+
"resolved": "https://registry.npmjs.org/for-in/-/for-in-0.1.8.tgz",
|
| 8809 |
+
"integrity": "sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g==",
|
| 8810 |
+
"engines": {
|
| 8811 |
+
"node": ">=0.10.0"
|
| 8812 |
+
}
|
| 8813 |
+
},
|
| 8814 |
"node_modules/mkdirp": {
|
| 8815 |
"version": "1.0.4",
|
| 8816 |
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz",
|
|
|
|
| 9858 |
"node": ">=18"
|
| 9859 |
}
|
| 9860 |
},
|
| 9861 |
+
"node_modules/puppeteer-extra": {
|
| 9862 |
+
"version": "3.3.6",
|
| 9863 |
+
"resolved": "https://registry.npmjs.org/puppeteer-extra/-/puppeteer-extra-3.3.6.tgz",
|
| 9864 |
+
"integrity": "sha512-rsLBE/6mMxAjlLd06LuGacrukP2bqbzKCLzV1vrhHFavqQE/taQ2UXv3H5P0Ls7nsrASa+6x3bDbXHpqMwq+7A==",
|
| 9865 |
+
"dependencies": {
|
| 9866 |
+
"@types/debug": "^4.1.0",
|
| 9867 |
+
"debug": "^4.1.1",
|
| 9868 |
+
"deepmerge": "^4.2.2"
|
| 9869 |
+
},
|
| 9870 |
+
"engines": {
|
| 9871 |
+
"node": ">=8"
|
| 9872 |
+
},
|
| 9873 |
+
"peerDependencies": {
|
| 9874 |
+
"@types/puppeteer": "*",
|
| 9875 |
+
"puppeteer": "*",
|
| 9876 |
+
"puppeteer-core": "*"
|
| 9877 |
+
},
|
| 9878 |
+
"peerDependenciesMeta": {
|
| 9879 |
+
"@types/puppeteer": {
|
| 9880 |
+
"optional": true
|
| 9881 |
+
},
|
| 9882 |
+
"puppeteer": {
|
| 9883 |
+
"optional": true
|
| 9884 |
+
},
|
| 9885 |
+
"puppeteer-core": {
|
| 9886 |
+
"optional": true
|
| 9887 |
+
}
|
| 9888 |
+
}
|
| 9889 |
+
},
|
| 9890 |
+
"node_modules/puppeteer-extra-plugin": {
|
| 9891 |
+
"version": "3.2.3",
|
| 9892 |
+
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.3.tgz",
|
| 9893 |
+
"integrity": "sha512-6RNy0e6pH8vaS3akPIKGg28xcryKscczt4wIl0ePciZENGE2yoaQJNd17UiEbdmh5/6WW6dPcfRWT9lxBwCi2Q==",
|
| 9894 |
+
"dependencies": {
|
| 9895 |
+
"@types/debug": "^4.1.0",
|
| 9896 |
+
"debug": "^4.1.1",
|
| 9897 |
+
"merge-deep": "^3.0.1"
|
| 9898 |
+
},
|
| 9899 |
+
"engines": {
|
| 9900 |
+
"node": ">=9.11.2"
|
| 9901 |
+
},
|
| 9902 |
+
"peerDependencies": {
|
| 9903 |
+
"playwright-extra": "*",
|
| 9904 |
+
"puppeteer-extra": "*"
|
| 9905 |
+
},
|
| 9906 |
+
"peerDependenciesMeta": {
|
| 9907 |
+
"playwright-extra": {
|
| 9908 |
+
"optional": true
|
| 9909 |
+
},
|
| 9910 |
+
"puppeteer-extra": {
|
| 9911 |
+
"optional": true
|
| 9912 |
+
}
|
| 9913 |
+
}
|
| 9914 |
+
},
|
| 9915 |
+
"node_modules/puppeteer-extra-plugin-stealth": {
|
| 9916 |
+
"version": "2.11.2",
|
| 9917 |
+
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.11.2.tgz",
|
| 9918 |
+
"integrity": "sha512-bUemM5XmTj9i2ZerBzsk2AN5is0wHMNE6K0hXBzBXOzP5m5G3Wl0RHhiqKeHToe/uIH8AoZiGhc1tCkLZQPKTQ==",
|
| 9919 |
+
"dependencies": {
|
| 9920 |
+
"debug": "^4.1.1",
|
| 9921 |
+
"puppeteer-extra-plugin": "^3.2.3",
|
| 9922 |
+
"puppeteer-extra-plugin-user-preferences": "^2.4.1"
|
| 9923 |
+
},
|
| 9924 |
+
"engines": {
|
| 9925 |
+
"node": ">=8"
|
| 9926 |
+
},
|
| 9927 |
+
"peerDependencies": {
|
| 9928 |
+
"playwright-extra": "*",
|
| 9929 |
+
"puppeteer-extra": "*"
|
| 9930 |
+
},
|
| 9931 |
+
"peerDependenciesMeta": {
|
| 9932 |
+
"playwright-extra": {
|
| 9933 |
+
"optional": true
|
| 9934 |
+
},
|
| 9935 |
+
"puppeteer-extra": {
|
| 9936 |
+
"optional": true
|
| 9937 |
+
}
|
| 9938 |
+
}
|
| 9939 |
+
},
|
| 9940 |
+
"node_modules/puppeteer-extra-plugin-user-data-dir": {
|
| 9941 |
+
"version": "2.4.1",
|
| 9942 |
+
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.4.1.tgz",
|
| 9943 |
+
"integrity": "sha512-kH1GnCcqEDoBXO7epAse4TBPJh9tEpVEK/vkedKfjOVOhZAvLkHGc9swMs5ChrJbRnf8Hdpug6TJlEuimXNQ+g==",
|
| 9944 |
+
"dependencies": {
|
| 9945 |
+
"debug": "^4.1.1",
|
| 9946 |
+
"fs-extra": "^10.0.0",
|
| 9947 |
+
"puppeteer-extra-plugin": "^3.2.3",
|
| 9948 |
+
"rimraf": "^3.0.2"
|
| 9949 |
+
},
|
| 9950 |
+
"engines": {
|
| 9951 |
+
"node": ">=8"
|
| 9952 |
+
},
|
| 9953 |
+
"peerDependencies": {
|
| 9954 |
+
"playwright-extra": "*",
|
| 9955 |
+
"puppeteer-extra": "*"
|
| 9956 |
+
},
|
| 9957 |
+
"peerDependenciesMeta": {
|
| 9958 |
+
"playwright-extra": {
|
| 9959 |
+
"optional": true
|
| 9960 |
+
},
|
| 9961 |
+
"puppeteer-extra": {
|
| 9962 |
+
"optional": true
|
| 9963 |
+
}
|
| 9964 |
+
}
|
| 9965 |
+
},
|
| 9966 |
+
"node_modules/puppeteer-extra-plugin-user-data-dir/node_modules/fs-extra": {
|
| 9967 |
+
"version": "10.1.0",
|
| 9968 |
+
"resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.1.0.tgz",
|
| 9969 |
+
"integrity": "sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==",
|
| 9970 |
+
"dependencies": {
|
| 9971 |
+
"graceful-fs": "^4.2.0",
|
| 9972 |
+
"jsonfile": "^6.0.1",
|
| 9973 |
+
"universalify": "^2.0.0"
|
| 9974 |
+
},
|
| 9975 |
+
"engines": {
|
| 9976 |
+
"node": ">=12"
|
| 9977 |
+
}
|
| 9978 |
+
},
|
| 9979 |
+
"node_modules/puppeteer-extra-plugin-user-preferences": {
|
| 9980 |
+
"version": "2.4.1",
|
| 9981 |
+
"resolved": "https://registry.npmjs.org/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.4.1.tgz",
|
| 9982 |
+
"integrity": "sha512-i1oAZxRbc1bk8MZufKCruCEC3CCafO9RKMkkodZltI4OqibLFXF3tj6HZ4LZ9C5vCXZjYcDWazgtY69mnmrQ9A==",
|
| 9983 |
+
"dependencies": {
|
| 9984 |
+
"debug": "^4.1.1",
|
| 9985 |
+
"deepmerge": "^4.2.2",
|
| 9986 |
+
"puppeteer-extra-plugin": "^3.2.3",
|
| 9987 |
+
"puppeteer-extra-plugin-user-data-dir": "^2.4.1"
|
| 9988 |
+
},
|
| 9989 |
+
"engines": {
|
| 9990 |
+
"node": ">=8"
|
| 9991 |
+
},
|
| 9992 |
+
"peerDependencies": {
|
| 9993 |
+
"playwright-extra": "*",
|
| 9994 |
+
"puppeteer-extra": "*"
|
| 9995 |
+
},
|
| 9996 |
+
"peerDependenciesMeta": {
|
| 9997 |
+
"playwright-extra": {
|
| 9998 |
+
"optional": true
|
| 9999 |
+
},
|
| 10000 |
+
"puppeteer-extra": {
|
| 10001 |
+
"optional": true
|
| 10002 |
+
}
|
| 10003 |
+
}
|
| 10004 |
+
},
|
| 10005 |
"node_modules/pure-rand": {
|
| 10006 |
"version": "6.1.0",
|
| 10007 |
"resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz",
|
|
|
|
| 10597 |
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
|
| 10598 |
"integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="
|
| 10599 |
},
|
| 10600 |
+
"node_modules/shallow-clone": {
|
| 10601 |
+
"version": "0.1.2",
|
| 10602 |
+
"resolved": "https://registry.npmjs.org/shallow-clone/-/shallow-clone-0.1.2.tgz",
|
| 10603 |
+
"integrity": "sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==",
|
| 10604 |
+
"dependencies": {
|
| 10605 |
+
"is-extendable": "^0.1.1",
|
| 10606 |
+
"kind-of": "^2.0.1",
|
| 10607 |
+
"lazy-cache": "^0.2.3",
|
| 10608 |
+
"mixin-object": "^2.0.1"
|
| 10609 |
+
},
|
| 10610 |
+
"engines": {
|
| 10611 |
+
"node": ">=0.10.0"
|
| 10612 |
+
}
|
| 10613 |
+
},
|
| 10614 |
+
"node_modules/shallow-clone/node_modules/kind-of": {
|
| 10615 |
+
"version": "2.0.1",
|
| 10616 |
+
"resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz",
|
| 10617 |
+
"integrity": "sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==",
|
| 10618 |
+
"dependencies": {
|
| 10619 |
+
"is-buffer": "^1.0.2"
|
| 10620 |
+
},
|
| 10621 |
+
"engines": {
|
| 10622 |
+
"node": ">=0.10.0"
|
| 10623 |
+
}
|
| 10624 |
+
},
|
| 10625 |
+
"node_modules/shallow-clone/node_modules/lazy-cache": {
|
| 10626 |
+
"version": "0.2.7",
|
| 10627 |
+
"resolved": "https://registry.npmjs.org/lazy-cache/-/lazy-cache-0.2.7.tgz",
|
| 10628 |
+
"integrity": "sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ==",
|
| 10629 |
+
"engines": {
|
| 10630 |
+
"node": ">=0.10.0"
|
| 10631 |
+
}
|
| 10632 |
+
},
|
| 10633 |
"node_modules/shebang-command": {
|
| 10634 |
"version": "2.0.0",
|
| 10635 |
"resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
|
backend/functions/package.json
CHANGED
|
@@ -47,6 +47,8 @@
|
|
| 47 |
"minio": "^7.1.3",
|
| 48 |
"openai": "^4.20.0",
|
| 49 |
"puppeteer": "^22.6.3",
|
|
|
|
|
|
|
| 50 |
"stripe": "^11.11.0",
|
| 51 |
"tiktoken": "^1.0.10",
|
| 52 |
"turndown": "^7.1.3",
|
|
|
|
| 47 |
"minio": "^7.1.3",
|
| 48 |
"openai": "^4.20.0",
|
| 49 |
"puppeteer": "^22.6.3",
|
| 50 |
+
"puppeteer-extra": "^3.3.6",
|
| 51 |
+
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
| 52 |
"stripe": "^11.11.0",
|
| 53 |
"tiktoken": "^1.0.10",
|
| 54 |
"turndown": "^7.1.3",
|
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError } from 'civkit';
|
| 2 |
import { singleton } from 'tsyringe';
|
| 3 |
import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
|
| 4 |
import _ from 'lodash';
|
|
@@ -32,11 +32,11 @@ export class CrawlerHost extends RPCHost {
|
|
| 32 |
const toBeTurnedToMd = snapshot.parsed?.content;
|
| 33 |
const turnedDown = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd).trim() : '';
|
| 34 |
|
| 35 |
-
const contentText = turnedDown && !(turnedDown.startsWith('<') && turnedDown.endsWith('>')) ? turnedDown : snapshot.text.trim();
|
| 36 |
|
| 37 |
const formatted = {
|
| 38 |
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
|
| 39 |
-
url: snapshot.href.trim(),
|
| 40 |
content: contentText.trim(),
|
| 41 |
|
| 42 |
toString() {
|
|
@@ -80,7 +80,15 @@ ${this.content}
|
|
| 80 |
},
|
| 81 |
) {
|
| 82 |
const noSlashURL = ctx.req.url.slice(1);
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
const screenshotEnabled = Boolean(ctx.req.headers['x-screenshot']);
|
| 85 |
const noCache = Boolean(ctx.req.headers['x-no-cache']);
|
| 86 |
|
|
@@ -125,7 +133,7 @@ ${this.content}
|
|
| 125 |
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
|
| 126 |
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
| 127 |
lastScrapped = scrapped;
|
| 128 |
-
if (!scrapped?.parsed?.content) {
|
| 129 |
continue;
|
| 130 |
}
|
| 131 |
|
|
@@ -143,7 +151,7 @@ ${this.content}
|
|
| 143 |
|
| 144 |
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
| 145 |
lastScrapped = scrapped;
|
| 146 |
-
if (!scrapped?.parsed?.content) {
|
| 147 |
continue;
|
| 148 |
}
|
| 149 |
|
|
|
|
| 1 |
+
import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError, ParamValidationError } from 'civkit';
|
| 2 |
import { singleton } from 'tsyringe';
|
| 3 |
import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
|
| 4 |
import _ from 'lodash';
|
|
|
|
| 32 |
const toBeTurnedToMd = snapshot.parsed?.content;
|
| 33 |
const turnedDown = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd).trim() : '';
|
| 34 |
|
| 35 |
+
const contentText = turnedDown && !(turnedDown.startsWith('<') && turnedDown.endsWith('>')) ? turnedDown : snapshot.text?.trim();
|
| 36 |
|
| 37 |
const formatted = {
|
| 38 |
title: (snapshot.parsed?.title || snapshot.title || '').trim(),
|
| 39 |
+
url: snapshot.href?.trim(),
|
| 40 |
content: contentText.trim(),
|
| 41 |
|
| 42 |
toString() {
|
|
|
|
| 80 |
},
|
| 81 |
) {
|
| 82 |
const noSlashURL = ctx.req.url.slice(1);
|
| 83 |
+
let urlToCrawl;
|
| 84 |
+
try {
|
| 85 |
+
urlToCrawl = new URL(normalizeUrl(noSlashURL.trim()));
|
| 86 |
+
} catch (err) {
|
| 87 |
+
throw new ParamValidationError({
|
| 88 |
+
message: `${err}`,
|
| 89 |
+
path: 'url'
|
| 90 |
+
});
|
| 91 |
+
}
|
| 92 |
const screenshotEnabled = Boolean(ctx.req.headers['x-screenshot']);
|
| 93 |
const noCache = Boolean(ctx.req.headers['x-no-cache']);
|
| 94 |
|
|
|
|
| 133 |
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
|
| 134 |
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
| 135 |
lastScrapped = scrapped;
|
| 136 |
+
if (!scrapped?.parsed?.content || !(scrapped.title?.trim())) {
|
| 137 |
continue;
|
| 138 |
}
|
| 139 |
|
|
|
|
| 151 |
|
| 152 |
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
| 153 |
lastScrapped = scrapped;
|
| 154 |
+
if (!scrapped?.parsed?.content || !(scrapped.title?.trim())) {
|
| 155 |
continue;
|
| 156 |
}
|
| 157 |
|
backend/functions/src/index.ts
CHANGED
|
@@ -21,6 +21,7 @@ initializeApp();
|
|
| 21 |
|
| 22 |
import { loadModulesDynamically, registry } from './shared';
|
| 23 |
import path from 'path';
|
|
|
|
| 24 |
loadModulesDynamically(path.resolve(__dirname, 'cloud-functions'));
|
| 25 |
|
| 26 |
Object.assign(exports, registry.exportAll());
|
|
@@ -31,4 +32,12 @@ Object.assign(exports, registry.exportGrouped({
|
|
| 31 |
registry.title = 'reader';
|
| 32 |
registry.version = '0.1.0';
|
| 33 |
|
| 34 |
-
process.on('unhandledRejection', () =>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
import { loadModulesDynamically, registry } from './shared';
|
| 23 |
import path from 'path';
|
| 24 |
+
import { ApplicationError } from 'civkit';
|
| 25 |
loadModulesDynamically(path.resolve(__dirname, 'cloud-functions'));
|
| 26 |
|
| 27 |
Object.assign(exports, registry.exportAll());
|
|
|
|
| 32 |
registry.title = 'reader';
|
| 33 |
registry.version = '0.1.0';
|
| 34 |
|
| 35 |
+
process.on('unhandledRejection', (err) => {
|
| 36 |
+
// Walk around Firebase runtime bug.
|
| 37 |
+
if (err instanceof ApplicationError) {
|
| 38 |
+
// Application error shall not crash the process;
|
| 39 |
+
return;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
throw err;
|
| 43 |
+
});
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -1,11 +1,13 @@
|
|
| 1 |
import { AssertionFailureError, AsyncService, Defer, HashManager, marshalErrorLike } from 'civkit';
|
| 2 |
import { container, singleton } from 'tsyringe';
|
| 3 |
-
import
|
| 4 |
import { Logger } from '../shared/services/logger';
|
| 5 |
import genericPool from 'generic-pool';
|
| 6 |
import os from 'os';
|
| 7 |
import fs from 'fs';
|
| 8 |
import { Crawled } from '../db/crawled';
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
|
@@ -31,6 +33,12 @@ export interface PageSnapshot {
|
|
| 31 |
}
|
| 32 |
const md5Hasher = new HashManager('md5', 'hex');
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
@singleton()
|
| 35 |
export class PuppeteerControl extends AsyncService {
|
| 36 |
|
|
@@ -77,9 +85,10 @@ export class PuppeteerControl extends AsyncService {
|
|
| 77 |
headless: true,
|
| 78 |
timeout: 10_000
|
| 79 |
}).catch((err) => {
|
| 80 |
-
this.logger.error(`Unknown firebase issue, just die fast
|
| 81 |
process.nextTick(() => {
|
| 82 |
-
|
|
|
|
| 83 |
});
|
| 84 |
return Promise.reject(err);
|
| 85 |
});
|
|
@@ -100,7 +109,7 @@ export class PuppeteerControl extends AsyncService {
|
|
| 100 |
const preparations = [];
|
| 101 |
|
| 102 |
// preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
|
| 103 |
-
preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
|
| 104 |
preparations.push(page.setBypassCSP(true));
|
| 105 |
preparations.push(page.setViewport({ width: 1920, height: 1080 }));
|
| 106 |
preparations.push(page.exposeFunction('reportSnapshot', (snapshot: any) => {
|
|
|
|
| 1 |
import { AssertionFailureError, AsyncService, Defer, HashManager, marshalErrorLike } from 'civkit';
|
| 2 |
import { container, singleton } from 'tsyringe';
|
| 3 |
+
import type { Browser } from 'puppeteer';
|
| 4 |
import { Logger } from '../shared/services/logger';
|
| 5 |
import genericPool from 'generic-pool';
|
| 6 |
import os from 'os';
|
| 7 |
import fs from 'fs';
|
| 8 |
import { Crawled } from '../db/crawled';
|
| 9 |
+
import puppeteer from 'puppeteer-extra';
|
| 10 |
+
import puppeteerStealth from 'puppeteer-extra-plugin-stealth';
|
| 11 |
|
| 12 |
|
| 13 |
const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
|
|
|
|
| 33 |
}
|
| 34 |
const md5Hasher = new HashManager('md5', 'hex');
|
| 35 |
|
| 36 |
+
puppeteer.use(puppeteerStealth());
|
| 37 |
+
// const puppeteerUAOverride = require('puppeteer-extra-plugin-stealth/evasions/user-agent-override');
|
| 38 |
+
// puppeteer.use(puppeteerUAOverride({
|
| 39 |
+
// userAgent: `Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`
|
| 40 |
+
// }))
|
| 41 |
+
|
| 42 |
@singleton()
|
| 43 |
export class PuppeteerControl extends AsyncService {
|
| 44 |
|
|
|
|
| 85 |
headless: true,
|
| 86 |
timeout: 10_000
|
| 87 |
}).catch((err) => {
|
| 88 |
+
this.logger.error(`Unknown firebase issue, just die fast.`, { err });
|
| 89 |
process.nextTick(() => {
|
| 90 |
+
this.emit('error', err);
|
| 91 |
+
// process.exit(1);
|
| 92 |
});
|
| 93 |
return Promise.reject(err);
|
| 94 |
});
|
|
|
|
| 109 |
const preparations = [];
|
| 110 |
|
| 111 |
// preparations.push(page.setUserAgent(`Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)`));
|
| 112 |
+
// preparations.push(page.setUserAgent(`Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`));
|
| 113 |
preparations.push(page.setBypassCSP(true));
|
| 114 |
preparations.push(page.setViewport({ width: 1920, height: 1080 }));
|
| 115 |
preparations.push(page.exposeFunction('reportSnapshot', (snapshot: any) => {
|