Created using Colab
Browse files- notebooks/Crawl_a_Website.ipynb +20 -20
notebooks/Crawl_a_Website.ipynb
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
"colab": {
|
6 |
"provenance": [],
|
7 |
"toc_visible": true,
|
8 |
-
"authorship_tag": "
|
9 |
"include_colab_link": true
|
10 |
},
|
11 |
"kernelspec": {
|
@@ -39,7 +39,7 @@
|
|
39 |
},
|
40 |
"outputId": "155feab4-8ae6-43da-a07f-8a1f4b677c2b"
|
41 |
},
|
42 |
-
"execution_count":
|
43 |
"outputs": [
|
44 |
{
|
45 |
"output_type": "stream",
|
@@ -73,7 +73,7 @@
|
|
73 |
"metadata": {
|
74 |
"id": "wxDPsVXSAj6_"
|
75 |
},
|
76 |
-
"execution_count":
|
77 |
"outputs": []
|
78 |
},
|
79 |
{
|
@@ -116,7 +116,7 @@
|
|
116 |
"metadata": {
|
117 |
"id": "x74PqfQ7eIzD"
|
118 |
},
|
119 |
-
"execution_count":
|
120 |
"outputs": []
|
121 |
},
|
122 |
{
|
@@ -149,7 +149,7 @@
|
|
149 |
"metadata": {
|
150 |
"id": "Q6Xs1OhUfVQV"
|
151 |
},
|
152 |
-
"execution_count":
|
153 |
"outputs": []
|
154 |
},
|
155 |
{
|
@@ -164,7 +164,7 @@
|
|
164 |
"id": "3cNdJNi2g1ly",
|
165 |
"outputId": "f5184c15-6b55-47ee-98ee-646a06290a4c"
|
166 |
},
|
167 |
-
"execution_count":
|
168 |
"outputs": [
|
169 |
{
|
170 |
"output_type": "execute_result",
|
@@ -192,7 +192,7 @@
|
|
192 |
"id": "WleP60A3gkQM",
|
193 |
"outputId": "8c79ab53-e47b-4227-eb6f-0286b8ba2d15"
|
194 |
},
|
195 |
-
"execution_count":
|
196 |
"outputs": [
|
197 |
{
|
198 |
"output_type": "execute_result",
|
@@ -226,7 +226,7 @@
|
|
226 |
"metadata": {
|
227 |
"id": "TOJ3K-CBfVDR"
|
228 |
},
|
229 |
-
"execution_count":
|
230 |
"outputs": []
|
231 |
},
|
232 |
{
|
@@ -335,7 +335,7 @@
|
|
335 |
{
|
336 |
"cell_type": "code",
|
337 |
"source": [
|
338 |
-
"url = \"https://api.usescraper.com/crawler/jobs/{}/data\".format(
|
339 |
"\n",
|
340 |
"data_res = requests.request(\"GET\", url, headers=headers)\n",
|
341 |
"\n",
|
@@ -350,7 +350,7 @@
|
|
350 |
"id": "J4dUn4cmGGab",
|
351 |
"outputId": "15717b0d-dac6-4a67-e13f-1330623d4ced"
|
352 |
},
|
353 |
-
"execution_count":
|
354 |
"outputs": [
|
355 |
{
|
356 |
"output_type": "stream",
|
@@ -375,7 +375,7 @@
|
|
375 |
"id": "F8VEQvJkITLJ",
|
376 |
"outputId": "b54ec108-7221-4230-8b61-d0a4be503a66"
|
377 |
},
|
378 |
-
"execution_count":
|
379 |
"outputs": [
|
380 |
{
|
381 |
"output_type": "stream",
|
@@ -413,7 +413,7 @@
|
|
413 |
"metadata": {
|
414 |
"id": "YEieGzSFSXas"
|
415 |
},
|
416 |
-
"execution_count":
|
417 |
"outputs": []
|
418 |
},
|
419 |
{
|
@@ -435,7 +435,7 @@
|
|
435 |
"metadata": {
|
436 |
"id": "wxmiQDv3SXV6"
|
437 |
},
|
438 |
-
"execution_count":
|
439 |
"outputs": []
|
440 |
},
|
441 |
{
|
@@ -448,7 +448,7 @@
|
|
448 |
"metadata": {
|
449 |
"id": "tCVhv4OkSXTV"
|
450 |
},
|
451 |
-
"execution_count":
|
452 |
"outputs": []
|
453 |
},
|
454 |
{
|
@@ -476,7 +476,7 @@
|
|
476 |
"metadata": {
|
477 |
"id": "6KpeCRMBUgup"
|
478 |
},
|
479 |
-
"execution_count":
|
480 |
"outputs": []
|
481 |
},
|
482 |
{
|
@@ -489,7 +489,7 @@
|
|
489 |
"metadata": {
|
490 |
"id": "nWTBidwoZSO0"
|
491 |
},
|
492 |
-
"execution_count":
|
493 |
"outputs": []
|
494 |
},
|
495 |
{
|
@@ -500,7 +500,7 @@
|
|
500 |
"metadata": {
|
501 |
"id": "RUuJO0IIYSeU"
|
502 |
},
|
503 |
-
"execution_count":
|
504 |
"outputs": []
|
505 |
},
|
506 |
{
|
@@ -511,7 +511,7 @@
|
|
511 |
"metadata": {
|
512 |
"id": "6_s2LkH6YX1V"
|
513 |
},
|
514 |
-
"execution_count":
|
515 |
"outputs": []
|
516 |
},
|
517 |
{
|
@@ -527,7 +527,7 @@
|
|
527 |
"id": "02zdJNqIZKep",
|
528 |
"outputId": "76340610-0d98-4fd0-d237-ddb9f1752391"
|
529 |
},
|
530 |
-
"execution_count":
|
531 |
"outputs": [
|
532 |
{
|
533 |
"output_type": "execute_result",
|
@@ -562,7 +562,7 @@
|
|
562 |
"id": "PuCcgP0nZSIl",
|
563 |
"outputId": "e136cdbb-2ee4-4dfb-f532-f6c9365e519e"
|
564 |
},
|
565 |
-
"execution_count":
|
566 |
"outputs": [
|
567 |
{
|
568 |
"output_type": "stream",
|
|
|
5 |
"colab": {
|
6 |
"provenance": [],
|
7 |
"toc_visible": true,
|
8 |
+
"authorship_tag": "ABX9TyOUem37lhhg0mJYauho+pvb",
|
9 |
"include_colab_link": true
|
10 |
},
|
11 |
"kernelspec": {
|
|
|
39 |
},
|
40 |
"outputId": "155feab4-8ae6-43da-a07f-8a1f4b677c2b"
|
41 |
},
|
42 |
+
"execution_count": null,
|
43 |
"outputs": [
|
44 |
{
|
45 |
"output_type": "stream",
|
|
|
73 |
"metadata": {
|
74 |
"id": "wxDPsVXSAj6_"
|
75 |
},
|
76 |
+
"execution_count": null,
|
77 |
"outputs": []
|
78 |
},
|
79 |
{
|
|
|
116 |
"metadata": {
|
117 |
"id": "x74PqfQ7eIzD"
|
118 |
},
|
119 |
+
"execution_count": null,
|
120 |
"outputs": []
|
121 |
},
|
122 |
{
|
|
|
149 |
"metadata": {
|
150 |
"id": "Q6Xs1OhUfVQV"
|
151 |
},
|
152 |
+
"execution_count": null,
|
153 |
"outputs": []
|
154 |
},
|
155 |
{
|
|
|
164 |
"id": "3cNdJNi2g1ly",
|
165 |
"outputId": "f5184c15-6b55-47ee-98ee-646a06290a4c"
|
166 |
},
|
167 |
+
"execution_count": null,
|
168 |
"outputs": [
|
169 |
{
|
170 |
"output_type": "execute_result",
|
|
|
192 |
"id": "WleP60A3gkQM",
|
193 |
"outputId": "8c79ab53-e47b-4227-eb6f-0286b8ba2d15"
|
194 |
},
|
195 |
+
"execution_count": null,
|
196 |
"outputs": [
|
197 |
{
|
198 |
"output_type": "execute_result",
|
|
|
226 |
"metadata": {
|
227 |
"id": "TOJ3K-CBfVDR"
|
228 |
},
|
229 |
+
"execution_count": null,
|
230 |
"outputs": []
|
231 |
},
|
232 |
{
|
|
|
335 |
{
|
336 |
"cell_type": "code",
|
337 |
"source": [
|
338 |
+
"url = \"https://api.usescraper.com/crawler/jobs/{}/data\".format(response['id'])\n",
|
339 |
"\n",
|
340 |
"data_res = requests.request(\"GET\", url, headers=headers)\n",
|
341 |
"\n",
|
|
|
350 |
"id": "J4dUn4cmGGab",
|
351 |
"outputId": "15717b0d-dac6-4a67-e13f-1330623d4ced"
|
352 |
},
|
353 |
+
"execution_count": null,
|
354 |
"outputs": [
|
355 |
{
|
356 |
"output_type": "stream",
|
|
|
375 |
"id": "F8VEQvJkITLJ",
|
376 |
"outputId": "b54ec108-7221-4230-8b61-d0a4be503a66"
|
377 |
},
|
378 |
+
"execution_count": null,
|
379 |
"outputs": [
|
380 |
{
|
381 |
"output_type": "stream",
|
|
|
413 |
"metadata": {
|
414 |
"id": "YEieGzSFSXas"
|
415 |
},
|
416 |
+
"execution_count": null,
|
417 |
"outputs": []
|
418 |
},
|
419 |
{
|
|
|
435 |
"metadata": {
|
436 |
"id": "wxmiQDv3SXV6"
|
437 |
},
|
438 |
+
"execution_count": null,
|
439 |
"outputs": []
|
440 |
},
|
441 |
{
|
|
|
448 |
"metadata": {
|
449 |
"id": "tCVhv4OkSXTV"
|
450 |
},
|
451 |
+
"execution_count": null,
|
452 |
"outputs": []
|
453 |
},
|
454 |
{
|
|
|
476 |
"metadata": {
|
477 |
"id": "6KpeCRMBUgup"
|
478 |
},
|
479 |
+
"execution_count": null,
|
480 |
"outputs": []
|
481 |
},
|
482 |
{
|
|
|
489 |
"metadata": {
|
490 |
"id": "nWTBidwoZSO0"
|
491 |
},
|
492 |
+
"execution_count": null,
|
493 |
"outputs": []
|
494 |
},
|
495 |
{
|
|
|
500 |
"metadata": {
|
501 |
"id": "RUuJO0IIYSeU"
|
502 |
},
|
503 |
+
"execution_count": null,
|
504 |
"outputs": []
|
505 |
},
|
506 |
{
|
|
|
511 |
"metadata": {
|
512 |
"id": "6_s2LkH6YX1V"
|
513 |
},
|
514 |
+
"execution_count": null,
|
515 |
"outputs": []
|
516 |
},
|
517 |
{
|
|
|
527 |
"id": "02zdJNqIZKep",
|
528 |
"outputId": "76340610-0d98-4fd0-d237-ddb9f1752391"
|
529 |
},
|
530 |
+
"execution_count": null,
|
531 |
"outputs": [
|
532 |
{
|
533 |
"output_type": "execute_result",
|
|
|
562 |
"id": "PuCcgP0nZSIl",
|
563 |
"outputId": "e136cdbb-2ee4-4dfb-f532-f6c9365e519e"
|
564 |
},
|
565 |
+
"execution_count": null,
|
566 |
"outputs": [
|
567 |
{
|
568 |
"output_type": "stream",
|