Spaces:
Running
Running
File size: 10,442 Bytes
1151f26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"websites = [\n",
" \"https://docs.wized.com/\",\n",
" \"https://v1.wized.com/\"\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from rag_scraper.link_extractor import LinkExtractor\n",
"\n",
"page_urls = []\n",
"for url in websites:\n",
" scraped_urls = LinkExtractor.scrape_url(url)\n",
" \n",
" page_urls.extend(scraped_urls)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['https://docs.wized.com/requests/firebase/get-user/',\n",
" 'https://docs.wized.com/changelog/',\n",
" 'https://docs.wized.com/actions/after-actions/set-variable/',\n",
" 'https://docs.wized.com/actions/after-actions/perform-request/',\n",
" 'https://docs.wized.com/requests/firebase/sign-in-email-password/',\n",
" 'https://docs.wized.com/requests/supabase/sign-in-password/',\n",
" 'https://docs.wized.com/actions/after-actions/navigate-to/',\n",
" 'https://docs.wized.com/actions/element-actions/',\n",
" 'https://docs.wized.com/requests/firebase/',\n",
" 'https://docs.wized.com/requests/firebase/sign-up-email-password/',\n",
" 'https://docs.wized.com/requests/supabase/sign-in-oauth/',\n",
" 'https://docs.wized.com/',\n",
" 'https://docs.wized.com/requests/firebase/sign-out/',\n",
" 'https://docs.wized.com/function-editor/',\n",
" 'https://docs.wized.com/configurator/backups/',\n",
" 'https://docs.wized.com/requests/supabase/delete-item/',\n",
" 'https://docs.wized.com/embed-versions-comparison/',\n",
" 'https://docs.wized.com/function-editor/common-patterns/',\n",
" 'https://docs.wized.com/actions/element-actions/render-list/',\n",
" 'https://docs.wized.com/function-editor/parameters/',\n",
" 'https://docs.wized.com/actions/reactivity/',\n",
" 'https://docs.wized.com/configurator/settings/',\n",
" 'https://docs.wized.com/requests/rest/',\n",
" 'https://docs.wized.com/requests/firebase/send-password-reset/',\n",
" 'https://docs.wized.com/configurator/publishing/',\n",
" 'https://docs.wized.com/actions/after-actions/set-cookie/',\n",
" 'https://docs.wized.com/configurator/',\n",
" 'https://docs.wized.com/requests/supabase/sign-out/',\n",
" 'https://docs.wized.com/requests/supabase/get-session/',\n",
" 'https://docs.wized.com/actions/event-actions/page-starts-loading/',\n",
" 'https://docs.wized.com/actions/element-actions/set-visibility/',\n",
" 'https://docs.wized.com/data-store/navigation/',\n",
" 'https://docs.wized.com/actions/element-actions/set-form-values/',\n",
" 'https://docs.wized.com/requests/firebase/delete-item/',\n",
" 'https://docs.wized.com/configurator/canvas/',\n",
" 'https://docs.wized.com/actions/element-actions/set-html-attribute/',\n",
" 'https://docs.wized.com/actions/element-actions/add-param-to-link/',\n",
" 'https://docs.wized.com/requests/supabase/create-item/',\n",
" 'https://docs.wized.com/actions/after-actions/',\n",
" 'https://docs.wized.com/requests/firebase/get-list/',\n",
" 'https://docs.wized.com/requests/firebase/set-item/',\n",
" 'https://docs.wized.com/actions/event-actions/page-finishes-loading/',\n",
" 'https://docs.wized.com/requests/firebase/create-item/',\n",
" 'https://docs.wized.com/requests/firebase/update-email/',\n",
" 'https://docs.wized.com/actions/event-actions/attribute-present/',\n",
" 'https://docs.wized.com/actions/event-actions/custom-condition/',\n",
" 'https://docs.wized.com/requests/supabase/sign-in-magic-link/',\n",
" 'https://docs.wized.com/actions/event-actions/',\n",
" 'https://docs.wized.com/actions/element-actions/on-event/',\n",
" 'https://docs.wized.com/actions/',\n",
" 'https://docs.wized.com/data-store/input-fields/',\n",
" 'https://docs.wized.com/data-store/forms/',\n",
" 'https://docs.wized.com/data-store/variables/',\n",
" 'https://docs.wized.com/javascript-api/',\n",
" 'https://docs.wized.com/actions/element-actions/set-text/',\n",
" 'https://docs.wized.com/requests/supabase/get-item/',\n",
" 'https://docs.wized.com/requests/supabase/get-list/',\n",
" 'https://docs.wized.com/actions/after-actions/run-function/',\n",
" 'https://docs.wized.com/requests/firebase/update-item/',\n",
" 'https://docs.wized.com/actions/element-actions/set-input-value/',\n",
" 'https://docs.wized.com/requests/firebase/get-item/',\n",
" 'https://docs.wized.com/requests/supabase/send-password-reset/',\n",
" 'https://docs.wized.com/requests/firebase/sign-in-provider/',\n",
" 'https://v1.wized.com/',\n",
" 'https://docs.wized.com/data-store/requests-data/',\n",
" 'https://docs.wized.com/requests/supabase/',\n",
" 'https://docs.wized.com/requests/supabase/update-item/',\n",
" 'https://docs.wized.com/actions/event-actions/request-finishes/',\n",
" 'https://docs.wized.com/requests/supabase/sign-up/',\n",
" 'https://docs.wized.com/actions/element-actions/set-style/',\n",
" 'https://docs.wized.com/data-store/',\n",
" 'https://docs.wized.com/requests/',\n",
" 'https://docs.wized.com/actions/element-actions/set-class/',\n",
" 'https://docs.wized.com/requests/firebase/unsubscribe-real-time/',\n",
" 'https://docs.wized.com/requests/supabase/get-user/',\n",
" 'https://docs.wized.com/requests/supabase/update-user/',\n",
" 'https://docs.wized.com/requests/supabase/unsubscribe-real-time/',\n",
" 'https://docs.wized.com/data-store/cookies/',\n",
" 'https://docs.wized.com/requests/rest/file-uploads/',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/data-out',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/intro-to-web-applications',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-projects/build-a-weather-app',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/booleans',\n",
" 'https://v1.wized.com/naming-convention/variable-naming',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/social-sign-in-with-wized-and-xano',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/filtering-request-data',\n",
" 'https://v1.wized.com/guides/general/pages-from-the-website-arent-showing-up-in-the-configurator',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/configurator',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/authentication',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/master-data-types-in-wized',\n",
" 'https://v1.wized.com/',\n",
" 'https://v1.wized.com/guides/general/previewing-array-data-with-index-variables-doesnt-work',\n",
" 'https://v1.wized.com/naming-convention/action-naming',\n",
" 'https://v1.wized.com/cheat-sheets/prevent-content-flashing',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/operators',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/filtering-and-sorting-with-finsweet-attributes',\n",
" 'https://v1.wized.com/javascript-api/js-api-documentation',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/conditional-logic',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/magic-link-authentication',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-projects/build-a-bulk-ecommerce-store',\n",
" 'https://v1.wized.com/faq/frequently-asked-questions',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/data-in',\n",
" 'https://v1.wized.com/guides/general/how-to-render-a-list-of-loaded-items-in-wized',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/my-apps',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/elements',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/dashboard',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/file-upload',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-projects/build-a-custom-referral-program',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-projects/build-a-classified-ads-app',\n",
" 'https://v1.wized.com/cheat-sheets/debugging-wized-applications',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/stripe-checkout',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-lessons/storing-data-on-the-front-end',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-projects',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons',\n",
" 'https://v1.wized.com/updates/',\n",
" 'https://v1.wized.com/naming-convention/request-naming',\n",
" 'https://v1.wized.com/guides/general/third-party-login-with-xano',\n",
" 'https://v1.wized.com/naming-convention/element-naming',\n",
" 'https://v1.wized.com/advanced-learning-path/advanced-projects',\n",
" 'https://v1.wized.com/beginner-learning-path/beginner-lessons/actions',\n",
" 'https://v1.wized.com/cheat-sheets/formulas']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"page_urls"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "rag-scraper-L88jsp71-py3.10",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|