Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -24,6 +24,7 @@ app.add_middleware(
|
|
24 |
|
25 |
# Function to crawl all URLs from a domain
|
26 |
def get_all_links_from_domain(domain_url):
|
|
|
27 |
visited_urls = set()
|
28 |
domain_links = set()
|
29 |
parsed_initial_url = urlparse(domain_url)
|
@@ -33,6 +34,8 @@ def get_all_links_from_domain(domain_url):
|
|
33 |
|
34 |
# Function to crawl links from a page within the same domain
|
35 |
def get_links_from_page(url, visited_urls, all_links, base_domain):
|
|
|
|
|
36 |
if not url.startswith(base_domain):
|
37 |
return
|
38 |
|
|
|
24 |
|
25 |
# Function to crawl all URLs from a domain
|
26 |
def get_all_links_from_domain(domain_url):
|
27 |
+
print("domain url " + domain_url)
|
28 |
visited_urls = set()
|
29 |
domain_links = set()
|
30 |
parsed_initial_url = urlparse(domain_url)
|
|
|
34 |
|
35 |
# Function to crawl links from a page within the same domain
|
36 |
def get_links_from_page(url, visited_urls, all_links, base_domain):
|
37 |
+
print("url " + url)
|
38 |
+
print("base_domain " + base_domain)
|
39 |
if not url.startswith(base_domain):
|
40 |
return
|
41 |
|