Chris4K commited on
Commit
856f17d
·
verified ·
1 Parent(s): b558c53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -0
app.py CHANGED
@@ -24,6 +24,7 @@ app.add_middleware(
24
 
25
  # Function to crawl all URLs from a domain
26
  def get_all_links_from_domain(domain_url):
 
27
  visited_urls = set()
28
  domain_links = set()
29
  parsed_initial_url = urlparse(domain_url)
@@ -33,6 +34,8 @@ def get_all_links_from_domain(domain_url):
33
 
34
  # Function to crawl links from a page within the same domain
35
  def get_links_from_page(url, visited_urls, all_links, base_domain):
 
 
36
  if not url.startswith(base_domain):
37
  return
38
 
 
24
 
25
  # Function to crawl all URLs from a domain
26
  def get_all_links_from_domain(domain_url):
27
+ print("domain url " + domain_url)
28
  visited_urls = set()
29
  domain_links = set()
30
  parsed_initial_url = urlparse(domain_url)
 
34
 
35
  # Function to crawl links from a page within the same domain
36
  def get_links_from_page(url, visited_urls, all_links, base_domain):
37
+ print("url " + url)
38
+ print("base_domain " + base_domain)
39
  if not url.startswith(base_domain):
40
  return
41