cyberandy commited on
Commit
d469446
·
verified ·
1 Parent(s): fd8ba41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -231,7 +231,7 @@ class WebsiteCrawler:
231
  desc = self.clean_text(desc) if desc else ""
232
 
233
  # Skip if it's duplicate content
234
- if self.is_duplicate_content(desc, title):
235
  return []
236
 
237
  # Determine category and importance
@@ -294,8 +294,8 @@ class WebsiteCrawler:
294
  self.homepage_metadata = {
295
  "site_name": urlparse(url).netloc.split('.')[0].capitalize(),
296
  "description": None
297
- }
298
-
299
  async def crawl_website(self, start_url):
300
  """Crawl website starting from the given URL"""
301
  # First process the homepage
 
231
  desc = self.clean_text(desc) if desc else ""
232
 
233
  # Skip if it's duplicate content
234
+ if self.is_duplicate_content(desc, title, url):
235
  return []
236
 
237
  # Determine category and importance
 
294
  self.homepage_metadata = {
295
  "site_name": urlparse(url).netloc.split('.')[0].capitalize(),
296
  "description": None
297
+ }
298
+
299
  async def crawl_website(self, start_url):
300
  """Crawl website starting from the given URL"""
301
  # First process the homepage