Spaces:

WordLift
/

create-llms-txt

Running

cyberandy commited on Nov 25, 2024

Commit

d469446

verified ·

1 Parent(s): fd8ba41

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -231,7 +231,7 @@ class WebsiteCrawler:
             desc = self.clean_text(desc) if desc else ""
             # Skip if it's duplicate content
-            if self.is_duplicate_content(desc, title):
                 return []
             # Determine category and importance
@@ -294,8 +294,8 @@ class WebsiteCrawler:
             self.homepage_metadata = {
                 "site_name": urlparse(url).netloc.split('.')[0].capitalize(),
                 "description": None
-            }
     async def crawl_website(self, start_url):
         """Crawl website starting from the given URL"""
         # First process the homepage

             desc = self.clean_text(desc) if desc else ""
             # Skip if it's duplicate content
+            if self.is_duplicate_content(desc, title, url):
                 return []
             # Determine category and importance
             self.homepage_metadata = {
                 "site_name": urlparse(url).netloc.split('.')[0].capitalize(),
                 "description": None
+            }
     async def crawl_website(self, start_url):
         """Crawl website starting from the given URL"""
         # First process the homepage