Spaces:

drift-ai
/

faq-website

Runtime error

vincentclaes commited on Mar 31, 2023

Commit

0125da1

•

1 Parent(s): 5505694

implement a cut off

Files changed (1) hide show

scrape_website.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import requests
 from bs4 import BeautifulSoup
 def process_webpage(url:str):
     # A set to keep track of visited pages
@@ -40,11 +41,11 @@ def process_webpage(url:str):
     # make main page as first item
     text_list.reverse()
-    page_content = "\n".join(text_list)
     # Print the text content of the landing page and all child pages
     print(page_content)
-    return page_content
 if __name__ == '__main__':

 import requests
 from bs4 import BeautifulSoup
+TOKEN_CUT_OFF = 2500
 def process_webpage(url:str):
     # A set to keep track of visited pages
     # make main page as first item
     text_list.reverse()
+    text_list_cut_off = text_list[:TOKEN_CUT_OFF]
+    page_content = "\n".join(text_list_cut_off)
     # Print the text content of the landing page and all child pages
     print(page_content)
+    return
 if __name__ == '__main__':