Spaces:
Sleeping
Sleeping
import logging | |
import os | |
from scrapy.crawler import CrawlerProcess | |
from scrapy.utils.project import get_project_settings | |
from rtd_scraper.tutorial.spiders.docs_spider import DocsSpider, sanitize_url | |
# When using scrapy it seems to set logging for all apps at DEBUG, so simply shut it off here... | |
for name in logging.root.manager.loggerDict: | |
logger = logging.getLogger(name) | |
logger.setLevel(logging.INFO) | |
def run_spider(homepage_url, save_directory, target_version=None): | |
process = CrawlerProcess(settings=get_project_settings()) | |
process.crawl( | |
DocsSpider, | |
homepage_url=homepage_url, | |
save_dir=save_directory, | |
target_version=target_version, | |
) | |
# Start the crawling process | |
process.start() | |
# To stop the crawling process gracefully | |
process.stop() | |