Spaces:
Sleeping
Sleeping
File size: 818 Bytes
ac493ec df044c6 db19951 ac493ec 75f72d8 ac493ec db19951 ac493ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import logging
import os
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from rtd_scraper.tutorial.spiders.docs_spider import DocsSpider, sanitize_url
# When using scrapy it seems to set logging for all apps at DEBUG, so simply shut it off here...
for name in logging.root.manager.loggerDict:
logger = logging.getLogger(name)
logger.setLevel(logging.INFO)
def run_spider(homepage_url, save_directory, target_version=None):
process = CrawlerProcess(settings=get_project_settings())
process.crawl(
DocsSpider,
homepage_url=homepage_url,
save_dir=save_directory,
target_version=target_version,
)
# Start the crawling process
process.start()
# To stop the crawling process gracefully
process.stop()
|