File size: 818 Bytes
ac493ec
 
 
 
 
 
df044c6
db19951
ac493ec
 
 
 
 
 
75f72d8
ac493ec
db19951
 
 
 
 
 
ac493ec
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import logging
import os

from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings

from rtd_scraper.tutorial.spiders.docs_spider import DocsSpider, sanitize_url

# When using scrapy it seems to set logging for all apps at DEBUG, so simply shut it off here...
for name in logging.root.manager.loggerDict:
    logger = logging.getLogger(name)
    logger.setLevel(logging.INFO)


def run_spider(homepage_url, save_directory, target_version=None):
    process = CrawlerProcess(settings=get_project_settings())
    process.crawl(
        DocsSpider,
        homepage_url=homepage_url,
        save_dir=save_directory,
        target_version=target_version,
    )

    # Start the crawling process
    process.start()

    # To stop the crawling process gracefully
    process.stop()