File size: 1,231 Bytes
5469918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import scrapy

class QuotesSpider(scrapy.Spider):
    name = "devgan"
    allowed_domains = ["devgan.in"]

    def start_requests(self):
        urls = [
            'http://devgan.in/all_sections_ipc.php',
        ]
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse_mainpage)

    def parse_mainpage(self, response):
        # identify the links to the individual section pages
        sections = response.css('div#content').css('a')#.getall()
        # for each section
        for section in sections:
            # loc var
            loc = {
                'title' : section.xpath('@title').extract(),
                'link' : 'http://devgan.in' + section.xpath('@href').extract()[0],
                'section': section.css('span.sectionlink::text').extract(),
            }
            # traverse again and extract the description
            yield scrapy.Request(loc['link'], callback=self.parse_section, 
                    cb_kwargs=dict(meta=loc))

    def parse_section(self, response, meta):
        # extract the description
        meta['description'] = " ".join(response.css('tr.mys-desc').css('::text').extract())
        # return
        return meta