Spaces:
Runtime error
Runtime error
import scrapy | |
class QuotesSpider(scrapy.Spider): | |
name = "devgan" | |
allowed_domains = ["devgan.in"] | |
def start_requests(self): | |
urls = [ | |
'http://devgan.in/all_sections_ipc.php', | |
] | |
for url in urls: | |
yield scrapy.Request(url=url, callback=self.parse_mainpage) | |
def parse_mainpage(self, response): | |
# identify the links to the individual section pages | |
sections = response.css('div#content').css('a')#.getall() | |
# for each section | |
for section in sections: | |
# loc var | |
loc = { | |
'title' : section.xpath('@title').extract(), | |
'link' : 'http://devgan.in' + section.xpath('@href').extract()[0], | |
'section': section.css('span.sectionlink::text').extract(), | |
} | |
# traverse again and extract the description | |
yield scrapy.Request(loc['link'], callback=self.parse_section, | |
cb_kwargs=dict(meta=loc)) | |
def parse_section(self, response, meta): | |
# extract the description | |
meta['description'] = " ".join(response.css('tr.mys-desc').css('::text').extract()) | |
# return | |
return meta | |