爬取图库 发表于 2018-05-28 | 分类于 Scrapy框架 目标爬取cosplay图片 实现items.py代码 12345class GetImagesItem(scrapy.Item): # 名称不能变 image_urls = scrapy.Field() images=scrapy.Field() pass setting.py代码 1234567ITEM_PIPELINES = { # 添加Scrapy内置下载器 'scrapy.pipelines.images.ImagesPipeline':1,}IMAGES_STORE = '/Users/mintaoyu/Desktop/images'IMAGES_MIN_WIDTH = 600IMAGES_MIN_HEIGHT = 400 images.py代码 1234567891011121314151617181920212223242526import scrapyfrom scrapy.linkextractors import LinkExtractorfrom ..items import GetImagesItemclass ImagesSpider(scrapy.Spider): name = 'images' allowed_domains = ['moe.005.tv'] start_urls = ['http://moe.005.tv/cosplay/'] def parse(self, response): # 例子页面 le = LinkExtractor(restrict_xpaths='//div[@class="zhuti_w_list"]') for link in le.extract_links(response): yield scrapy.Request(link.url,callback=self.parse_images) # 下一页 le = LinkExtractor(restrict_xpaths='//a[@class="n"]') links = le.extract_links(response) if links: next_url = links[0].url yield scrapy.Request(next_url,callback=self.parse) # 点进页面链接后要获取相关信息的逻辑代码 def parse_images(self, response): example = GetImagesItem() example['image_urls'] = response.xpath('//div[@class="content_nr"]//img/@src').extract() return example 赏个🍗吧 打赏 微信支付 支付宝 本文作者: Keeep 本文链接: http://Keeep.coding.me/blog/爬取360图库/ 版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 3.0 许可协议。转载请注明出处!