Files
crawlab/backend/template/scrapy/config_spider/spiders/spider.py
2019-11-24 19:45:21 +08:00

19 lines
444 B
Python

# -*- coding: utf-8 -*-
import scrapy
import re
from config_spider.items import Item
from urllib.parse import urljoin
def get_real_url(response, url):
if re.search(r'^https?|^\/\/', url):
return url
return urljoin(response.url, url)
class ConfigSpider(scrapy.Spider):
name = 'config_spider'
def start_requests(self):
yield scrapy.Request(url='###START_URL###', callback=self.###START_STAGE###)
###PARSERS###