Update spider.py

修复“//” 开头的url报错   ValueError('Missing scheme in request url: %s' % self._url)
This commit is contained in:
Seven2Nine
2020-07-08 11:06:52 +08:00
committed by GitHub
parent d75a993121
commit 142367a5f0

View File

@@ -9,7 +9,7 @@ def get_real_url(response, url):
return url
elif re.search(r'^\/\/', url):
u = urlparse(response.url)
return u.scheme + url
return u.scheme + ":" + url
return urljoin(response.url, url)
class ConfigSpider(scrapy.Spider):