pagination change

This commit is contained in:
Marvin Zhang
2019-06-09 17:11:42 +08:00
parent ada606afbb
commit 8796070339

View File

@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import os
import sys
from urllib.parse import urlparse
from urllib.parse import urlparse, urljoin
import scrapy
@@ -72,11 +72,8 @@ def get_next_url(response):
# found next url
if next_url is not None:
if not next_url.startswith('http') and not next_url.startswith('//'):
u = urlparse(response.url)
if next_url.startswith('/'):
next_url = f'{u.scheme}://{u.netloc}{next_url}'
else:
next_url = f'{u.scheme}://{u.netloc}{u.path}/{next_url}'
return urljoin(response.url, next_url)
else:
return next_url
return None