From 87960703399669092e172471c968f7bb5e1001b0 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 9 Jun 2019 17:11:42 +0800 Subject: [PATCH] pagination change --- crawlab/spiders/spiders/spiders/config_spider.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/crawlab/spiders/spiders/spiders/config_spider.py b/crawlab/spiders/spiders/spiders/config_spider.py index 70693c0d..77e65862 100644 --- a/crawlab/spiders/spiders/spiders/config_spider.py +++ b/crawlab/spiders/spiders/spiders/config_spider.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import os import sys -from urllib.parse import urlparse +from urllib.parse import urlparse, urljoin import scrapy @@ -72,11 +72,8 @@ def get_next_url(response): # found next url if next_url is not None: if not next_url.startswith('http') and not next_url.startswith('//'): - u = urlparse(response.url) - if next_url.startswith('/'): - next_url = f'{u.scheme}://{u.netloc}{next_url}' - else: - next_url = f'{u.scheme}://{u.netloc}{u.path}/{next_url}' + return urljoin(response.url, next_url) + else: return next_url return None