From 558b7472f8c645131d60f6b83ae479e4159dd2bf Mon Sep 17 00:00:00 2001
From: Marvin Zhang <tikazyq@gmail.com>
Date: Wed, 29 May 2019 19:34:51 +0800
Subject: [PATCH] fixed bugs

---
 crawlab/routes/spiders.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/crawlab/routes/spiders.py b/crawlab/routes/spiders.py
index d4f99a3a..0bfe8e4c 100644
--- a/crawlab/routes/spiders.py
+++ b/crawlab/routes/spiders.py
@@ -4,6 +4,7 @@ import shutil
 import subprocess
 from datetime import datetime
 from random import random
+from urllib.parse import urlparse
 
 import gevent
 import requests
@@ -101,7 +102,7 @@ class SpiderApi(BaseApi):
         ('pagination_selector_type', str),
 
         # whether to obey robots.txt
-        ('obey_robots_txt', str),
+        ('obey_robots_txt', bool),
     )
 
     def get(self, id=None, action=None):
@@ -523,7 +524,8 @@ class SpiderApi(BaseApi):
         tags = []
         for tag in sel.iter():
             if tag.tag == 'a':
-                if tag.get('href') is not None and not tag.get('href').startswith('#') and not tag.get('href').startswith('javascript'):
+                if tag.get('href') is not None and not tag.get('href').startswith('#') and not tag.get(
+                        'href').startswith('javascript'):
                     tags.append(tag)
 
         return tags
@@ -566,6 +568,9 @@ class SpiderApi(BaseApi):
                     if f.get('is_detail'):
                         url = d.get(f['name'])
                         if url is not None:
+                            if not url.startswith('http') and not url.startswith('//'):
+                                u = urlparse(spider['start_url'])
+                                url = f'{u.scheme}://{u.netloc}{url}'
                             ev_list.append(gevent.spawn(get_detail_page_data, url, spider, idx, data))
                         break
 
@@ -657,7 +662,6 @@ class SpiderApi(BaseApi):
                         'query': f'{tag.tag}.{cls_str}',
                     })
 
-
         return {
             'status': 'ok',
             'item_selector': item_selector,