From 1cd2fd47f381ee69d2bd8f382119fa3c6b5893a0 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Fri, 10 May 2019 13:26:58 +0800 Subject: [PATCH] link site to spider --- crawlab/routes/sites.py | 3 ++- crawlab/routes/spiders.py | 3 +++ spiders/chinaz/chinaz/pipelines.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/crawlab/routes/sites.py b/crawlab/routes/sites.py index 443cecd5..d9bd4593 100644 --- a/crawlab/routes/sites.py +++ b/crawlab/routes/sites.py @@ -43,7 +43,8 @@ class SiteApi(BaseApi): if keyword is not None: filter_['$or'] = [ {'description': {'$regex': keyword}}, - {'name': {'$regex': keyword}} + {'name': {'$regex': keyword}}, + {'domain': {'$regex': keyword}} ] items = db_manager.list( diff --git a/crawlab/routes/spiders.py b/crawlab/routes/spiders.py index 157218ee..5473d824 100644 --- a/crawlab/routes/spiders.py +++ b/crawlab/routes/spiders.py @@ -61,6 +61,9 @@ class SpiderApi(BaseApi): # spider schedule cron enabled ('envs', str), + + # spider site + ('site', str), ) def get(self, id=None, action=None): diff --git a/spiders/chinaz/chinaz/pipelines.py b/spiders/chinaz/chinaz/pipelines.py index ad3fbb23..747de355 100644 --- a/spiders/chinaz/chinaz/pipelines.py +++ b/spiders/chinaz/chinaz/pipelines.py @@ -23,6 +23,6 @@ class MongoPipeline(object): def process_item(self, item, spider): item['task_id'] = os.environ.get('CRAWLAB_TASK_ID') item['_id'] = item['domain'] - if self.col.find_one({'_id': item['_id']}) is not None: + if self.col.find_one({'_id': item['_id']}) is None: self.col.save(item) return item