diff --git a/crawlab/routes/sites.py b/crawlab/routes/sites.py index 443cecd5..d9bd4593 100644 --- a/crawlab/routes/sites.py +++ b/crawlab/routes/sites.py @@ -43,7 +43,8 @@ class SiteApi(BaseApi): if keyword is not None: filter_['$or'] = [ {'description': {'$regex': keyword}}, - {'name': {'$regex': keyword}} + {'name': {'$regex': keyword}}, + {'domain': {'$regex': keyword}} ] items = db_manager.list( diff --git a/crawlab/routes/spiders.py b/crawlab/routes/spiders.py index 157218ee..5473d824 100644 --- a/crawlab/routes/spiders.py +++ b/crawlab/routes/spiders.py @@ -61,6 +61,9 @@ class SpiderApi(BaseApi): # spider schedule cron enabled ('envs', str), + + # spider site + ('site', str), ) def get(self, id=None, action=None): diff --git a/spiders/chinaz/chinaz/pipelines.py b/spiders/chinaz/chinaz/pipelines.py index ad3fbb23..747de355 100644 --- a/spiders/chinaz/chinaz/pipelines.py +++ b/spiders/chinaz/chinaz/pipelines.py @@ -23,6 +23,6 @@ class MongoPipeline(object): def process_item(self, item, spider): item['task_id'] = os.environ.get('CRAWLAB_TASK_ID') item['_id'] = item['domain'] - if self.col.find_one({'_id': item['_id']}) is not None: + if self.col.find_one({'_id': item['_id']}) is None: self.col.save(item) return item