link site to spider

This commit is contained in:
Marvin Zhang
2019-05-10 13:26:58 +08:00
parent 69e2e2f3c5
commit 1cd2fd47f3
3 changed files with 6 additions and 2 deletions

View File

@@ -43,7 +43,8 @@ class SiteApi(BaseApi):
if keyword is not None:
filter_['$or'] = [
{'description': {'$regex': keyword}},
{'name': {'$regex': keyword}}
{'name': {'$regex': keyword}},
{'domain': {'$regex': keyword}}
]
items = db_manager.list(

View File

@@ -61,6 +61,9 @@ class SpiderApi(BaseApi):
# spider schedule cron enabled
('envs', str),
# spider site
('site', str),
)
def get(self, id=None, action=None):

View File

@@ -23,6 +23,6 @@ class MongoPipeline(object):
def process_item(self, item, spider):
item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
item['_id'] = item['domain']
if self.col.find_one({'_id': item['_id']}) is not None:
if self.col.find_one({'_id': item['_id']}) is None:
self.col.save(item)
return item