mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
link site to spider
This commit is contained in:
@@ -43,7 +43,8 @@ class SiteApi(BaseApi):
|
||||
if keyword is not None:
|
||||
filter_['$or'] = [
|
||||
{'description': {'$regex': keyword}},
|
||||
{'name': {'$regex': keyword}}
|
||||
{'name': {'$regex': keyword}},
|
||||
{'domain': {'$regex': keyword}}
|
||||
]
|
||||
|
||||
items = db_manager.list(
|
||||
|
||||
@@ -61,6 +61,9 @@ class SpiderApi(BaseApi):
|
||||
|
||||
# spider schedule cron enabled
|
||||
('envs', str),
|
||||
|
||||
# spider site
|
||||
('site', str),
|
||||
)
|
||||
|
||||
def get(self, id=None, action=None):
|
||||
|
||||
@@ -23,6 +23,6 @@ class MongoPipeline(object):
|
||||
def process_item(self, item, spider):
|
||||
item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
|
||||
item['_id'] = item['domain']
|
||||
if self.col.find_one({'_id': item['_id']}) is not None:
|
||||
if self.col.find_one({'_id': item['_id']}) is None:
|
||||
self.col.save(item)
|
||||
return item
|
||||
|
||||
Reference in New Issue
Block a user