updated execute spider logic

This commit is contained in:
Marvin Zhang
2019-03-01 12:07:28 +08:00
parent d8766f2a97
commit f3ff0a464a
6 changed files with 70 additions and 29 deletions

View File

@@ -6,8 +6,6 @@ FROM ubuntu:latest
ADD . /opt/crawlab
# add dns
#RUN echo -e "nameserver 180.76.76.76" >> /etc/resolv.conf
#ADD ./resolv.conf /etc
RUN cat /etc/resolv.conf
# install python

5
app.py
View File

@@ -1,7 +1,8 @@
from flask import Flask
from flask import Flask, logging
from flask_cors import CORS
from flask_restful import Api
from config import FLASK_HOST, FLASK_PORT
from routes.deploys import DeployApi
from routes.files import FileApi
from routes.nodes import NodeApi
@@ -45,4 +46,4 @@ api.add_resource(StatsApi,
'/api/stats/<string:action>')
if __name__ == '__main__':
app.run()
app.run(host=FLASK_HOST, port=FLASK_PORT)

View File

@@ -24,4 +24,6 @@ MONGO_DB = 'crawlab_test'
# flask variables
DEBUG = True
SERVER_NAME = '0.0.0.0:5000'
FLASK_HOST = '0.0.0.0'
FLASK_PORT = 5000
# SERVER_NAME = '0.0.0.0:5000'

View File

View File

@@ -1,7 +1,7 @@
import json
import requests
from bson import ObjectId
from flask import current_app
from config import FLOWER_API_ENDPOINT
from constants.node import NodeType
@@ -39,33 +39,40 @@ class NodeApi(BaseApi):
# TODO: use query "?status=1" to get status of nodes
# get status for each node
status_data = check_nodes_status()
status_data = {}
try:
status_data = check_nodes_status()
except Exception as err:
current_app.logger.error(err)
# get a list of items
res = requests.get('%s/workers' % FLOWER_API_ENDPOINT)
online_node_ids = []
for k, v in json.loads(res.content.decode('utf-8')).items():
node_name = k
node_celery = v
node = db_manager.get('nodes', id=node_name)
try:
res = requests.get('%s/workers' % FLOWER_API_ENDPOINT)
for k, v in json.loads(res.content.decode('utf-8')).items():
node_name = k
node_celery = v
node = db_manager.get('nodes', id=node_name)
# new node
if node is None:
node = {}
for _k, _v in node_celery.items():
node[_k] = _v
node['_id'] = node_name
node['name'] = node_name
db_manager.save('nodes', node)
# new node
if node is None:
node = {}
for _k, _v in node_celery.items():
node[_k] = _v
node['_id'] = node_name
node['name'] = node_name
db_manager.save('nodes', node)
# existing node
else:
for _k, _v in v.items():
node[_k] = _v
node['name'] = node_name
db_manager.save('nodes', node)
# existing node
else:
for _k, _v in v.items():
node[_k] = _v
node['name'] = node_name
db_manager.save('nodes', node)
online_node_ids.append(node_name)
online_node_ids.append(node_name)
except Exception as err:
current_app.logger.error(err)
# iterate db nodes to update status
nodes = []

View File

@@ -93,10 +93,43 @@ class SpiderApi(BaseApi):
node_id = args.get('node_id')
if node_id is None:
return {}, 400
return {
'code': 400,
'status': 400,
'error': 'node_id cannot be empty'
}, 400
# get node from db
node = db_manager.get('nodes', id=node_id)
# validate ip and port
if node.get('ip') is None or node.get('port') is None:
return {
'code': 400,
'status': 'ok',
'error': 'node ip and port should not be empty'
}, 400
# dispatch crawl task
res = requests.get('http://%s:%s/api/spiders/%s/on_crawl' % (
node.get('ip'),
node.get('port'),
id
), {'node_id', node_id})
data = json.loads(res)
return {
'code': res.status_code,
'status': 'ok',
'error': data.get('error'),
'task': data.get('task')
}
def on_crawl(self, id):
args = self.parser.parse_args()
node_id = args.get('node_id')
job = execute_spider.delay(id, node_id)
# print('crawl: %s' % id)
return {
'code': 200,
'status': 'ok',