mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
updated execute spider logic
This commit is contained in:
@@ -6,8 +6,6 @@ FROM ubuntu:latest
|
||||
ADD . /opt/crawlab
|
||||
|
||||
# add dns
|
||||
#RUN echo -e "nameserver 180.76.76.76" >> /etc/resolv.conf
|
||||
#ADD ./resolv.conf /etc
|
||||
RUN cat /etc/resolv.conf
|
||||
|
||||
# install python
|
||||
|
||||
5
app.py
5
app.py
@@ -1,7 +1,8 @@
|
||||
from flask import Flask
|
||||
from flask import Flask, logging
|
||||
from flask_cors import CORS
|
||||
from flask_restful import Api
|
||||
|
||||
from config import FLASK_HOST, FLASK_PORT
|
||||
from routes.deploys import DeployApi
|
||||
from routes.files import FileApi
|
||||
from routes.nodes import NodeApi
|
||||
@@ -45,4 +46,4 @@ api.add_resource(StatsApi,
|
||||
'/api/stats/<string:action>')
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run()
|
||||
app.run(host=FLASK_HOST, port=FLASK_PORT)
|
||||
|
||||
@@ -24,4 +24,6 @@ MONGO_DB = 'crawlab_test'
|
||||
|
||||
# flask variables
|
||||
DEBUG = True
|
||||
SERVER_NAME = '0.0.0.0:5000'
|
||||
FLASK_HOST = '0.0.0.0'
|
||||
FLASK_PORT = 5000
|
||||
# SERVER_NAME = '0.0.0.0:5000'
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import json
|
||||
|
||||
import requests
|
||||
from bson import ObjectId
|
||||
from flask import current_app
|
||||
|
||||
from config import FLOWER_API_ENDPOINT
|
||||
from constants.node import NodeType
|
||||
@@ -39,33 +39,40 @@ class NodeApi(BaseApi):
|
||||
# TODO: use query "?status=1" to get status of nodes
|
||||
|
||||
# get status for each node
|
||||
status_data = check_nodes_status()
|
||||
status_data = {}
|
||||
try:
|
||||
status_data = check_nodes_status()
|
||||
except Exception as err:
|
||||
current_app.logger.error(err)
|
||||
|
||||
# get a list of items
|
||||
res = requests.get('%s/workers' % FLOWER_API_ENDPOINT)
|
||||
online_node_ids = []
|
||||
for k, v in json.loads(res.content.decode('utf-8')).items():
|
||||
node_name = k
|
||||
node_celery = v
|
||||
node = db_manager.get('nodes', id=node_name)
|
||||
try:
|
||||
res = requests.get('%s/workers' % FLOWER_API_ENDPOINT)
|
||||
for k, v in json.loads(res.content.decode('utf-8')).items():
|
||||
node_name = k
|
||||
node_celery = v
|
||||
node = db_manager.get('nodes', id=node_name)
|
||||
|
||||
# new node
|
||||
if node is None:
|
||||
node = {}
|
||||
for _k, _v in node_celery.items():
|
||||
node[_k] = _v
|
||||
node['_id'] = node_name
|
||||
node['name'] = node_name
|
||||
db_manager.save('nodes', node)
|
||||
# new node
|
||||
if node is None:
|
||||
node = {}
|
||||
for _k, _v in node_celery.items():
|
||||
node[_k] = _v
|
||||
node['_id'] = node_name
|
||||
node['name'] = node_name
|
||||
db_manager.save('nodes', node)
|
||||
|
||||
# existing node
|
||||
else:
|
||||
for _k, _v in v.items():
|
||||
node[_k] = _v
|
||||
node['name'] = node_name
|
||||
db_manager.save('nodes', node)
|
||||
# existing node
|
||||
else:
|
||||
for _k, _v in v.items():
|
||||
node[_k] = _v
|
||||
node['name'] = node_name
|
||||
db_manager.save('nodes', node)
|
||||
|
||||
online_node_ids.append(node_name)
|
||||
online_node_ids.append(node_name)
|
||||
except Exception as err:
|
||||
current_app.logger.error(err)
|
||||
|
||||
# iterate db nodes to update status
|
||||
nodes = []
|
||||
|
||||
@@ -93,10 +93,43 @@ class SpiderApi(BaseApi):
|
||||
node_id = args.get('node_id')
|
||||
|
||||
if node_id is None:
|
||||
return {}, 400
|
||||
return {
|
||||
'code': 400,
|
||||
'status': 400,
|
||||
'error': 'node_id cannot be empty'
|
||||
}, 400
|
||||
|
||||
# get node from db
|
||||
node = db_manager.get('nodes', id=node_id)
|
||||
|
||||
# validate ip and port
|
||||
if node.get('ip') is None or node.get('port') is None:
|
||||
return {
|
||||
'code': 400,
|
||||
'status': 'ok',
|
||||
'error': 'node ip and port should not be empty'
|
||||
}, 400
|
||||
|
||||
# dispatch crawl task
|
||||
res = requests.get('http://%s:%s/api/spiders/%s/on_crawl' % (
|
||||
node.get('ip'),
|
||||
node.get('port'),
|
||||
id
|
||||
), {'node_id', node_id})
|
||||
data = json.loads(res)
|
||||
return {
|
||||
'code': res.status_code,
|
||||
'status': 'ok',
|
||||
'error': data.get('error'),
|
||||
'task': data.get('task')
|
||||
}
|
||||
|
||||
def on_crawl(self, id):
|
||||
args = self.parser.parse_args()
|
||||
node_id = args.get('node_id')
|
||||
|
||||
job = execute_spider.delay(id, node_id)
|
||||
# print('crawl: %s' % id)
|
||||
|
||||
return {
|
||||
'code': 200,
|
||||
'status': 'ok',
|
||||
|
||||
Reference in New Issue
Block a user