diff --git a/crawlab/app.py b/crawlab/app.py index d72aa40f..7a3f60b9 100644 --- a/crawlab/app.py +++ b/crawlab/app.py @@ -15,15 +15,13 @@ file_dir = os.path.dirname(os.path.realpath(__file__)) root_path = os.path.abspath(os.path.join(file_dir, '.')) sys.path.append(root_path) -from config import FLASK_HOST, FLASK_PORT, PROJECT_LOGS_FOLDER, BROKER_URL -from constants.manage import ActionType +from config import FLASK_HOST, FLASK_PORT, PROJECT_LOGS_FOLDER from routes.deploys import DeployApi from routes.files import FileApi from routes.nodes import NodeApi from routes.spiders import SpiderApi, SpiderImportApi, SpiderManageApi from routes.stats import StatsApi from routes.tasks import TaskApi -from tasks.celery import celery_app # flask app instance app = Flask(__name__) @@ -72,5 +70,8 @@ if __name__ == '__main__': if not os.path.exists(PROJECT_LOGS_FOLDER): os.makedirs(PROJECT_LOGS_FOLDER) + # run scheduler as a separate process + scheduler.run() + # run app instance app.run(host=FLASK_HOST, port=FLASK_PORT, threaded=True) diff --git a/crawlab/config.py b/crawlab/config.py index 5b1dee95..a3f09ba4 100644 --- a/crawlab/config.py +++ b/crawlab/config.py @@ -29,5 +29,5 @@ MONGO_DB = 'crawlab_test' # flask variables DEBUG = True -FLASK_HOST = '0.0.0.0' -FLASK_PORT = 5000 +FLASK_HOST = '127.0.0.1' +FLASK_PORT = 8000 diff --git a/crawlab/requirements.txt b/crawlab/requirements.txt deleted file mode 100644 index 95fc504c..00000000 --- a/crawlab/requirements.txt +++ /dev/null @@ -1,60 +0,0 @@ -amqp==2.4.1 -apscheduler==3.5.3 -aniso8601==4.1.0 -asn1crypto==0.24.0 -attrs==18.2.0 -Automat==0.7.0 -Babel==2.6.0 -billiard==3.5.0.5 -celery==4.2.1 -certifi==2018.11.29 -cffi==1.11.5 -chardet==3.0.4 -Click==7.0 -constantly==15.1.0 -cryptography==2.5 -cssselect==1.0.3 -dnspython==1.16.0 -docopt==0.6.2 -eventlet==0.24.1 -Flask==1.0.2 -Flask-Cors==3.0.7 -Flask-RESTful==0.3.7 -flower==0.9.2 -greenlet==0.4.15 -hyperlink==18.0.0 -idna==2.8 -incremental==17.5.0 -itsdangerous==1.1.0 -Jinja2==2.10 -kombu==4.3.0 -lxml==4.3.1 -MarkupSafe==1.1.0 -mongoengine==0.16.3 -monotonic==1.5 -parsel==1.5.1 -pyasn1==0.4.5 -pyasn1-modules==0.2.4 -pycparser==2.19 -PyDispatcher==2.0.5 -PyHamcrest==1.9.0 -pymongo==3.7.2 -PyMySQL==0.9.3 -pyOpenSSL==19.0.0 -python-scrapyd-api==2.1.2 -pytz==2018.9 -queuelib==1.5.0 -redis==3.1.0 -requests==2.21.0 -Scrapy==1.6.0 -scrapy-redis==0.6.8 -scrapy-splash==0.7.2 -service-identity==18.1.0 -six==1.12.0 -tornado==5.1.1 -Twisted==18.9.0 -urllib3==1.24.1 -vine==1.2.0 -w3lib==1.20.0 -Werkzeug==0.14.1 -zope.interface==4.6.0 diff --git a/crawlab/routes/base.py b/crawlab/routes/base.py index 4106cae1..0df9d9b5 100644 --- a/crawlab/routes/base.py +++ b/crawlab/routes/base.py @@ -105,6 +105,10 @@ class BaseApi(Resource): if k not in DEFAULT_ARGS: values[k] = args.get(k) item = db_manager.update_one(col_name=self.col_name, id=id, values=values) + + # execute after_update hook + self.after_update(id) + return item def post(self, id=None, action=None): @@ -122,3 +126,6 @@ class BaseApi(Resource): def delete(self, id=None): db_manager.remove_one(col_name=self.col_name, id=id) + + def after_update(self, id=None): + pass diff --git a/crawlab/routes/spiders.py b/crawlab/routes/spiders.py index aaeca318..af6fdbea 100644 --- a/crawlab/routes/spiders.py +++ b/crawlab/routes/spiders.py @@ -16,6 +16,7 @@ from constants.node import NodeStatus from constants.task import TaskStatus from db.manager import db_manager from routes.base import BaseApi +from tasks.scheduler import scheduler from tasks.spider import execute_spider from utils import jsonify from utils.deploy import zip_file, unzip_file @@ -25,6 +26,10 @@ from utils.spider import get_lang_by_stats parser = reqparse.RequestParser() parser.add_argument('file', type=FileStorage, location='files') +IGNORE_DIRS = [ + '.idea' +] + class SpiderApi(BaseApi): col_name = 'spiders' @@ -75,6 +80,9 @@ class SpiderApi(BaseApi): items = [] dirs = os.listdir(PROJECT_SOURCE_FILE_FOLDER) for _dir in dirs: + if _dir in IGNORE_DIRS: + continue + dir_path = os.path.join(PROJECT_SOURCE_FILE_FOLDER, _dir) dir_name = _dir spider = db_manager.get_one_by_key('spiders', key='src', value=dir_path) @@ -274,6 +282,9 @@ class SpiderApi(BaseApi): 'items': items }) + def after_update(self, id=None): + scheduler.update() + class SpiderImportApi(Resource): parser = reqparse.RequestParser() diff --git a/crawlab/tasks/scheduler.py b/crawlab/tasks/scheduler.py index f227dce8..540b7158 100644 --- a/crawlab/tasks/scheduler.py +++ b/crawlab/tasks/scheduler.py @@ -1,9 +1,9 @@ import requests -from apscheduler.schedulers.background import BlockingScheduler +from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.jobstores.mongodb import MongoDBJobStore from pymongo import MongoClient -from config import MONGO_DB, MONGO_HOST, MONGO_PORT +from config import MONGO_DB, MONGO_HOST, MONGO_PORT, FLASK_HOST, FLASK_PORT from constants.spider import CronEnabled from db.manager import db_manager @@ -17,17 +17,24 @@ class Scheduler(object): client=mongo) } - scheduler = BlockingScheduler(jobstores=jobstores) + scheduler = BackgroundScheduler(jobstores=jobstores) def execute_spider(self, id: str): - r = requests.get('http://localhost:5000/api/spiders/%s/on_crawl' % id) + r = requests.get('http://%s:%s/api/spiders/%s/on_crawl' % ( + FLASK_HOST, + FLASK_PORT, + id + )) def restart(self): self.scheduler.shutdown() self.scheduler.start() def update(self): + # remove all existing periodic jobs self.scheduler.remove_all_jobs() + + # add new periodic jobs from database spiders = db_manager.list('spiders', {'cron_enabled': CronEnabled.ON}) for spider in spiders: cron = spider.get('cron') @@ -38,7 +45,7 @@ class Scheduler(object): day = cron_arr[3] month = cron_arr[4] day_of_week = cron_arr[5] - self.scheduler.add_job(func=self.execute_spider, trigger='cron', args=(str(spider['_id']),), + self.scheduler.add_job(func=self.execute_spider, trigger='cron', args=(str(spider['_id']),), jobstore='mongo', day_of_week=day_of_week, month=month, day=day, hour=hour, minute=minute, second=second) diff --git a/frontend/src/api/request.js b/frontend/src/api/request.js index a7570fea..297fb6bf 100644 --- a/frontend/src/api/request.js +++ b/frontend/src/api/request.js @@ -1,6 +1,7 @@ import axios from 'axios' -const baseUrl = 'http://localhost:5000/api' +const baseUrl = 'http://localhost:8000/api' +// const baseUrl = 'http://139.129.230.98:8000/api' const request = (method, path, params, data) => { return new Promise((resolve, reject) => {