mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
code cleanup
This commit is contained in:
@@ -1,125 +0,0 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from multiprocessing import Process
|
||||
|
||||
import click
|
||||
from flask import Flask
|
||||
from flask_cors import CORS
|
||||
from flask_restful import Api
|
||||
|
||||
from routes.schedules import ScheduleApi
|
||||
from tasks.scheduler import scheduler
|
||||
|
||||
file_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
root_path = os.path.abspath(os.path.join(file_dir, '.'))
|
||||
sys.path.append(root_path)
|
||||
|
||||
from config import FLASK_HOST, FLASK_PORT, PROJECT_LOGS_FOLDER, BROKER_URL
|
||||
from constants.manage import ActionType
|
||||
from routes.deploys import DeployApi
|
||||
from routes.files import FileApi
|
||||
from routes.nodes import NodeApi
|
||||
from routes.spiders import SpiderApi, SpiderImportApi, SpiderManageApi
|
||||
from routes.stats import StatsApi
|
||||
from routes.tasks import TaskApi
|
||||
from tasks.celery import celery_app
|
||||
from utils.log import other
|
||||
# flask app instance
|
||||
app = Flask(__name__)
|
||||
app.config.from_object('config')
|
||||
|
||||
# init flask api instance
|
||||
api = Api(app)
|
||||
|
||||
# cors support
|
||||
CORS(app, supports_credentials=True)
|
||||
|
||||
# reference api routes
|
||||
api.add_resource(NodeApi,
|
||||
'/api/nodes',
|
||||
'/api/nodes/<string:id>',
|
||||
'/api/nodes/<string:id>/<string:action>')
|
||||
api.add_resource(SpiderImportApi,
|
||||
'/api/spiders/import/<string:platform>')
|
||||
api.add_resource(SpiderManageApi,
|
||||
'/api/spiders/manage/<string:action>')
|
||||
api.add_resource(SpiderApi,
|
||||
'/api/spiders',
|
||||
'/api/spiders/<string:id>',
|
||||
'/api/spiders/<string:id>/<string:action>')
|
||||
api.add_resource(DeployApi,
|
||||
'/api/deploys',
|
||||
'/api/deploys/<string:id>',
|
||||
'/api/deploys/<string:id>/<string:action>')
|
||||
api.add_resource(TaskApi,
|
||||
'/api/tasks',
|
||||
'/api/tasks/<string:id>',
|
||||
'/api/tasks/<string:id>/<string:action>'
|
||||
)
|
||||
api.add_resource(FileApi,
|
||||
'/api/files',
|
||||
'/api/files/<string:action>')
|
||||
api.add_resource(StatsApi,
|
||||
'/api/stats',
|
||||
'/api/stats/<string:action>')
|
||||
api.add_resource(ScheduleApi,
|
||||
'/api/schedules',
|
||||
'/api/schedules/<string:id>')
|
||||
|
||||
|
||||
def run_app():
|
||||
# create folder if it does not exist
|
||||
if not os.path.exists(PROJECT_LOGS_FOLDER):
|
||||
os.makedirs(PROJECT_LOGS_FOLDER)
|
||||
|
||||
# run app instance
|
||||
app.run(host=FLASK_HOST, port=FLASK_PORT)
|
||||
|
||||
|
||||
def run_flower():
|
||||
p = subprocess.Popen(['celery', 'flower', '-b', BROKER_URL], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
for line in iter(p.stdout.readline, 'b'):
|
||||
if line.decode('utf-8') != '':
|
||||
other.info(line.decode('utf-8'))
|
||||
|
||||
|
||||
def run_worker():
|
||||
if sys.platform == 'windows':
|
||||
celery_app.start(argv=['tasks', 'worker', '-P', 'eventlet', '-E', '-l', 'INFO'])
|
||||
else:
|
||||
celery_app.start(argv=['tasks', 'worker', '-E', '-l', 'INFO'])
|
||||
|
||||
|
||||
def run_scheduler():
|
||||
scheduler.run()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument('action', type=click.Choice([ActionType.APP,
|
||||
ActionType.FLOWER,
|
||||
ActionType.WORKER,
|
||||
ActionType.SCHEDULER,
|
||||
ActionType.RUN_ALL]))
|
||||
def main(action):
|
||||
if action == ActionType.APP:
|
||||
run_app()
|
||||
elif action == ActionType.FLOWER:
|
||||
run_flower()
|
||||
elif action == ActionType.WORKER:
|
||||
run_worker()
|
||||
elif action == ActionType.SCHEDULER:
|
||||
run_scheduler()
|
||||
elif action == ActionType.RUN_ALL:
|
||||
p_flower = Process(target=run_flower)
|
||||
p_flower.start()
|
||||
p_app = Process(target=run_app)
|
||||
p_app.start()
|
||||
p_worker = Process(target=run_worker)
|
||||
p_worker.start()
|
||||
p_scheduler = Process(target=run_scheduler)
|
||||
p_scheduler.start()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,6 +0,0 @@
|
||||
from mongoengine import *
|
||||
import datetime
|
||||
|
||||
|
||||
class BaseModel(Document):
|
||||
create_ts = DateTimeField(default=datetime.datetime.utcnow)
|
||||
@@ -1,10 +0,0 @@
|
||||
from mongoengine import *
|
||||
|
||||
from model.base import BaseModel
|
||||
|
||||
|
||||
class Deploy(BaseModel):
|
||||
_id = ObjectIdField()
|
||||
spider_id = ObjectIdField()
|
||||
version = IntField()
|
||||
node_id = ObjectIdField()
|
||||
@@ -1,12 +0,0 @@
|
||||
from mongoengine import *
|
||||
|
||||
from model.base import BaseModel
|
||||
|
||||
|
||||
class Node(BaseModel):
|
||||
_id = ObjectIdField()
|
||||
ip = StringField()
|
||||
port = IntField()
|
||||
name = StringField()
|
||||
description = StringField()
|
||||
status = IntField()
|
||||
@@ -1,12 +0,0 @@
|
||||
from mongoengine import *
|
||||
|
||||
from model.base import BaseModel
|
||||
|
||||
|
||||
class Spider(BaseModel):
|
||||
_id = ObjectIdField()
|
||||
name = StringField()
|
||||
cmd = StringField()
|
||||
src = StringField()
|
||||
type = IntField()
|
||||
lang = IntField()
|
||||
@@ -1,9 +0,0 @@
|
||||
from mongoengine import *
|
||||
|
||||
from model.base import BaseModel
|
||||
|
||||
|
||||
class Task(BaseModel):
|
||||
_id = ObjectIdField()
|
||||
deploy_id = ObjectIdField()
|
||||
file_path = StringField()
|
||||
@@ -1,24 +0,0 @@
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
with open('requirements.txt') as f:
|
||||
requirements = [l for l in f.read().splitlines() if l]
|
||||
|
||||
setup(
|
||||
name='crawlab-server',
|
||||
version='0.0.1',
|
||||
url='https://github.com/tikazyq/crawlab',
|
||||
install_requires=requirements,
|
||||
license='BSD',
|
||||
author='Marvin Zhang',
|
||||
author_email='tikazyq@163.com',
|
||||
description='Celery-based web crawler admin platform for managing distributed web spiders regardless of languages and frameworks.',
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
download_url="https://github.com/tikazyq/crawlab/archive/master.zip",
|
||||
packages=find_packages(),
|
||||
keywords=['celery', 'python', 'webcrawler', 'crawl', 'scrapy', 'admin'],
|
||||
zip_safe=True,
|
||||
)
|
||||
@@ -79,6 +79,7 @@
|
||||
<el-row class="button-group-container">
|
||||
<div class="button-group">
|
||||
<el-button type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
|
||||
<el-button type="primary" @click="onExtractFields" v-loading="extractFieldsLoading">{{$t('Extract Fields')}}</el-button>
|
||||
<el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>
|
||||
<el-button type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
|
||||
</div>
|
||||
@@ -126,6 +127,7 @@ export default {
|
||||
{ value: 'detail', label: 'Detail Only' },
|
||||
{ value: 'list-detail', label: 'List + Detail' }
|
||||
],
|
||||
extractFieldsLoading: false,
|
||||
previewLoading: false,
|
||||
saveLoading: false,
|
||||
dialogVisible: false
|
||||
@@ -210,6 +212,8 @@ export default {
|
||||
this.$message.success(this.$t(`Spider task has been scheduled`))
|
||||
})
|
||||
})
|
||||
},
|
||||
onExtractFields () {
|
||||
}
|
||||
},
|
||||
created () {
|
||||
|
||||
@@ -48,13 +48,14 @@ export default {
|
||||
Submit: '提交',
|
||||
'Import Spiders': '导入爬虫',
|
||||
'Deploy All': '部署所有爬虫',
|
||||
Refresh: '刷新',
|
||||
View: '查看',
|
||||
Edit: '编辑',
|
||||
Remove: '删除',
|
||||
Confirm: '确认',
|
||||
Stop: '停止',
|
||||
Preview: '预览',
|
||||
'Refresh': '刷新',
|
||||
'View': '查看',
|
||||
'Edit': '编辑',
|
||||
'Remove': '删除',
|
||||
'Confirm': '确认',
|
||||
'Stop': '停止',
|
||||
'Preview': '预览',
|
||||
'Extract Fields': '提取字段',
|
||||
|
||||
// 主页
|
||||
'Total Tasks': '总任务数',
|
||||
|
||||
Reference in New Issue
Block a user