From c052d8c6d8d30bd629105d7d1cdd4708dcc498d3 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sat, 1 Jun 2019 16:55:44 +0800 Subject: [PATCH 1/2] allow user to 1-command to run server --- CHANGELOG.md | 4 ++ README-zh.md | 2 +- README.md | 2 +- crawlab/__init__.py | 0 crawlab/{bin/run_flower.py => flower.py} | 0 crawlab/manage.py | 74 ++++++++++++++++++++++++ crawlab/{bin/run_worker.py => worker.py} | 0 frontend/package.json | 2 +- 8 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 crawlab/__init__.py rename crawlab/{bin/run_flower.py => flower.py} (100%) create mode 100644 crawlab/manage.py rename crawlab/{bin/run_worker.py => worker.py} (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d102705..8c08a065 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.2.3 (unreleased) +### Features / Enhancement +- **CLI**. Allow user to use command-line interface to execute Crawlab programs. + # 0.2.2 (2019-05-30) ### Features / Enhancement - **Automatic Extract Fields**: Automatically extracting data fields in list pages for configurable spider. diff --git a/README-zh.md b/README-zh.md index 88d83abc..94530fef 100644 --- a/README-zh.md +++ b/README-zh.md @@ -1,7 +1,7 @@ # Crawlab ![](http://114.67.75.98:8081/buildStatus/icon?job=crawlab%2Fdevelop) -![](https://img.shields.io/badge/版本-v0.2.1-blue.svg) +![](https://img.shields.io/badge/版本-v0.2.3-blue.svg) diff --git a/README.md b/README.md index 9bffb6ed..29433857 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Crawlab ![](http://114.67.75.98:8081/buildStatus/icon?job=crawlab%2Fdevelop) -![](https://img.shields.io/badge/version-v0.2.1-blue.svg) +![](https://img.shields.io/badge/version-v0.2.3-blue.svg) diff --git a/crawlab/__init__.py b/crawlab/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/crawlab/bin/run_flower.py b/crawlab/flower.py similarity index 100% rename from crawlab/bin/run_flower.py rename to crawlab/flower.py diff --git a/crawlab/manage.py b/crawlab/manage.py new file mode 100644 index 00000000..4e2eb613 --- /dev/null +++ b/crawlab/manage.py @@ -0,0 +1,74 @@ +import argparse +import os +import subprocess +from multiprocessing import Process +import sys + +BASE_DIR = os.path.dirname(__file__) + +APP_DESC = """ +Crawlab CLI tool. +""" +ACTION_LIST = [ + 'serve', + 'app', + 'worker', + 'flower', + 'frontend', +] +if len(sys.argv) == 1: + sys.argv.append('--help') +parser = argparse.ArgumentParser() +parser.add_argument('action', type=str) +# parser.add_argument('-q', '--quality', type=int, default=0, +# help="download video quality : 1 for the standard-definition; 3 for the super-definition") +args = parser.parse_args() + + +def run_app(): + p = subprocess.Popen([sys.executable, os.path.join(BASE_DIR, 'app.py')]) + p.communicate() + + +def run_flower(): + p = subprocess.Popen([sys.executable, os.path.join(BASE_DIR, 'flower.py')]) + p.communicate() + + +def run_worker(): + p = subprocess.Popen([sys.executable, os.path.join(BASE_DIR, 'worker.py')]) + p.communicate() + + +def run_frontend(): + p = subprocess.Popen(['npm', 'run', 'serve'], + cwd=os.path.abspath(os.path.join(BASE_DIR, '..', 'frontend'))) + p.communicate() + + +def main(): + p_app = Process(target=run_app) + p_flower = Process(target=run_flower) + p_worker = Process(target=run_worker) + p_frontend = Process(target=run_frontend) + if args.action == 'serve': + p_app.start() + p_flower.start() + p_worker.start() + p_frontend.start() + elif args.action == 'app': + p_app.start() + p_flower.start() + elif args.action == 'worker': + p_app.start() + p_worker.start() + elif args.action == 'flower': + p_flower.start() + elif args.action == 'frontend': + p_frontend.start() + else: + print(f'Invalid action: {args.action}') + + +if __name__ == '__main__': + main() diff --git a/crawlab/bin/run_worker.py b/crawlab/worker.py similarity index 100% rename from crawlab/bin/run_worker.py rename to crawlab/worker.py diff --git a/frontend/package.json b/frontend/package.json index 701463a3..e751c9f9 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "crawlab", - "version": "0.2.1", + "version": "0.2.3", "private": true, "scripts": { "serve": "cross-env NODE_ENV=development vue-cli-service serve --ip=0.0.0.0", From e65feaeaebbf9e3f02d08f9d6544958071d390f4 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sat, 1 Jun 2019 17:05:46 +0800 Subject: [PATCH 2/2] added manage.py for CLI tool --- README-zh.md | 15 +------------ README.md | 16 +------------- crawlab/manage.py => manage.py | 20 +++++++++++------ setup.cfg | 2 ++ setup.py | 39 ++++++++++++++++++++++++++++++++++ 5 files changed, 57 insertions(+), 35 deletions(-) rename crawlab/manage.py => manage.py (74%) create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/README-zh.md b/README-zh.md index 94530fef..2f407f00 100644 --- a/README-zh.md +++ b/README-zh.md @@ -37,20 +37,7 @@ npm install ## 快速开始 ```bash -# 启动后端API -python app.py - -# 启动Flower服务 -python ./bin/run_flower.py - -# 启动worker -python ./bin/run_worker.py -``` - -```bash -# 运行前端 -cd frontend -npm run serve +python manage.py serve ``` ## 截图 diff --git a/README.md b/README.md index 29433857..4eb0e9ca 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,6 @@ Celery-based web crawler admin platform for managing distributed web spiders reg [Demo](http://114.67.75.98:8080) | [Documentation](https://tikazyq.github.io/crawlab) - ## Pre-requisite - Python 3.6+ - Node.js 8.12+ @@ -38,20 +37,7 @@ Please edit configuration file `config.py` to configure api and database connect ## Quick Start ```bash -# Start backend API -python app.py - -# Start Flower service -python ./bin/run_flower.py - -# Start worker -python ./bin/run_worker.py -``` - -```bash -# run frontend client -cd frontend -npm run serve +python manage.py serve ``` ## Screenshot diff --git a/crawlab/manage.py b/manage.py similarity index 74% rename from crawlab/manage.py rename to manage.py index 4e2eb613..e9806a79 100644 --- a/crawlab/manage.py +++ b/manage.py @@ -8,6 +8,15 @@ BASE_DIR = os.path.dirname(__file__) APP_DESC = """ Crawlab CLI tool. + +usage: python manage.py [action] + +action: + serve: start all necessary services to run crawlab. This is for quick start, please checkout Deployment guide for production environment. + app: start app + flower services, normally run on master node. + worker: start app + worker services, normally run on worker nodes. + flower: start flower service only. + frontend: start frontend/client service only. """ ACTION_LIST = [ 'serve', @@ -17,32 +26,31 @@ ACTION_LIST = [ 'frontend', ] if len(sys.argv) == 1: + print(APP_DESC) sys.argv.append('--help') parser = argparse.ArgumentParser() parser.add_argument('action', type=str) -# parser.add_argument('-q', '--quality', type=int, default=0, -# help="download video quality : 1 for the standard-definition; 3 for the super-definition") args = parser.parse_args() def run_app(): - p = subprocess.Popen([sys.executable, os.path.join(BASE_DIR, 'app.py')]) + p = subprocess.Popen([sys.executable, os.path.join(BASE_DIR, 'crawlab', 'app.py')]) p.communicate() def run_flower(): - p = subprocess.Popen([sys.executable, os.path.join(BASE_DIR, 'flower.py')]) + p = subprocess.Popen([sys.executable, os.path.join(BASE_DIR, 'crawlab', 'flower.py')]) p.communicate() def run_worker(): - p = subprocess.Popen([sys.executable, os.path.join(BASE_DIR, 'worker.py')]) + p = subprocess.Popen([sys.executable, os.path.join(BASE_DIR, 'crawlab', 'worker.py')]) p.communicate() def run_frontend(): p = subprocess.Popen(['npm', 'run', 'serve'], - cwd=os.path.abspath(os.path.join(BASE_DIR, '..', 'frontend'))) + cwd=os.path.abspath(os.path.join(BASE_DIR, 'frontend'))) p.communicate() diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..224a7795 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +description-file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..ce239009 --- /dev/null +++ b/setup.py @@ -0,0 +1,39 @@ +#-*- encoding: UTF-8 -*- +from setuptools import setup, find_packages + +VERSION = '0.2.3' + +with open('README.md') as fp: + readme = fp.read() + +setup(name='crawlab-server', + version=VERSION, + description="Celery-based web crawler admin platform for managing distributed web spiders regardless of languages and frameworks.", + long_description=readme, + classifiers=['Python', 'Javascript', 'Scrapy'], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers + keywords='python crawlab celery crawler spider platform scrapy', + author='tikazyq', + author_email='tikazyq@163.com', + url='https://github.com/tikazyq/crawlab', + license='BSD', + packages=find_packages(), + include_package_data=True, + zip_safe=True, + install_requires=[ + 'celery', + 'flower', + 'requests', + 'pymongo', + 'flask', + 'flask_cors', + 'flask_restful', + 'lxml', + 'gevent', + 'scrapy', + ], + entry_points={ + 'console_scripts':[ + 'crawlab = crawlab.manage:main' + ] + }, +) \ No newline at end of file