From 04c5d69c9f6c8b7e7b86b0ca9c35382f8a82dfd6 Mon Sep 17 00:00:00 2001 From: Yeqing Zhang Date: Tue, 12 Feb 2019 06:58:54 +0800 Subject: [PATCH] added base class for api handling --- app.py | 8 ++-- bin/start_flower.sh | 2 +- db/manager.py | 10 +++- routes/__init__.py | 4 +- routes/base.py | 80 +++++++++++++++++++++----------- routes/{spider.py => spiders.py} | 7 +-- routes/task.py | 80 -------------------------------- routes/tasks.py | 13 ++++++ routes/test.py | 25 ++++++++++ test/__init__.py | 0 test/test.http | 15 ++++++ utils/__init__.py | 9 ++++ 12 files changed, 136 insertions(+), 117 deletions(-) mode change 100644 => 100755 bin/start_flower.sh rename routes/{spider.py => spiders.py} (89%) delete mode 100644 routes/task.py create mode 100644 routes/tasks.py create mode 100644 routes/test.py create mode 100644 test/__init__.py create mode 100644 test/test.http diff --git a/app.py b/app.py index 3dccd20c..1eccc7cc 100644 --- a/app.py +++ b/app.py @@ -10,8 +10,10 @@ app.config['DEBUG'] = True api = Api(app) # reference api routes -import routes.task -import routes.spider +import routes.tasks +import routes.spiders +import routes.test # start flask app -app.run() +if __name__ == '__main__': + app.run() diff --git a/bin/start_flower.sh b/bin/start_flower.sh old mode 100644 new mode 100755 index 59570c24..335e0170 --- a/bin/start_flower.sh +++ b/bin/start_flower.sh @@ -1 +1 @@ -celery flower --broker=redis://localhost:6379/0 --backend=redis://localhost:6379/1 \ No newline at end of file +celery flower --broker=mongodb://localhost:27017 --backend=redis://localhost:6379/1 \ No newline at end of file diff --git a/db/manager.py b/db/manager.py index 906fb5ed..54d26c10 100644 --- a/db/manager.py +++ b/db/manager.py @@ -1,6 +1,10 @@ from bson import ObjectId +from mongoengine import connect from pymongo import MongoClient from config.db import MONGO_HOST, MONGO_PORT, MONGO_DB +from utils import is_object_id + +connect(db=MONGO_DB, host=MONGO_HOST, port=MONGO_PORT) class DbManager(object): @@ -32,8 +36,12 @@ class DbManager(object): return data def get(self, col_name: str, id: str): + if is_object_id(id): + _id = ObjectId(id) + else: + _id = id col = self.db[col_name] - return col.find_one({'_id': ObjectId(id)}) + return col.find_one({'_id': _id}) def count(self, col_name: str, cond): col = self.db[col_name] diff --git a/routes/__init__.py b/routes/__init__.py index 480b415e..ccebe5a0 100644 --- a/routes/__init__.py +++ b/routes/__init__.py @@ -1,7 +1,7 @@ # print('routes') -from routes import spider -from routes import task +from routes import spiders +from routes import tasks print('routes') diff --git a/routes/base.py b/routes/base.py index c4e315af..686467da 100644 --- a/routes/base.py +++ b/routes/base.py @@ -1,42 +1,68 @@ -import json - -from celery.utils.log import get_logger from flask_restful import reqparse, Resource -from app import api from db.manager import db_manager - -logger = get_logger('tasks') -parser = reqparse.RequestParser() -parser.add_argument('task_name', type=str) - -# collection name -COL_NAME = 'test' +from utils import jsonify class BaseApi(Resource): - col_name = 'base' + col_name = 'tmp' + parser = reqparse.RequestParser() + arguments = [] + + def __init__(self): + super(BaseApi).__init__() + self.parser.add_argument('page', type=int) + self.parser.add_argument('page_size', type=int) + self.parser.add_argument('filter', type=dict) def get(self, id=None): - args = parser.parse_args() - cond = {} - if args.filter is not None: - cond = json.loads(args.filter) - if id is None: - return db_manager.list(col_name=self.col_name, cond=cond, page=args.page, page_size=args.page_size) - else: - return db_manager.get(col_name=self.col_name, id=id) + args = self.parser.parse_args() - def list(self): - args = parser.parse_args() - cond = {} - if args.filter is not None: - cond = json.loads(args.filter) - return db_manager.list(col_name=self.col_name, cond=cond, page=args.page, page_size=args.page_size) + # get item by id + if id is None: + # filter + cond = {} + if args.get('filter') is not None: + cond = args.filter + # cond = json.loads(args.filter) + + # page number + page = 1 + if args.get('page') is not None: + page = args.page + # page = int(args.page) + + # page size + page_size = 10 + if args.get('page_size') is not None: + page_size = args.page_size + # page = int(args.page_size) + + # TODO: sort functionality + + # total count + total_count = db_manager.count(col_name=self.col_name, cond=cond) + + # items + items = db_manager.list(col_name=self.col_name, + cond=cond, + skip=(page - 1) * page_size, + limit=page_size) + + return jsonify({ + 'status': 'ok', + 'total_count': total_count, + 'page': page, + 'page_size': page_size, + 'items': items + }) + + # list items + else: + return jsonify(db_manager.get(col_name=self.col_name, id=id)) def update(self, id=None): pass def remove(self, id=None): pass - diff --git a/routes/spider.py b/routes/spiders.py similarity index 89% rename from routes/spider.py rename to routes/spiders.py index d5041b61..a282c528 100644 --- a/routes/spider.py +++ b/routes/spiders.py @@ -55,6 +55,7 @@ class SpiderExecutorApi(Resource): } -api.add_resource(SpiderExecutorApi, '/api/spider/:id/crawl') -api.add_resource(SpiderApi, '/api/spider/:id') -api.add_resource(SpiderApi, '/api/spiders') +api.add_resource(SpiderExecutorApi, '/api/spiders//crawl') +api.add_resource(SpiderApi, + '/api/spiders', + '/api/spiders/') diff --git a/routes/task.py b/routes/task.py deleted file mode 100644 index 4faebaed..00000000 --- a/routes/task.py +++ /dev/null @@ -1,80 +0,0 @@ -import json - -from celery.utils.log import get_logger -from flask import jsonify -from flask_restful import reqparse, Resource - -from app import api -from db.manager import db_manager - -logger = get_logger('tasks') -parser = reqparse.RequestParser() - -# collection name -COL_NAME = 'tasks' - - -class TaskApi(Resource): - col_name = COL_NAME - parser = reqparse.RequestParser() - - def __init__(self): - super(TaskApi).__init__() - self.parser.add_argument('page') - self.parser.add_argument('page_size') - self.parser.add_argument('filter') - - def get(self, id=None): - args = self.parser.parse_args() - - # get item by id - if id is None: - # filter - cond = {} - if args.get('filter') is not None: - cond = json.loads(args.filter) - - # page number - page = 0 - if args.get('page') is not None: - page = int(args.page) - else: - print(args) - - # page size - page_size = 10 - if args.get('page_size') is not None: - page = int(args.page_size) - - # total count - total_count = db_manager.count(col_name=self.col_name, cond=cond) - - # items - items = db_manager.list(col_name=self.col_name, - cond=cond, - skip=page * page_size, - limit=page_size) - return jsonify({ - 'status': 'ok', - 'total_count': total_count, - 'page': page, - 'page_size': page_size, - 'items': items - }) - - # list items - else: - return jsonify(db_manager.get(col_name=self.col_name, id=id)) - - def update(self, id=None): - pass - - def remove(self, id=None): - pass - - -# api.add_resource(TaskApi, '/api/task/:id') -api.add_resource(TaskApi, - '/api/tasks', - '/api/task/:id' - ) diff --git a/routes/tasks.py b/routes/tasks.py new file mode 100644 index 00000000..26019821 --- /dev/null +++ b/routes/tasks.py @@ -0,0 +1,13 @@ +from app import api +from routes.base import BaseApi + + +class TaskApi(BaseApi): + col_name = 'tasks_celery' + + +# add api to resources +api.add_resource(TaskApi, + '/api/tasks', + '/api/tasks/' + ) diff --git a/routes/test.py b/routes/test.py new file mode 100644 index 00000000..a017f537 --- /dev/null +++ b/routes/test.py @@ -0,0 +1,25 @@ +from app import api +from routes.base import BaseApi +from tasks.spider import get_baidu_html + + +class TestApi(BaseApi): + col_name = 'test' + + def __init__(self): + super(TestApi).__init__() + self.parser.add_argument('keyword', type=str) + + def get(self, id=None): + args = self.parser.parse_args() + for i in range(100): + get_baidu_html.delay(args.keyword) + return { + 'status': 'ok' + } + + +# add api to resources +api.add_resource(TestApi, + '/api/test', + ) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/test.http b/test/test.http new file mode 100644 index 00000000..81f51885 --- /dev/null +++ b/test/test.http @@ -0,0 +1,15 @@ +# For a quick start check out our HTTP Requests collection (Tools|HTTP Client|Open HTTP Requests Collection). +# +# Following HTTP Request Live Templates are available: +# * 'gtrp' and 'gtr' create a GET request with or without query parameters; +# * 'ptr' and 'ptrp' create a POST request with a simple or parameter-like body; +# * 'mptr' and 'fptr' create a POST request to submit a form with a text or file field (multipart/form-data); + +### Send POST request with json body +POST http://localhost:5000/api/test +Content-Type: application/json + +{ + "id": 999, + "value": "content" +} \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py index 2a54fbdb..feab944e 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1,5 +1,14 @@ +import json import re +from bson import json_util + def is_object_id(id): return re.search('^[a-zA-Z0-9]{24}$', id) is not None + + +def jsonify(obj: dict): + dump_str = json_util.dumps(obj) + converted_obj = json.loads(dump_str) + return converted_obj