diff --git a/app.py b/app.py index 5e99dc3f..3dccd20c 100644 --- a/app.py +++ b/app.py @@ -9,5 +9,9 @@ app.config['DEBUG'] = True # init flask api instance api = Api(app) +# reference api routes +import routes.task +import routes.spider + # start flask app app.run() diff --git a/config/db.py b/config/db.py index f66752bf..ff628e95 100644 --- a/config/db.py +++ b/config/db.py @@ -1,6 +1,6 @@ # 数据库 MONGO_HOST = 'localhost' -MONGO_PORT = '27017' +MONGO_PORT = 27017 # MONGO_USER = 'test' # MONGO_PASS = 'test' MONGO_DB = 'crawlab_test' diff --git a/db/manager.py b/db/manager.py index dccb4619..906fb5ed 100644 --- a/db/manager.py +++ b/db/manager.py @@ -1,3 +1,4 @@ +from bson import ObjectId from pymongo import MongoClient from config.db import MONGO_HOST, MONGO_PORT, MONGO_DB @@ -7,7 +8,6 @@ class DbManager(object): self.mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT) self.db = self.mongo[MONGO_DB] - # TODO: CRUD def save(self, col_name: str, item, **kwargs): col = self.db[col_name] col.save(item, **kwargs) @@ -20,12 +20,24 @@ class DbManager(object): col = self.db[col_name] col.update(cond, {'$set': values}, **kwargs) - def list(self, col_name: str, cond: dict, skip: int, limit: int, **kwargs): - if kwargs.get('page') is not None: - try: - page = int(kwargs.get('page')) - skip = page * limit - except Exception as err: - pass - # TODO: list logic - # TODO: pagination + def update_one(self, col_name: str, id: str, values: dict, **kwargs): + col = self.db[col_name] + col.find_one_and_update({'_id': ObjectId(id)}, {'$set': values}) + + def list(self, col_name: str, cond: dict, skip: int = 0, limit: int = 10, **kwargs): + col = self.db[col_name] + data = [] + for item in col.find(cond).skip(skip).limit(limit): + data.append(item) + return data + + def get(self, col_name: str, id: str): + col = self.db[col_name] + return col.find_one({'_id': ObjectId(id)}) + + def count(self, col_name: str, cond): + col = self.db[col_name] + return col.count(cond) + + +db_manager = DbManager() diff --git a/route.py b/route.py index 12aedf7a..e8020ab1 100644 --- a/route.py +++ b/route.py @@ -1,4 +1,4 @@ -from app import api -from api.spider import SpiderApi, SpiderExecutorApi +# from app import api +# from api.spider import SpiderApi, SpiderExecutorApi -api.add_resource(SpiderExecutorApi, '/spider') +# api.add_resource(SpiderExecutorApi, '/spider') diff --git a/routes/__init__.py b/routes/__init__.py index f0f71b0d..480b415e 100644 --- a/routes/__init__.py +++ b/routes/__init__.py @@ -1,5 +1,7 @@ -from app import app +# print('routes') +from routes import spider +from routes import task + +print('routes') -# api.add_resource(SpiderApi, '/spider') -# print(SpiderExecutorApi) diff --git a/routes/base.py b/routes/base.py new file mode 100644 index 00000000..c4e315af --- /dev/null +++ b/routes/base.py @@ -0,0 +1,42 @@ +import json + +from celery.utils.log import get_logger +from flask_restful import reqparse, Resource + +from app import api +from db.manager import db_manager + +logger = get_logger('tasks') +parser = reqparse.RequestParser() +parser.add_argument('task_name', type=str) + +# collection name +COL_NAME = 'test' + + +class BaseApi(Resource): + col_name = 'base' + + def get(self, id=None): + args = parser.parse_args() + cond = {} + if args.filter is not None: + cond = json.loads(args.filter) + if id is None: + return db_manager.list(col_name=self.col_name, cond=cond, page=args.page, page_size=args.page_size) + else: + return db_manager.get(col_name=self.col_name, id=id) + + def list(self): + args = parser.parse_args() + cond = {} + if args.filter is not None: + cond = json.loads(args.filter) + return db_manager.list(col_name=self.col_name, cond=cond, page=args.page, page_size=args.page_size) + + def update(self, id=None): + pass + + def remove(self, id=None): + pass + diff --git a/routes/spider.py b/routes/spider.py index fbd0c10a..d5041b61 100644 --- a/routes/spider.py +++ b/routes/spider.py @@ -1,18 +1,50 @@ -from celery.utils.log import get_logger +import json +# from celery.utils.log import get_logger from flask_restful import reqparse, Resource + +from app import api +from db.manager import db_manager from tasks.spider import execute_spider -logger = get_logger('tasks') +# logger = get_logger('tasks') parser = reqparse.RequestParser() parser.add_argument('spider_name', type=str) +# collection name +COL_NAME = 'spiders' + class SpiderApi(Resource): - pass + col_name = COL_NAME + + def get(self, id=None): + args = parser.parse_args() + cond = {} + if args.filter is not None: + cond = json.loads(args.filter) + if id is None: + return db_manager.list(col_name=self.col_name, cond=cond, page=args.page, page_size=args.page_size) + else: + return db_manager.get(col_name=self.col_name, id=id) + + def list(self): + args = parser.parse_args() + cond = {} + if args.filter is not None: + cond = json.loads(args.filter) + return db_manager.list(col_name=self.col_name, cond=cond, page=args.page, page_size=args.page_size) + + def update(self, id=None): + pass + + def remove(self, id=None): + pass class SpiderExecutorApi(Resource): - def get(self): + col_name = COL_NAME + + def post(self, id): args = parser.parse_args() job = execute_spider.delay(args.spider_name) return { @@ -21,3 +53,8 @@ class SpiderExecutorApi(Resource): 'spider_name': args.spider_name, 'result': job.get(timeout=5) } + + +api.add_resource(SpiderExecutorApi, '/api/spider/:id/crawl') +api.add_resource(SpiderApi, '/api/spider/:id') +api.add_resource(SpiderApi, '/api/spiders') diff --git a/routes/task.py b/routes/task.py new file mode 100644 index 00000000..4faebaed --- /dev/null +++ b/routes/task.py @@ -0,0 +1,80 @@ +import json + +from celery.utils.log import get_logger +from flask import jsonify +from flask_restful import reqparse, Resource + +from app import api +from db.manager import db_manager + +logger = get_logger('tasks') +parser = reqparse.RequestParser() + +# collection name +COL_NAME = 'tasks' + + +class TaskApi(Resource): + col_name = COL_NAME + parser = reqparse.RequestParser() + + def __init__(self): + super(TaskApi).__init__() + self.parser.add_argument('page') + self.parser.add_argument('page_size') + self.parser.add_argument('filter') + + def get(self, id=None): + args = self.parser.parse_args() + + # get item by id + if id is None: + # filter + cond = {} + if args.get('filter') is not None: + cond = json.loads(args.filter) + + # page number + page = 0 + if args.get('page') is not None: + page = int(args.page) + else: + print(args) + + # page size + page_size = 10 + if args.get('page_size') is not None: + page = int(args.page_size) + + # total count + total_count = db_manager.count(col_name=self.col_name, cond=cond) + + # items + items = db_manager.list(col_name=self.col_name, + cond=cond, + skip=page * page_size, + limit=page_size) + return jsonify({ + 'status': 'ok', + 'total_count': total_count, + 'page': page, + 'page_size': page_size, + 'items': items + }) + + # list items + else: + return jsonify(db_manager.get(col_name=self.col_name, id=id)) + + def update(self, id=None): + pass + + def remove(self, id=None): + pass + + +# api.add_resource(TaskApi, '/api/task/:id') +api.add_resource(TaskApi, + '/api/tasks', + '/api/task/:id' + ) diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 00000000..2a54fbdb --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,5 @@ +import re + + +def is_object_id(id): + return re.search('^[a-zA-Z0-9]{24}$', id) is not None