diff --git a/.idea/httpRequests/http-requests-log.http b/.idea/httpRequests/http-requests-log.http index cb020385..f2e44011 100644 --- a/.idea/httpRequests/http-requests-log.http +++ b/.idea/httpRequests/http-requests-log.http @@ -1,3 +1,46 @@ +PUT http://localhost:5000/api/spiders +Content-Type: application/json + +{ + "spider_name": "baidu spider", + "cmd": "python /Users/yeqing/projects/crawlab/spiders/baidu/baidu.py", + "src": "/Users/yeqing/projects/crawlab/spiders/baidu/baidu.py", + "spider_type": 1, + "lang_type": 1 +} + +<> 2019-02-13T083950.200.json + +### + +PUT http://localhost:5000/api/spiders +Content-Type: application/json + +{ + "spider_name": "baidu spider", + "cmd": "/Users/yeqing/projects/crawlab/spiders/baidu/baidu.py", + "src": "/Users/yeqing/projects/crawlab/spiders/baidu/baidu.py", + "spider_type": 1, + "lang_type": 1 +} + +<> 2019-02-13T083921.200.json + +### + +PUT http://localhost:5000/api/spiders +Content-Type: application/json + +{ + "spider_name": "baidu spider", + "cmd": "/Users/yeqing/projects/crawlab/spiders/baidu/baidu.py", + "src": "/Users/yeqing/projects/crawlab/spiders/baidu/baidu.py", + "spider_type": 1, + "lang_type": 1 +} + +### + POST http://localhost:5000/api/spiders/5c63a2ddb65d151bee71d76b/crawl Content-Type: application/json diff --git a/config/__init__.py b/config/__init__.py index e51a3ffb..ca07cdb2 100644 --- a/config/__init__.py +++ b/config/__init__.py @@ -1,2 +1 @@ -MONGO_HOST = 'localhost' -MONGO_DATABASE = 'test' +PROJECT_FILE_FOLDER = '/var/crawlab' diff --git a/db/manager.py b/db/manager.py index 7c68b5d3..c3cea2a7 100644 --- a/db/manager.py +++ b/db/manager.py @@ -1,10 +1,9 @@ from bson import ObjectId from mongoengine import connect -from pymongo import MongoClient +from pymongo import MongoClient, DESCENDING from config.db import MONGO_HOST, MONGO_PORT, MONGO_DB from utils import is_object_id - connect(db=MONGO_DB, host=MONGO_HOST, port=MONGO_PORT) @@ -48,5 +47,11 @@ class DbManager(object): col = self.db[col_name] return col.count(cond) + def get_latest_version(self, spider_id): + col = self.db['deploys'] + for item in col.find({'spider_id': ObjectId(spider_id)}).sort('version', DESCENDING): + return item.version + return None + db_manager = DbManager() diff --git a/model/spider.py b/model/spider.py index 0f36e686..12b810d3 100644 --- a/model/spider.py +++ b/model/spider.py @@ -6,7 +6,7 @@ from model.base import BaseModel class Spider(BaseModel): _id = ObjectIdField() spider_name = StringField() + cmd = StringField() + src = StringField() spider_type = IntField() lang_type = IntField() - execute_cmd = StringField() - src_file_path = StringField() diff --git a/routes/deploys.py b/routes/deploys.py new file mode 100644 index 00000000..efa16a08 --- /dev/null +++ b/routes/deploys.py @@ -0,0 +1,15 @@ +from app import api +from routes.base import BaseApi + + +class DeployApi(BaseApi): + col_name = 'deploys' + + arguments = ( + ) + + +api.add_resource(DeployApi, + '/api/deploys', + '/api/deploys/', + '/api/deploys//') diff --git a/routes/spiders.py b/routes/spiders.py index 38e42c29..0a42f01a 100644 --- a/routes/spiders.py +++ b/routes/spiders.py @@ -1,33 +1,40 @@ import json # from celery.utils.log import get_logger +import os +import shutil + from flask_restful import reqparse, Resource from app import api +from config import PROJECT_FILE_FOLDER from db.manager import db_manager from routes.base import BaseApi from tasks.spider import execute_spider -# logger = get_logger('tasks') -parser = reqparse.RequestParser() -parser.add_argument('spider_name', type=str) - class SpiderApi(BaseApi): col_name = 'spiders' arguments = ( ('spider_name', str), + ('cmd', str), + ('src', str), ('spider_type', int), ('lang_type', int), - ('execute_cmd', str), - ('src_file_path', str), ) def crawl(self, id): print('crawl: %s' % id) def deploy(self, id): - print('deploy: %s' % id) + args = self.parser.parse_args() + spider = db_manager.get(col_name=self.col_name, id=id) + latest_version = db_manager.get_latest_version(id=id) + src = args.get('src') + dst = os.path.join(PROJECT_FILE_FOLDER, str(spider._id), latest_version + 1) + if not os.path.exists(dst): + os.mkdir(dst) + shutil.copytree(src=src, dst=dst) api.add_resource(SpiderApi, diff --git a/spiders/baidu/baidu.py b/spiders/baidu/baidu.py new file mode 100644 index 00000000..f0d7bba3 --- /dev/null +++ b/spiders/baidu/baidu.py @@ -0,0 +1,6 @@ +from time import sleep +import requests + +for i in range(10): + r = requests.get('http://www.baidu.com') + sleep(0.1) diff --git a/test/test.http b/test/test.http index c5603dc8..124b3134 100644 --- a/test/test.http +++ b/test/test.http @@ -10,7 +10,11 @@ PUT http://localhost:5000/api/spiders Content-Type: application/json { - "spider_name": "a spider" + "spider_name": "baidu spider", + "cmd": "python /Users/yeqing/projects/crawlab/spiders/baidu/baidu.py", + "src": "/Users/yeqing/projects/crawlab/spiders/baidu/baidu.py", + "spider_type": 1, + "lang_type": 1 } ### Send POST request with json body @@ -18,7 +22,11 @@ POST http://localhost:5000/api/spiders/5c63a2ddb65d151bee71d76b Content-Type: application/json { - "spider_name": "b spider" + "spider_name": "baidu spider", + "cmd": "/Users/yeqing/projects/crawlab/spiders/baidu/baidu.py", + "src": "/Users/yeqing/projects/crawlab/spiders/baidu/baidu.py", + "spider_type": 1, + "lang_type": 1 } ### Send POST request with json body by path