updated API docs

This commit is contained in:
Marvin Zhang
2019-04-07 19:07:00 +08:00
parent d56fb30127
commit b6ea079ad3
8 changed files with 73 additions and 9 deletions

View File

@@ -11,7 +11,12 @@ class DeployApi(BaseApi):
('node_id', str),
)
def get(self, id=None, action=None):
def get(self, id: str = None, action: str = None) -> (dict, tuple):
"""
GET method of DeployAPI.
:param id: deploy_id
:param action: action
"""
# action by id
if action is not None:
if not hasattr(self, action):

View File

@@ -15,6 +15,10 @@ class FileApi(Resource):
self.parser.add_argument('path', type=str)
def get(self, action=None):
"""
GET method of FileAPI.
:param action: action
"""
args = self.parser.parse_args()
path = args.get('path')

View File

@@ -8,7 +8,11 @@ from utils import jsonify
class StatsApi(Resource):
def get(self, action=None):
def get(self, action: str = None) -> (dict, tuple):
"""
GET method of StatsApi.
:param action: action
"""
# action
if action is not None:
if not hasattr(self, action):
@@ -23,6 +27,9 @@ class StatsApi(Resource):
return {}
def get_home_stats(self):
"""
Get stats for home page
"""
# overview stats
task_count = db_manager.count('tasks', {})
spider_count = db_manager.count('spiders', {})

View File

@@ -2,7 +2,7 @@ import os
from datetime import datetime
from bson import ObjectId
from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER,PYTHON_ENV_PATH
from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER, PYTHON_ENV_PATH
from constants.task import TaskStatus
from db.manager import db_manager
from .celery import celery_app
@@ -12,12 +12,17 @@ from utils.log import other as logger
@celery_app.task(bind=True)
def execute_spider(self, id: str):
"""
Execute spider task.
:param self:
:param id: task_id
"""
task_id = self.request.id
hostname = self.request.hostname
spider = db_manager.get('spiders', id=id)
command = spider.get('cmd')
if command.startswith("env"):
command = PYTHON_ENV_PATH + command.replace("env","")
command = PYTHON_ENV_PATH + command.replace("env", "")
current_working_directory = os.path.join(PROJECT_DEPLOY_FILE_FOLDER, str(spider.get('_id')))

View File

@@ -1,8 +1,13 @@
import os, zipfile
from utils.log import other
# 打包目录为zip文件未压缩
def zip_file(source_dir, output_filename):
"""
打包目录为zip文件未压缩
:param source_dir: source directory
:param output_filename: output file name
"""
zipf = zipfile.ZipFile(output_filename, 'w')
pre_len = len(os.path.dirname(source_dir))
for parent, dirnames, filenames in os.walk(source_dir):
@@ -14,6 +19,11 @@ def zip_file(source_dir, output_filename):
def unzip_file(zip_src, dst_dir):
"""
Unzip file
:param zip_src: source zip file
:param dst_dir: destination directory
"""
r = zipfile.is_zipfile(zip_src)
if r:
fz = zipfile.ZipFile(zip_src, 'r')

View File

@@ -14,7 +14,12 @@ SUFFIX_LANG_MAPPING = {
}
def get_file_suffix(file_name: str):
def get_file_suffix(file_name: str) -> (str, None):
"""
Get suffix of a file
:param file_name:
:return:
"""
file_name = file_name.lower()
m = suffix_regex.search(file_name)
if m is not None:
@@ -23,7 +28,11 @@ def get_file_suffix(file_name: str):
return None
def get_file_list(path):
def get_file_list(path: str) -> list:
"""
Get a list of files of given directory path
:param path: directory path
"""
for root, dirs, file_names in os.walk(path):
# print(root) # 当前目录路径
# print(dirs) # 当前路径下所有子目录
@@ -35,6 +44,10 @@ def get_file_list(path):
def get_file_suffix_stats(path) -> dict:
"""
Get suffix stats of given file
:param path: file path
"""
stats = defaultdict(int)
for file_path in get_file_list(path):
suffix = get_file_suffix(file_path)
@@ -44,6 +57,10 @@ def get_file_suffix_stats(path) -> dict:
def get_file_content(path) -> dict:
"""
Get file content
:param path: file path
"""
with open(path) as f:
suffix = get_file_suffix(path)
lang = SUFFIX_LANG_MAPPING.get(suffix)

View File

@@ -8,11 +8,18 @@ from db.manager import db_manager
def check_nodes_status():
"""
Update node status from Flower.
"""
res = requests.get('%s/workers?status=1' % FLOWER_API_ENDPOINT)
return json.loads(res.content.decode('utf-8'))
def update_nodes_status(refresh=False):
"""
Update all nodes status
:param refresh:
"""
online_node_ids = []
url = '%s/workers?status=1' % FLOWER_API_ENDPOINT
if refresh:

View File

@@ -3,8 +3,10 @@ import os
from constants.spider import FILE_SUFFIX_LANG_MAPPING, LangType, SUFFIX_IGNORE, SpiderType
from db.manager import db_manager
def get_lang_by_stats(stats: dict) -> LangType:
"""
Get programming language provided suffix stats
:param stats: stats is generated by utils.file.get_file_suffix_stats
:return:
"""
@@ -20,14 +22,21 @@ def get_lang_by_stats(stats: dict) -> LangType:
pass
def get_spider_type(path: str) -> SpiderType:
"""
Get spider type
:param path: spider directory path
"""
for file_name in os.listdir(path):
if file_name == 'scrapy.cfg':
return SpiderType.SCRAPY
def get_spider_col_fields(col_name):
def get_spider_col_fields(col_name: str) -> list:
"""
Get spider collection fields
:param col_name: collection name
"""
items = db_manager.list(col_name, {}, limit=100, sort_key='_id')
fields = set()
for item in items: