diff --git a/crawlab/routes/deploys.py b/crawlab/routes/deploys.py index 9cf4bf7c..885173c1 100644 --- a/crawlab/routes/deploys.py +++ b/crawlab/routes/deploys.py @@ -11,7 +11,12 @@ class DeployApi(BaseApi): ('node_id', str), ) - def get(self, id=None, action=None): + def get(self, id: str = None, action: str = None) -> (dict, tuple): + """ + GET method of DeployAPI. + :param id: deploy_id + :param action: action + """ # action by id if action is not None: if not hasattr(self, action): diff --git a/crawlab/routes/files.py b/crawlab/routes/files.py index 4f18f0ee..ef80e20b 100644 --- a/crawlab/routes/files.py +++ b/crawlab/routes/files.py @@ -15,6 +15,10 @@ class FileApi(Resource): self.parser.add_argument('path', type=str) def get(self, action=None): + """ + GET method of FileAPI. + :param action: action + """ args = self.parser.parse_args() path = args.get('path') diff --git a/crawlab/routes/stats.py b/crawlab/routes/stats.py index f637039f..aa7432c6 100644 --- a/crawlab/routes/stats.py +++ b/crawlab/routes/stats.py @@ -8,7 +8,11 @@ from utils import jsonify class StatsApi(Resource): - def get(self, action=None): + def get(self, action: str = None) -> (dict, tuple): + """ + GET method of StatsApi. + :param action: action + """ # action if action is not None: if not hasattr(self, action): @@ -23,6 +27,9 @@ class StatsApi(Resource): return {} def get_home_stats(self): + """ + Get stats for home page + """ # overview stats task_count = db_manager.count('tasks', {}) spider_count = db_manager.count('spiders', {}) diff --git a/crawlab/tasks/spider.py b/crawlab/tasks/spider.py index 681ddab2..589fe9b1 100644 --- a/crawlab/tasks/spider.py +++ b/crawlab/tasks/spider.py @@ -2,7 +2,7 @@ import os from datetime import datetime from bson import ObjectId -from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER,PYTHON_ENV_PATH +from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER, PYTHON_ENV_PATH from constants.task import TaskStatus from db.manager import db_manager from .celery import celery_app @@ -12,12 +12,17 @@ from utils.log import other as logger @celery_app.task(bind=True) def execute_spider(self, id: str): + """ + Execute spider task. + :param self: + :param id: task_id + """ task_id = self.request.id hostname = self.request.hostname spider = db_manager.get('spiders', id=id) command = spider.get('cmd') if command.startswith("env"): - command = PYTHON_ENV_PATH + command.replace("env","") + command = PYTHON_ENV_PATH + command.replace("env", "") current_working_directory = os.path.join(PROJECT_DEPLOY_FILE_FOLDER, str(spider.get('_id'))) diff --git a/crawlab/utils/deploy.py b/crawlab/utils/deploy.py index f7879a68..e04c7da7 100644 --- a/crawlab/utils/deploy.py +++ b/crawlab/utils/deploy.py @@ -1,8 +1,13 @@ import os, zipfile from utils.log import other -# 打包目录为zip文件(未压缩) + def zip_file(source_dir, output_filename): + """ + 打包目录为zip文件(未压缩) + :param source_dir: source directory + :param output_filename: output file name + """ zipf = zipfile.ZipFile(output_filename, 'w') pre_len = len(os.path.dirname(source_dir)) for parent, dirnames, filenames in os.walk(source_dir): @@ -14,6 +19,11 @@ def zip_file(source_dir, output_filename): def unzip_file(zip_src, dst_dir): + """ + Unzip file + :param zip_src: source zip file + :param dst_dir: destination directory + """ r = zipfile.is_zipfile(zip_src) if r: fz = zipfile.ZipFile(zip_src, 'r') diff --git a/crawlab/utils/file.py b/crawlab/utils/file.py index cec15ad5..d549c62d 100644 --- a/crawlab/utils/file.py +++ b/crawlab/utils/file.py @@ -14,7 +14,12 @@ SUFFIX_LANG_MAPPING = { } -def get_file_suffix(file_name: str): +def get_file_suffix(file_name: str) -> (str, None): + """ + Get suffix of a file + :param file_name: + :return: + """ file_name = file_name.lower() m = suffix_regex.search(file_name) if m is not None: @@ -23,7 +28,11 @@ def get_file_suffix(file_name: str): return None -def get_file_list(path): +def get_file_list(path: str) -> list: + """ + Get a list of files of given directory path + :param path: directory path + """ for root, dirs, file_names in os.walk(path): # print(root) # 当前目录路径 # print(dirs) # 当前路径下所有子目录 @@ -35,6 +44,10 @@ def get_file_list(path): def get_file_suffix_stats(path) -> dict: + """ + Get suffix stats of given file + :param path: file path + """ stats = defaultdict(int) for file_path in get_file_list(path): suffix = get_file_suffix(file_path) @@ -44,6 +57,10 @@ def get_file_suffix_stats(path) -> dict: def get_file_content(path) -> dict: + """ + Get file content + :param path: file path + """ with open(path) as f: suffix = get_file_suffix(path) lang = SUFFIX_LANG_MAPPING.get(suffix) diff --git a/crawlab/utils/node.py b/crawlab/utils/node.py index c6cd47be..07a45c01 100644 --- a/crawlab/utils/node.py +++ b/crawlab/utils/node.py @@ -8,11 +8,18 @@ from db.manager import db_manager def check_nodes_status(): + """ + Update node status from Flower. + """ res = requests.get('%s/workers?status=1' % FLOWER_API_ENDPOINT) return json.loads(res.content.decode('utf-8')) def update_nodes_status(refresh=False): + """ + Update all nodes status + :param refresh: + """ online_node_ids = [] url = '%s/workers?status=1' % FLOWER_API_ENDPOINT if refresh: diff --git a/crawlab/utils/spider.py b/crawlab/utils/spider.py index 61616790..0a45d28f 100644 --- a/crawlab/utils/spider.py +++ b/crawlab/utils/spider.py @@ -3,8 +3,10 @@ import os from constants.spider import FILE_SUFFIX_LANG_MAPPING, LangType, SUFFIX_IGNORE, SpiderType from db.manager import db_manager + def get_lang_by_stats(stats: dict) -> LangType: """ + Get programming language provided suffix stats :param stats: stats is generated by utils.file.get_file_suffix_stats :return: """ @@ -20,14 +22,21 @@ def get_lang_by_stats(stats: dict) -> LangType: pass - def get_spider_type(path: str) -> SpiderType: + """ + Get spider type + :param path: spider directory path + """ for file_name in os.listdir(path): if file_name == 'scrapy.cfg': return SpiderType.SCRAPY -def get_spider_col_fields(col_name): +def get_spider_col_fields(col_name: str) -> list: + """ + Get spider collection fields + :param col_name: collection name + """ items = db_manager.list(col_name, {}, limit=100, sort_key='_id') fields = set() for item in items: