Files
crawlab/utils/spider.py
2019-02-26 12:42:12 +08:00

20 lines
640 B
Python

from constants.spider import FILE_SUFFIX_LANG_MAPPING, LangType, SUFFIX_IGNORE, SpiderType
def get_lang_by_stats(stats: dict) -> LangType:
"""
:param stats: stats is generated by utils.file.get_file_suffix_stats
:return:
"""
data = stats.items()
data = sorted(data, key=lambda item: item[1])
data = list(filter(lambda item: item[0] not in SUFFIX_IGNORE, data))
top_suffix = data[-1][0]
if FILE_SUFFIX_LANG_MAPPING.get(top_suffix) is not None:
return FILE_SUFFIX_LANG_MAPPING.get(top_suffix)
return LangType.OTHER
def get_spider_type(path: str) -> SpiderType:
return SpiderType.SCRAPY