mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
24 lines
736 B
Python
24 lines
736 B
Python
import os
|
|
|
|
from constants.spider import FILE_SUFFIX_LANG_MAPPING, LangType, SUFFIX_IGNORE, SpiderType
|
|
|
|
|
|
def get_lang_by_stats(stats: dict) -> LangType:
|
|
"""
|
|
:param stats: stats is generated by utils.file.get_file_suffix_stats
|
|
:return:
|
|
"""
|
|
data = stats.items()
|
|
data = sorted(data, key=lambda item: item[1])
|
|
data = list(filter(lambda item: item[0] not in SUFFIX_IGNORE, data))
|
|
top_suffix = data[-1][0]
|
|
if FILE_SUFFIX_LANG_MAPPING.get(top_suffix) is not None:
|
|
return FILE_SUFFIX_LANG_MAPPING.get(top_suffix)
|
|
return LangType.OTHER
|
|
|
|
|
|
def get_spider_type(path: str) -> SpiderType:
|
|
for file_name in os.listdir(path):
|
|
if file_name == 'scrapy.cfg':
|
|
return SpiderType.SCRAPY
|