mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
fixed sorting issue
This commit is contained in:
@@ -40,10 +40,13 @@ class DbManager(object):
|
||||
col = self.db[col_name]
|
||||
col.remove({'_id': ObjectId(id)})
|
||||
|
||||
def list(self, col_name: str, cond: dict, skip: int = 0, limit: int = 100, **kwargs):
|
||||
def list(self, col_name: str, cond: dict, sort_key=None, sort_direction=DESCENDING, skip: int = 0, limit: int = 100,
|
||||
**kwargs):
|
||||
if sort_key is None:
|
||||
sort_key = '_i'
|
||||
col = self.db[col_name]
|
||||
data = []
|
||||
for item in col.find(cond).skip(skip).limit(limit):
|
||||
for item in col.find(cond).sort(sort_key, sort_direction).skip(skip).limit(limit):
|
||||
data.append(item)
|
||||
return data
|
||||
|
||||
|
||||
@@ -142,7 +142,7 @@ class SpiderApi(BaseApi):
|
||||
})
|
||||
|
||||
def get_deploys(self, id):
|
||||
items = db_manager.list('deploys', {'spider_id': ObjectId(id)}, limit=10)
|
||||
items = db_manager.list('deploys', {'spider_id': ObjectId(id)}, limit=10, sort_key='create_ts')
|
||||
deploys = []
|
||||
for item in items:
|
||||
spider_id = item['spider_id']
|
||||
@@ -155,7 +155,7 @@ class SpiderApi(BaseApi):
|
||||
})
|
||||
|
||||
def get_tasks(self, id):
|
||||
items = db_manager.list('tasks', {'spider_id': ObjectId(id)}, limit=10)
|
||||
items = db_manager.list('tasks', {'spider_id': ObjectId(id)}, limit=10, sort_key='finish_ts')
|
||||
for item in items:
|
||||
spider_id = item['spider_id']
|
||||
spider = db_manager.get('spiders', id=str(spider_id))
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from flask_restful import reqparse, Resource
|
||||
|
||||
@@ -54,9 +55,20 @@ class StatsApi(Resource):
|
||||
}
|
||||
}
|
||||
])
|
||||
daily_tasks = []
|
||||
date_cache = {}
|
||||
for item in cur:
|
||||
daily_tasks.append(item)
|
||||
date_cache[item['_id']] = item['count']
|
||||
start_date = datetime.now() - timedelta(31)
|
||||
end_date = datetime.now() - timedelta(1)
|
||||
date = start_date
|
||||
daily_tasks = []
|
||||
while date < end_date:
|
||||
date = date + timedelta(1)
|
||||
date_str = date.strftime('%Y-%m-%d')
|
||||
daily_tasks.append({
|
||||
'date': date_str,
|
||||
'count': date_cache.get(date_str) or 0,
|
||||
})
|
||||
|
||||
return {
|
||||
'status': 'ok',
|
||||
|
||||
@@ -33,7 +33,7 @@ class TaskApi(BaseApi):
|
||||
task['log'] = f.read()
|
||||
return jsonify(task)
|
||||
|
||||
tasks = db_manager.list('tasks', {}, limit=1000)
|
||||
tasks = db_manager.list('tasks', {}, limit=1000, sort_key='finish_ts')
|
||||
items = []
|
||||
for task in tasks:
|
||||
_task = db_manager.get('tasks_celery', id=task['_id'])
|
||||
|
||||
@@ -10,5 +10,4 @@ import scrapy
|
||||
|
||||
class TaobaoItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
pass
|
||||
name = scrapy.Field()
|
||||
|
||||
@@ -8,4 +8,5 @@
|
||||
|
||||
class TaobaoPipeline(object):
|
||||
def process_item(self, item, spider):
|
||||
print('task_id: %s' % spider.task_id)
|
||||
return item
|
||||
|
||||
@@ -19,7 +19,8 @@ NEWSPIDER_MODULE = 'taobao.spiders'
|
||||
#USER_AGENT = 'taobao (+http://www.yourdomain.com)'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = True
|
||||
# ROBOTSTXT_OBEY = True
|
||||
ROBOTSTXT_OBEY = False
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
#CONCURRENT_REQUESTS = 32
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
|
||||
import scrapy
|
||||
|
||||
from ..items import TaobaoItem
|
||||
|
||||
|
||||
class TaobaoSpiderSpider(scrapy.Spider):
|
||||
name = 'taobao_spider'
|
||||
@@ -8,4 +12,4 @@ class TaobaoSpiderSpider(scrapy.Spider):
|
||||
start_urls = ['http://taobao.com/']
|
||||
|
||||
def parse(self, response):
|
||||
pass
|
||||
yield TaobaoItem()
|
||||
|
||||
@@ -46,6 +46,7 @@ def execute_spider(self, id: str, node_id: str):
|
||||
'node_id': node_id,
|
||||
'hostname': hostname,
|
||||
'log_file_path': log_file_path,
|
||||
'spider_version': latest_version
|
||||
})
|
||||
|
||||
# execute the command
|
||||
|
||||
Reference in New Issue
Block a user