mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
modified task status update logic
This commit is contained in:
@@ -34,9 +34,10 @@ class TaskApi(BaseApi):
|
||||
_task = db_manager.get('tasks_celery', id=task['_id'])
|
||||
_spider = db_manager.get('spiders', id=str(task['spider_id']))
|
||||
if _task:
|
||||
task['status'] = _task['status']
|
||||
else:
|
||||
task['status'] = TaskStatus.UNAVAILABLE
|
||||
if not task.get('status'):
|
||||
task['status'] = _task['status']
|
||||
# else:
|
||||
# task['status'] = TaskStatus.UNAVAILABLE
|
||||
task['result'] = _task['result']
|
||||
task['spider_name'] = _spider['name']
|
||||
try:
|
||||
|
||||
@@ -5,6 +5,7 @@ from bson import ObjectId
|
||||
from celery.utils.log import get_logger
|
||||
|
||||
from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER
|
||||
from constants.task import TaskStatus
|
||||
from db.manager import db_manager
|
||||
from .celery import celery_app
|
||||
import subprocess
|
||||
@@ -44,6 +45,7 @@ def execute_spider(self, id: str):
|
||||
'node_id': 'celery@%s' % hostname,
|
||||
'hostname': hostname,
|
||||
'log_file_path': log_file_path,
|
||||
'status': TaskStatus.PENDING
|
||||
})
|
||||
|
||||
# execute the command
|
||||
@@ -61,9 +63,17 @@ def execute_spider(self, id: str):
|
||||
# get output from the process
|
||||
_stdout, _stderr = p.communicate()
|
||||
|
||||
# get return code
|
||||
code = p.poll()
|
||||
if code == 0:
|
||||
status = TaskStatus.SUCCESS
|
||||
else:
|
||||
status = TaskStatus.FAILURE
|
||||
|
||||
# save task when the task is finished
|
||||
db_manager.update_one('tasks', id=task_id, values={
|
||||
'finish_ts': datetime.now(),
|
||||
'status': status
|
||||
})
|
||||
task = db_manager.get('tasks', id=id)
|
||||
|
||||
|
||||
81
spiders/segmentfault/package-lock.json
generated
Normal file
81
spiders/segmentfault/package-lock.json
generated
Normal file
@@ -0,0 +1,81 @@
|
||||
{
|
||||
"name": "segmentfault",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 1,
|
||||
"requires": true,
|
||||
"dependencies": {
|
||||
"bson": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "http://registry.npm.taobao.org/bson/download/bson-1.1.1.tgz",
|
||||
"integrity": "sha1-QzD16ZEExOdR5zUYWeLUCCefLxM="
|
||||
},
|
||||
"memory-pager": {
|
||||
"version": "1.5.0",
|
||||
"resolved": "http://registry.npm.taobao.org/memory-pager/download/memory-pager-1.5.0.tgz",
|
||||
"integrity": "sha1-2HUWVdItOEaCdByXLyw9bfo+ZrU=",
|
||||
"optional": true
|
||||
},
|
||||
"mongodb": {
|
||||
"version": "3.1.13",
|
||||
"resolved": "http://registry.npm.taobao.org/mongodb/download/mongodb-3.1.13.tgz",
|
||||
"integrity": "sha1-+M3Ls2rXoItXC9EnHIUldT91+fQ=",
|
||||
"requires": {
|
||||
"mongodb-core": "3.1.11",
|
||||
"safe-buffer": "^5.1.2"
|
||||
}
|
||||
},
|
||||
"mongodb-core": {
|
||||
"version": "3.1.11",
|
||||
"resolved": "http://registry.npm.taobao.org/mongodb-core/download/mongodb-core-3.1.11.tgz",
|
||||
"integrity": "sha1-slMDjbtNcynz0cLuVAC7DJIh/eU=",
|
||||
"requires": {
|
||||
"bson": "^1.1.0",
|
||||
"require_optional": "^1.0.1",
|
||||
"safe-buffer": "^5.1.2",
|
||||
"saslprep": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"require_optional": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "http://registry.npm.taobao.org/require_optional/download/require_optional-1.0.1.tgz",
|
||||
"integrity": "sha1-TPNaQkf2TKPfjC7yCMxJSxyo/C4=",
|
||||
"requires": {
|
||||
"resolve-from": "^2.0.0",
|
||||
"semver": "^5.1.0"
|
||||
}
|
||||
},
|
||||
"resolve-from": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "http://registry.npm.taobao.org/resolve-from/download/resolve-from-2.0.0.tgz",
|
||||
"integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c="
|
||||
},
|
||||
"safe-buffer": {
|
||||
"version": "5.1.2",
|
||||
"resolved": "http://registry.npm.taobao.org/safe-buffer/download/safe-buffer-5.1.2.tgz",
|
||||
"integrity": "sha1-mR7GnSluAxN0fVm9/St0XDX4go0="
|
||||
},
|
||||
"saslprep": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "http://registry.npm.taobao.org/saslprep/download/saslprep-1.0.2.tgz",
|
||||
"integrity": "sha1-2lq5NubqC7rpEf/sd1NL43DJ9S0=",
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"sparse-bitfield": "^3.0.3"
|
||||
}
|
||||
},
|
||||
"semver": {
|
||||
"version": "5.6.0",
|
||||
"resolved": "http://registry.npm.taobao.org/semver/download/semver-5.6.0.tgz",
|
||||
"integrity": "sha1-fnQlb7qknHWqfHogXMInmcrIAAQ="
|
||||
},
|
||||
"sparse-bitfield": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "http://registry.npm.taobao.org/sparse-bitfield/download/sparse-bitfield-3.0.3.tgz",
|
||||
"integrity": "sha1-/0rm5oZWBWuks+eSqzM004JzyhE=",
|
||||
"optional": true,
|
||||
"requires": {
|
||||
"memory-pager": "^1.0.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
14
spiders/segmentfault/package.json
Normal file
14
spiders/segmentfault/package.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "segmentfault",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "segmentfault_spider.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"mongodb": "^3.1.13"
|
||||
}
|
||||
}
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 129 KiB After Width: | Height: | Size: 144 KiB |
@@ -1,20 +1,27 @@
|
||||
const puppeteer = require('puppeteer');
|
||||
const MongoClient = require('mongodb').MongoClient;
|
||||
|
||||
(async () => {
|
||||
// browser
|
||||
const browser = await (puppeteer.launch({
|
||||
timeout: 15000
|
||||
}));
|
||||
|
||||
// define start url
|
||||
const url = 'https://segmentfault.com/newest';
|
||||
|
||||
// start a new page
|
||||
const page = await browser.newPage();
|
||||
|
||||
// navigate to url
|
||||
await page.goto(url);
|
||||
await page.waitFor(2000);
|
||||
|
||||
// take a screenshot
|
||||
await page.screenshot({path: 'screenshot.png'});
|
||||
|
||||
const titles = await page.evaluate(sel => {
|
||||
// scrape data
|
||||
const results = await page.evaluate(() => {
|
||||
let results = [];
|
||||
document.querySelectorAll('.news-list .news-item .news__item-title').forEach(el => {
|
||||
results.push({
|
||||
@@ -24,7 +31,24 @@ const puppeteer = require('puppeteer');
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log(titles);
|
||||
// open database connection
|
||||
const client = await MongoClient.connect('mongodb://localhost/crawlab_test');
|
||||
let db = await client.db('test');
|
||||
const colName = process.env.CRAWLAB_COLLECTION;
|
||||
const taskId = process.env.CRAWLAB_TASK_ID;
|
||||
const col = db.collection(colName);
|
||||
|
||||
// save to database
|
||||
await results.forEach(d => {
|
||||
d.task_id = taskId;
|
||||
col.save(d);
|
||||
});
|
||||
|
||||
// close database connection
|
||||
db.close();
|
||||
|
||||
console.log(results);
|
||||
|
||||
// shutdown browser
|
||||
browser.close();
|
||||
})();
|
||||
Reference in New Issue
Block a user