From ff631e109557abede336a943b885da2d75763a9e Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Wed, 13 Mar 2019 19:27:14 +0800 Subject: [PATCH] updated README --- README-zh.md | 6 +- README.md | 6 +- frontend/src/store/modules/deploy.js | 1 - frontend/src/store/modules/spider.js | 3 - frontend/src/store/modules/task.js | 5 -- spiders/segmentfault/package-lock.json | 81 -------------------------- 6 files changed, 10 insertions(+), 92 deletions(-) delete mode 100644 spiders/segmentfault/package-lock.json diff --git a/README-zh.md b/README-zh.md index b6551af1..057d9cf6 100644 --- a/README-zh.md +++ b/README-zh.md @@ -96,12 +96,16 @@ Crawlab的架构跟Celery非常相似,但是加入了包括前端、爬虫、F 前端其实就是一个基于[Vue-Element-Admin](https://github.com/PanJiaChen/vue-element-admin)的单页应用。其中重用了很多Element-UI的控件来支持相应的展示。 -## 数据关联 +## 与其他框架的集成 任务是利用python的`subprocess`模块中的`Popen`来实现的。任务ID将以环境变量`CRAWLAB_TASK_ID`的形式存在于爬虫任务运行的进程中,并以此来关联抓取数据。 在你的爬虫程序中,你需要将`CRAWLAB_TASK_ID`的值以`task_id`作为可以存入数据库中。这样Crawlab就直到如何将爬虫任务与抓取数据关联起来了。当前,Crawlab只支持MongoDB。 +### Scrapy + +以下是Crawlab跟Scrapy集成的例子,利用了Crawlab传过来的task_id和collection_name。 + ```python import os from pymongo import MongoClient diff --git a/README.md b/README.md index 4427d1b5..9afc64d9 100644 --- a/README.md +++ b/README.md @@ -96,12 +96,16 @@ Broker is the same as defined in Celery. It is the queue for running async tasks Frontend is basically a Vue SPA that inherits from [Vue-Element-Admin](https://github.com/PanJiaChen/vue-element-admin) of [PanJiaChen](https://github.com/PanJiaChen). Thanks for his awesome template. -## Linking Results +## Integration with Other Frameworks A task is triggered via `Popen` in python `subprocess` module. A Task ID is will be defined as a variable `CRAWLAB_TASK_ID` in the shell environment to link the data to the task. In your spider program, you should store the `CRAWLAB_TASK_ID` value in the database with key `task_id`. Then Crawlab would know how to link those results to a particular task. For now, Crawlab only supports MongoDB. +### Scrapy + +Below is an example to integrate Crawlab with Scrapy in pipelines. + ```python import os from pymongo import MongoClient diff --git a/frontend/src/store/modules/deploy.js b/frontend/src/store/modules/deploy.js index 59eac948..56cfe868 100644 --- a/frontend/src/store/modules/deploy.js +++ b/frontend/src/store/modules/deploy.js @@ -18,7 +18,6 @@ const actions = { request.get('/deploys') .then(response => { commit('SET_DEPLOY_LIST', response.data.items.map(d => { - if (d.finish_ts) d.finish_ts = dayjs(d.finish_ts.$date).format('YYYY-MM-DD HH:mm:ss') return d }).sort((a, b) => a.finish_ts < b.finish_ts ? 1 : -1)) }) diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index 87060362..bdbc1ac7 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -103,7 +103,6 @@ const actions = { .then(response => { commit('deploy/SET_DEPLOY_LIST', response.data.items.map(d => { - if (d.finish_ts) d.finish_ts = dayjs(d.finish_ts.$date).format('YYYY-MM-DD HH:mm:ss') return d }).sort((a, b) => a.finish_ts < b.finish_ts ? 1 : -1), { root: true }) @@ -114,8 +113,6 @@ const actions = { .then(response => { commit('task/SET_TASK_LIST', response.data.items.map(d => { - if (d.create_ts) d.create_ts = dayjs(d.create_ts.$date).format('YYYY-MM-DD HH:mm:ss') - if (d.finish_ts) d.finish_ts = dayjs(d.finish_ts.$date).format('YYYY-MM-DD HH:mm:ss') return d }).sort((a, b) => a.create_ts < b.create_ts ? 1 : -1), { root: true }) diff --git a/frontend/src/store/modules/task.js b/frontend/src/store/modules/task.js index e578dd34..7b2321c7 100644 --- a/frontend/src/store/modules/task.js +++ b/frontend/src/store/modules/task.js @@ -61,11 +61,6 @@ const actions = { return request.get(`/tasks/${id}`) .then(response => { let data = response.data - if (data.create_ts && data.finish_ts) { - data.duration = dayjs(data.finish_ts.$date).diff(dayjs(data.create_ts.$date), 'second') - } - if (data.create_ts) data.create_ts = dayjs(data.create_ts.$date).format('YYYY-MM-DD HH:mm:ss') - if (data.finish_ts) data.finish_ts = dayjs(data.finish_ts.$date).format('YYYY-MM-DD HH:mm:ss') commit('SET_TASK_FORM', data) dispatch('spider/getSpiderData', data.spider_id, { root: true }) dispatch('node/getNodeData', data.node_id, { root: true }) diff --git a/spiders/segmentfault/package-lock.json b/spiders/segmentfault/package-lock.json deleted file mode 100644 index 15a5cdf7..00000000 --- a/spiders/segmentfault/package-lock.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "name": "segmentfault", - "version": "1.0.0", - "lockfileVersion": 1, - "requires": true, - "dependencies": { - "bson": { - "version": "1.1.1", - "resolved": "http://registry.npm.taobao.org/bson/download/bson-1.1.1.tgz", - "integrity": "sha1-QzD16ZEExOdR5zUYWeLUCCefLxM=" - }, - "memory-pager": { - "version": "1.5.0", - "resolved": "http://registry.npm.taobao.org/memory-pager/download/memory-pager-1.5.0.tgz", - "integrity": "sha1-2HUWVdItOEaCdByXLyw9bfo+ZrU=", - "optional": true - }, - "mongodb": { - "version": "3.1.13", - "resolved": "http://registry.npm.taobao.org/mongodb/download/mongodb-3.1.13.tgz", - "integrity": "sha1-+M3Ls2rXoItXC9EnHIUldT91+fQ=", - "requires": { - "mongodb-core": "3.1.11", - "safe-buffer": "^5.1.2" - } - }, - "mongodb-core": { - "version": "3.1.11", - "resolved": "http://registry.npm.taobao.org/mongodb-core/download/mongodb-core-3.1.11.tgz", - "integrity": "sha1-slMDjbtNcynz0cLuVAC7DJIh/eU=", - "requires": { - "bson": "^1.1.0", - "require_optional": "^1.0.1", - "safe-buffer": "^5.1.2", - "saslprep": "^1.0.0" - } - }, - "require_optional": { - "version": "1.0.1", - "resolved": "http://registry.npm.taobao.org/require_optional/download/require_optional-1.0.1.tgz", - "integrity": "sha1-TPNaQkf2TKPfjC7yCMxJSxyo/C4=", - "requires": { - "resolve-from": "^2.0.0", - "semver": "^5.1.0" - } - }, - "resolve-from": { - "version": "2.0.0", - "resolved": "http://registry.npm.taobao.org/resolve-from/download/resolve-from-2.0.0.tgz", - "integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c=" - }, - "safe-buffer": { - "version": "5.1.2", - "resolved": "http://registry.npm.taobao.org/safe-buffer/download/safe-buffer-5.1.2.tgz", - "integrity": "sha1-mR7GnSluAxN0fVm9/St0XDX4go0=" - }, - "saslprep": { - "version": "1.0.2", - "resolved": "http://registry.npm.taobao.org/saslprep/download/saslprep-1.0.2.tgz", - "integrity": "sha1-2lq5NubqC7rpEf/sd1NL43DJ9S0=", - "optional": true, - "requires": { - "sparse-bitfield": "^3.0.3" - } - }, - "semver": { - "version": "5.6.0", - "resolved": "http://registry.npm.taobao.org/semver/download/semver-5.6.0.tgz", - "integrity": "sha1-fnQlb7qknHWqfHogXMInmcrIAAQ=" - }, - "sparse-bitfield": { - "version": "3.0.3", - "resolved": "http://registry.npm.taobao.org/sparse-bitfield/download/sparse-bitfield-3.0.3.tgz", - "integrity": "sha1-/0rm5oZWBWuks+eSqzM004JzyhE=", - "optional": true, - "requires": { - "memory-pager": "^1.0.2" - } - } - } -}