updated README

This commit is contained in:
Marvin Zhang
2019-03-13 19:27:14 +08:00
parent e22f5dc5a5
commit ff631e1095
6 changed files with 10 additions and 92 deletions

View File

@@ -96,12 +96,16 @@ Crawlab的架构跟Celery非常相似但是加入了包括前端、爬虫、F
前端其实就是一个基于[Vue-Element-Admin](https://github.com/PanJiaChen/vue-element-admin)的单页应用。其中重用了很多Element-UI的控件来支持相应的展示。
## 数据关联
## 与其他框架的集成
任务是利用python的`subprocess`模块中的`Popen`来实现的。任务ID将以环境变量`CRAWLAB_TASK_ID`的形式存在于爬虫任务运行的进程中,并以此来关联抓取数据。
在你的爬虫程序中,你需要将`CRAWLAB_TASK_ID`的值以`task_id`作为可以存入数据库中。这样Crawlab就直到如何将爬虫任务与抓取数据关联起来了。当前Crawlab只支持MongoDB。
### Scrapy
以下是Crawlab跟Scrapy集成的例子利用了Crawlab传过来的task_id和collection_name。
```python
import os
from pymongo import MongoClient

View File

@@ -96,12 +96,16 @@ Broker is the same as defined in Celery. It is the queue for running async tasks
Frontend is basically a Vue SPA that inherits from [Vue-Element-Admin](https://github.com/PanJiaChen/vue-element-admin) of [PanJiaChen](https://github.com/PanJiaChen). Thanks for his awesome template.
## Linking Results
## Integration with Other Frameworks
A task is triggered via `Popen` in python `subprocess` module. A Task ID is will be defined as a variable `CRAWLAB_TASK_ID` in the shell environment to link the data to the task.
In your spider program, you should store the `CRAWLAB_TASK_ID` value in the database with key `task_id`. Then Crawlab would know how to link those results to a particular task. For now, Crawlab only supports MongoDB.
### Scrapy
Below is an example to integrate Crawlab with Scrapy in pipelines.
```python
import os
from pymongo import MongoClient

View File

@@ -18,7 +18,6 @@ const actions = {
request.get('/deploys')
.then(response => {
commit('SET_DEPLOY_LIST', response.data.items.map(d => {
if (d.finish_ts) d.finish_ts = dayjs(d.finish_ts.$date).format('YYYY-MM-DD HH:mm:ss')
return d
}).sort((a, b) => a.finish_ts < b.finish_ts ? 1 : -1))
})

View File

@@ -103,7 +103,6 @@ const actions = {
.then(response => {
commit('deploy/SET_DEPLOY_LIST',
response.data.items.map(d => {
if (d.finish_ts) d.finish_ts = dayjs(d.finish_ts.$date).format('YYYY-MM-DD HH:mm:ss')
return d
}).sort((a, b) => a.finish_ts < b.finish_ts ? 1 : -1),
{ root: true })
@@ -114,8 +113,6 @@ const actions = {
.then(response => {
commit('task/SET_TASK_LIST',
response.data.items.map(d => {
if (d.create_ts) d.create_ts = dayjs(d.create_ts.$date).format('YYYY-MM-DD HH:mm:ss')
if (d.finish_ts) d.finish_ts = dayjs(d.finish_ts.$date).format('YYYY-MM-DD HH:mm:ss')
return d
}).sort((a, b) => a.create_ts < b.create_ts ? 1 : -1),
{ root: true })

View File

@@ -61,11 +61,6 @@ const actions = {
return request.get(`/tasks/${id}`)
.then(response => {
let data = response.data
if (data.create_ts && data.finish_ts) {
data.duration = dayjs(data.finish_ts.$date).diff(dayjs(data.create_ts.$date), 'second')
}
if (data.create_ts) data.create_ts = dayjs(data.create_ts.$date).format('YYYY-MM-DD HH:mm:ss')
if (data.finish_ts) data.finish_ts = dayjs(data.finish_ts.$date).format('YYYY-MM-DD HH:mm:ss')
commit('SET_TASK_FORM', data)
dispatch('spider/getSpiderData', data.spider_id, { root: true })
dispatch('node/getNodeData', data.node_id, { root: true })

View File

@@ -1,81 +0,0 @@
{
"name": "segmentfault",
"version": "1.0.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"bson": {
"version": "1.1.1",
"resolved": "http://registry.npm.taobao.org/bson/download/bson-1.1.1.tgz",
"integrity": "sha1-QzD16ZEExOdR5zUYWeLUCCefLxM="
},
"memory-pager": {
"version": "1.5.0",
"resolved": "http://registry.npm.taobao.org/memory-pager/download/memory-pager-1.5.0.tgz",
"integrity": "sha1-2HUWVdItOEaCdByXLyw9bfo+ZrU=",
"optional": true
},
"mongodb": {
"version": "3.1.13",
"resolved": "http://registry.npm.taobao.org/mongodb/download/mongodb-3.1.13.tgz",
"integrity": "sha1-+M3Ls2rXoItXC9EnHIUldT91+fQ=",
"requires": {
"mongodb-core": "3.1.11",
"safe-buffer": "^5.1.2"
}
},
"mongodb-core": {
"version": "3.1.11",
"resolved": "http://registry.npm.taobao.org/mongodb-core/download/mongodb-core-3.1.11.tgz",
"integrity": "sha1-slMDjbtNcynz0cLuVAC7DJIh/eU=",
"requires": {
"bson": "^1.1.0",
"require_optional": "^1.0.1",
"safe-buffer": "^5.1.2",
"saslprep": "^1.0.0"
}
},
"require_optional": {
"version": "1.0.1",
"resolved": "http://registry.npm.taobao.org/require_optional/download/require_optional-1.0.1.tgz",
"integrity": "sha1-TPNaQkf2TKPfjC7yCMxJSxyo/C4=",
"requires": {
"resolve-from": "^2.0.0",
"semver": "^5.1.0"
}
},
"resolve-from": {
"version": "2.0.0",
"resolved": "http://registry.npm.taobao.org/resolve-from/download/resolve-from-2.0.0.tgz",
"integrity": "sha1-lICrIOlP+h2egKgEx+oUdhGWa1c="
},
"safe-buffer": {
"version": "5.1.2",
"resolved": "http://registry.npm.taobao.org/safe-buffer/download/safe-buffer-5.1.2.tgz",
"integrity": "sha1-mR7GnSluAxN0fVm9/St0XDX4go0="
},
"saslprep": {
"version": "1.0.2",
"resolved": "http://registry.npm.taobao.org/saslprep/download/saslprep-1.0.2.tgz",
"integrity": "sha1-2lq5NubqC7rpEf/sd1NL43DJ9S0=",
"optional": true,
"requires": {
"sparse-bitfield": "^3.0.3"
}
},
"semver": {
"version": "5.6.0",
"resolved": "http://registry.npm.taobao.org/semver/download/semver-5.6.0.tgz",
"integrity": "sha1-fnQlb7qknHWqfHogXMInmcrIAAQ="
},
"sparse-bitfield": {
"version": "3.0.3",
"resolved": "http://registry.npm.taobao.org/sparse-bitfield/download/sparse-bitfield-3.0.3.tgz",
"integrity": "sha1-/0rm5oZWBWuks+eSqzM004JzyhE=",
"optional": true,
"requires": {
"memory-pager": "^1.0.2"
}
}
}
}