mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-23 17:31:11 +01:00
updated docs
This commit is contained in:
0
gitbook/Examples/PuppeteerIntegration.md
Normal file
0
gitbook/Examples/PuppeteerIntegration.md
Normal file
@@ -1,2 +1,4 @@
|
||||
# Examples
|
||||
## 样例
|
||||
|
||||
1. [与Scrapy集成](/Examples/ScrapyIntegration.md)
|
||||
|
||||
|
||||
26
gitbook/Examples/ScrapyIntegration.md
Normal file
26
gitbook/Examples/ScrapyIntegration.md
Normal file
@@ -0,0 +1,26 @@
|
||||
### 与Scrapy集成
|
||||
|
||||
以下是Crawlab跟`Scrapy`集成的例子,利用了Crawlab传过来的`task_id`和`collection_name`。
|
||||
|
||||
```python
|
||||
import os
|
||||
from pymongo import MongoClient
|
||||
|
||||
MONGO_HOST = '192.168.99.100'
|
||||
MONGO_PORT = 27017
|
||||
MONGO_DB = 'crawlab_test'
|
||||
|
||||
# scrapy example in the pipeline
|
||||
class JuejinPipeline(object):
|
||||
mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
|
||||
db = mongo[MONGO_DB]
|
||||
col_name = os.environ.get('CRAWLAB_COLLECTION')
|
||||
if not col_name:
|
||||
col_name = 'test'
|
||||
col = db[col_name]
|
||||
|
||||
def process_item(self, item, spider):
|
||||
item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
|
||||
self.col.save(item)
|
||||
return item
|
||||
```
|
||||
Reference in New Issue
Block a user