mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-23 17:31:11 +01:00
27 lines
680 B
Markdown
27 lines
680 B
Markdown
### 与Scrapy集成
|
||
|
||
以下是Crawlab跟`Scrapy`集成的例子,利用了Crawlab传过来的`task_id`和`collection_name`。
|
||
|
||
```python
|
||
import os
|
||
from pymongo import MongoClient
|
||
|
||
MONGO_HOST = '192.168.99.100'
|
||
MONGO_PORT = 27017
|
||
MONGO_DB = 'crawlab_test'
|
||
|
||
# scrapy example in the pipeline
|
||
class JuejinPipeline(object):
|
||
mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
|
||
db = mongo[MONGO_DB]
|
||
col_name = os.environ.get('CRAWLAB_COLLECTION')
|
||
if not col_name:
|
||
col_name = 'test'
|
||
col = db[col_name]
|
||
|
||
def process_item(self, item, spider):
|
||
item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
|
||
self.col.save(item)
|
||
return item
|
||
```
|