From 2e5468e4c1c1ac223cc66e66ab26d358b320e46f Mon Sep 17 00:00:00 2001 From: marvzhang Date: Mon, 25 Nov 2019 16:45:55 +0800 Subject: [PATCH] refactor code --- backend/services/task.go | 9 ++++++- .../scrapy/config_spider/pipelines.py | 16 ++++++++++++ backend/utils/file.go | 26 +++++++++---------- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/backend/services/task.go b/backend/services/task.go index 9e584e82..7138f0f9 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -369,7 +369,14 @@ func ExecuteTask(id int) { ) // 执行命令 - cmd := spider.Cmd + var cmd string + if spider.Type == constants.Configurable { + // 可配置爬虫命令 + cmd = "scrapy crawl config_spider" + } else { + // 自定义爬虫命令 + cmd = spider.Cmd + } // 加入参数 if t.Param != "" { diff --git a/backend/template/scrapy/config_spider/pipelines.py b/backend/template/scrapy/config_spider/pipelines.py index 5a61b924..830ab0b0 100644 --- a/backend/template/scrapy/config_spider/pipelines.py +++ b/backend/template/scrapy/config_spider/pipelines.py @@ -5,7 +5,23 @@ # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html +import os +from pymongo import MongoClient + +mongo = MongoClient( + host=os.environ.get('CRAWLAB_MONGO_HOST'), + port=int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017), + username=os.environ.get('CRAWLAB_MONGO_USERNAME'), + password=os.environ.get('CRAWLAB_MONGO_PASSWORD'), + authSource=os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') +) +db = mongo[os.environ.get('CRAWLAB_MONGO_DB')] +col_name = os.environ.get('CRAWLAB_COLLECTION') +task_id = os.environ.get('CRAWLAB_TASK_ID') class ConfigSpiderPipeline(object): def process_item(self, item, spider): + item['task_id'] = task_id + if col is not None: + col.save(item) return item diff --git a/backend/utils/file.go b/backend/utils/file.go index 681d129a..5e461038 100644 --- a/backend/utils/file.go +++ b/backend/utils/file.go @@ -276,40 +276,40 @@ func GetFilesFromDir(dirPath string) ([]*os.File, error) { // File copies a single file from src to dst func CopyFile(src, dst string) error { var err error - var srcfd *os.File - var dstfd *os.File - var srcinfo os.FileInfo + var srcFd *os.File + var dstFd *os.File + var srcInfo os.FileInfo - if srcfd, err = os.Open(src); err != nil { + if srcFd, err = os.Open(src); err != nil { return err } - defer srcfd.Close() + defer srcFd.Close() - if dstfd, err = os.Create(dst); err != nil { + if dstFd, err = os.Create(dst); err != nil { return err } - defer dstfd.Close() + defer dstFd.Close() - if _, err = io.Copy(dstfd, srcfd); err != nil { + if _, err = io.Copy(dstFd, srcFd); err != nil { return err } - if srcinfo, err = os.Stat(src); err != nil { + if srcInfo, err = os.Stat(src); err != nil { return err } - return os.Chmod(dst, srcinfo.Mode()) + return os.Chmod(dst, srcInfo.Mode()) } // Dir copies a whole directory recursively func CopyDir(src string, dst string) error { var err error var fds []os.FileInfo - var srcinfo os.FileInfo + var srcInfo os.FileInfo - if srcinfo, err = os.Stat(src); err != nil { + if srcInfo, err = os.Stat(src); err != nil { return err } - if err = os.MkdirAll(dst, srcinfo.Mode()); err != nil { + if err = os.MkdirAll(dst, srcInfo.Mode()); err != nil { return err }