diff --git a/CHANGELOG.md b/CHANGELOG.md index 671aa8a1..aa2682ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,15 @@ -# 0.4.2 (unknown) +# 0.4.2 (2019-12-26) ### Features / Enhancement - **Disclaimer**. Added page for Disclaimer. - **Call API to fetch version**. [#371](https://github.com/crawlab-team/crawlab/issues/371) - **Configure to allow user registration**. [#346](https://github.com/crawlab-team/crawlab/issues/346) - **Allow adding new users**. +- **More Advanced File Management**. Allow users to add / edit / rename / delete files. [#286](https://github.com/crawlab-team/crawlab/issues/286) +- **Optimized Spider Creation Process**. Allow users to create an empty customized spider before uploading the zip file. +- **Better Task Management**. Allow users to filter tasks by selecting through certian criterions. [#341](https://github.com/crawlab-team/crawlab/issues/341) ### Bug Fixes +- **Duplicated nodes**. [#391](https://github.com/crawlab-team/crawlab/issues/391) - **"mongodb no reachable" error**. [#373](https://github.com/crawlab-team/crawlab/issues/373) # 0.4.1 (2019-12-13) diff --git a/Dockerfile b/Dockerfile index ddb4d47e..cf8ab174 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ WORKDIR /app # install frontend RUN npm config set unsafe-perm true -RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org +RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org # --sass_binary_site=https://npm.taobao.org/mirrors/node-sass/ RUN npm run build:prod @@ -27,6 +27,9 @@ ADD . /app # set as non-interactive ENV DEBIAN_FRONTEND noninteractive +# set CRAWLAB_IS_DOCKER +ENV CRAWLAB_IS_DOCKER Y + # install packages RUN apt-get update \ && apt-get install -y curl git net-tools iputils-ping ntp ntpdate python3 python3-pip \ diff --git a/Dockerfile.local b/Dockerfile.local index ddb4d47e..d99010a4 100644 --- a/Dockerfile.local +++ b/Dockerfile.local @@ -15,7 +15,7 @@ WORKDIR /app # install frontend RUN npm config set unsafe-perm true -RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org +RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org # --sass_binary_site=https://npm.taobao.org/mirrors/node-sass/ RUN npm run build:prod @@ -28,7 +28,8 @@ ADD . /app ENV DEBIAN_FRONTEND noninteractive # install packages -RUN apt-get update \ +RUN chmod 777 /tmp \ + && apt-get update \ && apt-get install -y curl git net-tools iputils-ping ntp ntpdate python3 python3-pip \ && ln -s /usr/bin/pip3 /usr/local/bin/pip \ && ln -s /usr/bin/python3 /usr/local/bin/python diff --git a/README-zh.md b/README-zh.md index 0c943c3e..5b9acf29 100644 --- a/README-zh.md +++ b/README-zh.md @@ -14,7 +14,7 @@ 基于Golang的分布式爬虫管理平台,支持Python、NodeJS、Go、Java、PHP等多种编程语言以及多种爬虫框架。 -[查看演示 Demo](http://crawlab.cn/demo) | [文档](https://tikazyq.github.io/crawlab-docs) +[查看演示 Demo](http://crawlab.cn/demo) | [文档](http://docs.crawlab.cn) ## 安装 @@ -254,6 +254,9 @@ Crawlab使用起来很方便,也很通用,可以适用于几乎任何主流 + + + ## 社区 & 赞助 diff --git a/README.md b/README.md index 11ac8383..7b7c3d2d 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Golang-based distributed web crawler management platform, supporting various languages including Python, NodeJS, Go, Java, PHP and various web crawler frameworks including Scrapy, Puppeteer, Selenium. -[Demo](http://crawlab.cn/demo) | [Documentation](https://tikazyq.github.io/crawlab-docs) +[Demo](http://crawlab.cn/demo) | [Documentation](http://docs.crawlab.cn) ## Installation @@ -219,6 +219,9 @@ Crawlab is easy to use, general enough to adapt spiders in any language and any + + + ## Community & Sponsorship diff --git a/backend/conf/config.yml b/backend/conf/config.yml index a6522ba5..5ada78f6 100644 --- a/backend/conf/config.yml +++ b/backend/conf/config.yml @@ -32,6 +32,6 @@ task: workers: 4 other: tmppath: "/tmp" -version: 0.4.1 +version: 0.4.2 setting: allowRegister: "N" \ No newline at end of file diff --git a/backend/constants/system.go b/backend/constants/system.go index 59c39787..70d41063 100644 --- a/backend/constants/system.go +++ b/backend/constants/system.go @@ -5,3 +5,9 @@ const ( Linux = "linux" Darwin = "darwin" ) + +const ( + Python = "python" + NodeJS = "node" + Java = "java" +) diff --git a/backend/database/pubsub.go b/backend/database/pubsub.go index 7f647cda..444ce91a 100644 --- a/backend/database/pubsub.go +++ b/backend/database/pubsub.go @@ -58,9 +58,9 @@ func (r *Redis) subscribe(ctx context.Context, consume ConsumeFunc, channel ...s } done <- nil case <-tick.C: - //fmt.Printf("ping message \n") if err := psc.Ping(""); err != nil { - done <- err + fmt.Printf("ping message error: %s \n", err) + //done <- err } case err := <-done: close(done) diff --git a/backend/entity/system.go b/backend/entity/system.go index dff637b7..ac3e9dec 100644 --- a/backend/entity/system.go +++ b/backend/entity/system.go @@ -13,3 +13,18 @@ type Executable struct { FileName string `json:"file_name"` DisplayName string `json:"display_name"` } + +type Lang struct { + Name string `json:"name"` + ExecutableName string `json:"executable_name"` + ExecutablePath string `json:"executable_path"` + DepExecutablePath string `json:"dep_executable_path"` + Installed bool `json:"installed"` +} + +type Dependency struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + Installed bool `json:"installed"` +} diff --git a/backend/go.mod b/backend/go.mod index d59b6d41..89bbdbbc 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -11,6 +11,7 @@ require ( github.com/go-playground/locales v0.12.1 // indirect github.com/go-playground/universal-translator v0.16.0 // indirect github.com/gomodule/redigo v2.0.0+incompatible + github.com/imroc/req v0.2.4 github.com/leodido/go-urn v1.1.0 // indirect github.com/pkg/errors v0.8.1 github.com/satori/go.uuid v1.2.0 diff --git a/backend/go.sum b/backend/go.sum index 55a56852..e4386cc9 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -66,6 +66,8 @@ github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/imroc/req v0.2.4 h1:8XbvaQpERLAJV6as/cB186DtH5f0m5zAOtHEaTQ4ac0= +github.com/imroc/req v0.2.4/go.mod h1:J9FsaNHDTIVyW/b5r6/Df5qKEEEq2WzZKIgKSajd1AE= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0= diff --git a/backend/main.go b/backend/main.go index d14f64b7..6a807331 100644 --- a/backend/main.go +++ b/backend/main.go @@ -31,24 +31,24 @@ func main() { log.Error("init config error:" + err.Error()) panic(err) } - log.Info("初始化配置成功") + log.Info("initialized config successfully") // 初始化日志设置 logLevel := viper.GetString("log.level") if logLevel != "" { log.SetLevelFromString(logLevel) } - log.Info("初始化日志设置成功") + log.Info("initialized log config successfully") if viper.GetString("log.isDeletePeriodically") == "Y" { err := services.InitDeleteLogPeriodically() if err != nil { - log.Error("Init DeletePeriodically Failed") + log.Error("init DeletePeriodically failed") panic(err) } - log.Info("初始化定期清理日志配置成功") + log.Info("initialized periodically cleaning log successfully") } else { - log.Info("默认未开启定期清理日志配置") + log.Info("periodically cleaning log is switched off") } // 初始化Mongodb数据库 @@ -57,7 +57,7 @@ func main() { debug.PrintStack() panic(err) } - log.Info("初始化Mongodb数据库成功") + log.Info("initialized MongoDB successfully") // 初始化Redis数据库 if err := database.InitRedis(); err != nil { @@ -65,7 +65,7 @@ func main() { debug.PrintStack() panic(err) } - log.Info("初始化Redis数据库成功") + log.Info("initialized Redis successfully") if model.IsMaster() { // 初始化定时任务 @@ -74,8 +74,8 @@ func main() { debug.PrintStack() panic(err) } - log.Info("初始化定时任务成功") } + log.Info("initialized schedule successfully") // 初始化任务执行器 if err := services.InitTaskExecutor(); err != nil { @@ -83,14 +83,14 @@ func main() { debug.PrintStack() panic(err) } - log.Info("初始化任务执行器成功") + log.Info("initialized task executor successfully") // 初始化节点服务 if err := services.InitNodeService(); err != nil { log.Error("init node service error:" + err.Error()) panic(err) } - log.Info("初始化节点配置成功") + log.Info("initialized node service successfully") // 初始化爬虫服务 if err := services.InitSpiderService(); err != nil { @@ -98,7 +98,7 @@ func main() { debug.PrintStack() panic(err) } - log.Info("初始化爬虫服务成功") + log.Info("initialized spider service successfully") // 初始化用户服务 if err := services.InitUserService(); err != nil { @@ -106,7 +106,15 @@ func main() { debug.PrintStack() panic(err) } - log.Info("初始化用户服务成功") + log.Info("initialized user service successfully") + + // 初始化依赖服务 + if err := services.InitDepsFetcher(); err != nil { + log.Error("init user service error:" + err.Error()) + debug.PrintStack() + panic(err) + } + log.Info("initialized dependency fetcher successfully") // 以下为主节点服务 if model.IsMaster() { @@ -122,25 +130,34 @@ func main() { { // 路由 // 节点 - authGroup.GET("/nodes", routes.GetNodeList) // 节点列表 - authGroup.GET("/nodes/:id", routes.GetNode) // 节点详情 - authGroup.POST("/nodes/:id", routes.PostNode) // 修改节点 - authGroup.GET("/nodes/:id/tasks", routes.GetNodeTaskList) // 节点任务列表 - authGroup.GET("/nodes/:id/system", routes.GetSystemInfo) // 节点任务列表 - authGroup.DELETE("/nodes/:id", routes.DeleteNode) // 删除节点 + authGroup.GET("/nodes", routes.GetNodeList) // 节点列表 + authGroup.GET("/nodes/:id", routes.GetNode) // 节点详情 + authGroup.POST("/nodes/:id", routes.PostNode) // 修改节点 + authGroup.GET("/nodes/:id/tasks", routes.GetNodeTaskList) // 节点任务列表 + authGroup.GET("/nodes/:id/system", routes.GetSystemInfo) // 节点任务列表 + authGroup.DELETE("/nodes/:id", routes.DeleteNode) // 删除节点 + authGroup.GET("/nodes/:id/langs", routes.GetLangList) // 节点语言环境列表 + authGroup.GET("/nodes/:id/deps", routes.GetDepList) // 节点第三方依赖列表 + authGroup.GET("/nodes/:id/deps/installed", routes.GetInstalledDepList) // 节点已安装第三方依赖列表 // 爬虫 - authGroup.GET("/spiders", routes.GetSpiderList) // 爬虫列表 - authGroup.GET("/spiders/:id", routes.GetSpider) // 爬虫详情 - authGroup.POST("/spiders", routes.PutSpider) // 上传爬虫 TODO: 名称不对 - authGroup.POST("/spiders/:id", routes.PostSpider) // 修改爬虫 - authGroup.POST("/spiders/:id/publish", routes.PublishSpider) // 发布爬虫 - authGroup.DELETE("/spiders/:id", routes.DeleteSpider) // 删除爬虫 - authGroup.GET("/spiders/:id/tasks", routes.GetSpiderTasks) // 爬虫任务列表 - authGroup.GET("/spiders/:id/file", routes.GetSpiderFile) // 爬虫文件读取 - authGroup.POST("/spiders/:id/file", routes.PostSpiderFile) // 爬虫目录写入 - authGroup.GET("/spiders/:id/dir", routes.GetSpiderDir) // 爬虫目录 - authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 - authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型 + authGroup.GET("/spiders", routes.GetSpiderList) // 爬虫列表 + authGroup.GET("/spiders/:id", routes.GetSpider) // 爬虫详情 + authGroup.PUT("/spiders", routes.PutSpider) // 添加爬虫 + authGroup.POST("/spiders", routes.UploadSpider) // 上传爬虫 + authGroup.POST("/spiders/:id", routes.PostSpider) // 修改爬虫 + authGroup.POST("/spiders/:id/publish", routes.PublishSpider) // 发布爬虫 + authGroup.POST("/spiders/:id/upload", routes.UploadSpiderFromId) // 上传爬虫(ID) + authGroup.DELETE("/spiders/:id", routes.DeleteSpider) // 删除爬虫 + authGroup.GET("/spiders/:id/tasks", routes.GetSpiderTasks) // 爬虫任务列表 + authGroup.GET("/spiders/:id/file", routes.GetSpiderFile) // 爬虫文件读取 + authGroup.POST("/spiders/:id/file", routes.PostSpiderFile) // 爬虫文件更改 + authGroup.PUT("/spiders/:id/file", routes.PutSpiderFile) // 爬虫文件创建 + authGroup.PUT("/spiders/:id/dir", routes.PutSpiderDir) // 爬虫目录创建 + authGroup.DELETE("/spiders/:id/file", routes.DeleteSpiderFile) // 爬虫文件删除 + authGroup.POST("/spiders/:id/file/rename", routes.RenameSpiderFile) // 爬虫文件重命名 + authGroup.GET("/spiders/:id/dir", routes.GetSpiderDir) // 爬虫目录 + authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 + authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型 // 可配置爬虫 authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置 authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置 @@ -178,6 +195,8 @@ func main() { authGroup.GET("/me", routes.GetMe) // 获取自己账户 // release版本 authGroup.GET("/version", routes.GetVersion) // 获取发布的版本 + // 系统 + authGroup.GET("/system/deps", routes.GetAllDepList) // 节点所有第三方依赖列表 } } diff --git a/backend/model/node.go b/backend/model/node.go index 2fe810f8..effbfbd0 100644 --- a/backend/model/node.go +++ b/backend/model/node.go @@ -63,7 +63,9 @@ func GetCurrentNode() (Node, error) { // 如果获取失败 if err != nil { // 如果为主节点,表示为第一次注册,插入节点信息 - if IsMaster() { + // update: 增加具体错误过滤。防止加入多个master节点,后续需要职责拆分, + //只在master节点运行的时候才检测master节点的信息是否存在 + if IsMaster() && err == mgo.ErrNotFound { // 获取本机信息 ip, mac, key, err := GetNodeBaseInfo() if err != nil { diff --git a/backend/model/spider.go b/backend/model/spider.go index 78adc4d0..02c3aa8d 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -157,15 +157,15 @@ func GetSpiderByFileId(fileId bson.ObjectId) *Spider { } // 获取爬虫(根据名称) -func GetSpiderByName(name string) *Spider { +func GetSpiderByName(name string) Spider { s, c := database.GetCol("spiders") defer s.Close() - var result *Spider + var result Spider if err := c.Find(bson.M{"name": name}).One(&result); err != nil { log.Errorf("get spider error: %s, spider_name: %s", err.Error(), name) //debug.PrintStack() - return nil + return result } return result } diff --git a/backend/routes/config_spider.go b/backend/routes/config_spider.go index e387935a..ac6a11e0 100644 --- a/backend/routes/config_spider.go +++ b/backend/routes/config_spider.go @@ -40,7 +40,7 @@ func PutConfigSpider(c *gin.Context) { } // 判断爬虫是否存在 - if spider := model.GetSpiderByName(spider.Name); spider != nil { + if spider := model.GetSpiderByName(spider.Name); spider.Name != "" { HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("spider for '%s' already exists", spider.Name)) return } diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 588811e3..a5623b67 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -7,6 +7,7 @@ import ( "crawlab/model" "crawlab/services" "crawlab/utils" + "fmt" "github.com/apex/log" "github.com/gin-gonic/gin" "github.com/globalsign/mgo" @@ -17,6 +18,7 @@ import ( "io/ioutil" "net/http" "os" + "path" "path/filepath" "runtime/debug" "strconv" @@ -117,6 +119,64 @@ func PublishSpider(c *gin.Context) { } func PutSpider(c *gin.Context) { + var spider model.Spider + if err := c.ShouldBindJSON(&spider); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + + // 爬虫名称不能为空 + if spider.Name == "" { + HandleErrorF(http.StatusBadRequest, c, "spider name should not be empty") + return + } + + // 判断爬虫是否存在 + if spider := model.GetSpiderByName(spider.Name); spider.Name != "" { + HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("spider for '%s' already exists", spider.Name)) + return + } + + // 设置爬虫类别 + spider.Type = constants.Customized + + // 将FileId置空 + spider.FileId = bson.ObjectIdHex(constants.ObjectIdNull) + + // 创建爬虫目录 + spiderDir := filepath.Join(viper.GetString("spider.path"), spider.Name) + if utils.Exists(spiderDir) { + if err := os.RemoveAll(spiderDir); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + } + if err := os.MkdirAll(spiderDir, 0777); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + spider.Src = spiderDir + + // 添加爬虫到数据库 + if err := spider.Add(); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 同步到GridFS + if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: spider, + }) +} + +func UploadSpider(c *gin.Context) { // 从body中获取文件 uploadFile, err := c.FormFile("file") if err != nil { @@ -125,6 +185,144 @@ func PutSpider(c *gin.Context) { return } + // 获取参数 + name := c.PostForm("name") + displayName := c.PostForm("display_name") + col := c.PostForm("col") + cmd := c.PostForm("cmd") + + // 如果不为zip文件,返回错误 + if !strings.HasSuffix(uploadFile.Filename, ".zip") { + HandleError(http.StatusBadRequest, c, errors.New("not a valid zip file")) + return + } + + // 以防tmp目录不存在 + tmpPath := viper.GetString("other.tmppath") + if !utils.Exists(tmpPath) { + if err := os.MkdirAll(tmpPath, os.ModePerm); err != nil { + log.Error("mkdir other.tmppath dir error:" + err.Error()) + debug.PrintStack() + HandleError(http.StatusBadRequest, c, errors.New("mkdir other.tmppath dir error")) + return + } + } + + // 保存到本地临时文件 + randomId := uuid.NewV4() + tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") + if err := c.SaveUploadedFile(uploadFile, tmpFilePath); err != nil { + log.Error("save upload file error: " + err.Error()) + debug.PrintStack() + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 获取 GridFS 实例 + s, gf := database.GetGridFs("files") + defer s.Close() + + // 判断文件是否已经存在 + var gfFile model.GridFs + if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil { + // 已经存在文件,则删除 + _ = gf.RemoveId(gfFile.Id) + } + + // 上传到GridFs + fid, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath) + if err != nil { + log.Errorf("upload to grid fs error: %s", err.Error()) + debug.PrintStack() + return + } + + idx := strings.LastIndex(uploadFile.Filename, "/") + targetFilename := uploadFile.Filename[idx+1:] + + // 判断爬虫是否存在 + spiderName := strings.Replace(targetFilename, ".zip", "", 1) + if name != "" { + spiderName = name + } + spider := model.GetSpiderByName(spiderName) + if spider.Name == "" { + // 保存爬虫信息 + srcPath := viper.GetString("spider.path") + spider := model.Spider{ + Name: spiderName, + DisplayName: spiderName, + Type: constants.Customized, + Src: filepath.Join(srcPath, spiderName), + FileId: fid, + } + if name != "" { + spider.Name = name + } + if displayName != "" { + spider.DisplayName = displayName + } + if col != "" { + spider.Col = col + } + if cmd != "" { + spider.Cmd = cmd + } + _ = spider.Add() + } else { + if name != "" { + spider.Name = name + } + if displayName != "" { + spider.DisplayName = displayName + } + if col != "" { + spider.Col = col + } + if cmd != "" { + spider.Cmd = cmd + } + // 更新file_id + spider.FileId = fid + _ = spider.Save() + } + + // 发起同步 + services.PublishAllSpiders() + + // 获取爬虫 + spider = model.GetSpiderByName(spiderName) + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: spider, + }) +} + +func UploadSpiderFromId(c *gin.Context) { + // TODO: 与 UploadSpider 部分逻辑重复,需要优化代码 + // 爬虫ID + spiderId := c.Param("id") + + // 获取爬虫 + spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) + if err != nil { + if err == mgo.ErrNotFound { + HandleErrorF(http.StatusNotFound, c, "cannot find spider") + } else { + HandleError(http.StatusInternalServerError, c, err) + } + return + } + + // 从body中获取文件 + uploadFile, err := c.FormFile("file") + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + // 如果不为zip文件,返回错误 if !strings.HasSuffix(uploadFile.Filename, ".zip") { debug.PrintStack() @@ -172,28 +370,12 @@ func PutSpider(c *gin.Context) { return } - idx := strings.LastIndex(uploadFile.Filename, "/") - targetFilename := uploadFile.Filename[idx+1:] + // 更新file_id + spider.FileId = fid + _ = spider.Save() - // 判断爬虫是否存在 - spiderName := strings.Replace(targetFilename, ".zip", "", 1) - spider := model.GetSpiderByName(spiderName) - if spider == nil { - // 保存爬虫信息 - srcPath := viper.GetString("spider.path") - spider := model.Spider{ - Name: spiderName, - DisplayName: spiderName, - Type: constants.Customized, - Src: filepath.Join(srcPath, spiderName), - FileId: fid, - } - _ = spider.Add() - } else { - // 更新file_id - spider.FileId = fid - _ = spider.Save() - } + // 发起同步 + services.PublishSpider(spider) c.JSON(http.StatusOK, Response{ Status: "ok", @@ -283,6 +465,14 @@ func GetSpiderDir(c *gin.Context) { }) } +// 爬虫文件管理 + +type SpiderFileReqBody struct { + Path string `json:"path"` + Content string `json:"content"` + NewPath string `json:"new_path"` +} + func GetSpiderFile(c *gin.Context) { // 爬虫ID id := c.Param("id") @@ -311,11 +501,6 @@ func GetSpiderFile(c *gin.Context) { }) } -type SpiderFileReqBody struct { - Path string `json:"path"` - Content string `json:"content"` -} - func PostSpiderFile(c *gin.Context) { // 爬虫ID id := c.Param("id") @@ -340,6 +525,12 @@ func PostSpiderFile(c *gin.Context) { return } + // 同步到GridFS + if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + // 返回结果 c.JSON(http.StatusOK, Response{ Status: "ok", @@ -347,6 +538,161 @@ func PostSpiderFile(c *gin.Context) { }) } +func PutSpiderFile(c *gin.Context) { + spiderId := c.Param("id") + var reqBody SpiderFileReqBody + if err := c.ShouldBindJSON(&reqBody); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 文件路径 + filePath := path.Join(spider.Src, reqBody.Path) + + // 如果文件已存在,则报错 + if utils.Exists(filePath) { + HandleErrorF(http.StatusInternalServerError, c, fmt.Sprintf(`%s already exists`, filePath)) + return + } + + // 写入文件 + if err := ioutil.WriteFile(filePath, []byte(reqBody.Content), 0777); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 同步到GridFS + if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + +func PutSpiderDir(c *gin.Context) { + spiderId := c.Param("id") + var reqBody SpiderFileReqBody + if err := c.ShouldBindJSON(&reqBody); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 文件路径 + filePath := path.Join(spider.Src, reqBody.Path) + + // 如果文件已存在,则报错 + if utils.Exists(filePath) { + HandleErrorF(http.StatusInternalServerError, c, fmt.Sprintf(`%s already exists`, filePath)) + return + } + + // 创建文件夹 + if err := os.MkdirAll(filePath, 0777); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 同步到GridFS + if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + +func DeleteSpiderFile(c *gin.Context) { + spiderId := c.Param("id") + var reqBody SpiderFileReqBody + if err := c.ShouldBindJSON(&reqBody); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + filePath := path.Join(spider.Src, reqBody.Path) + if err := os.RemoveAll(filePath); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 同步到GridFS + if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + +func RenameSpiderFile(c *gin.Context) { + spiderId := c.Param("id") + var reqBody SpiderFileReqBody + if err := c.ShouldBindJSON(&reqBody); err != nil { + HandleError(http.StatusBadRequest, c, err) + } + spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 原文件路径 + filePath := path.Join(spider.Src, reqBody.Path) + newFilePath := path.Join(spider.Src, reqBody.NewPath) + + // 如果新文件已存在,则报错 + if utils.Exists(newFilePath) { + HandleErrorF(http.StatusInternalServerError, c, fmt.Sprintf(`%s already exists`, newFilePath)) + return + } + + // 重命名 + if err := os.Rename(filePath, newFilePath); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 删除原文件 + if err := os.RemoveAll(filePath); err != nil { + HandleError(http.StatusInternalServerError, c, err) + } + + // 同步到GridFS + if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + // 爬虫类型 func GetSpiderTypes(c *gin.Context) { types, err := model.GetSpiderTypes() diff --git a/backend/routes/system.go b/backend/routes/system.go new file mode 100644 index 00000000..bcd186f8 --- /dev/null +++ b/backend/routes/system.go @@ -0,0 +1,110 @@ +package routes + +import ( + "crawlab/constants" + "crawlab/entity" + "crawlab/services" + "fmt" + "github.com/gin-gonic/gin" + "net/http" + "strings" +) + +func GetLangList(c *gin.Context) { + nodeId := c.Param("id") + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: services.GetLangList(nodeId), + }) +} + +func GetDepList(c *gin.Context) { + nodeId := c.Param("id") + lang := c.Query("lang") + depName := c.Query("dep_name") + + var depList []entity.Dependency + if lang == constants.Python { + list, err := services.GetPythonDepList(nodeId, depName) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + depList = list + } else { + HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("%s is not implemented", lang)) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: depList, + }) +} + +func GetInstalledDepList(c *gin.Context) { + nodeId := c.Param("id") + lang := c.Query("lang") + var depList []entity.Dependency + if lang == constants.Python { + list, err := services.GetPythonInstalledDepList(nodeId) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + depList = list + } else { + HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("%s is not implemented", lang)) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: depList, + }) +} + +func GetAllDepList(c *gin.Context) { + lang := c.Query("lang") + depName := c.Query("dep_name") + + // 获取所有依赖列表 + var list []string + if lang == constants.Python { + _list, err := services.GetPythonDepListFromRedis() + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + list = _list + } else { + HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("%s is not implemented", lang)) + return + } + + // 过滤依赖列表 + var depList []string + for _, name := range list { + if strings.HasPrefix(strings.ToLower(name), strings.ToLower(depName)) { + depList = append(depList, name) + } + } + + // 只取前20 + var returnList []string + for i, name := range depList { + if i >= 10 { + break + } + returnList = append(returnList, name) + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: returnList, + }) +} diff --git a/backend/services/spider.go b/backend/services/spider.go index 3922d822..3515afa9 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -12,6 +12,7 @@ import ( "github.com/apex/log" "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" + uuid "github.com/satori/go.uuid" "github.com/spf13/viper" "os" "path/filepath" @@ -30,6 +31,48 @@ type SpiderUploadMessage struct { SpiderId string } +// 从主节点上传爬虫到GridFS +func UploadSpiderToGridFsFromMaster(spider model.Spider) error { + // 爬虫所在目录 + spiderDir := spider.Src + + // 打包为 zip 文件 + files, err := utils.GetFilesFromDir(spiderDir) + if err != nil { + return err + } + randomId := uuid.NewV4() + tmpFilePath := filepath.Join(viper.GetString("other.tmppath"), spider.Name+"."+randomId.String()+".zip") + spiderZipFileName := spider.Name + ".zip" + if err := utils.Compress(files, tmpFilePath); err != nil { + return err + } + + // 获取 GridFS 实例 + s, gf := database.GetGridFs("files") + defer s.Close() + + // 判断文件是否已经存在 + var gfFile model.GridFs + if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil { + // 已经存在文件,则删除 + _ = gf.RemoveId(gfFile.Id) + } + + // 上传到GridFs + fid, err := UploadToGridFs(spiderZipFileName, tmpFilePath) + if err != nil { + log.Errorf("upload to grid fs error: %s", err.Error()) + return err + } + + // 保存爬虫 FileId + spider.FileId = fid + _ = spider.Save() + + return nil +} + // 上传zip文件到GridFS func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err error) { fid = "" diff --git a/backend/services/system.go b/backend/services/system.go index 92f9cf96..045ecbff 100644 --- a/backend/services/system.go +++ b/backend/services/system.go @@ -4,28 +4,60 @@ import ( "crawlab/constants" "crawlab/database" "crawlab/entity" + "crawlab/lib/cron" "crawlab/model" "crawlab/utils" "encoding/json" + "errors" + "fmt" + "github.com/apex/log" + "github.com/imroc/req" + "os/exec" + "regexp" + "runtime/debug" + "sort" + "strings" + "sync" ) +type PythonDepJsonData struct { + Info PythonDepJsonDataInfo `json:"info"` +} + +type PythonDepJsonDataInfo struct { + Name string `json:"name"` + Summary string `json:"summary"` + Version string `json:"version"` +} + +type PythonDepNameDict struct { + Name string `json:"name"` + Weight int `json:"weight"` +} + +type PythonDepNameDictSlice []PythonDepNameDict + +func (s PythonDepNameDictSlice) Len() int { return len(s) } +func (s PythonDepNameDictSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s PythonDepNameDictSlice) Less(i, j int) bool { return s[i].Weight > s[j].Weight } + var SystemInfoChanMap = utils.NewChanMap() -func GetRemoteSystemInfo(id string) (sysInfo entity.SystemInfo, err error) { +func GetRemoteSystemInfo(nodeId string) (sysInfo entity.SystemInfo, err error) { // 发送消息 msg := entity.NodeMessage{ Type: constants.MsgTypeGetSystemInfo, - NodeId: id, + NodeId: nodeId, } // 序列化 msgBytes, _ := json.Marshal(&msg) - if _, err := database.RedisClient.Publish("nodes:"+id, utils.BytesToString(msgBytes)); err != nil { + if _, err := database.RedisClient.Publish("nodes:"+nodeId, utils.BytesToString(msgBytes)); err != nil { return entity.SystemInfo{}, err } // 通道 - ch := SystemInfoChanMap.ChanBlocked(id) + ch := SystemInfoChanMap.ChanBlocked(nodeId) // 等待响应,阻塞 sysInfoStr := <-ch @@ -38,11 +70,242 @@ func GetRemoteSystemInfo(id string) (sysInfo entity.SystemInfo, err error) { return sysInfo, nil } -func GetSystemInfo(id string) (sysInfo entity.SystemInfo, err error) { - if IsMasterNode(id) { +func GetSystemInfo(nodeId string) (sysInfo entity.SystemInfo, err error) { + if IsMasterNode(nodeId) { sysInfo, err = model.GetLocalSystemInfo() } else { - sysInfo, err = GetRemoteSystemInfo(id) + sysInfo, err = GetRemoteSystemInfo(nodeId) } return } + +func GetLangList(nodeId string) []entity.Lang { + list := []entity.Lang{ + {Name: "Python", ExecutableName: "python", ExecutablePath: "/usr/local/bin/python", DepExecutablePath: "/usr/local/bin/pip"}, + {Name: "NodeJS", ExecutableName: "node", ExecutablePath: "/usr/local/bin/node"}, + {Name: "Java", ExecutableName: "java", ExecutablePath: "/usr/local/bin/java"}, + } + for i, lang := range list { + list[i].Installed = IsInstalledLang(nodeId, lang) + } + return list +} + +func GetLangFromLangName(nodeId string, name string) entity.Lang { + langList := GetLangList(nodeId) + for _, lang := range langList { + if lang.ExecutableName == name { + return lang + } + } + return entity.Lang{} +} + +func GetPythonDepList(nodeId string, searchDepName string) ([]entity.Dependency, error) { + var list []entity.Dependency + + // 先从 Redis 获取 + depList, err := GetPythonDepListFromRedis() + if err != nil { + return list, err + } + + // 过滤相似的依赖 + var depNameList PythonDepNameDictSlice + for _, depName := range depList { + if strings.HasPrefix(strings.ToLower(depName), strings.ToLower(searchDepName)) { + var weight int + if strings.ToLower(depName) == strings.ToLower(searchDepName) { + weight = 3 + } else if strings.HasPrefix(strings.ToLower(depName), strings.ToLower(searchDepName)) { + weight = 2 + } else { + weight = 1 + } + depNameList = append(depNameList, PythonDepNameDict{ + Name: depName, + Weight: weight, + }) + } + } + + // 获取已安装依赖 + installedDepList, err := GetPythonInstalledDepList(nodeId) + if err != nil { + return list, err + } + + // 从依赖源获取数据 + var goSync sync.WaitGroup + sort.Stable(depNameList) + for i, depNameDict := range depNameList { + if i > 10 { + break + } + goSync.Add(1) + go func(depName string, n *sync.WaitGroup) { + url := fmt.Sprintf("https://pypi.org/pypi/%s/json", depName) + res, err := req.Get(url) + if err != nil { + n.Done() + return + } + var data PythonDepJsonData + if err := res.ToJSON(&data); err != nil { + n.Done() + return + } + dep := entity.Dependency{ + Name: depName, + Version: data.Info.Version, + Description: data.Info.Summary, + } + dep.Installed = IsInstalledDep(installedDepList, dep) + list = append(list, dep) + n.Done() + }(depNameDict.Name, &goSync) + } + goSync.Wait() + + return list, nil +} + +func GetPythonDepListFromRedis() ([]string, error) { + var list []string + + // 从 Redis 获取字符串 + rawData, err := database.RedisClient.HGet("system", "deps:python") + if err != nil { + return list, err + } + + // 反序列化 + if err := json.Unmarshal([]byte(rawData), &list); err != nil { + return list, err + } + + // 如果为空,则从依赖源获取列表 + if len(list) == 0 { + UpdatePythonDepList() + } + + return list, nil +} + +func IsInstalledLang(nodeId string, lang entity.Lang) bool { + sysInfo, err := GetSystemInfo(nodeId) + if err != nil { + return false + } + for _, exec := range sysInfo.Executables { + if exec.Path == lang.ExecutablePath { + return true + } + } + return false +} + +func IsInstalledDep(installedDepList []entity.Dependency, dep entity.Dependency) bool { + for _, _dep := range installedDepList { + if strings.ToLower(_dep.Name) == strings.ToLower(dep.Name) { + return true + } + } + return false +} + +func FetchPythonDepList() ([]string, error) { + // 依赖URL + url := "https://pypi.tuna.tsinghua.edu.cn/simple" + + // 输出列表 + var list []string + + // 请求URL + res, err := req.Get(url) + if err != nil { + log.Error(err.Error()) + debug.PrintStack() + return list, err + } + + // 获取响应数据 + text, err := res.ToString() + if err != nil { + log.Error(err.Error()) + debug.PrintStack() + return list, err + } + + // 从响应数据中提取依赖名 + regex := regexp.MustCompile("(.*)") + for _, line := range strings.Split(text, "\n") { + arr := regex.FindStringSubmatch(line) + if len(arr) < 2 { + continue + } + list = append(list, arr[1]) + } + + // 赋值给列表 + return list, nil +} + +func UpdatePythonDepList() { + // 从依赖源获取列表 + list, _ := FetchPythonDepList() + + // 序列化 + listBytes, err := json.Marshal(list) + if err != nil { + log.Error(err.Error()) + debug.PrintStack() + return + } + + // 设置Redis + if err := database.RedisClient.HSet("system", "deps:python", string(listBytes)); err != nil { + log.Error(err.Error()) + debug.PrintStack() + return + } +} + +func GetPythonInstalledDepList(nodeId string) ([]entity.Dependency, error){ + var list []entity.Dependency + + lang := GetLangFromLangName(nodeId, constants.Python) + if !IsInstalledLang(nodeId, lang) { + return list, errors.New("python is not installed") + } + cmd := exec.Command("pip", "freeze") + outputBytes, err := cmd.Output() + if err != nil { + debug.PrintStack() + return list, err + } + + for _, line := range strings.Split(string(outputBytes), "\n") { + arr := strings.Split(line, "==") + if len(arr) < 2 { + continue + } + dep := entity.Dependency{ + Name: strings.ToLower(arr[0]), + Version: arr[1], + Installed: true, + } + list = append(list, dep) + } + + return list, nil +} + +func InitDepsFetcher() error { + c := cron.New(cron.WithSeconds()) + c.Start() + if _, err := c.AddFunc("0 */5 * * * *", UpdatePythonDepList); err != nil { + return err + } + return nil +} diff --git a/backend/vendor/github.com/imroc/req/LICENSE b/backend/vendor/github.com/imroc/req/LICENSE new file mode 100644 index 00000000..8dada3ed --- /dev/null +++ b/backend/vendor/github.com/imroc/req/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/backend/vendor/github.com/imroc/req/README.md b/backend/vendor/github.com/imroc/req/README.md new file mode 100644 index 00000000..c41d5ae0 --- /dev/null +++ b/backend/vendor/github.com/imroc/req/README.md @@ -0,0 +1,302 @@ +# req +[![GoDoc](https://godoc.org/github.com/imroc/req?status.svg)](https://godoc.org/github.com/imroc/req) + +A golang http request library for humans + + + +Features +======== + +- Light weight +- Simple +- Easy play with JSON and XML +- Easy for debug and logging +- Easy file uploads and downloads +- Easy manage cookie +- Easy set up proxy +- Easy set timeout +- Easy customize http client + + +Document +======== +[中文](doc/README_cn.md) + + +Install +======= +``` sh +go get github.com/imroc/req +``` + +Overview +======= +`req` implements a friendly API over Go's existing `net/http` library. + +`Req` and `Resp` are two most important struct, you can think of `Req` as a client that initiate HTTP requests, `Resp` as a information container for the request and response. They all provide simple and convenient APIs that allows you to do a lot of things. +``` go +func (r *Req) Post(url string, v ...interface{}) (*Resp, error) +``` + +In most cases, only url is required, others are optional, like headers, params, files or body etc. + +There is a default `Req` object, all of its' public methods are wrapped by the `req` package, so you can also think of `req` package as a `Req` object +``` go +// use Req object to initiate requests. +r := req.New() +r.Get(url) + +// use req package to initiate request. +req.Get(url) +``` +You can use `req.New()` to create lots of `*Req` as client with independent configuration + +Examples +======= +[Basic](#Basic) +[Set Header](#Set-Header) +[Set Param](#Set-Param) +[Set Body](#Set-Body) +[Debug](#Debug) +[Output Format](#Format) +[ToJSON & ToXML](#ToJSON-ToXML) +[Get *http.Response](#Response) +[Upload](#Upload) +[Download](#Download) +[Cookie](#Cookie) +[Set Timeout](#Set-Timeout) +[Set Proxy](#Set-Proxy) +[Customize Client](#Customize-Client) + +## Basic +``` go +header := req.Header{ + "Accept": "application/json", + "Authorization": "Basic YWRtaW46YWRtaW4=", +} +param := req.Param{ + "name": "imroc", + "cmd": "add", +} +// only url is required, others are optional. +r, err = req.Post("http://foo.bar/api", header, param) +if err != nil { + log.Fatal(err) +} +r.ToJSON(&foo) // response => struct/map +log.Printf("%+v", r) // print info (try it, you may surprise) +``` + +## Set Header +Use `req.Header` (it is actually a `map[string]string`) +``` go +authHeader := req.Header{ + "Accept": "application/json", + "Authorization": "Basic YWRtaW46YWRtaW4=", +} +req.Get("https://www.baidu.com", authHeader, req.Header{"User-Agent": "V1.1"}) +``` +use `http.Header` +``` go +header := make(http.Header) +header.Set("Accept", "application/json") +req.Get("https://www.baidu.com", header) +``` + +## Set Param +Use `req.Param` (it is actually a `map[string]interface{}`) +``` go +param := req.Param{ + "id": "imroc", + "pwd": "roc", +} +req.Get("http://foo.bar/api", param) // http://foo.bar/api?id=imroc&pwd=roc +req.Post(url, param) // body => id=imroc&pwd=roc +``` +use `req.QueryParam` force to append params to the url (it is also actually a `map[string]interface{}`) +``` go +req.Post("http://foo.bar/api", req.Param{"name": "roc", "age": "22"}, req.QueryParam{"access_token": "fedledGF9Hg9ehTU"}) +/* +POST /api?access_token=fedledGF9Hg9ehTU HTTP/1.1 +Host: foo.bar +User-Agent: Go-http-client/1.1 +Content-Length: 15 +Content-Type: application/x-www-form-urlencoded;charset=UTF-8 +Accept-Encoding: gzip + +age=22&name=roc +*/ +``` + +## Set Body +Put `string`, `[]byte` and `io.Reader` as body directly. +``` go +req.Post(url, "id=roc&cmd=query") +``` +Put object as xml or json body (add `Content-Type` header automatically) +``` go +req.Post(url, req.BodyJSON(&foo)) +req.Post(url, req.BodyXML(&bar)) +``` + +## Debug +Set global variable `req.Debug` to true, it will print detail infomation for every request. +``` go +req.Debug = true +req.Post("http://localhost/test" "hi") +``` +![post](doc/post.png) + +## Output Format +You can use different kind of output format to log the request and response infomation in your log file in defferent scenarios. For example, use `%+v` output format in the development phase, it allows you to observe the details. Use `%v` or `%-v` output format in production phase, just log the information necessarily. + +### `%+v` or `%+s` +Output in detail +``` go +r, _ := req.Post(url, header, param) +log.Printf("%+v", r) // output the same format as Debug is enabled +``` + +### `%v` or `%s` +Output in simple way (default format) +``` go +r, _ := req.Get(url, param) +log.Printf("%v\n", r) // GET http://foo.bar/api?name=roc&cmd=add {"code":"0","msg":"success"} +log.Prinln(r) // smae as above +``` + +### `%-v` or `%-s` +Output in simple way and keep all in one line (request body or response body may have multiple lines, this format will replace `"\r"` or `"\n"` with `" "`, it's useful when doing some search in your log file) + +### Flag +You can call `SetFlags` to control the output content, decide which pieces can be output. +``` go +const ( + LreqHead = 1 << iota // output request head (request line and request header) + LreqBody // output request body + LrespHead // output response head (response line and response header) + LrespBody // output response body + Lcost // output time costed by the request + LstdFlags = LreqHead | LreqBody | LrespHead | LrespBody +) +``` +``` go +req.SetFlags(req.LreqHead | req.LreqBody | req.LrespHead) +``` + +### Monitoring time consuming +``` go +req.SetFlags(req.LstdFlags | req.Lcost) // output format add time costed by request +r,_ := req.Get(url) +log.Println(r) // http://foo.bar/api 3.260802ms {"code":0 "msg":"success"} +if r.Cost() > 3 * time.Second { // check cost + log.Println("WARN: slow request:", r) +} +``` + +## ToJSON & ToXML +``` go +r, _ := req.Get(url) +r.ToJSON(&foo) +r, _ = req.Post(url, req.BodyXML(&bar)) +r.ToXML(&baz) +``` + +## Get *http.Response +```go +// func (r *Req) Response() *http.Response +r, _ := req.Get(url) +resp := r.Response() +fmt.Println(resp.StatusCode) +``` + +## Upload +Use `req.File` to match files +``` go +req.Post(url, req.File("imroc.png"), req.File("/Users/roc/Pictures/*.png")) +``` +Use `req.FileUpload` to fully control +``` go +file, _ := os.Open("imroc.png") +req.Post(url, req.FileUpload{ + File: file, + FieldName: "file", // FieldName is form field name + FileName: "avatar.png", //Filename is the name of the file that you wish to upload. We use this to guess the mimetype as well as pass it onto the server +}) +``` +Use `req.UploadProgress` to listen upload progress +```go +progress := func(current, total int64) { + fmt.Println(float32(current)/float32(total)*100, "%") +} +req.Post(url, req.File("/Users/roc/Pictures/*.png"), req.UploadProgress(progress)) +fmt.Println("upload complete") +``` + +## Download +``` go +r, _ := req.Get(url) +r.ToFile("imroc.png") +``` +Use `req.DownloadProgress` to listen download progress +```go +progress := func(current, total int64) { + fmt.Println(float32(current)/float32(total)*100, "%") +} +r, _ := req.Get(url, req.DownloadProgress(progress)) +r.ToFile("hello.mp4") +fmt.Println("download complete") +``` + +## Cookie +By default, the underlying `*http.Client` will manage your cookie(send cookie header to server automatically if server has set a cookie for you), you can disable it by calling this function : +``` go +req.EnableCookie(false) +``` +and you can set cookie in request just using `*http.Cookie` +``` go +cookie := new(http.Cookie) +// ...... +req.Get(url, cookie) +``` + +## Set Timeout +``` go +req.SetTimeout(50 * time.Second) +``` + +## Set Proxy +By default, req use proxy from system environment if `http_proxy` or `https_proxy` is specified, you can set a custom proxy or disable it by set `nil` +``` go +req.SetProxy(func(r *http.Request) (*url.URL, error) { + if strings.Contains(r.URL.Hostname(), "google") { + return url.Parse("http://my.vpn.com:23456") + } + return nil, nil +}) +``` +Set a simple proxy (use fixed proxy url for every request) +``` go +req.SetProxyUrl("http://my.proxy.com:23456") +``` + +## Customize Client +Use `SetClient` to change the default underlying `*http.Client` +``` go +req.SetClient(client) +``` +Specify independent http client for some requests +``` go +client := &http.Client{Timeout: 30 * time.Second} +req.Get(url, client) +``` +Change some properties of default client you want +``` go +req.Client().Jar, _ = cookiejar.New(nil) +trans, _ := req.Client().Transport.(*http.Transport) +trans.MaxIdleConns = 20 +trans.TLSHandshakeTimeout = 20 * time.Second +trans.DisableKeepAlives = true +trans.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} +``` diff --git a/backend/vendor/github.com/imroc/req/dump.go b/backend/vendor/github.com/imroc/req/dump.go new file mode 100644 index 00000000..ce6d3a5b --- /dev/null +++ b/backend/vendor/github.com/imroc/req/dump.go @@ -0,0 +1,216 @@ +package req + +import ( + "bufio" + "bytes" + "fmt" + "io" + "io/ioutil" + "net" + "net/http" + "net/http/httputil" + "net/url" + "strings" + "time" +) + +// Debug enable debug mode if set to true +var Debug bool + +// dumpConn is a net.Conn which writes to Writer and reads from Reader +type dumpConn struct { + io.Writer + io.Reader +} + +func (c *dumpConn) Close() error { return nil } +func (c *dumpConn) LocalAddr() net.Addr { return nil } +func (c *dumpConn) RemoteAddr() net.Addr { return nil } +func (c *dumpConn) SetDeadline(t time.Time) error { return nil } +func (c *dumpConn) SetReadDeadline(t time.Time) error { return nil } +func (c *dumpConn) SetWriteDeadline(t time.Time) error { return nil } + +// delegateReader is a reader that delegates to another reader, +// once it arrives on a channel. +type delegateReader struct { + c chan io.Reader + r io.Reader // nil until received from c +} + +func (r *delegateReader) Read(p []byte) (int, error) { + if r.r == nil { + r.r = <-r.c + } + return r.r.Read(p) +} + +type dummyBody struct { + N int + off int +} + +func (d *dummyBody) Read(p []byte) (n int, err error) { + if d.N <= 0 { + err = io.EOF + return + } + left := d.N - d.off + if left <= 0 { + err = io.EOF + return + } + + if l := len(p); l > 0 { + if l >= left { + n = left + err = io.EOF + } else { + n = l + } + d.off += n + for i := 0; i < n; i++ { + p[i] = '*' + } + } + + return +} + +func (d *dummyBody) Close() error { + return nil +} + +type dumpBuffer struct { + bytes.Buffer +} + +func (b *dumpBuffer) Write(p []byte) { + if b.Len() > 0 { + b.Buffer.WriteString("\r\n\r\n") + } + b.Buffer.Write(p) +} + +func (b *dumpBuffer) WriteString(s string) { + b.Write([]byte(s)) +} + +func (r *Resp) dumpRequest(dump *dumpBuffer) { + head := r.r.flag&LreqHead != 0 + body := r.r.flag&LreqBody != 0 + + if head { + r.dumpReqHead(dump) + } + if body { + if r.multipartHelper != nil { + dump.Write(r.multipartHelper.Dump()) + } else if len(r.reqBody) > 0 { + dump.Write(r.reqBody) + } + } +} + +func (r *Resp) dumpReqHead(dump *dumpBuffer) { + reqSend := new(http.Request) + *reqSend = *r.req + if reqSend.URL.Scheme == "https" { + reqSend.URL = new(url.URL) + *reqSend.URL = *r.req.URL + reqSend.URL.Scheme = "http" + } + + if reqSend.ContentLength > 0 { + reqSend.Body = &dummyBody{N: int(reqSend.ContentLength)} + } else { + reqSend.Body = &dummyBody{N: 1} + } + + // Use the actual Transport code to record what we would send + // on the wire, but not using TCP. Use a Transport with a + // custom dialer that returns a fake net.Conn that waits + // for the full input (and recording it), and then responds + // with a dummy response. + var buf bytes.Buffer // records the output + pr, pw := io.Pipe() + defer pw.Close() + dr := &delegateReader{c: make(chan io.Reader)} + + t := &http.Transport{ + Dial: func(net, addr string) (net.Conn, error) { + return &dumpConn{io.MultiWriter(&buf, pw), dr}, nil + }, + } + defer t.CloseIdleConnections() + + client := new(http.Client) + *client = *r.client + client.Transport = t + + // Wait for the request before replying with a dummy response: + go func() { + req, err := http.ReadRequest(bufio.NewReader(pr)) + if err == nil { + // Ensure all the body is read; otherwise + // we'll get a partial dump. + io.Copy(ioutil.Discard, req.Body) + req.Body.Close() + } + + dr.c <- strings.NewReader("HTTP/1.1 204 No Content\r\nConnection: close\r\n\r\n") + pr.Close() + }() + + _, err := client.Do(reqSend) + if err != nil { + dump.WriteString(err.Error()) + } else { + reqDump := buf.Bytes() + if i := bytes.Index(reqDump, []byte("\r\n\r\n")); i >= 0 { + reqDump = reqDump[:i] + } + dump.Write(reqDump) + } +} + +func (r *Resp) dumpResponse(dump *dumpBuffer) { + head := r.r.flag&LrespHead != 0 + body := r.r.flag&LrespBody != 0 + if head { + respDump, err := httputil.DumpResponse(r.resp, false) + if err != nil { + dump.WriteString(err.Error()) + } else { + if i := bytes.Index(respDump, []byte("\r\n\r\n")); i >= 0 { + respDump = respDump[:i] + } + dump.Write(respDump) + } + } + if body && len(r.Bytes()) > 0 { + dump.Write(r.Bytes()) + } +} + +// Cost return the time cost of the request +func (r *Resp) Cost() time.Duration { + return r.cost +} + +// Dump dump the request +func (r *Resp) Dump() string { + dump := new(dumpBuffer) + if r.r.flag&Lcost != 0 { + dump.WriteString(fmt.Sprint(r.cost)) + } + r.dumpRequest(dump) + l := dump.Len() + if l > 0 { + dump.WriteString("=================================") + l = dump.Len() + } + + r.dumpResponse(dump) + + return dump.String() +} diff --git a/backend/vendor/github.com/imroc/req/req.go b/backend/vendor/github.com/imroc/req/req.go new file mode 100644 index 00000000..d1b3e712 --- /dev/null +++ b/backend/vendor/github.com/imroc/req/req.go @@ -0,0 +1,688 @@ +package req + +import ( + "bytes" + "compress/gzip" + "context" + "encoding/json" + "encoding/xml" + "errors" + "fmt" + "io" + "io/ioutil" + "mime/multipart" + "net/http" + "net/textproto" + "net/url" + "os" + "path/filepath" + "strconv" + "strings" + "time" +) + +// default *Req +var std = New() + +// flags to decide which part can be outputed +const ( + LreqHead = 1 << iota // output request head (request line and request header) + LreqBody // output request body + LrespHead // output response head (response line and response header) + LrespBody // output response body + Lcost // output time costed by the request + LstdFlags = LreqHead | LreqBody | LrespHead | LrespBody +) + +// Header represents http request header +type Header map[string]string + +func (h Header) Clone() Header { + if h == nil { + return nil + } + hh := Header{} + for k, v := range h { + hh[k] = v + } + return hh +} + +// Param represents http request param +type Param map[string]interface{} + +// QueryParam is used to force append http request param to the uri +type QueryParam map[string]interface{} + +// Host is used for set request's Host +type Host string + +// FileUpload represents a file to upload +type FileUpload struct { + // filename in multipart form. + FileName string + // form field name + FieldName string + // file to uplaod, required + File io.ReadCloser +} + +type DownloadProgress func(current, total int64) + +type UploadProgress func(current, total int64) + +// File upload files matching the name pattern such as +// /usr/*/bin/go* (assuming the Separator is '/') +func File(patterns ...string) interface{} { + matches := []string{} + for _, pattern := range patterns { + m, err := filepath.Glob(pattern) + if err != nil { + return err + } + matches = append(matches, m...) + } + if len(matches) == 0 { + return errors.New("req: no file have been matched") + } + uploads := []FileUpload{} + for _, match := range matches { + if s, e := os.Stat(match); e != nil || s.IsDir() { + continue + } + file, _ := os.Open(match) + uploads = append(uploads, FileUpload{ + File: file, + FileName: filepath.Base(match), + FieldName: "media", + }) + } + + return uploads +} + +type bodyJson struct { + v interface{} +} + +type bodyXml struct { + v interface{} +} + +// BodyJSON make the object be encoded in json format and set it to the request body +func BodyJSON(v interface{}) *bodyJson { + return &bodyJson{v: v} +} + +// BodyXML make the object be encoded in xml format and set it to the request body +func BodyXML(v interface{}) *bodyXml { + return &bodyXml{v: v} +} + +// Req is a convenient client for initiating requests +type Req struct { + client *http.Client + jsonEncOpts *jsonEncOpts + xmlEncOpts *xmlEncOpts + flag int +} + +// New create a new *Req +func New() *Req { + return &Req{flag: LstdFlags} +} + +type param struct { + url.Values +} + +func (p *param) getValues() url.Values { + if p.Values == nil { + p.Values = make(url.Values) + } + return p.Values +} + +func (p *param) Copy(pp param) { + if pp.Values == nil { + return + } + vs := p.getValues() + for key, values := range pp.Values { + for _, value := range values { + vs.Add(key, value) + } + } +} +func (p *param) Adds(m map[string]interface{}) { + if len(m) == 0 { + return + } + vs := p.getValues() + for k, v := range m { + vs.Add(k, fmt.Sprint(v)) + } +} + +func (p *param) Empty() bool { + return p.Values == nil +} + +// Do execute a http request with sepecify method and url, +// and it can also have some optional params, depending on your needs. +func (r *Req) Do(method, rawurl string, vs ...interface{}) (resp *Resp, err error) { + if rawurl == "" { + return nil, errors.New("req: url not specified") + } + req := &http.Request{ + Method: method, + Header: make(http.Header), + Proto: "HTTP/1.1", + ProtoMajor: 1, + ProtoMinor: 1, + } + resp = &Resp{req: req, r: r} + + var queryParam param + var formParam param + var uploads []FileUpload + var uploadProgress UploadProgress + var progress func(int64, int64) + var delayedFunc []func() + var lastFunc []func() + + for _, v := range vs { + switch vv := v.(type) { + case Header: + for key, value := range vv { + req.Header.Add(key, value) + } + case http.Header: + for key, values := range vv { + for _, value := range values { + req.Header.Add(key, value) + } + } + case *bodyJson: + fn, err := setBodyJson(req, resp, r.jsonEncOpts, vv.v) + if err != nil { + return nil, err + } + delayedFunc = append(delayedFunc, fn) + case *bodyXml: + fn, err := setBodyXml(req, resp, r.xmlEncOpts, vv.v) + if err != nil { + return nil, err + } + delayedFunc = append(delayedFunc, fn) + case url.Values: + p := param{vv} + if method == "GET" || method == "HEAD" { + queryParam.Copy(p) + } else { + formParam.Copy(p) + } + case Param: + if method == "GET" || method == "HEAD" { + queryParam.Adds(vv) + } else { + formParam.Adds(vv) + } + case QueryParam: + queryParam.Adds(vv) + case string: + setBodyBytes(req, resp, []byte(vv)) + case []byte: + setBodyBytes(req, resp, vv) + case bytes.Buffer: + setBodyBytes(req, resp, vv.Bytes()) + case *http.Client: + resp.client = vv + case FileUpload: + uploads = append(uploads, vv) + case []FileUpload: + uploads = append(uploads, vv...) + case *http.Cookie: + req.AddCookie(vv) + case Host: + req.Host = string(vv) + case io.Reader: + fn := setBodyReader(req, resp, vv) + lastFunc = append(lastFunc, fn) + case UploadProgress: + uploadProgress = vv + case DownloadProgress: + resp.downloadProgress = vv + case func(int64, int64): + progress = vv + case context.Context: + req = req.WithContext(vv) + resp.req = req + case error: + return nil, vv + } + } + + if length := req.Header.Get("Content-Length"); length != "" { + if l, err := strconv.ParseInt(length, 10, 64); err == nil { + req.ContentLength = l + } + } + + if len(uploads) > 0 && (req.Method == "POST" || req.Method == "PUT") { // multipart + var up UploadProgress + if uploadProgress != nil { + up = uploadProgress + } else if progress != nil { + up = UploadProgress(progress) + } + multipartHelper := &multipartHelper{ + form: formParam.Values, + uploads: uploads, + uploadProgress: up, + } + multipartHelper.Upload(req) + resp.multipartHelper = multipartHelper + } else { + if progress != nil { + resp.downloadProgress = DownloadProgress(progress) + } + if !formParam.Empty() { + if req.Body != nil { + queryParam.Copy(formParam) + } else { + setBodyBytes(req, resp, []byte(formParam.Encode())) + setContentType(req, "application/x-www-form-urlencoded; charset=UTF-8") + } + } + } + + if !queryParam.Empty() { + paramStr := queryParam.Encode() + if strings.IndexByte(rawurl, '?') == -1 { + rawurl = rawurl + "?" + paramStr + } else { + rawurl = rawurl + "&" + paramStr + } + } + + u, err := url.Parse(rawurl) + if err != nil { + return nil, err + } + req.URL = u + + if host := req.Header.Get("Host"); host != "" { + req.Host = host + } + + for _, fn := range delayedFunc { + fn() + } + + if resp.client == nil { + resp.client = r.Client() + } + + var response *http.Response + if r.flag&Lcost != 0 { + before := time.Now() + response, err = resp.client.Do(req) + after := time.Now() + resp.cost = after.Sub(before) + } else { + response, err = resp.client.Do(req) + } + if err != nil { + return nil, err + } + + for _, fn := range lastFunc { + fn() + } + + resp.resp = response + + if _, ok := resp.client.Transport.(*http.Transport); ok && response.Header.Get("Content-Encoding") == "gzip" && req.Header.Get("Accept-Encoding") != "" { + body, err := gzip.NewReader(response.Body) + if err != nil { + return nil, err + } + response.Body = body + } + + // output detail if Debug is enabled + if Debug { + fmt.Println(resp.Dump()) + } + return +} + +func setBodyBytes(req *http.Request, resp *Resp, data []byte) { + resp.reqBody = data + req.Body = ioutil.NopCloser(bytes.NewReader(data)) + req.ContentLength = int64(len(data)) +} + +func setBodyJson(req *http.Request, resp *Resp, opts *jsonEncOpts, v interface{}) (func(), error) { + var data []byte + switch vv := v.(type) { + case string: + data = []byte(vv) + case []byte: + data = vv + case *bytes.Buffer: + data = vv.Bytes() + default: + if opts != nil { + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + enc.SetIndent(opts.indentPrefix, opts.indentValue) + enc.SetEscapeHTML(opts.escapeHTML) + err := enc.Encode(v) + if err != nil { + return nil, err + } + data = buf.Bytes() + } else { + var err error + data, err = json.Marshal(v) + if err != nil { + return nil, err + } + } + } + setBodyBytes(req, resp, data) + delayedFunc := func() { + setContentType(req, "application/json; charset=UTF-8") + } + return delayedFunc, nil +} + +func setBodyXml(req *http.Request, resp *Resp, opts *xmlEncOpts, v interface{}) (func(), error) { + var data []byte + switch vv := v.(type) { + case string: + data = []byte(vv) + case []byte: + data = vv + case *bytes.Buffer: + data = vv.Bytes() + default: + if opts != nil { + var buf bytes.Buffer + enc := xml.NewEncoder(&buf) + enc.Indent(opts.prefix, opts.indent) + err := enc.Encode(v) + if err != nil { + return nil, err + } + data = buf.Bytes() + } else { + var err error + data, err = xml.Marshal(v) + if err != nil { + return nil, err + } + } + } + setBodyBytes(req, resp, data) + delayedFunc := func() { + setContentType(req, "application/xml; charset=UTF-8") + } + return delayedFunc, nil +} + +func setContentType(req *http.Request, contentType string) { + if req.Header.Get("Content-Type") == "" { + req.Header.Set("Content-Type", contentType) + } +} + +func setBodyReader(req *http.Request, resp *Resp, rd io.Reader) func() { + var rc io.ReadCloser + switch r := rd.(type) { + case *os.File: + stat, err := r.Stat() + if err == nil { + req.ContentLength = stat.Size() + } + rc = r + + case io.ReadCloser: + rc = r + default: + rc = ioutil.NopCloser(rd) + } + bw := &bodyWrapper{ + ReadCloser: rc, + limit: 102400, + } + req.Body = bw + lastFunc := func() { + resp.reqBody = bw.buf.Bytes() + } + return lastFunc +} + +type bodyWrapper struct { + io.ReadCloser + buf bytes.Buffer + limit int +} + +func (b *bodyWrapper) Read(p []byte) (n int, err error) { + n, err = b.ReadCloser.Read(p) + if left := b.limit - b.buf.Len(); left > 0 && n > 0 { + if n <= left { + b.buf.Write(p[:n]) + } else { + b.buf.Write(p[:left]) + } + } + return +} + +type multipartHelper struct { + form url.Values + uploads []FileUpload + dump []byte + uploadProgress UploadProgress +} + +func (m *multipartHelper) Upload(req *http.Request) { + pr, pw := io.Pipe() + bodyWriter := multipart.NewWriter(pw) + go func() { + for key, values := range m.form { + for _, value := range values { + bodyWriter.WriteField(key, value) + } + } + var upload func(io.Writer, io.Reader) error + if m.uploadProgress != nil { + var total int64 + for _, up := range m.uploads { + if file, ok := up.File.(*os.File); ok { + stat, err := file.Stat() + if err != nil { + continue + } + total += stat.Size() + } + } + var current int64 + buf := make([]byte, 1024) + var lastTime time.Time + upload = func(w io.Writer, r io.Reader) error { + for { + n, err := r.Read(buf) + if n > 0 { + _, _err := w.Write(buf[:n]) + if _err != nil { + return _err + } + current += int64(n) + if now := time.Now(); now.Sub(lastTime) > 200*time.Millisecond { + lastTime = now + m.uploadProgress(current, total) + } + } + if err == io.EOF { + return nil + } + if err != nil { + return err + } + } + } + } + + i := 0 + for _, up := range m.uploads { + if up.FieldName == "" { + i++ + up.FieldName = "file" + strconv.Itoa(i) + } + fileWriter, err := bodyWriter.CreateFormFile(up.FieldName, up.FileName) + if err != nil { + continue + } + //iocopy + if upload == nil { + io.Copy(fileWriter, up.File) + } else { + if _, ok := up.File.(*os.File); ok { + upload(fileWriter, up.File) + } else { + io.Copy(fileWriter, up.File) + } + } + up.File.Close() + } + bodyWriter.Close() + pw.Close() + }() + req.Header.Set("Content-Type", bodyWriter.FormDataContentType()) + req.Body = ioutil.NopCloser(pr) +} + +func (m *multipartHelper) Dump() []byte { + if m.dump != nil { + return m.dump + } + var buf bytes.Buffer + bodyWriter := multipart.NewWriter(&buf) + for key, values := range m.form { + for _, value := range values { + m.writeField(bodyWriter, key, value) + } + } + for _, up := range m.uploads { + m.writeFile(bodyWriter, up.FieldName, up.FileName) + } + bodyWriter.Close() + m.dump = buf.Bytes() + return m.dump +} + +func (m *multipartHelper) writeField(w *multipart.Writer, fieldname, value string) error { + h := make(textproto.MIMEHeader) + h.Set("Content-Disposition", + fmt.Sprintf(`form-data; name="%s"`, fieldname)) + p, err := w.CreatePart(h) + if err != nil { + return err + } + _, err = p.Write([]byte(value)) + return err +} + +func (m *multipartHelper) writeFile(w *multipart.Writer, fieldname, filename string) error { + h := make(textproto.MIMEHeader) + h.Set("Content-Disposition", + fmt.Sprintf(`form-data; name="%s"; filename="%s"`, + fieldname, filename)) + h.Set("Content-Type", "application/octet-stream") + p, err := w.CreatePart(h) + if err != nil { + return err + } + _, err = p.Write([]byte("******")) + return err +} + +// Get execute a http GET request +func (r *Req) Get(url string, v ...interface{}) (*Resp, error) { + return r.Do("GET", url, v...) +} + +// Post execute a http POST request +func (r *Req) Post(url string, v ...interface{}) (*Resp, error) { + return r.Do("POST", url, v...) +} + +// Put execute a http PUT request +func (r *Req) Put(url string, v ...interface{}) (*Resp, error) { + return r.Do("PUT", url, v...) +} + +// Patch execute a http PATCH request +func (r *Req) Patch(url string, v ...interface{}) (*Resp, error) { + return r.Do("PATCH", url, v...) +} + +// Delete execute a http DELETE request +func (r *Req) Delete(url string, v ...interface{}) (*Resp, error) { + return r.Do("DELETE", url, v...) +} + +// Head execute a http HEAD request +func (r *Req) Head(url string, v ...interface{}) (*Resp, error) { + return r.Do("HEAD", url, v...) +} + +// Options execute a http OPTIONS request +func (r *Req) Options(url string, v ...interface{}) (*Resp, error) { + return r.Do("OPTIONS", url, v...) +} + +// Get execute a http GET request +func Get(url string, v ...interface{}) (*Resp, error) { + return std.Get(url, v...) +} + +// Post execute a http POST request +func Post(url string, v ...interface{}) (*Resp, error) { + return std.Post(url, v...) +} + +// Put execute a http PUT request +func Put(url string, v ...interface{}) (*Resp, error) { + return std.Put(url, v...) +} + +// Head execute a http HEAD request +func Head(url string, v ...interface{}) (*Resp, error) { + return std.Head(url, v...) +} + +// Options execute a http OPTIONS request +func Options(url string, v ...interface{}) (*Resp, error) { + return std.Options(url, v...) +} + +// Delete execute a http DELETE request +func Delete(url string, v ...interface{}) (*Resp, error) { + return std.Delete(url, v...) +} + +// Patch execute a http PATCH request +func Patch(url string, v ...interface{}) (*Resp, error) { + return std.Patch(url, v...) +} + +// Do execute request. +func Do(method, url string, v ...interface{}) (*Resp, error) { + return std.Do(method, url, v...) +} diff --git a/backend/vendor/github.com/imroc/req/resp.go b/backend/vendor/github.com/imroc/req/resp.go new file mode 100644 index 00000000..eb56b1bd --- /dev/null +++ b/backend/vendor/github.com/imroc/req/resp.go @@ -0,0 +1,215 @@ +package req + +import ( + "encoding/json" + "encoding/xml" + "fmt" + "io" + "io/ioutil" + "net/http" + "os" + "regexp" + "time" +) + +// Resp represents a request with it's response +type Resp struct { + r *Req + req *http.Request + resp *http.Response + client *http.Client + cost time.Duration + *multipartHelper + reqBody []byte + respBody []byte + downloadProgress DownloadProgress + err error // delayed error +} + +// Request returns *http.Request +func (r *Resp) Request() *http.Request { + return r.req +} + +// Response returns *http.Response +func (r *Resp) Response() *http.Response { + return r.resp +} + +// Bytes returns response body as []byte +func (r *Resp) Bytes() []byte { + data, _ := r.ToBytes() + return data +} + +// ToBytes returns response body as []byte, +// return error if error happend when reading +// the response body +func (r *Resp) ToBytes() ([]byte, error) { + if r.err != nil { + return nil, r.err + } + if r.respBody != nil { + return r.respBody, nil + } + defer r.resp.Body.Close() + respBody, err := ioutil.ReadAll(r.resp.Body) + if err != nil { + r.err = err + return nil, err + } + r.respBody = respBody + return r.respBody, nil +} + +// String returns response body as string +func (r *Resp) String() string { + data, _ := r.ToBytes() + return string(data) +} + +// ToString returns response body as string, +// return error if error happend when reading +// the response body +func (r *Resp) ToString() (string, error) { + data, err := r.ToBytes() + return string(data), err +} + +// ToJSON convert json response body to struct or map +func (r *Resp) ToJSON(v interface{}) error { + data, err := r.ToBytes() + if err != nil { + return err + } + return json.Unmarshal(data, v) +} + +// ToXML convert xml response body to struct or map +func (r *Resp) ToXML(v interface{}) error { + data, err := r.ToBytes() + if err != nil { + return err + } + return xml.Unmarshal(data, v) +} + +// ToFile download the response body to file with optional download callback +func (r *Resp) ToFile(name string) error { + //TODO set name to the suffix of url path if name == "" + file, err := os.Create(name) + if err != nil { + return err + } + defer file.Close() + + if r.respBody != nil { + _, err = file.Write(r.respBody) + return err + } + + if r.downloadProgress != nil && r.resp.ContentLength > 0 { + return r.download(file) + } + + defer r.resp.Body.Close() + _, err = io.Copy(file, r.resp.Body) + return err +} + +func (r *Resp) download(file *os.File) error { + p := make([]byte, 1024) + b := r.resp.Body + defer b.Close() + total := r.resp.ContentLength + var current int64 + var lastTime time.Time + for { + l, err := b.Read(p) + if l > 0 { + _, _err := file.Write(p[:l]) + if _err != nil { + return _err + } + current += int64(l) + if now := time.Now(); now.Sub(lastTime) > 200*time.Millisecond { + lastTime = now + r.downloadProgress(current, total) + } + } + if err != nil { + if err == io.EOF { + return nil + } + return err + } + } +} + +var regNewline = regexp.MustCompile(`\n|\r`) + +func (r *Resp) autoFormat(s fmt.State) { + req := r.req + if r.r.flag&Lcost != 0 { + fmt.Fprint(s, req.Method, " ", req.URL.String(), " ", r.cost) + } else { + fmt.Fprint(s, req.Method, " ", req.URL.String()) + } + + // test if it is should be outputed pretty + var pretty bool + var parts []string + addPart := func(part string) { + if part == "" { + return + } + parts = append(parts, part) + if !pretty && regNewline.MatchString(part) { + pretty = true + } + } + if r.r.flag&LreqBody != 0 { // request body + addPart(string(r.reqBody)) + } + if r.r.flag&LrespBody != 0 { // response body + addPart(r.String()) + } + + for _, part := range parts { + if pretty { + fmt.Fprint(s, "\n") + } + fmt.Fprint(s, " ", part) + } +} + +func (r *Resp) miniFormat(s fmt.State) { + req := r.req + if r.r.flag&Lcost != 0 { + fmt.Fprint(s, req.Method, " ", req.URL.String(), " ", r.cost) + } else { + fmt.Fprint(s, req.Method, " ", req.URL.String()) + } + if r.r.flag&LreqBody != 0 && len(r.reqBody) > 0 { // request body + str := regNewline.ReplaceAllString(string(r.reqBody), " ") + fmt.Fprint(s, " ", str) + } + if r.r.flag&LrespBody != 0 && r.String() != "" { // response body + str := regNewline.ReplaceAllString(r.String(), " ") + fmt.Fprint(s, " ", str) + } +} + +// Format fort the response +func (r *Resp) Format(s fmt.State, verb rune) { + if r == nil || r.req == nil { + return + } + if s.Flag('+') { // include header and format pretty. + fmt.Fprint(s, r.Dump()) + } else if s.Flag('-') { // keep all informations in one line. + r.miniFormat(s) + } else { // auto + r.autoFormat(s) + } +} diff --git a/backend/vendor/github.com/imroc/req/setting.go b/backend/vendor/github.com/imroc/req/setting.go new file mode 100644 index 00000000..74235f37 --- /dev/null +++ b/backend/vendor/github.com/imroc/req/setting.go @@ -0,0 +1,236 @@ +package req + +import ( + "crypto/tls" + "errors" + "net" + "net/http" + "net/http/cookiejar" + "net/url" + "time" +) + +// create a default client +func newClient() *http.Client { + jar, _ := cookiejar.New(nil) + transport := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + DualStack: true, + }).DialContext, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + } + return &http.Client{ + Jar: jar, + Transport: transport, + Timeout: 2 * time.Minute, + } +} + +// Client return the default underlying http client +func (r *Req) Client() *http.Client { + if r.client == nil { + r.client = newClient() + } + return r.client +} + +// Client return the default underlying http client +func Client() *http.Client { + return std.Client() +} + +// SetClient sets the underlying http.Client. +func (r *Req) SetClient(client *http.Client) { + r.client = client // use default if client == nil +} + +// SetClient sets the default http.Client for requests. +func SetClient(client *http.Client) { + std.SetClient(client) +} + +// SetFlags control display format of *Resp +func (r *Req) SetFlags(flags int) { + r.flag = flags +} + +// SetFlags control display format of *Resp +func SetFlags(flags int) { + std.SetFlags(flags) +} + +// Flags return output format for the *Resp +func (r *Req) Flags() int { + return r.flag +} + +// Flags return output format for the *Resp +func Flags() int { + return std.Flags() +} + +func (r *Req) getTransport() *http.Transport { + trans, _ := r.Client().Transport.(*http.Transport) + return trans +} + +// EnableInsecureTLS allows insecure https +func (r *Req) EnableInsecureTLS(enable bool) { + trans := r.getTransport() + if trans == nil { + return + } + if trans.TLSClientConfig == nil { + trans.TLSClientConfig = &tls.Config{} + } + trans.TLSClientConfig.InsecureSkipVerify = enable +} + +func EnableInsecureTLS(enable bool) { + std.EnableInsecureTLS(enable) +} + +// EnableCookieenable or disable cookie manager +func (r *Req) EnableCookie(enable bool) { + if enable { + jar, _ := cookiejar.New(nil) + r.Client().Jar = jar + } else { + r.Client().Jar = nil + } +} + +// EnableCookieenable or disable cookie manager +func EnableCookie(enable bool) { + std.EnableCookie(enable) +} + +// SetTimeout sets the timeout for every request +func (r *Req) SetTimeout(d time.Duration) { + r.Client().Timeout = d +} + +// SetTimeout sets the timeout for every request +func SetTimeout(d time.Duration) { + std.SetTimeout(d) +} + +// SetProxyUrl set the simple proxy with fixed proxy url +func (r *Req) SetProxyUrl(rawurl string) error { + trans := r.getTransport() + if trans == nil { + return errors.New("req: no transport") + } + u, err := url.Parse(rawurl) + if err != nil { + return err + } + trans.Proxy = http.ProxyURL(u) + return nil +} + +// SetProxyUrl set the simple proxy with fixed proxy url +func SetProxyUrl(rawurl string) error { + return std.SetProxyUrl(rawurl) +} + +// SetProxy sets the proxy for every request +func (r *Req) SetProxy(proxy func(*http.Request) (*url.URL, error)) error { + trans := r.getTransport() + if trans == nil { + return errors.New("req: no transport") + } + trans.Proxy = proxy + return nil +} + +// SetProxy sets the proxy for every request +func SetProxy(proxy func(*http.Request) (*url.URL, error)) error { + return std.SetProxy(proxy) +} + +type jsonEncOpts struct { + indentPrefix string + indentValue string + escapeHTML bool +} + +func (r *Req) getJSONEncOpts() *jsonEncOpts { + if r.jsonEncOpts == nil { + r.jsonEncOpts = &jsonEncOpts{escapeHTML: true} + } + return r.jsonEncOpts +} + +// SetJSONEscapeHTML specifies whether problematic HTML characters +// should be escaped inside JSON quoted strings. +// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e +// to avoid certain safety problems that can arise when embedding JSON in HTML. +// +// In non-HTML settings where the escaping interferes with the readability +// of the output, SetEscapeHTML(false) disables this behavior. +func (r *Req) SetJSONEscapeHTML(escape bool) { + opts := r.getJSONEncOpts() + opts.escapeHTML = escape +} + +// SetJSONEscapeHTML specifies whether problematic HTML characters +// should be escaped inside JSON quoted strings. +// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e +// to avoid certain safety problems that can arise when embedding JSON in HTML. +// +// In non-HTML settings where the escaping interferes with the readability +// of the output, SetEscapeHTML(false) disables this behavior. +func SetJSONEscapeHTML(escape bool) { + std.SetJSONEscapeHTML(escape) +} + +// SetJSONIndent instructs the encoder to format each subsequent encoded +// value as if indented by the package-level function Indent(dst, src, prefix, indent). +// Calling SetIndent("", "") disables indentation. +func (r *Req) SetJSONIndent(prefix, indent string) { + opts := r.getJSONEncOpts() + opts.indentPrefix = prefix + opts.indentValue = indent +} + +// SetJSONIndent instructs the encoder to format each subsequent encoded +// value as if indented by the package-level function Indent(dst, src, prefix, indent). +// Calling SetIndent("", "") disables indentation. +func SetJSONIndent(prefix, indent string) { + std.SetJSONIndent(prefix, indent) +} + +type xmlEncOpts struct { + prefix string + indent string +} + +func (r *Req) getXMLEncOpts() *xmlEncOpts { + if r.xmlEncOpts == nil { + r.xmlEncOpts = &xmlEncOpts{} + } + return r.xmlEncOpts +} + +// SetXMLIndent sets the encoder to generate XML in which each element +// begins on a new indented line that starts with prefix and is followed by +// one or more copies of indent according to the nesting depth. +func (r *Req) SetXMLIndent(prefix, indent string) { + opts := r.getXMLEncOpts() + opts.prefix = prefix + opts.indent = indent +} + +// SetXMLIndent sets the encoder to generate XML in which each element +// begins on a new indented line that starts with prefix and is followed by +// one or more copies of indent according to the nesting depth. +func SetXMLIndent(prefix, indent string) { + std.SetXMLIndent(prefix, indent) +} diff --git a/backend/vendor/github.com/json-iterator/go/Gopkg.lock b/backend/vendor/github.com/json-iterator/go/Gopkg.lock deleted file mode 100644 index c8a9fbb3..00000000 --- a/backend/vendor/github.com/json-iterator/go/Gopkg.lock +++ /dev/null @@ -1,21 +0,0 @@ -# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. - - -[[projects]] - name = "github.com/modern-go/concurrent" - packages = ["."] - revision = "e0a39a4cb4216ea8db28e22a69f4ec25610d513a" - version = "1.0.0" - -[[projects]] - name = "github.com/modern-go/reflect2" - packages = ["."] - revision = "4b7aa43c6742a2c18fdef89dd197aaae7dac7ccd" - version = "1.0.1" - -[solve-meta] - analyzer-name = "dep" - analyzer-version = 1 - inputs-digest = "ea54a775e5a354cb015502d2e7aa4b74230fc77e894f34a838b268c25ec8eeb8" - solver-name = "gps-cdcl" - solver-version = 1 diff --git a/backend/vendor/github.com/modern-go/reflect2/Gopkg.lock b/backend/vendor/github.com/modern-go/reflect2/Gopkg.lock deleted file mode 100644 index 2a3a6989..00000000 --- a/backend/vendor/github.com/modern-go/reflect2/Gopkg.lock +++ /dev/null @@ -1,15 +0,0 @@ -# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. - - -[[projects]] - name = "github.com/modern-go/concurrent" - packages = ["."] - revision = "e0a39a4cb4216ea8db28e22a69f4ec25610d513a" - version = "1.0.0" - -[solve-meta] - analyzer-name = "dep" - analyzer-version = 1 - inputs-digest = "daee8a88b3498b61c5640056665b8b9eea062006f5e596bbb6a3ed9119a11ec7" - solver-name = "gps-cdcl" - solver-version = 1 diff --git a/backend/vendor/modules.txt b/backend/vendor/modules.txt index 99c90a5c..c6a30b46 100644 --- a/backend/vendor/modules.txt +++ b/backend/vendor/modules.txt @@ -40,6 +40,8 @@ github.com/hashicorp/hcl/hcl/token github.com/hashicorp/hcl/json/parser github.com/hashicorp/hcl/json/scanner github.com/hashicorp/hcl/json/token +# github.com/imroc/req v0.2.4 +github.com/imroc/req # github.com/json-iterator/go v1.1.6 github.com/json-iterator/go # github.com/jtolds/gls v4.20.0+incompatible diff --git a/docker-compose.yml b/docker-compose.yml index bea50fb1..b4f36e86 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,25 +6,25 @@ services: environment: CRAWLAB_API_ADDRESS: "http://localhost:8000" # backend API address 后端 API 地址,设置为 http://<宿主机IP>:<端口>,端口为映射出来的端口 CRAWLAB_SERVER_MASTER: "Y" # whether to be master node 是否为主节点,主节点为 Y,工作节点为 N - CRAWLAB_MONGO_HOST: "mongo1" # MongoDB host address MongoDB 的地址,在 docker compose 网络中,直接引用服务名称 + CRAWLAB_MONGO_HOST: "mongo" # MongoDB host address MongoDB 的地址,在 docker compose 网络中,直接引用服务名称 CRAWLAB_REDIS_ADDRESS: "redis" # Redis host address Redis 的地址,在 docker compose 网络中,直接引用服务名称 ports: - "8080:8080" # frontend port mapping 前端端口映射 - "8000:8000" # backend port mapping 后端端口映射 depends_on: - - mongo1 + - mongo - redis worker: image: tikazyq/crawlab:latest container_name: worker environment: CRAWLAB_SERVER_MASTER: "N" - CRAWLAB_MONGO_HOST: "mongo1" + CRAWLAB_MONGO_HOST: "mongo" CRAWLAB_REDIS_ADDRESS: "redis" depends_on: - - mongo1 + - mongo - redis - mongo1: + mongo: image: mongo:latest restart: always # volumes: diff --git a/frontend/package.json b/frontend/package.json index d11b503b..32432b8f 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "crawlab", - "version": "0.4.1", + "version": "0.4.2", "private": true, "scripts": { "serve": "vue-cli-service serve --ip=0.0.0.0 --mode=development", diff --git a/frontend/src/components/File/FileList.vue b/frontend/src/components/File/FileList.vue index e874c446..50b7d306 100644 --- a/frontend/src/components/File/FileList.vue +++ b/frontend/src/components/File/FileList.vue @@ -7,15 +7,57 @@ {{$t('Back')}} - + + + {{$t('Cancel')}} + + + {{$t('Confirm')}} + + + + + +
+ + {{$t('Confirm')}} + +
+ +
+ {{$t('Save')}} - - - - {{$t('New File')}} - + + +
+ + {{$t('File')}} + + + {{$t('Directory')}} + +
+ +
@@ -70,7 +112,11 @@ export default { return { code: 'var hello = \'world\'', isEdit: false, - showFile: false + showFile: false, + name: '', + isShowAdd: false, + isShowDelete: false, + isShowRename: false } }, computed: { @@ -101,6 +147,7 @@ export default { this.$store.commit('file/SET_CURRENT_PATH', item.path) this.$store.dispatch('file/getFileContent', { path: item.path }) } + this.$st.sendEv('爬虫详情', '文件', '点击') }, onBack () { const sep = '/' @@ -109,16 +156,71 @@ export default { const path = arr.join(sep) this.$store.commit('file/SET_CURRENT_PATH', path) this.$store.dispatch('file/getFileList', { path: this.currentPath }) + this.$st.sendEv('爬虫详情', '文件', '回退') }, - onFileSave () { - this.$store.dispatch('file/saveFileContent', { path: this.currentPath }) - .then(() => { - this.$message.success(this.$t('Saved file successfully')) - }) + async onFileSave () { + await this.$store.dispatch('file/saveFileContent', { path: this.currentPath }) + this.$message.success(this.$t('Saved file successfully')) + this.$st.sendEv('爬虫详情', '文件', '保存') }, onBackFile () { this.showFile = false this.onBack() + }, + onHideAdd () { + this.name = '' + }, + async onAddFile () { + if (!this.name) { + this.$message.error(this.$t('Name cannot be empty')) + return + } + const path = this.currentPath + '/' + this.name + await this.$store.dispatch('file/addFile', { path }) + await this.$store.dispatch('file/getFileList', { path: this.currentPath }) + this.isShowAdd = false + + this.showFile = true + this.$store.commit('file/SET_FILE_CONTENT', '') + this.$store.commit('file/SET_CURRENT_PATH', path) + await this.$store.dispatch('file/getFileContent', { path }) + this.$st.sendEv('爬虫详情', '文件', '添加') + }, + async onAddDir () { + if (!this.name) { + this.$message.error(this.$t('Name cannot be empty')) + return + } + await this.$store.dispatch('file/addDir', { path: this.currentPath + '/' + this.name }) + await this.$store.dispatch('file/getFileList', { path: this.currentPath }) + this.isShowAdd = false + this.$st.sendEv('爬虫详情', '文件', '添加') + }, + async onFileDelete () { + await this.$store.dispatch('file/deleteFile', { path: this.currentPath }) + this.$message.success(this.$t('Deleted successfully')) + this.isShowDelete = false + this.onBackFile() + this.$st.sendEv('爬虫详情', '文件', '删除') + }, + onOpenRename () { + this.isShowRename = true + const arr = this.currentPath.split('/') + this.name = arr[arr.length - 1] + }, + async onRenameFile () { + let newPath + if (this.currentPath.split('/').length === 1) { + newPath = this.name + } else { + const arr = this.currentPath.split('/') + newPath = arr[0] + '/' + this.name + } + await this.$store.dispatch('file/renameFile', { path: this.currentPath, newPath }) + this.$store.commit('file/SET_CURRENT_PATH', newPath) + this.$message.success(this.$t('Renamed successfully')) + this.isShowRename = false + this.$st.sendEv('爬虫详情', '文件', '重命名') } }, created () { @@ -236,4 +338,14 @@ export default { font-size: 14px; color: rgba(3, 47, 98, 1); } + + .add-type-list { + text-align: right; + margin-top: 10px; + } + + .add-type { + cursor: pointer; + font-weight: bolder; + } diff --git a/frontend/src/components/InfoView/SpiderInfoView.vue b/frontend/src/components/InfoView/SpiderInfoView.vue index f804bca1..a13b4c7a 100644 --- a/frontend/src/components/InfoView/SpiderInfoView.vue +++ b/frontend/src/components/InfoView/SpiderInfoView.vue @@ -21,7 +21,8 @@ - + @@ -45,20 +46,41 @@ - + - {{$t('Run')}} - {{$t('Save')}} + + + {{$t('Upload')}} + + + + {{$t('Run')}} + + + {{$t('Save')}} + + + diff --git a/frontend/src/components/ScrollView/LogView.vue b/frontend/src/components/ScrollView/LogView.vue index e07ca03b..c4c20228 100644 --- a/frontend/src/components/ScrollView/LogView.vue +++ b/frontend/src/components/ScrollView/LogView.vue @@ -67,7 +67,7 @@ export default { .log-view { margin-top: 0!important; min-height: 100%; - overflow-y: scroll; + overflow-y: scroll!important; list-style: none; color: #A9B7C6; background: #2B2B2B; diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index a960c600..7fef483c 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -40,6 +40,7 @@ export default { // 操作 Add: '添加', + Create: '创建', Run: '运行', Deploy: '部署', Save: '保存', @@ -63,6 +64,9 @@ export default { 'Item Threshold': '子项阈值', 'Back': '返回', 'New File': '新建文件', + 'Rename': '重命名', + 'Install': '安装', + 'Uninstall': '卸载', // 主页 'Total Tasks': '总任务数', @@ -83,6 +87,8 @@ export default { 'Node Network': '节点拓扑图', 'Master': '主节点', 'Worker': '工作节点', + 'Installation': '安装', + 'Search Dependencies': '搜索依赖', // 节点列表 'IP': 'IP地址', @@ -234,6 +240,9 @@ export default { // 文件 'Choose Folder': '选择文件', + 'File': '文件', + 'Folder': '文件夹', + 'Directory': '目录', // 导入 'Import Spider': '导入爬虫', @@ -257,11 +266,14 @@ export default { 'ARCH': '操作架构', 'Number of CPU': 'CPU数', 'Executables': '执行文件', + 'Latest Version': '最新版本', // 弹出框 'Notification': '提示', 'Are you sure to delete this node?': '你确定要删除该节点?', 'Are you sure to run this spider?': '你确定要运行该爬虫?', + 'Added spider successfully': '成功添加爬虫', + 'Uploaded spider files successfully': '成功上传爬虫文件', 'Node info has been saved successfully': '节点信息已成功保存', 'A task has been scheduled successfully': '已经成功派发一个任务', 'Are you sure to delete this spider?': '你确定要删除该爬虫?', @@ -277,10 +289,17 @@ export default { 'username already exists': '用户名已存在', 'Deleted successfully': '成功删除', 'Saved successfully': '成功保存', - 'Please zip your spider files from the root directory': '爬虫文件请从根目录下开始压缩。', + 'Renamed successfully': '重命名保存', + 'You can click "Add" to create an empty spider and upload files later.': '您可以点击"添加"按钮创建空的爬虫,之后再上传文件。', + 'OR, you can also click "Upload" and upload a zip file containing your spider project.': '或者,您也可以点击"上传"按钮并上传一个包含爬虫项目的 zip 文件。', + 'NOTE: When uploading a zip file, please zip your spider files from the ROOT DIRECTORY.': '注意: 上传 zip 文件时,请从 根目录 下开始压缩爬虫文件。', 'English': 'English', 'Are you sure to delete the schedule task?': '确定删除定时任务?', + ' is not installed, do you want to install it?': ' 还没有安装,您是否打算安装它?', 'Disclaimer': '免责声明', + 'Please search dependencies': '请搜索依赖', + 'No Data': '暂无数据', + 'Show installed': '只看已安装', // 登录 'Sign in': '登录', diff --git a/frontend/src/store/modules/file.js b/frontend/src/store/modules/file.js index 66b84651..abdf5638 100644 --- a/frontend/src/store/modules/file.js +++ b/frontend/src/store/modules/file.js @@ -25,8 +25,9 @@ const actions = { const { path } = payload const spiderId = rootState.spider.spiderForm._id commit('SET_CURRENT_PATH', path) - request.get(`/spiders/${spiderId}/dir`, { path }) + return request.get(`/spiders/${spiderId}/dir`, { path }) .then(response => { + if (!response.data.data) response.data.data = [] commit( 'SET_FILE_LIST', response.data.data @@ -38,10 +39,35 @@ const actions = { getFileContent ({ commit, rootState }, payload) { const { path } = payload const spiderId = rootState.spider.spiderForm._id - request.get(`/spiders/${spiderId}/file`, { path }) + return request.get(`/spiders/${spiderId}/file`, { path }) .then(response => { commit('SET_FILE_CONTENT', response.data.data) }) + }, + saveFileContent ({ state, rootState }, payload) { + const { path } = payload + const spiderId = rootState.spider.spiderForm._id + return request.post(`/spiders/${spiderId}/file`, { path, content: state.fileContent }) + }, + addFile ({ rootState }, payload) { + const { path } = payload + const spiderId = rootState.spider.spiderForm._id + return request.put(`/spiders/${spiderId}/file`, { path }) + }, + addDir ({ rootState }, payload) { + const { path } = payload + const spiderId = rootState.spider.spiderForm._id + return request.put(`/spiders/${spiderId}/dir`, { path }) + }, + deleteFile ({ rootState }, payload) { + const { path } = payload + const spiderId = rootState.spider.spiderForm._id + return request.delete(`/spiders/${spiderId}/file`, { path }) + }, + renameFile ({ rootState }, payload) { + const { path, newPath } = payload + const spiderId = rootState.spider.spiderForm._id + return request.post(`/spiders/${spiderId}/file/rename`, { path, new_path: newPath }) } } diff --git a/frontend/src/store/modules/node.js b/frontend/src/store/modules/node.js index 5e21a222..873e53e8 100644 --- a/frontend/src/store/modules/node.js +++ b/frontend/src/store/modules/node.js @@ -3,7 +3,7 @@ import request from '../../api/request' const state = { // NodeList nodeList: [], - nodeForm: { _id: {} }, + nodeForm: {}, // spider to deploy/run activeSpider: {} diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index 7ff9324a..0b669a6d 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -174,6 +174,9 @@ const actions = { addConfigSpider ({ state }) { return request.put(`/config_spiders`, state.spiderForm) }, + addSpider ({ state }) { + return request.put(`/spiders`, state.spiderForm) + }, async getTemplateList ({ state, commit }) { const res = await request.get(`/config_spiders_templates`) commit('SET_TEMPLATE_LIST', res.data.data) diff --git a/frontend/src/store/modules/task.js b/frontend/src/store/modules/task.js index 9de29000..95153be5 100644 --- a/frontend/src/store/modules/task.js +++ b/frontend/src/store/modules/task.js @@ -12,7 +12,8 @@ const state = { // filter filter: { node_id: '', - spider_id: '' + spider_id: '', + status: '' }, // pagination pageNum: 1, @@ -89,7 +90,8 @@ const actions = { page_num: state.pageNum, page_size: state.pageSize, node_id: state.filter.node_id || undefined, - spider_id: state.filter.spider_id || undefined + spider_id: state.filter.spider_id || undefined, + status: state.filter.status || undefined }) .then(response => { commit('SET_TASK_LIST', response.data.data || []) diff --git a/frontend/src/views/layout/components/Navbar.vue b/frontend/src/views/layout/components/Navbar.vue index e8c33e86..52960c21 100644 --- a/frontend/src/views/layout/components/Navbar.vue +++ b/frontend/src/views/layout/components/Navbar.vue @@ -8,9 +8,6 @@ - - - {{$t('Logout')}} @@ -31,7 +28,7 @@ - + {{$t('Documentation')}} diff --git a/frontend/src/views/node/NodeDetail.vue b/frontend/src/views/node/NodeDetail.vue index 527da581..10364b01 100644 --- a/frontend/src/views/node/NodeDetail.vue +++ b/frontend/src/views/node/NodeDetail.vue @@ -13,6 +13,9 @@ + + + {{$t('Deployed Spiders')}} @@ -25,11 +28,13 @@ import { mapState } from 'vuex' import NodeOverview from '../../components/Overview/NodeOverview' +import NodeInstallation from '../../components/Node/NodeInstallation' export default { name: 'NodeDetail', components: { - NodeOverview + NodeOverview, + NodeInstallation }, data () { return { @@ -43,7 +48,9 @@ export default { ]) }, methods: { - onTabClick () { + onTabClick (name) { + if (name === 'installation') { + } }, onNodeChange (id) { this.$router.push(`/nodes/${id}`) diff --git a/frontend/src/views/spider/SpiderList.vue b/frontend/src/views/spider/SpiderList.vue index 74177b62..80c3fa0b 100644 --- a/frontend/src/views/spider/SpiderList.vue +++ b/frontend/src/views/spider/SpiderList.vue @@ -34,6 +34,50 @@ :visible.sync="addDialogVisible" :before-close="onAddDialogClose"> + + + + + + + + + + + + + + + + + + {{$t('Upload')}} + + + + + +

{{$t('You can click "Add" to create an empty spider and upload files later.')}}

+

{{$t('OR, you can also click "Upload" and upload a zip file containing your spider project.')}}

+

+ {{$t('NOTE: When uploading a zip file, please zip your' + + ' spider files from the ROOT DIRECTORY.')}} +

+
+
+ {{$t('Add')}} +
+
@@ -60,63 +104,10 @@ {{$t('Add')}} - - - - - {{$t('Upload')}} - - - - -
- - - - - - - - - - - - - - - - {{$t('Cancel')}} - {{$t('Add')}} - - - - - - - - - - {{$t('Import Spiders')}}
- + {{$t('Add Spider')}} @@ -299,8 +294,6 @@ export default { isEditMode: false, dialogVisible: false, addDialogVisible: false, - addConfigurableDialogVisible: false, - addCustomizedDialogVisible: false, crawlConfirmDialogVisible: false, activeSpiderId: undefined, filter: { @@ -320,7 +313,7 @@ export default { name: [{ required: true, message: 'Required Field', trigger: 'change' }] }, fileList: [], - spiderType: 'configurable' + spiderType: 'customized' } }, computed: { @@ -333,7 +326,15 @@ export default { ]), ...mapGetters('user', [ 'token' - ]) + ]), + uploadForm () { + return { + name: this.spiderForm.name, + display_name: this.spiderForm.display_name, + col: this.spiderForm.col, + cmd: this.spiderForm.cmd + } + } }, methods: { onSpiderTypeChange (val) { @@ -374,9 +375,19 @@ export default { }) }, onAddCustomized () { - this.addDialogVisible = false - this.addCustomizedDialogVisible = true - this.$st.sendEv('爬虫列表', '添加爬虫', '自定义爬虫') + this.$refs['addCustomizedForm'].validate(async res => { + if (!res) return + let res2 + try { + res2 = await this.$store.dispatch('spider/addSpider') + } catch (e) { + this.$message.error(this.$t('Something wrong happened')) + return + } + await this.$store.dispatch('spider/getSpiderList') + this.$router.push(`/spiders/${res2.data.data._id}`) + this.$st.sendEv('爬虫列表', '添加爬虫', '自定义爬虫') + }) }, onRefresh () { this.getList() @@ -510,9 +521,7 @@ export default { } }) }, - onUploadChange () { - }, - onUploadSuccess () { + onUploadSuccess (res) { // clear fileList this.fileList = [] @@ -521,8 +530,11 @@ export default { this.getList() }, 500) - // close popup - this.addCustomizedDialogVisible = false + // message + this.$message.success(this.$t('Uploaded spider files successfully')) + + // navigate to spider detail + this.$router.push(`/spiders/${res.data._id}`) }, getTime (str) { if (!str || str.match('^0001')) return 'NA' diff --git a/frontend/src/views/task/TaskList.vue b/frontend/src/views/task/TaskList.vue index becc7d0b..6cfab747 100644 --- a/frontend/src/views/task/TaskList.vue +++ b/frontend/src/views/task/TaskList.vue @@ -4,6 +4,29 @@
+ + + + + + + + + + + + + + + + + + + + + + +
@@ -310,6 +333,10 @@ export default { }, onSelectionChange (val) { this.multipleSelection = val + }, + onFilterChange () { + this.$store.dispatch('task/getTaskList') + this.$st.sendEv('任务列表', '筛选任务') } }, created () { diff --git a/frontend/yarn.lock b/frontend/yarn.lock index a6600a96..3e7bb8a3 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -1336,7 +1336,7 @@ async@^1.5.2: version "1.5.2" resolved "http://registry.npm.taobao.org/async/download/async-1.5.2.tgz#ec6a61ae56480c0c3cb241c95618e20892f9672a" -async@^2.1.4, async@^2.5.0: +async@^2.1.4: version "2.6.2" resolved "http://registry.npm.taobao.org/async/download/async-2.6.2.tgz#18330ea7e6e313887f5d2f2a904bac6fe4dd5381" dependencies: @@ -2199,6 +2199,11 @@ commander@^2.18.0, commander@^2.19.0: version "2.19.0" resolved "http://registry.npm.taobao.org/commander/download/commander-2.19.0.tgz#f6198aa84e5b83c46054b94ddedbfed5ee9ff12a" +commander@~2.20.3: + version "2.20.3" + resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33" + integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ== + commondir@^1.0.1: version "1.0.1" resolved "http://registry.npm.taobao.org/commondir/download/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b" @@ -4031,10 +4036,11 @@ handle-thing@^2.0.0: resolved "http://registry.npm.taobao.org/handle-thing/download/handle-thing-2.0.0.tgz#0e039695ff50c93fc288557d696f3c1dc6776754" handlebars@^4.0.3: - version "4.1.0" - resolved "http://registry.npm.taobao.org/handlebars/download/handlebars-4.1.0.tgz#0d6a6f34ff1f63cecec8423aa4169827bf787c3a" + version "4.5.3" + resolved "https://registry.yarnpkg.com/handlebars/-/handlebars-4.5.3.tgz#5cf75bd8714f7605713511a56be7c349becb0482" + integrity sha512-3yPecJoJHK/4c6aZhSvxOyG4vJKDshV36VHp0iVCDVh7o9w2vwi3NSnL2MMPj3YdduqaBcu7cGbggJQM0br9xA== dependencies: - async "^2.5.0" + neo-async "^2.6.0" optimist "^0.6.1" source-map "^0.6.1" optionalDependencies: @@ -5727,7 +5733,8 @@ minimist@^1.1.1, minimist@^1.1.3, minimist@^1.2.0: minimist@~0.0.1: version "0.0.10" - resolved "http://registry.npm.taobao.org/minimist/download/minimist-0.0.10.tgz#de3f98543dbf96082be48ad1a0c7cda836301dcf" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-0.0.10.tgz#de3f98543dbf96082be48ad1a0c7cda836301dcf" + integrity sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8= minipass@^2.2.1, minipass@^2.3.4: version "2.3.5" @@ -5863,8 +5870,9 @@ negotiator@0.6.1: resolved "http://registry.npm.taobao.org/negotiator/download/negotiator-0.6.1.tgz#2b327184e8992101177b28563fb5e7102acd0ca9" neo-async@^2.5.0, neo-async@^2.6.0: - version "2.6.0" - resolved "http://registry.npm.taobao.org/neo-async/download/neo-async-2.6.0.tgz#b9d15e4d71c6762908654b5183ed38b753340835" + version "2.6.1" + resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.1.tgz#ac27ada66167fa8849a6addd837f6b189ad2081c" + integrity sha512-iyam8fBuCUpWeKPGpaNMetEocMt364qkCsfL9JuhjXX6dRnguRVOfk2GZaDpPjcOKiiXCPINZC1GczQ7iTq3Zw== nice-try@^1.0.4: version "1.0.5" @@ -6205,7 +6213,8 @@ opn@^5.1.0, opn@^5.3.0: optimist@^0.6.1: version "0.6.1" - resolved "http://registry.npm.taobao.org/optimist/download/optimist-0.6.1.tgz#da3ea74686fa21a19a111c326e90eb15a0196686" + resolved "https://registry.yarnpkg.com/optimist/-/optimist-0.6.1.tgz#da3ea74686fa21a19a111c326e90eb15a0196686" + integrity sha1-2j6nRob6IaGaERwybpDrFaAZZoY= dependencies: minimist "~0.0.1" wordwrap "~0.0.2" @@ -7745,7 +7754,8 @@ source-map@^0.5.0, source-map@^0.5.3, source-map@^0.5.6, source-map@^0.5.7: source-map@^0.6.0, source-map@^0.6.1, source-map@~0.6.0, source-map@~0.6.1: version "0.6.1" - resolved "http://registry.npm.taobao.org/source-map/download/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263" + resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263" + integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g== spdx-correct@^3.0.0: version "3.1.0" @@ -8328,13 +8338,21 @@ typedarray@^0.0.6: version "0.0.6" resolved "http://registry.npm.taobao.org/typedarray/download/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777" -uglify-js@3.4.x, uglify-js@^3.1.4: +uglify-js@3.4.x: version "3.4.9" resolved "http://registry.npm.taobao.org/uglify-js/download/uglify-js-3.4.9.tgz#af02f180c1207d76432e473ed24a28f4a782bae3" dependencies: commander "~2.17.1" source-map "~0.6.1" +uglify-js@^3.1.4: + version "3.7.3" + resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.7.3.tgz#f918fce9182f466d5140f24bb0ff35c2d32dcc6a" + integrity sha512-7tINm46/3puUA4hCkKYo4Xdts+JDaVC9ZPRcG8Xw9R4nhO/gZgUM3TENq8IF4Vatk8qCig4MzP/c8G4u2BkVQg== + dependencies: + commander "~2.20.3" + source-map "~0.6.1" + unicode-canonical-property-names-ecmascript@^1.0.4: version "1.0.4" resolved "http://registry.npm.taobao.org/unicode-canonical-property-names-ecmascript/download/unicode-canonical-property-names-ecmascript-1.0.4.tgz#2619800c4c825800efdd8343af7dd9933cbe2818" @@ -8862,7 +8880,8 @@ wide-align@^1.1.0: wordwrap@~0.0.2: version "0.0.3" - resolved "http://registry.npm.taobao.org/wordwrap/download/wordwrap-0.0.3.tgz#a3d5da6cd5c0bc0008d37234bbaf1bed63059107" + resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-0.0.3.tgz#a3d5da6cd5c0bc0008d37234bbaf1bed63059107" + integrity sha1-o9XabNXAvAAI03I0u68b7WMFkQc= wordwrap@~1.0.0: version "1.0.0"