fix: missing relational models issues

This commit is contained in:
Marvin Zhang
2024-06-23 23:20:58 +08:00
parent 725ee7d9ad
commit 460c8d958a
9 changed files with 406 additions and 168 deletions

View File

@@ -8,7 +8,6 @@ import (
"github.com/crawlab-team/crawlab/core/interfaces" "github.com/crawlab-team/crawlab/core/interfaces"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/spf13/viper" "github.com/spf13/viper"
"go.mongodb.org/mongo-driver/bson/primitive"
"io" "io"
"os" "os"
"path/filepath" "path/filepath"
@@ -18,7 +17,7 @@ import (
func GetBaseFileListDir(rootPath string, c *gin.Context) { func GetBaseFileListDir(rootPath string, c *gin.Context) {
path := c.Query("path") path := c.Query("path")
fsSvc, err := getBaseFileFsSvc(rootPath, c) fsSvc, err := getBaseFileFsSvc(rootPath)
if err != nil { if err != nil {
HandleErrorBadRequest(c, err) HandleErrorBadRequest(c, err)
return return
@@ -38,7 +37,7 @@ func GetBaseFileListDir(rootPath string, c *gin.Context) {
func GetBaseFileFile(rootPath string, c *gin.Context) { func GetBaseFileFile(rootPath string, c *gin.Context) {
path := c.Query("path") path := c.Query("path")
fsSvc, err := getBaseFileFsSvc(rootPath, c) fsSvc, err := getBaseFileFsSvc(rootPath)
if err != nil { if err != nil {
HandleErrorBadRequest(c, err) HandleErrorBadRequest(c, err)
return return
@@ -56,7 +55,7 @@ func GetBaseFileFile(rootPath string, c *gin.Context) {
func GetBaseFileFileInfo(rootPath string, c *gin.Context) { func GetBaseFileFileInfo(rootPath string, c *gin.Context) {
path := c.Query("path") path := c.Query("path")
fsSvc, err := getBaseFileFsSvc(rootPath, c) fsSvc, err := getBaseFileFsSvc(rootPath)
if err != nil { if err != nil {
HandleErrorBadRequest(c, err) HandleErrorBadRequest(c, err)
return return
@@ -72,7 +71,7 @@ func GetBaseFileFileInfo(rootPath string, c *gin.Context) {
} }
func PostBaseFileSaveFile(rootPath string, c *gin.Context) { func PostBaseFileSaveFile(rootPath string, c *gin.Context) {
fsSvc, err := getBaseFileFsSvc(rootPath, c) fsSvc, err := getBaseFileFsSvc(rootPath)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorInternalServerError(c, err)
return return
@@ -122,7 +121,7 @@ func PostBaseFileSaveFile(rootPath string, c *gin.Context) {
} }
func PostBaseFileSaveFiles(rootPath string, c *gin.Context) { func PostBaseFileSaveFiles(rootPath string, c *gin.Context) {
fsSvc, err := getBaseFileFsSvc(rootPath, c) fsSvc, err := getBaseFileFsSvc(rootPath)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorInternalServerError(c, err)
return return
@@ -183,7 +182,7 @@ func PostBaseFileSaveDir(rootPath string, c *gin.Context) {
return return
} }
fsSvc, err := getBaseFileFsSvc(rootPath, c) fsSvc, err := getBaseFileFsSvc(rootPath)
if err != nil { if err != nil {
HandleErrorBadRequest(c, err) HandleErrorBadRequest(c, err)
return return
@@ -207,7 +206,7 @@ func PostBaseFileRenameFile(rootPath string, c *gin.Context) {
return return
} }
fsSvc, err := getBaseFileFsSvc(rootPath, c) fsSvc, err := getBaseFileFsSvc(rootPath)
if err != nil { if err != nil {
HandleErrorBadRequest(c, err) HandleErrorBadRequest(c, err)
return return
@@ -231,7 +230,7 @@ func DeleteBaseFileFile(rootPath string, c *gin.Context) {
payload.Path = "." payload.Path = "."
} }
fsSvc, err := getBaseFileFsSvc(rootPath, c) fsSvc, err := getBaseFileFsSvc(rootPath)
if err != nil { if err != nil {
HandleErrorBadRequest(c, err) HandleErrorBadRequest(c, err)
return return
@@ -259,7 +258,7 @@ func PostBaseFileCopyFile(rootPath string, c *gin.Context) {
return return
} }
fsSvc, err := getBaseFileFsSvc(rootPath, c) fsSvc, err := getBaseFileFsSvc(rootPath)
if err != nil { if err != nil {
HandleErrorBadRequest(c, err) HandleErrorBadRequest(c, err)
return return
@@ -274,7 +273,7 @@ func PostBaseFileCopyFile(rootPath string, c *gin.Context) {
} }
func PostBaseFileExport(rootPath string, c *gin.Context) { func PostBaseFileExport(rootPath string, c *gin.Context) {
fsSvc, err := getBaseFileFsSvc(rootPath, c) fsSvc, err := getBaseFileFsSvc(rootPath)
if err != nil { if err != nil {
HandleErrorBadRequest(c, err) HandleErrorBadRequest(c, err)
return return
@@ -292,14 +291,9 @@ func PostBaseFileExport(rootPath string, c *gin.Context) {
c.File(zipFilePath) c.File(zipFilePath)
} }
func getBaseFileFsSvc(rootPath string, c *gin.Context) (svc interfaces.FsServiceV2, err error) { func getBaseFileFsSvc(rootPath string) (svc interfaces.FsServiceV2, err error) {
id, err := primitive.ObjectIDFromHex(c.Param("id"))
if err != nil {
return nil, err
}
workspacePath := viper.GetString("workspace") workspacePath := viper.GetString("workspace")
fsSvc := fs.NewFsServiceV2(filepath.Join(workspacePath, id.Hex(), rootPath)) fsSvc := fs.NewFsServiceV2(filepath.Join(workspacePath, rootPath))
return fsSvc, nil return fsSvc, nil
} }

View File

@@ -0,0 +1,89 @@
package controllers
import (
"github.com/crawlab-team/crawlab/core/errors"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/db/mongo"
"github.com/gin-gonic/gin"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
mongo2 "go.mongodb.org/mongo-driver/mongo"
)
func GetProjectList(c *gin.Context) {
// get all list
all := MustGetFilterAll(c)
if all {
NewControllerV2[models.ProjectV2]().getAll(c)
return
}
// params
pagination := MustGetPagination(c)
query := MustGetFilterQuery(c)
sort := MustGetSortOption(c)
// get list
projects, err := service.NewModelServiceV2[models.ProjectV2]().GetMany(query, &mongo.FindOptions{
Sort: sort,
Skip: pagination.Size * (pagination.Page - 1),
Limit: pagination.Size,
})
if err != nil {
if err.Error() != mongo2.ErrNoDocuments.Error() {
HandleErrorInternalServerError(c, err)
}
return
}
if len(projects) == 0 {
HandleSuccessWithListData(c, []models.ProjectV2{}, 0)
return
}
// total count
total, err := service.NewModelServiceV2[models.ProjectV2]().Count(query)
if err != nil {
HandleErrorInternalServerError(c, err)
return
}
// project ids
var ids []primitive.ObjectID
// count cache
cache := map[primitive.ObjectID]int{}
// iterate
for _, p := range projects {
ids = append(ids, p.Id)
cache[p.Id] = 0
}
// spiders
spiders, err := service.NewModelServiceV2[models.SpiderV2]().GetMany(bson.M{
"project_id": bson.M{
"$in": ids,
},
}, nil)
if err != nil {
HandleErrorInternalServerError(c, err)
return
}
for _, s := range spiders {
_, ok := cache[s.ProjectId]
if !ok {
HandleErrorInternalServerError(c, errors.ErrorControllerMissingInCache)
return
}
cache[s.ProjectId]++
}
// assign
for _, p := range projects {
p.Spiders = cache[p.Id]
projects = append(projects, p)
}
HandleSuccessWithListData(c, projects, total)
}

View File

@@ -61,204 +61,210 @@ func InitRoutes(app *gin.Engine) (err error) {
RegisterController(groups.AuthGroup, "/nodes", NewControllerV2[models.NodeV2]()) RegisterController(groups.AuthGroup, "/nodes", NewControllerV2[models.NodeV2]())
RegisterController(groups.AuthGroup, "/notifications/settings", NewControllerV2[models.SettingV2]()) RegisterController(groups.AuthGroup, "/notifications/settings", NewControllerV2[models.SettingV2]())
RegisterController(groups.AuthGroup, "/permissions", NewControllerV2[models.PermissionV2]()) RegisterController(groups.AuthGroup, "/permissions", NewControllerV2[models.PermissionV2]())
RegisterController(groups.AuthGroup, "/projects", NewControllerV2[models.ProjectV2]()) RegisterController(groups.AuthGroup, "/projects", NewControllerV2[models.ProjectV2]([]Action{
{
Method: http.MethodGet,
Path: "",
HandlerFunc: GetProjectList,
},
}...))
RegisterController(groups.AuthGroup, "/roles", NewControllerV2[models.RoleV2]()) RegisterController(groups.AuthGroup, "/roles", NewControllerV2[models.RoleV2]())
RegisterController(groups.AuthGroup, "/schedules", NewControllerV2[models.ScheduleV2]( RegisterController(groups.AuthGroup, "/schedules", NewControllerV2[models.ScheduleV2]([]Action{
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "", Path: "",
HandlerFunc: PostSchedule, HandlerFunc: PostSchedule,
}, },
Action{ {
Method: http.MethodPut, Method: http.MethodPut,
Path: "/:id", Path: "/:id",
HandlerFunc: PutScheduleById, HandlerFunc: PutScheduleById,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/enable", Path: "/:id/enable",
HandlerFunc: PostScheduleEnable, HandlerFunc: PostScheduleEnable,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/disable", Path: "/:id/disable",
HandlerFunc: PostScheduleDisable, HandlerFunc: PostScheduleDisable,
}, },
)) }...))
RegisterController(groups.AuthGroup, "/spiders", NewControllerV2[models.SpiderV2]( RegisterController(groups.AuthGroup, "/spiders", NewControllerV2[models.SpiderV2]([]Action{
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/:id", Path: "/:id",
HandlerFunc: GetSpiderById, HandlerFunc: GetSpiderById,
}, },
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "", Path: "",
HandlerFunc: GetSpiderList, HandlerFunc: GetSpiderList,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "", Path: "",
HandlerFunc: PostSpider, HandlerFunc: PostSpider,
}, },
Action{ {
Method: http.MethodPut, Method: http.MethodPut,
Path: "/:id", Path: "/:id",
HandlerFunc: PutSpiderById, HandlerFunc: PutSpiderById,
}, },
Action{ {
Method: http.MethodDelete, Method: http.MethodDelete,
Path: "/:id", Path: "/:id",
HandlerFunc: DeleteSpiderById, HandlerFunc: DeleteSpiderById,
}, },
Action{ {
Method: http.MethodDelete, Method: http.MethodDelete,
Path: "", Path: "",
HandlerFunc: DeleteSpiderList, HandlerFunc: DeleteSpiderList,
}, },
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/:id/files/list", Path: "/:id/files/list",
HandlerFunc: GetSpiderListDir, HandlerFunc: GetSpiderListDir,
}, },
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/:id/files/get", Path: "/:id/files/get",
HandlerFunc: GetSpiderFile, HandlerFunc: GetSpiderFile,
}, },
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/:id/files/info", Path: "/:id/files/info",
HandlerFunc: GetSpiderFileInfo, HandlerFunc: GetSpiderFileInfo,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/files/save", Path: "/:id/files/save",
HandlerFunc: PostSpiderSaveFile, HandlerFunc: PostSpiderSaveFile,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/files/save/batch", Path: "/:id/files/save/batch",
HandlerFunc: PostSpiderSaveFiles, HandlerFunc: PostSpiderSaveFiles,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/files/save/dir", Path: "/:id/files/save/dir",
HandlerFunc: PostSpiderSaveDir, HandlerFunc: PostSpiderSaveDir,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/files/rename", Path: "/:id/files/rename",
HandlerFunc: PostSpiderRenameFile, HandlerFunc: PostSpiderRenameFile,
}, },
Action{ {
Method: http.MethodDelete, Method: http.MethodDelete,
Path: "/:id/files", Path: "/:id/files",
HandlerFunc: DeleteSpiderFile, HandlerFunc: DeleteSpiderFile,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/files/copy", Path: "/:id/files/copy",
HandlerFunc: PostSpiderCopyFile, HandlerFunc: PostSpiderCopyFile,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/files/export", Path: "/:id/files/export",
HandlerFunc: PostSpiderExport, HandlerFunc: PostSpiderExport,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/run", Path: "/:id/run",
HandlerFunc: PostSpiderRun, HandlerFunc: PostSpiderRun,
}, },
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/:id/data-source", Path: "/:id/data-source",
HandlerFunc: GetSpiderDataSource, HandlerFunc: GetSpiderDataSource,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/data-source/:ds_id", Path: "/:id/data-source/:ds_id",
HandlerFunc: PostSpiderDataSource, HandlerFunc: PostSpiderDataSource,
}, },
)) }...))
RegisterController(groups.AuthGroup, "/tasks", NewControllerV2[models.TaskV2]( RegisterController(groups.AuthGroup, "/tasks", NewControllerV2[models.TaskV2]([]Action{
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/:id", Path: "/:id",
HandlerFunc: GetTaskById, HandlerFunc: GetTaskById,
}, },
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "", Path: "",
HandlerFunc: GetTaskList, HandlerFunc: GetTaskList,
}, },
Action{ {
Method: http.MethodDelete, Method: http.MethodDelete,
Path: "/:id", Path: "/:id",
HandlerFunc: DeleteTaskById, HandlerFunc: DeleteTaskById,
}, },
Action{ {
Method: http.MethodDelete, Method: http.MethodDelete,
Path: "", Path: "",
HandlerFunc: DeleteList, HandlerFunc: DeleteList,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/run", Path: "/run",
HandlerFunc: PostTaskRun, HandlerFunc: PostTaskRun,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/restart", Path: "/:id/restart",
HandlerFunc: PostTaskRestart, HandlerFunc: PostTaskRestart,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/cancel", Path: "/:id/cancel",
HandlerFunc: PostTaskCancel, HandlerFunc: PostTaskCancel,
}, },
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/:id/logs", Path: "/:id/logs",
HandlerFunc: GetTaskLogs, HandlerFunc: GetTaskLogs,
}, },
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/:id/data", Path: "/:id/data",
HandlerFunc: GetTaskData, HandlerFunc: GetTaskData,
}, },
)) }...))
RegisterController(groups.AuthGroup, "/tokens", NewControllerV2[models.TokenV2]( RegisterController(groups.AuthGroup, "/tokens", NewControllerV2[models.TokenV2]([]Action{
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "", Path: "",
HandlerFunc: PostToken, HandlerFunc: PostToken,
}, },
)) }...))
RegisterController(groups.AuthGroup, "/users", NewControllerV2[models.UserV2]( RegisterController(groups.AuthGroup, "/users", NewControllerV2[models.UserV2]([]Action{
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "", Path: "",
HandlerFunc: PostUser, HandlerFunc: PostUser,
}, },
Action{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/:id/change-password", Path: "/:id/change-password",
HandlerFunc: PostUserChangePassword, HandlerFunc: PostUserChangePassword,
}, },
Action{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/me", Path: "/me",
HandlerFunc: GetUserMe, HandlerFunc: GetUserMe,
}, },
Action{ {
Method: http.MethodPut, Method: http.MethodPut,
Path: "/me", Path: "/me",
HandlerFunc: PutUserById, HandlerFunc: PutUserById,
}, },
)) }...))
RegisterActions(groups.AuthGroup, "/results", []Action{ RegisterActions(groups.AuthGroup, "/results", []Action{
{ {

View File

@@ -2,7 +2,7 @@ package controllers
import ( import (
"errors" "errors"
log2 "github.com/apex/log" "github.com/apex/log"
"github.com/crawlab-team/crawlab/core/constants" "github.com/crawlab-team/crawlab/core/constants"
"github.com/crawlab-team/crawlab/core/fs" "github.com/crawlab-team/crawlab/core/fs"
"github.com/crawlab-team/crawlab/core/interfaces" "github.com/crawlab-team/crawlab/core/interfaces"
@@ -11,6 +11,7 @@ import (
"github.com/crawlab-team/crawlab/core/spider/admin" "github.com/crawlab-team/crawlab/core/spider/admin"
"github.com/crawlab-team/crawlab/core/utils" "github.com/crawlab-team/crawlab/core/utils"
"github.com/crawlab-team/crawlab/db/mongo" "github.com/crawlab-team/crawlab/db/mongo"
"github.com/crawlab-team/crawlab/trace"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/spf13/viper" "github.com/spf13/viper"
"go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson"
@@ -64,12 +65,25 @@ func GetSpiderById(c *gin.Context) {
} }
func GetSpiderList(c *gin.Context) { func GetSpiderList(c *gin.Context) {
// get all list
all := MustGetFilterAll(c)
if all {
NewControllerV2[models.ProjectV2]().getAll(c)
return
}
// get list
withStats := c.Query("stats") withStats := c.Query("stats")
if withStats == "" { if withStats == "" {
NewControllerV2[models.SpiderV2]().GetList(c) NewControllerV2[models.SpiderV2]().GetList(c)
return return
} }
// get list with stats
getSpiderListWithStats(c)
}
func getSpiderListWithStats(c *gin.Context) {
// params // params
pagination := MustGetPagination(c) pagination := MustGetPagination(c)
query := MustGetFilterQuery(c) query := MustGetFilterQuery(c)
@@ -205,6 +219,7 @@ func PostSpider(c *gin.Context) {
return return
} }
// user
u := GetUserFromContextV2(c) u := GetUserFromContextV2(c)
// add // add
@@ -229,7 +244,12 @@ func PostSpider(c *gin.Context) {
} }
// create folder // create folder
err = getSpiderFsSvcById(id).CreateDir(".") fsSvc, err := getSpiderFsSvcById(id)
if err != nil {
HandleErrorInternalServerError(c, err)
return
}
err = fsSvc.CreateDir(".")
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorInternalServerError(c, err)
return return
@@ -336,7 +356,7 @@ func DeleteSpiderById(c *gin.Context) {
// delete task logs // delete task logs
logPath := filepath.Join(viper.GetString("log.path"), id) logPath := filepath.Join(viper.GetString("log.path"), id)
if err := os.RemoveAll(logPath); err != nil { if err := os.RemoveAll(logPath); err != nil {
log2.Warnf("failed to remove task log directory: %s", logPath) log.Warnf("failed to remove task log directory: %s", logPath)
} }
wg.Done() wg.Done()
}(id.Hex()) }(id.Hex())
@@ -349,6 +369,35 @@ func DeleteSpiderById(c *gin.Context) {
return return
} }
go func() {
// spider
s, err := service.NewModelServiceV2[models.SpiderV2]().GetById(id)
if err != nil {
log.Errorf("failed to get spider: %s", err.Error())
trace.PrintError(err)
return
}
// skip spider with git
if !s.GitId.IsZero() {
return
}
// delete spider directory
fsSvc, err := getSpiderFsSvcById(id)
if err != nil {
log.Errorf("failed to get spider fs service: %s", err.Error())
trace.PrintError(err)
return
}
err = fsSvc.Delete(".")
if err != nil {
log.Errorf("failed to delete spider directory: %s", err.Error())
trace.PrintError(err)
return
}
}()
HandleSuccess(c) HandleSuccess(c)
} }
@@ -414,7 +463,7 @@ func DeleteSpiderList(c *gin.Context) {
// delete task logs // delete task logs
logPath := filepath.Join(viper.GetString("log.path"), id) logPath := filepath.Join(viper.GetString("log.path"), id)
if err := os.RemoveAll(logPath); err != nil { if err := os.RemoveAll(logPath); err != nil {
log2.Warnf("failed to remove task log directory: %s", logPath) log.Warnf("failed to remove task log directory: %s", logPath)
} }
wg.Done() wg.Done()
}(id.Hex()) }(id.Hex())
@@ -427,97 +476,136 @@ func DeleteSpiderList(c *gin.Context) {
return return
} }
// delete spider directories
go func() {
wg := sync.WaitGroup{}
wg.Add(len(payload.Ids))
for _, id := range payload.Ids {
go func(id primitive.ObjectID) {
defer wg.Done()
// spider
s, err := service.NewModelServiceV2[models.SpiderV2]().GetById(id)
if err != nil {
log.Errorf("failed to get spider: %s", err.Error())
trace.PrintError(err)
return
}
// skip spider with git
if !s.GitId.IsZero() {
return
}
// delete spider directory
fsSvc, err := getSpiderFsSvcById(id)
if err != nil {
log.Errorf("failed to get spider fs service: %s", err.Error())
trace.PrintError(err)
return
}
err = fsSvc.Delete(".")
if err != nil {
log.Errorf("failed to delete spider directory: %s", err.Error())
trace.PrintError(err)
return
}
}(id)
}
wg.Wait()
}()
HandleSuccess(c) HandleSuccess(c)
} }
func GetSpiderListDir(c *gin.Context) { func GetSpiderListDir(c *gin.Context) {
s, err := allowSpiderGit(c) rootPath, err := getSpiderRootPath(c)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorForbidden(c, err)
return return
} }
GetBaseFileListDir(s.GitRootPath, c) GetBaseFileListDir(rootPath, c)
} }
func GetSpiderFile(c *gin.Context) { func GetSpiderFile(c *gin.Context) {
s, err := allowSpiderGit(c) rootPath, err := getSpiderRootPath(c)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorForbidden(c, err)
return return
} }
GetBaseFileFile(s.GitRootPath, c) GetBaseFileFile(rootPath, c)
} }
func GetSpiderFileInfo(c *gin.Context) { func GetSpiderFileInfo(c *gin.Context) {
s, err := allowSpiderGit(c) rootPath, err := getSpiderRootPath(c)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorForbidden(c, err)
return return
} }
GetBaseFileFileInfo(s.GitRootPath, c) GetBaseFileFileInfo(rootPath, c)
} }
func PostSpiderSaveFile(c *gin.Context) { func PostSpiderSaveFile(c *gin.Context) {
s, err := allowSpiderGit(c) rootPath, err := getSpiderRootPath(c)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorForbidden(c, err)
return return
} }
PostBaseFileSaveFile(s.GitRootPath, c) PostBaseFileSaveFile(rootPath, c)
} }
func PostSpiderSaveFiles(c *gin.Context) { func PostSpiderSaveFiles(c *gin.Context) {
s, err := allowSpiderGit(c) rootPath, err := getSpiderRootPath(c)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorForbidden(c, err)
return return
} }
PostBaseFileSaveFiles(s.GitRootPath, c) PostBaseFileSaveFiles(rootPath, c)
} }
func PostSpiderSaveDir(c *gin.Context) { func PostSpiderSaveDir(c *gin.Context) {
s, err := allowSpiderGit(c) rootPath, err := getSpiderRootPath(c)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorForbidden(c, err)
return return
} }
PostBaseFileSaveDir(s.GitRootPath, c) PostBaseFileSaveDir(rootPath, c)
} }
func PostSpiderRenameFile(c *gin.Context) { func PostSpiderRenameFile(c *gin.Context) {
s, err := allowSpiderGit(c) rootPath, err := getSpiderRootPath(c)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorForbidden(c, err)
return return
} }
PostBaseFileRenameFile(s.GitRootPath, c) PostBaseFileRenameFile(rootPath, c)
} }
func DeleteSpiderFile(c *gin.Context) { func DeleteSpiderFile(c *gin.Context) {
s, err := allowSpiderGit(c) rootPath, err := getSpiderRootPath(c)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorForbidden(c, err)
return return
} }
DeleteBaseFileFile(s.GitRootPath, c) DeleteBaseFileFile(rootPath, c)
} }
func PostSpiderCopyFile(c *gin.Context) { func PostSpiderCopyFile(c *gin.Context) {
s, err := allowSpiderGit(c) rootPath, err := getSpiderRootPath(c)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorForbidden(c, err)
return return
} }
PostBaseFileCopyFile(s.GitRootPath, c) PostBaseFileCopyFile(rootPath, c)
} }
func PostSpiderExport(c *gin.Context) { func PostSpiderExport(c *gin.Context) {
s, err := allowSpiderGit(c) rootPath, err := getSpiderRootPath(c)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorForbidden(c, err)
return return
} }
PostBaseFileExport(s.GitRootPath, c) PostBaseFileExport(rootPath, c)
} }
func PostSpiderRun(c *gin.Context) { func PostSpiderRun(c *gin.Context) {
@@ -628,22 +716,25 @@ func PostSpiderDataSource(c *gin.Context) {
HandleSuccess(c) HandleSuccess(c)
} }
func getSpiderFsSvc(c *gin.Context) (svc interfaces.FsServiceV2, err error) { func getSpiderFsSvc(s *models.SpiderV2) (svc interfaces.FsServiceV2, err error) {
id, err := primitive.ObjectIDFromHex(c.Param("id"))
if err != nil {
return nil, err
}
workspacePath := viper.GetString("workspace") workspacePath := viper.GetString("workspace")
fsSvc := fs.NewFsServiceV2(filepath.Join(workspacePath, id.Hex())) fsSvc := fs.NewFsServiceV2(filepath.Join(workspacePath, s.Id.Hex()))
return fsSvc, nil return fsSvc, nil
} }
func getSpiderFsSvcById(id primitive.ObjectID) interfaces.FsServiceV2 { func GetSpiderFsSvcById(id primitive.ObjectID) (svc interfaces.FsServiceV2, err error) {
workspacePath := viper.GetString("workspace") return getSpiderFsSvcById(id)
fsSvc := fs.NewFsServiceV2(filepath.Join(workspacePath, id.Hex())) }
return fsSvc
func getSpiderFsSvcById(id primitive.ObjectID) (svc interfaces.FsServiceV2, err error) {
s, err := service.NewModelServiceV2[models.SpiderV2]().GetById(id)
if err != nil {
log.Errorf("failed to get spider: %s", err.Error())
trace.PrintError(err)
return nil, err
}
return getSpiderFsSvc(s)
} }
func upsertSpiderDataCollection(s *models.SpiderV2) (err error) { func upsertSpiderDataCollection(s *models.SpiderV2) (err error) {
@@ -685,21 +776,32 @@ func upsertSpiderDataCollection(s *models.SpiderV2) (err error) {
return nil return nil
} }
func allowSpiderGit(c *gin.Context) (s models.SpiderV2, err error) { func UpsertSpiderDataCollection(s *models.SpiderV2) (err error) {
if utils.IsPro() { return upsertSpiderDataCollection(s)
return s, nil }
}
func getSpiderRootPath(c *gin.Context) (rootPath string, err error) {
// spider id
id, err := primitive.ObjectIDFromHex(c.Param("id")) id, err := primitive.ObjectIDFromHex(c.Param("id"))
if err != nil { if err != nil {
return s, err return "", err
} }
_s, err := service.NewModelServiceV2[models.SpiderV2]().GetById(id)
// spider
s, err := service.NewModelServiceV2[models.SpiderV2]().GetById(id)
if err != nil { if err != nil {
return s, err return "", err
} }
// check git permission
if !utils.IsPro() && !s.GitId.IsZero() {
return "", errors.New("git is not allowed in the community version")
}
// if git id is zero, return spider id as root path
if s.GitId.IsZero() { if s.GitId.IsZero() {
return s, errors.New("git is not allowed in this edition") return id.Hex(), nil
} }
s = *_s
return s, nil return filepath.Join(s.GitId.Hex(), rootPath), nil
} }

View File

@@ -31,7 +31,8 @@ type syncContext struct {
func (ctx *syncContext) scan(c *gin.Context) { func (ctx *syncContext) scan(c *gin.Context) {
id := c.Param("id") id := c.Param("id")
dir := ctx._getDir(id) path := c.Query("path")
dir := ctx._getDir(id, path)
files, err := utils.ScanDirectory(dir) files, err := utils.ScanDirectory(dir)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorInternalServerError(c, err)
@@ -43,13 +44,13 @@ func (ctx *syncContext) scan(c *gin.Context) {
func (ctx *syncContext) download(c *gin.Context) { func (ctx *syncContext) download(c *gin.Context) {
id := c.Param("id") id := c.Param("id")
filePath := c.Query("path") filePath := c.Query("path")
dir := ctx._getDir(id) dir := ctx._getDir(id, "")
c.File(filepath.Join(dir, filePath)) c.File(filepath.Join(dir, filePath))
} }
func (ctx *syncContext) _getDir(id string) string { func (ctx *syncContext) _getDir(id string, path string) string {
workspacePath := viper.GetString("workspace") workspacePath := viper.GetString("workspace")
return filepath.Join(workspacePath, id) return filepath.Join(workspacePath, id, path)
} }
func newSyncContext() syncContext { func newSyncContext() syncContext {

View File

@@ -71,8 +71,8 @@ func GetTaskList(c *gin.Context) {
query := MustGetFilterQuery(c) query := MustGetFilterQuery(c)
sort := MustGetSortOption(c) sort := MustGetSortOption(c)
// get list // get tasks
list, err := service.NewModelServiceV2[models.TaskV2]().GetMany(query, &mongo.FindOptions{ tasks, err := service.NewModelServiceV2[models.TaskV2]().GetMany(query, &mongo.FindOptions{
Sort: sort, Sort: sort,
Skip: pagination.Size * (pagination.Page - 1), Skip: pagination.Size * (pagination.Page - 1),
Limit: pagination.Size, Limit: pagination.Size,
@@ -87,15 +87,17 @@ func GetTaskList(c *gin.Context) {
} }
// check empty list // check empty list
if len(list) == 0 { if len(tasks) == 0 {
HandleSuccessWithListData(c, nil, 0) HandleSuccessWithListData(c, nil, 0)
return return
} }
// ids // ids
var ids []primitive.ObjectID var taskIds []primitive.ObjectID
for _, t := range list { var spiderIds []primitive.ObjectID
ids = append(ids, t.Id) for _, t := range tasks {
taskIds = append(taskIds, t.Id)
spiderIds = append(spiderIds, t.SpiderId)
} }
// total count // total count
@@ -106,33 +108,56 @@ func GetTaskList(c *gin.Context) {
} }
// stat list // stat list
query = bson.M{ stats, err := service.NewModelServiceV2[models.TaskStatV2]().GetMany(bson.M{
"_id": bson.M{ "_id": bson.M{
"$in": ids, "$in": taskIds,
}, },
} }, nil)
stats, err := service.NewModelServiceV2[models.TaskStatV2]().GetMany(query, nil)
if err != nil { if err != nil {
HandleErrorInternalServerError(c, err) HandleErrorInternalServerError(c, err)
return return
} }
// cache stat list to dict // cache stat list to dict
dict := map[primitive.ObjectID]models.TaskStatV2{} statsDict := map[primitive.ObjectID]models.TaskStatV2{}
for _, s := range stats { for _, s := range stats {
dict[s.Id] = s statsDict[s.Id] = s
}
// spider list
spiders, err := service.NewModelServiceV2[models.SpiderV2]().GetMany(bson.M{
"_id": bson.M{
"$in": spiderIds,
},
}, nil)
if err != nil {
HandleErrorInternalServerError(c, err)
return
}
// cache spider list to dict
spiderDict := map[primitive.ObjectID]models.SpiderV2{}
for _, s := range spiders {
spiderDict[s.Id] = s
} }
// iterate list again // iterate list again
for i, t := range list { for i, t := range tasks {
ts, ok := dict[t.Id] // task stat
ts, ok := statsDict[t.Id]
if ok { if ok {
list[i].Stat = &ts tasks[i].Stat = &ts
}
// spider
s, ok := spiderDict[t.SpiderId]
if ok {
tasks[i].Spider = &s
} }
} }
// response // response
HandleSuccessWithListData(c, list, total) HandleSuccessWithListData(c, tasks, total)
} }
func DeleteTaskById(c *gin.Context) { func DeleteTaskById(c *gin.Context) {

View File

@@ -71,16 +71,8 @@ func (r *RunnerV2) Init() (err error) {
// start grpc client // start grpc client
if !r.c.IsStarted() { if !r.c.IsStarted() {
r.c.Start() err := r.c.Start()
} if err != nil {
// working directory
workspacePath := viper.GetString("workspace")
r.cwd = filepath.Join(workspacePath, r.s.Id.Hex())
// sync files from master
if !utils.IsMaster() {
if err := r.syncFiles(); err != nil {
return err return err
} }
} }
@@ -97,6 +89,16 @@ func (r *RunnerV2) Run() (err error) {
// log task started // log task started
log.Infof("task[%s] started", r.tid.Hex()) log.Infof("task[%s] started", r.tid.Hex())
// configure working directory
r.configureCwd()
// sync files worker nodes
if !utils.IsMaster() {
if err := r.syncFiles(); err != nil {
return err
}
}
// configure cmd // configure cmd
r.configureCmd() r.configureCmd()
@@ -317,26 +319,31 @@ func (r *RunnerV2) configureEnv() {
} }
func (r *RunnerV2) syncFiles() (err error) { func (r *RunnerV2) syncFiles() (err error) {
masterURL := fmt.Sprintf("%s/sync/%s", viper.GetString("api.endpoint"), r.s.Id.Hex()) var id string
workspacePath := viper.GetString("workspace") if r.s.GitId.IsZero() {
workerDir := filepath.Join(workspacePath, r.s.Id.Hex()) id = r.s.Id.Hex()
} else {
id = r.s.GitId.Hex()
}
masterURL := fmt.Sprintf("%s/sync/%s", viper.GetString("api.endpoint"), id)
workerDir := r.cwd
// get file list from master // get file list from master
resp, err := http.Get(masterURL + "/scan") resp, err := http.Get(masterURL + "/scan?path=" + workerDir)
if err != nil { if err != nil {
fmt.Println("Error getting file list from master:", err) log.Errorf("Error getting file list from master: %v", err)
return trace.TraceError(err) return trace.TraceError(err)
} }
defer resp.Body.Close() defer resp.Body.Close()
body, err := io.ReadAll(resp.Body) body, err := io.ReadAll(resp.Body)
if err != nil { if err != nil {
fmt.Println("Error reading response body:", err) log.Errorf("Error reading response body: %v", err)
return trace.TraceError(err) return trace.TraceError(err)
} }
var masterFiles map[string]entity.FsFileInfo var masterFiles map[string]entity.FsFileInfo
err = json.Unmarshal(body, &masterFiles) err = json.Unmarshal(body, &masterFiles)
if err != nil { if err != nil {
fmt.Println("Error unmarshaling JSON:", err) log.Errorf("Error unmarshaling JSON: %v", err)
return trace.TraceError(err) return trace.TraceError(err)
} }
@@ -349,7 +356,7 @@ func (r *RunnerV2) syncFiles() (err error) {
// create worker directory if not exists // create worker directory if not exists
if _, err := os.Stat(workerDir); os.IsNotExist(err) { if _, err := os.Stat(workerDir); os.IsNotExist(err) {
if err := os.MkdirAll(workerDir, os.ModePerm); err != nil { if err := os.MkdirAll(workerDir, os.ModePerm); err != nil {
fmt.Println("Error creating worker directory:", err) log.Errorf("Error creating worker directory: %v", err)
return trace.TraceError(err) return trace.TraceError(err)
} }
} }
@@ -357,7 +364,7 @@ func (r *RunnerV2) syncFiles() (err error) {
// get file list from worker // get file list from worker
workerFiles, err := utils.ScanDirectory(workerDir) workerFiles, err := utils.ScanDirectory(workerDir)
if err != nil { if err != nil {
fmt.Println("Error scanning worker directory:", err) log.Errorf("Error scanning worker directory: %v", err)
return trace.TraceError(err) return trace.TraceError(err)
} }
@@ -368,10 +375,10 @@ func (r *RunnerV2) syncFiles() (err error) {
// delete files that are deleted on master node // delete files that are deleted on master node
for path, workerFile := range workerFiles { for path, workerFile := range workerFiles {
if _, exists := masterFilesMap[path]; !exists { if _, exists := masterFilesMap[path]; !exists {
fmt.Println("Deleting file:", path) log.Infof("Deleting file: %s", path)
err := os.Remove(workerFile.FullPath) err := os.Remove(workerFile.FullPath)
if err != nil { if err != nil {
fmt.Println("Error deleting file:", err) log.Errorf("Error deleting file: %v", err)
} }
} }
} }
@@ -619,7 +626,17 @@ func (r *RunnerV2) _updateSpiderStat(status string) {
return return
} }
} }
}
func (r *RunnerV2) configureCwd() {
workspacePath := viper.GetString("workspace")
if r.s.GitId.IsZero() {
// not git
r.cwd = filepath.Join(workspacePath, r.s.Id.Hex())
} else {
// git
r.cwd = filepath.Join(workspacePath, r.s.GitId.Hex(), r.s.GitRootPath)
}
} }
func NewTaskRunnerV2(id primitive.ObjectID, svc *ServiceV2) (r2 *RunnerV2, err error) { func NewTaskRunnerV2(id primitive.ObjectID, svc *ServiceV2) (r2 *RunnerV2, err error) {

View File

@@ -43,7 +43,10 @@ type ServiceV2 struct {
func (svc *ServiceV2) Start() { func (svc *ServiceV2) Start() {
// Initialize gRPC if not started // Initialize gRPC if not started
if !svc.c.IsStarted() { if !svc.c.IsStarted() {
svc.c.Start() err := svc.c.Start()
if err != nil {
return
}
} }
go svc.ReportStatus() go svc.ReportStatus()

View File

@@ -9,11 +9,12 @@ type GitOptions struct {
} }
type GitRef struct { type GitRef struct {
Type string `json:"type"` Type string `json:"type"`
Name string `json:"name"` Name string `json:"name"`
FullName string `json:"full_name"` FullName string `json:"full_name"`
Hash string `json:"hash"` Hash string `json:"hash"`
Timestamp time.Time `json:"timestamp"` Timestamp time.Time `json:"timestamp"`
RemoteTrack string `json:"remote_track"`
} }
type GitLog struct { type GitLog struct {