fix: unable to sync files and save data issues

This commit is contained in:
Marvin Zhang
2024-06-25 14:58:54 +08:00
parent 460c8d958a
commit 5daeccb87d
12 changed files with 113 additions and 31 deletions

View File

@@ -80,10 +80,11 @@ func GetProjectList(c *gin.Context) {
} }
// assign // assign
var data []models.ProjectV2
for _, p := range projects { for _, p := range projects {
p.Spiders = cache[p.Id] p.Spiders = cache[p.Id]
projects = append(projects, p) data = append(data, p)
} }
HandleSuccessWithListData(c, projects, total) HandleSuccessWithListData(c, data, total)
} }

View File

@@ -363,6 +363,18 @@ func InitRoutes(app *gin.Engine) (err error) {
HandlerFunc: PostLogout, HandlerFunc: PostLogout,
}, },
}) })
RegisterActions(groups.AnonymousGroup, "/sync", []Action{
{
Method: http.MethodGet,
Path: "/:id/scan",
HandlerFunc: GetSyncScan,
},
{
Method: http.MethodGet,
Path: "/:id/download",
HandlerFunc: GetSyncDownload,
},
})
return nil return nil
} }

View File

@@ -0,0 +1,31 @@
package controllers
import (
"github.com/crawlab-team/crawlab/core/utils"
"github.com/gin-gonic/gin"
"github.com/spf13/viper"
"net/http"
"path/filepath"
)
func GetSyncScan(c *gin.Context) {
id := c.Param("id")
path := c.Query("path")
workspacePath := viper.GetString("workspace")
dirPath := filepath.Join(workspacePath, id, path)
files, err := utils.ScanDirectory(dirPath)
if err != nil {
HandleErrorInternalServerError(c, err)
return
}
c.AbortWithStatusJSON(http.StatusOK, files)
}
func GetSyncDownload(c *gin.Context) {
id := c.Param("id")
path := c.Query("path")
workspacePath := viper.GetString("workspace")
filePath := filepath.Join(workspacePath, id, path)
c.File(filePath)
}

View File

@@ -83,7 +83,7 @@ func (svr NodeServerV2) Register(ctx context.Context, req *grpc.Request) (res *g
} }
node.SetCreated(primitive.NilObjectID) node.SetCreated(primitive.NilObjectID)
node.SetUpdated(primitive.NilObjectID) node.SetUpdated(primitive.NilObjectID)
_, err = service.NewModelServiceV2[models.NodeV2]().InsertOne(*nodeDb) node.Id, err = service.NewModelServiceV2[models.NodeV2]().InsertOne(node)
if err != nil { if err != nil {
return HandleError(err) return HandleError(err)
} }

View File

@@ -8,7 +8,7 @@ import (
"github.com/crawlab-team/crawlab/core/utils" "github.com/crawlab-team/crawlab/core/utils"
"github.com/crawlab-team/crawlab/trace" "github.com/crawlab-team/crawlab/trace"
"os" "os"
"path" "path/filepath"
) )
type Service struct { type Service struct {
@@ -18,7 +18,7 @@ type Service struct {
func (svc *Service) Init() (err error) { func (svc *Service) Init() (err error) {
// check config directory path // check config directory path
configDirPath := path.Dir(svc.path) configDirPath := filepath.Dir(svc.path)
if !utils.Exists(configDirPath) { if !utils.Exists(configDirPath) {
if err := os.MkdirAll(configDirPath, os.FileMode(0766)); err != nil { if err := os.MkdirAll(configDirPath, os.FileMode(0766)); err != nil {
return trace.TraceError(err) return trace.TraceError(err)
@@ -55,13 +55,14 @@ func (svc *Service) Reload() (err error) {
} }
func (svc *Service) GetBasicNodeInfo() (res interfaces.Entity) { func (svc *Service) GetBasicNodeInfo() (res interfaces.Entity) {
return &entity.NodeInfo{ res = &entity.NodeInfo{
Key: svc.GetNodeKey(), Key: svc.GetNodeKey(),
Name: svc.GetNodeName(), Name: svc.GetNodeName(),
IsMaster: svc.IsMaster(), IsMaster: svc.IsMaster(),
AuthKey: svc.GetAuthKey(), AuthKey: svc.GetAuthKey(),
MaxRunners: svc.GetMaxRunners(), MaxRunners: svc.GetMaxRunners(),
} }
return res
} }
func (svc *Service) GetNodeKey() (res string) { func (svc *Service) GetNodeKey() (res string) {

View File

@@ -4,12 +4,18 @@
package sys_exec package sys_exec
import ( import (
"errors"
"os/exec" "os/exec"
"strings"
"syscall" "syscall"
) )
func BuildCmd(cmdStr string) *exec.Cmd { func BuildCmd(cmdStr string) (cmd *exec.Cmd, err error) {
return exec.Command("sh", "-c", cmdStr) if cmdStr == "" {
return nil, errors.New("command string is empty")
}
args := strings.Split(cmdStr, " ")
return exec.Command(args[0], args[1:]...), nil
} }
func SetPgid(cmd *exec.Cmd) { func SetPgid(cmd *exec.Cmd) {

View File

@@ -4,12 +4,18 @@
package sys_exec package sys_exec
import ( import (
"errors"
"os/exec" "os/exec"
"strings"
"syscall" "syscall"
) )
func BuildCmd(cmdStr string) *exec.Cmd { func BuildCmd(cmdStr string) (cmd *exec.Cmd, err error) {
return exec.Command("sh", "-c", cmdStr) if cmdStr == "" {
return nil, errors.New("command string is empty")
}
args := strings.Split(cmdStr, " ")
return exec.Command(args[0], args[1:]...), nil
} }
func SetPgid(cmd *exec.Cmd) { func SetPgid(cmd *exec.Cmd) {

View File

@@ -3,8 +3,16 @@
package sys_exec package sys_exec
import "os/exec" import (
"errors"
"os/exec"
"strings"
)
func BuildCmd(cmdStr string) *exec.Cmd { func BuildCmd(cmdStr string) (cmd *exec.Cmd, err error) {
return exec.Command("cmd", "/C", cmdStr) if cmdStr == "" {
return nil, errors.New("command string is empty")
}
args := strings.Split(cmdStr, " ")
return exec.Command(args[0], args[1:]...), nil
} }

View File

@@ -237,7 +237,7 @@ func (r *Runner) configureCmd() {
} }
// get cmd instance // get cmd instance
r.cmd = sys_exec.BuildCmd(cmdStr) r.cmd, _ = sys_exec.BuildCmd(cmdStr)
// set working directory // set working directory
r.cmd.Dir = r.cwd r.cmd.Dir = r.cwd

View File

@@ -95,12 +95,15 @@ func (r *RunnerV2) Run() (err error) {
// sync files worker nodes // sync files worker nodes
if !utils.IsMaster() { if !utils.IsMaster() {
if err := r.syncFiles(); err != nil { if err := r.syncFiles(); err != nil {
return err return r.updateTask(constants.TaskStatusError, err)
} }
} }
// configure cmd // configure cmd
r.configureCmd() err = r.configureCmd()
if err != nil {
return r.updateTask(constants.TaskStatusError, err)
}
// configure environment variables // configure environment variables
r.configureEnv() r.configureEnv()
@@ -205,7 +208,7 @@ func (r *RunnerV2) GetTaskId() (id primitive.ObjectID) {
return r.tid return r.tid
} }
func (r *RunnerV2) configureCmd() { func (r *RunnerV2) configureCmd() (err error) {
var cmdStr string var cmdStr string
// customized spider // customized spider
@@ -223,13 +226,17 @@ func (r *RunnerV2) configureCmd() {
} }
// get cmd instance // get cmd instance
r.cmd = sys_exec.BuildCmd(cmdStr) r.cmd, err = sys_exec.BuildCmd(cmdStr)
if err != nil {
log.Errorf("Error building command: %v", err)
trace.PrintError(err)
return err
}
// set working directory // set working directory
r.cmd.Dir = r.cwd r.cmd.Dir = r.cwd
// configure pgid to allow killing sub processes return nil
//sys_exec.SetPgid(r.cmd)
} }
func (r *RunnerV2) configureLogging() { func (r *RunnerV2) configureLogging() {
@@ -320,16 +327,18 @@ func (r *RunnerV2) configureEnv() {
func (r *RunnerV2) syncFiles() (err error) { func (r *RunnerV2) syncFiles() (err error) {
var id string var id string
var workingDir string
if r.s.GitId.IsZero() { if r.s.GitId.IsZero() {
id = r.s.Id.Hex() id = r.s.Id.Hex()
workingDir = ""
} else { } else {
id = r.s.GitId.Hex() id = r.s.GitId.Hex()
workingDir = r.s.GitRootPath
} }
masterURL := fmt.Sprintf("%s/sync/%s", viper.GetString("api.endpoint"), id) masterURL := fmt.Sprintf("%s/sync/%s", viper.GetString("api.endpoint"), id)
workerDir := r.cwd
// get file list from master // get file list from master
resp, err := http.Get(masterURL + "/scan?path=" + workerDir) resp, err := http.Get(masterURL + "/scan?path=" + workingDir)
if err != nil { if err != nil {
log.Errorf("Error getting file list from master: %v", err) log.Errorf("Error getting file list from master: %v", err)
return trace.TraceError(err) return trace.TraceError(err)
@@ -354,15 +363,15 @@ func (r *RunnerV2) syncFiles() (err error) {
} }
// create worker directory if not exists // create worker directory if not exists
if _, err := os.Stat(workerDir); os.IsNotExist(err) { if _, err := os.Stat(r.cwd); os.IsNotExist(err) {
if err := os.MkdirAll(workerDir, os.ModePerm); err != nil { if err := os.MkdirAll(r.cwd, os.ModePerm); err != nil {
log.Errorf("Error creating worker directory: %v", err) log.Errorf("Error creating worker directory: %v", err)
return trace.TraceError(err) return trace.TraceError(err)
} }
} }
// get file list from worker // get file list from worker
workerFiles, err := utils.ScanDirectory(workerDir) workerFiles, err := utils.ScanDirectory(r.cwd)
if err != nil { if err != nil {
log.Errorf("Error scanning worker directory: %v", err) log.Errorf("Error scanning worker directory: %v", err)
return trace.TraceError(err) return trace.TraceError(err)
@@ -391,7 +400,7 @@ func (r *RunnerV2) syncFiles() (err error) {
go func(path string, masterFile entity.FsFileInfo) { go func(path string, masterFile entity.FsFileInfo) {
defer wg.Done() defer wg.Done()
logrus.Infof("File needs to be synchronized: %s", path) logrus.Infof("File needs to be synchronized: %s", path)
err := r.downloadFile(masterURL+"/download?path="+path, filepath.Join(workerDir, path)) err := r.downloadFile(masterURL+"/download?path="+path, filepath.Join(r.cwd, path))
if err != nil { if err != nil {
logrus.Errorf("Error downloading file: %v", err) logrus.Errorf("Error downloading file: %v", err)
select { select {

View File

@@ -17,7 +17,6 @@ import (
type ServiceV2 struct { type ServiceV2 struct {
// dependencies // dependencies
nodeCfgSvc interfaces.NodeConfigService nodeCfgSvc interfaces.NodeConfigService
modelSvc service.ModelService
// internals // internals
mu sync.Mutex mu sync.Mutex
@@ -64,7 +63,7 @@ func (svc *ServiceV2) getResultService(id primitive.ObjectID) (resultSvc interfa
} }
// task // task
t, err := svc.modelSvc.GetTaskById(id) t, err := service.NewModelServiceV2[models.TaskV2]().GetById(id)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@@ -31,7 +31,10 @@ func InitializedDemo() (ok bool) {
func ImportDemo() (err error) { func ImportDemo() (err error) {
cmdStr := fmt.Sprintf("crawlab-cli login -a %s && crawlab-demo import", GetApiAddress()) cmdStr := fmt.Sprintf("crawlab-cli login -a %s && crawlab-demo import", GetApiAddress())
cmd := sys_exec.BuildCmd(cmdStr) cmd, err := sys_exec.BuildCmd(cmdStr)
if err != nil {
return err
}
if err := cmd.Run(); err != nil { if err := cmd.Run(); err != nil {
trace.PrintError(err) trace.PrintError(err)
} }
@@ -40,7 +43,10 @@ func ImportDemo() (err error) {
func ReimportDemo() (err error) { func ReimportDemo() (err error) {
cmdStr := fmt.Sprintf("crawlab-cli login -a %s && crawlab-demo reimport", GetApiAddress()) cmdStr := fmt.Sprintf("crawlab-cli login -a %s && crawlab-demo reimport", GetApiAddress())
cmd := sys_exec.BuildCmd(cmdStr) cmd, err := sys_exec.BuildCmd(cmdStr)
if err != nil {
return err
}
if err := cmd.Run(); err != nil { if err := cmd.Run(); err != nil {
trace.PrintError(err) trace.PrintError(err)
} }
@@ -49,7 +55,10 @@ func ReimportDemo() (err error) {
func CleanupDemo() (err error) { func CleanupDemo() (err error) {
cmdStr := fmt.Sprintf("crawlab-cli login -a %s && crawlab-demo reimport", GetApiAddress()) cmdStr := fmt.Sprintf("crawlab-cli login -a %s && crawlab-demo reimport", GetApiAddress())
cmd := sys_exec.BuildCmd(cmdStr) cmd, err := sys_exec.BuildCmd(cmdStr)
if err != nil {
return err
}
if err := cmd.Run(); err != nil { if err := cmd.Run(); err != nil {
trace.PrintError(err) trace.PrintError(err)
} }