refactor: streamline file service retrieval and enhance spider template handling

- Replaced direct calls to getBaseFileFsSvc with a new method fs.GetBaseFileFsSvc in base_file.go for improved clarity and maintainability.
- Introduced SpiderTemplateService interface and implemented registry service for managing spider templates, enhancing template handling in the spider controller.
- Added template-related fields to the Spider model to support template functionality.
- Created utility functions for string case conversions in utils/string.go to facilitate consistent formatting across the codebase.
- Updated environment configuration to retrieve the Python path dynamically, improving flexibility in the task runner's setup.
This commit is contained in:
Marvin Zhang
2025-01-06 18:09:45 +08:00
parent f5d9ccfbfc
commit 8d8b47e474
9 changed files with 110 additions and 28 deletions

View File

@@ -4,19 +4,16 @@ import (
"errors"
"fmt"
"github.com/crawlab-team/crawlab/core/fs"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/gin-gonic/gin"
"io"
"os"
"path/filepath"
"sync"
)
func GetBaseFileListDir(rootPath string, c *gin.Context) {
path := c.Query("path")
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
@@ -36,7 +33,7 @@ func GetBaseFileListDir(rootPath string, c *gin.Context) {
func GetBaseFileFile(rootPath string, c *gin.Context) {
path := c.Query("path")
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
@@ -54,7 +51,7 @@ func GetBaseFileFile(rootPath string, c *gin.Context) {
func GetBaseFileFileInfo(rootPath string, c *gin.Context) {
path := c.Query("path")
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
@@ -70,7 +67,7 @@ func GetBaseFileFileInfo(rootPath string, c *gin.Context) {
}
func PostBaseFileSaveFile(rootPath string, c *gin.Context) {
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorInternalServerError(c, err)
return
@@ -120,7 +117,7 @@ func PostBaseFileSaveFile(rootPath string, c *gin.Context) {
}
func PostBaseFileSaveFiles(rootPath string, c *gin.Context) {
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorInternalServerError(c, err)
return
@@ -181,7 +178,7 @@ func PostBaseFileSaveDir(rootPath string, c *gin.Context) {
return
}
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
@@ -205,7 +202,7 @@ func PostBaseFileRenameFile(rootPath string, c *gin.Context) {
return
}
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
@@ -229,7 +226,7 @@ func DeleteBaseFileFile(rootPath string, c *gin.Context) {
payload.Path = "."
}
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
@@ -257,7 +254,7 @@ func PostBaseFileCopyFile(rootPath string, c *gin.Context) {
return
}
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
@@ -272,7 +269,7 @@ func PostBaseFileCopyFile(rootPath string, c *gin.Context) {
}
func PostBaseFileExport(rootPath string, c *gin.Context) {
fsSvc, err := getBaseFileFsSvc(rootPath)
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
if err != nil {
HandleErrorBadRequest(c, err)
return
@@ -289,14 +286,3 @@ func PostBaseFileExport(rootPath string, c *gin.Context) {
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", zipFilePath))
c.File(zipFilePath)
}
func GetBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) {
return getBaseFileFsSvc(rootPath)
}
func getBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) {
workspacePath := utils.GetWorkspace()
fsSvc := fs.NewFsService(filepath.Join(workspacePath, rootPath))
return fsSvc, nil
}

View File

@@ -5,6 +5,7 @@ import (
"github.com/crawlab-team/crawlab/core/constants"
"github.com/crawlab-team/crawlab/core/models/models"
mongo2 "github.com/crawlab-team/crawlab/core/mongo"
"github.com/crawlab-team/crawlab/core/spider"
"math"
"os"
"path/filepath"
@@ -293,6 +294,17 @@ func PostSpider(c *gin.Context) {
return
}
// create template if available
if utils.IsPro() && s.Template != "" {
if templateSvc := spider.GetSpiderTemplateRegistryService(); templateSvc != nil {
err = templateSvc.CreateTemplate(s.Id)
if err != nil {
HandleErrorInternalServerError(c, err)
return
}
}
}
HandleSuccessWithData(c, s)
}

14
core/fs/utils.go Normal file
View File

@@ -0,0 +1,14 @@
package fs
import (
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/utils"
"path/filepath"
)
func GetBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) {
workspacePath := utils.GetWorkspace()
fsSvc := NewFsService(filepath.Join(workspacePath, rootPath))
return fsSvc, nil
}

View File

@@ -0,0 +1,7 @@
package interfaces
import "go.mongodb.org/mongo-driver/bson/primitive"
type SpiderTemplateService interface {
CreateTemplate(id primitive.ObjectID) (err error)
}

View File

@@ -20,6 +20,12 @@ type Spider struct {
GitId primitive.ObjectID `json:"git_id" bson:"git_id"` // related Git.Id
GitRootPath string `json:"git_root_path" bson:"git_root_path"`
Git *Git `json:"git,omitempty" bson:"-"`
Template string `json:"template,omitempty" bson:"template,omitempty"` // spider template
TemplateParams *struct {
SpiderName string `json:"spider_name,omitempty" bson:"spider_name,omitempty"`
StartUrls string `json:"start_urls,omitempty" bson:"start_urls,omitempty"`
Domains string `json:"domains,omitempty" bson:"domains,omitempty"`
} `json:"template_params,omitempty" bson:"template_params,omitempty"`
// stats
Stat *SpiderStat `json:"stat,omitempty" bson:"-"`

View File

@@ -0,0 +1,13 @@
package spider
import "github.com/crawlab-team/crawlab/core/interfaces"
var templateSvcInstance interfaces.SpiderTemplateService
func SetSpiderTemplateRegistryService(svc interfaces.SpiderTemplateService) {
templateSvcInstance = svc
}
func GetSpiderTemplateRegistryService() interfaces.SpiderTemplateService {
return templateSvcInstance
}

View File

@@ -340,6 +340,19 @@ func (r *Runner) startHealthCheck() {
}
}
// configurePythonPath sets up the Python environment paths, handling both pyenv and default installations
func (r *Runner) configurePythonPath() {
// Configure global node_modules path
pyenvRoot := utils.GetPyenvPath()
pyenvShimsPath := pyenvRoot + "/shims"
pyenvBinPath := pyenvRoot + "/bin"
// Configure global pyenv path
_ = os.Setenv("PYENV_ROOT", pyenvRoot)
_ = os.Setenv("PATH", pyenvShimsPath+":"+os.Getenv("PATH"))
_ = os.Setenv("PATH", pyenvBinPath+":"+os.Getenv("PATH"))
}
// configureNodePath sets up the Node.js environment paths, handling both nvm and default installations
func (r *Runner) configureNodePath() {
// Configure nvm-based Node.js paths
@@ -366,7 +379,10 @@ func (r *Runner) configureGoPath() {
// - Crawlab-specific variables
// - Global environment variables from the system
func (r *Runner) configureEnv() {
// Configure Node.js paths
// Configure Python path
r.configurePythonPath()
// Configure Node.js path
r.configureNodePath()
// Configure Go path
@@ -375,8 +391,6 @@ func (r *Runner) configureEnv() {
// Default envs
r.cmd.Env = os.Environ()
r.cmd.Env = append(r.cmd.Env, "CRAWLAB_TASK_ID="+r.tid.Hex())
r.cmd.Env = append(r.cmd.Env, "PYENV_ROOT="+utils.PyenvRoot)
r.cmd.Env = append(r.cmd.Env, "PATH="+os.Getenv("PATH")+":"+utils.PyenvRoot+"/shims:"+utils.PyenvRoot+"/bin")
// Global environment variables
envs, err := client.NewModelService[models.Environment]().GetMany(nil, nil)

View File

@@ -30,7 +30,7 @@ const (
DefaultInstallRoot = "/app/install"
MetadataConfigDirName = ".crawlab"
MetadataConfigName = "config.json"
PyenvRoot = "/root/.pyenv"
DefaultPyenvPath = "/root/.pyenv"
DefaultNodeModulesPath = "/usr/lib/node_modules"
DefaultGoPath = "/root/go"
)
@@ -250,6 +250,13 @@ func GetInstallRoot() string {
return DefaultInstallRoot
}
func GetPyenvPath() string {
if res := viper.GetString("install.pyenv.path"); res != "" {
return res
}
return DefaultPyenvPath
}
func GetNodeModulesPath() string {
if res := viper.GetString("install.node.path"); res != "" {
return res

23
core/utils/string.go Normal file
View File

@@ -0,0 +1,23 @@
package utils
import (
"golang.org/x/text/cases"
"golang.org/x/text/language"
"strings"
)
func ToSnakeCase(s string) string {
s = strings.TrimSpace(s)
s = strings.ToLower(s)
s = strings.ReplaceAll(s, " ", "_")
s = strings.ReplaceAll(s, "-", "_")
return s
}
func ToPascalCase(s string) string {
s = strings.TrimSpace(s)
s = strings.ReplaceAll(s, "_", " ")
s = cases.Title(language.English).String(s)
s = strings.ReplaceAll(s, " ", "")
return s
}