mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
refactor: streamline file service retrieval and enhance spider template handling
- Replaced direct calls to getBaseFileFsSvc with a new method fs.GetBaseFileFsSvc in base_file.go for improved clarity and maintainability. - Introduced SpiderTemplateService interface and implemented registry service for managing spider templates, enhancing template handling in the spider controller. - Added template-related fields to the Spider model to support template functionality. - Created utility functions for string case conversions in utils/string.go to facilitate consistent formatting across the codebase. - Updated environment configuration to retrieve the Python path dynamically, improving flexibility in the task runner's setup.
This commit is contained in:
@@ -4,19 +4,16 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/crawlab-team/crawlab/core/fs"
|
||||
"github.com/crawlab-team/crawlab/core/interfaces"
|
||||
"github.com/crawlab-team/crawlab/core/utils"
|
||||
"github.com/gin-gonic/gin"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
)
|
||||
|
||||
func GetBaseFileListDir(rootPath string, c *gin.Context) {
|
||||
path := c.Query("path")
|
||||
|
||||
fsSvc, err := getBaseFileFsSvc(rootPath)
|
||||
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
|
||||
if err != nil {
|
||||
HandleErrorBadRequest(c, err)
|
||||
return
|
||||
@@ -36,7 +33,7 @@ func GetBaseFileListDir(rootPath string, c *gin.Context) {
|
||||
func GetBaseFileFile(rootPath string, c *gin.Context) {
|
||||
path := c.Query("path")
|
||||
|
||||
fsSvc, err := getBaseFileFsSvc(rootPath)
|
||||
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
|
||||
if err != nil {
|
||||
HandleErrorBadRequest(c, err)
|
||||
return
|
||||
@@ -54,7 +51,7 @@ func GetBaseFileFile(rootPath string, c *gin.Context) {
|
||||
func GetBaseFileFileInfo(rootPath string, c *gin.Context) {
|
||||
path := c.Query("path")
|
||||
|
||||
fsSvc, err := getBaseFileFsSvc(rootPath)
|
||||
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
|
||||
if err != nil {
|
||||
HandleErrorBadRequest(c, err)
|
||||
return
|
||||
@@ -70,7 +67,7 @@ func GetBaseFileFileInfo(rootPath string, c *gin.Context) {
|
||||
}
|
||||
|
||||
func PostBaseFileSaveFile(rootPath string, c *gin.Context) {
|
||||
fsSvc, err := getBaseFileFsSvc(rootPath)
|
||||
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
|
||||
if err != nil {
|
||||
HandleErrorInternalServerError(c, err)
|
||||
return
|
||||
@@ -120,7 +117,7 @@ func PostBaseFileSaveFile(rootPath string, c *gin.Context) {
|
||||
}
|
||||
|
||||
func PostBaseFileSaveFiles(rootPath string, c *gin.Context) {
|
||||
fsSvc, err := getBaseFileFsSvc(rootPath)
|
||||
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
|
||||
if err != nil {
|
||||
HandleErrorInternalServerError(c, err)
|
||||
return
|
||||
@@ -181,7 +178,7 @@ func PostBaseFileSaveDir(rootPath string, c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
fsSvc, err := getBaseFileFsSvc(rootPath)
|
||||
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
|
||||
if err != nil {
|
||||
HandleErrorBadRequest(c, err)
|
||||
return
|
||||
@@ -205,7 +202,7 @@ func PostBaseFileRenameFile(rootPath string, c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
fsSvc, err := getBaseFileFsSvc(rootPath)
|
||||
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
|
||||
if err != nil {
|
||||
HandleErrorBadRequest(c, err)
|
||||
return
|
||||
@@ -229,7 +226,7 @@ func DeleteBaseFileFile(rootPath string, c *gin.Context) {
|
||||
payload.Path = "."
|
||||
}
|
||||
|
||||
fsSvc, err := getBaseFileFsSvc(rootPath)
|
||||
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
|
||||
if err != nil {
|
||||
HandleErrorBadRequest(c, err)
|
||||
return
|
||||
@@ -257,7 +254,7 @@ func PostBaseFileCopyFile(rootPath string, c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
fsSvc, err := getBaseFileFsSvc(rootPath)
|
||||
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
|
||||
if err != nil {
|
||||
HandleErrorBadRequest(c, err)
|
||||
return
|
||||
@@ -272,7 +269,7 @@ func PostBaseFileCopyFile(rootPath string, c *gin.Context) {
|
||||
}
|
||||
|
||||
func PostBaseFileExport(rootPath string, c *gin.Context) {
|
||||
fsSvc, err := getBaseFileFsSvc(rootPath)
|
||||
fsSvc, err := fs.GetBaseFileFsSvc(rootPath)
|
||||
if err != nil {
|
||||
HandleErrorBadRequest(c, err)
|
||||
return
|
||||
@@ -289,14 +286,3 @@ func PostBaseFileExport(rootPath string, c *gin.Context) {
|
||||
c.Header("Content-Disposition", fmt.Sprintf("attachment; filename=%s", zipFilePath))
|
||||
c.File(zipFilePath)
|
||||
}
|
||||
|
||||
func GetBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) {
|
||||
return getBaseFileFsSvc(rootPath)
|
||||
}
|
||||
|
||||
func getBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) {
|
||||
workspacePath := utils.GetWorkspace()
|
||||
fsSvc := fs.NewFsService(filepath.Join(workspacePath, rootPath))
|
||||
|
||||
return fsSvc, nil
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"github.com/crawlab-team/crawlab/core/constants"
|
||||
"github.com/crawlab-team/crawlab/core/models/models"
|
||||
mongo2 "github.com/crawlab-team/crawlab/core/mongo"
|
||||
"github.com/crawlab-team/crawlab/core/spider"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -293,6 +294,17 @@ func PostSpider(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// create template if available
|
||||
if utils.IsPro() && s.Template != "" {
|
||||
if templateSvc := spider.GetSpiderTemplateRegistryService(); templateSvc != nil {
|
||||
err = templateSvc.CreateTemplate(s.Id)
|
||||
if err != nil {
|
||||
HandleErrorInternalServerError(c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HandleSuccessWithData(c, s)
|
||||
}
|
||||
|
||||
|
||||
14
core/fs/utils.go
Normal file
14
core/fs/utils.go
Normal file
@@ -0,0 +1,14 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/crawlab-team/crawlab/core/interfaces"
|
||||
"github.com/crawlab-team/crawlab/core/utils"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
func GetBaseFileFsSvc(rootPath string) (svc interfaces.FsService, err error) {
|
||||
workspacePath := utils.GetWorkspace()
|
||||
fsSvc := NewFsService(filepath.Join(workspacePath, rootPath))
|
||||
|
||||
return fsSvc, nil
|
||||
}
|
||||
7
core/interfaces/spider_template_service.go
Normal file
7
core/interfaces/spider_template_service.go
Normal file
@@ -0,0 +1,7 @@
|
||||
package interfaces
|
||||
|
||||
import "go.mongodb.org/mongo-driver/bson/primitive"
|
||||
|
||||
type SpiderTemplateService interface {
|
||||
CreateTemplate(id primitive.ObjectID) (err error)
|
||||
}
|
||||
@@ -20,6 +20,12 @@ type Spider struct {
|
||||
GitId primitive.ObjectID `json:"git_id" bson:"git_id"` // related Git.Id
|
||||
GitRootPath string `json:"git_root_path" bson:"git_root_path"`
|
||||
Git *Git `json:"git,omitempty" bson:"-"`
|
||||
Template string `json:"template,omitempty" bson:"template,omitempty"` // spider template
|
||||
TemplateParams *struct {
|
||||
SpiderName string `json:"spider_name,omitempty" bson:"spider_name,omitempty"`
|
||||
StartUrls string `json:"start_urls,omitempty" bson:"start_urls,omitempty"`
|
||||
Domains string `json:"domains,omitempty" bson:"domains,omitempty"`
|
||||
} `json:"template_params,omitempty" bson:"template_params,omitempty"`
|
||||
|
||||
// stats
|
||||
Stat *SpiderStat `json:"stat,omitempty" bson:"-"`
|
||||
|
||||
13
core/spider/registry_service.go
Normal file
13
core/spider/registry_service.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package spider
|
||||
|
||||
import "github.com/crawlab-team/crawlab/core/interfaces"
|
||||
|
||||
var templateSvcInstance interfaces.SpiderTemplateService
|
||||
|
||||
func SetSpiderTemplateRegistryService(svc interfaces.SpiderTemplateService) {
|
||||
templateSvcInstance = svc
|
||||
}
|
||||
|
||||
func GetSpiderTemplateRegistryService() interfaces.SpiderTemplateService {
|
||||
return templateSvcInstance
|
||||
}
|
||||
@@ -340,6 +340,19 @@ func (r *Runner) startHealthCheck() {
|
||||
}
|
||||
}
|
||||
|
||||
// configurePythonPath sets up the Python environment paths, handling both pyenv and default installations
|
||||
func (r *Runner) configurePythonPath() {
|
||||
// Configure global node_modules path
|
||||
pyenvRoot := utils.GetPyenvPath()
|
||||
pyenvShimsPath := pyenvRoot + "/shims"
|
||||
pyenvBinPath := pyenvRoot + "/bin"
|
||||
|
||||
// Configure global pyenv path
|
||||
_ = os.Setenv("PYENV_ROOT", pyenvRoot)
|
||||
_ = os.Setenv("PATH", pyenvShimsPath+":"+os.Getenv("PATH"))
|
||||
_ = os.Setenv("PATH", pyenvBinPath+":"+os.Getenv("PATH"))
|
||||
}
|
||||
|
||||
// configureNodePath sets up the Node.js environment paths, handling both nvm and default installations
|
||||
func (r *Runner) configureNodePath() {
|
||||
// Configure nvm-based Node.js paths
|
||||
@@ -366,7 +379,10 @@ func (r *Runner) configureGoPath() {
|
||||
// - Crawlab-specific variables
|
||||
// - Global environment variables from the system
|
||||
func (r *Runner) configureEnv() {
|
||||
// Configure Node.js paths
|
||||
// Configure Python path
|
||||
r.configurePythonPath()
|
||||
|
||||
// Configure Node.js path
|
||||
r.configureNodePath()
|
||||
|
||||
// Configure Go path
|
||||
@@ -375,8 +391,6 @@ func (r *Runner) configureEnv() {
|
||||
// Default envs
|
||||
r.cmd.Env = os.Environ()
|
||||
r.cmd.Env = append(r.cmd.Env, "CRAWLAB_TASK_ID="+r.tid.Hex())
|
||||
r.cmd.Env = append(r.cmd.Env, "PYENV_ROOT="+utils.PyenvRoot)
|
||||
r.cmd.Env = append(r.cmd.Env, "PATH="+os.Getenv("PATH")+":"+utils.PyenvRoot+"/shims:"+utils.PyenvRoot+"/bin")
|
||||
|
||||
// Global environment variables
|
||||
envs, err := client.NewModelService[models.Environment]().GetMany(nil, nil)
|
||||
|
||||
@@ -30,7 +30,7 @@ const (
|
||||
DefaultInstallRoot = "/app/install"
|
||||
MetadataConfigDirName = ".crawlab"
|
||||
MetadataConfigName = "config.json"
|
||||
PyenvRoot = "/root/.pyenv"
|
||||
DefaultPyenvPath = "/root/.pyenv"
|
||||
DefaultNodeModulesPath = "/usr/lib/node_modules"
|
||||
DefaultGoPath = "/root/go"
|
||||
)
|
||||
@@ -250,6 +250,13 @@ func GetInstallRoot() string {
|
||||
return DefaultInstallRoot
|
||||
}
|
||||
|
||||
func GetPyenvPath() string {
|
||||
if res := viper.GetString("install.pyenv.path"); res != "" {
|
||||
return res
|
||||
}
|
||||
return DefaultPyenvPath
|
||||
}
|
||||
|
||||
func GetNodeModulesPath() string {
|
||||
if res := viper.GetString("install.node.path"); res != "" {
|
||||
return res
|
||||
|
||||
23
core/utils/string.go
Normal file
23
core/utils/string.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"golang.org/x/text/cases"
|
||||
"golang.org/x/text/language"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func ToSnakeCase(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
s = strings.ToLower(s)
|
||||
s = strings.ReplaceAll(s, " ", "_")
|
||||
s = strings.ReplaceAll(s, "-", "_")
|
||||
return s
|
||||
}
|
||||
|
||||
func ToPascalCase(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
s = strings.ReplaceAll(s, "_", " ")
|
||||
s = cases.Title(language.English).String(s)
|
||||
s = strings.ReplaceAll(s, " ", "")
|
||||
return s
|
||||
}
|
||||
Reference in New Issue
Block a user