refactor: enhance Spider model and string utility functions

- Updated the Spider model to introduce a new SpiderTemplateParams struct for improved template handling.
- Refactored string utility functions in utils/string.go to include a new replaceChars function, streamlining character replacement across multiple functions.
- Enhanced ToSnakeCase and ToKebabCase functions to utilize the new replaceChars function for better maintainability and readability.
- Added splitStringWithQuotes function to facilitate string manipulation with quotes, improving overall utility in string processing.
This commit is contained in:
Marvin Zhang
2025-01-07 13:21:16 +08:00
parent c3c629a7d7
commit c937e0f45f
2 changed files with 71 additions and 24 deletions

View File

@@ -7,25 +7,21 @@ import (
type Spider struct {
any `collection:"spiders"`
BaseModel[Spider] `bson:",inline"`
Name string `json:"name" bson:"name"` // spider name
ColId primitive.ObjectID `json:"col_id" bson:"col_id"` // data collection id (deprecated) # TODO: remove this field in the future
ColName string `json:"col_name,omitempty" bson:"col_name"` // data collection name
DbName string `json:"db_name,omitempty" bson:"db_name"` // database name
DataSourceId primitive.ObjectID `json:"data_source_id" bson:"data_source_id"` // data source id
DataSource *Database `json:"data_source,omitempty" bson:"-"` // data source
Description string `json:"description" bson:"description"` // description
ProjectId primitive.ObjectID `json:"project_id" bson:"project_id"` // Project.Id
Mode string `json:"mode" bson:"mode"` // default Task.Mode
NodeIds []primitive.ObjectID `json:"node_ids" bson:"node_ids"` // default Task.NodeIds
GitId primitive.ObjectID `json:"git_id" bson:"git_id"` // related Git.Id
GitRootPath string `json:"git_root_path" bson:"git_root_path"`
Git *Git `json:"git,omitempty" bson:"-"`
Template string `json:"template,omitempty" bson:"template,omitempty"` // spider template
TemplateParams *struct {
SpiderName string `json:"spider_name,omitempty" bson:"spider_name,omitempty"`
StartUrls string `json:"start_urls,omitempty" bson:"start_urls,omitempty"`
Domains string `json:"domains,omitempty" bson:"domains,omitempty"`
} `json:"template_params,omitempty" bson:"template_params,omitempty"`
Name string `json:"name" bson:"name"` // spider name
ColId primitive.ObjectID `json:"col_id" bson:"col_id"` // data collection id (deprecated) # TODO: remove this field in the future
ColName string `json:"col_name,omitempty" bson:"col_name"` // data collection name
DbName string `json:"db_name,omitempty" bson:"db_name"` // database name
DataSourceId primitive.ObjectID `json:"data_source_id" bson:"data_source_id"` // data source id
DataSource *Database `json:"data_source,omitempty" bson:"-"` // data source
Description string `json:"description" bson:"description"` // description
ProjectId primitive.ObjectID `json:"project_id" bson:"project_id"` // Project.Id
Mode string `json:"mode" bson:"mode"` // default Task.Mode
NodeIds []primitive.ObjectID `json:"node_ids" bson:"node_ids"` // default Task.NodeIds
GitId primitive.ObjectID `json:"git_id" bson:"git_id"` // related Git.Id
GitRootPath string `json:"git_root_path" bson:"git_root_path"`
Git *Git `json:"git,omitempty" bson:"-"`
Template string `json:"template,omitempty" bson:"template,omitempty"` // spider template
TemplateParams *SpiderTemplateParams `json:"template_params,omitempty" bson:"template_params,omitempty"`
// stats
Stat *SpiderStat `json:"stat,omitempty" bson:"-"`
@@ -36,3 +32,10 @@ type Spider struct {
Priority int `json:"priority" bson:"priority"`
AutoInstall bool `json:"auto_install" bson:"auto_install"`
}
type SpiderTemplateParams struct {
ProjectName string `json:"project_name,omitempty" bson:"project_name,omitempty"`
SpiderName string `json:"spider_name,omitempty" bson:"spider_name,omitempty"`
StartUrls string `json:"start_urls,omitempty" bson:"start_urls,omitempty"`
AllowedDomains string `json:"allowed_domains,omitempty" bson:"allowed_domains,omitempty"`
}

View File

@@ -6,12 +6,29 @@ import (
"strings"
)
// replaceChars replaces characters in a string
// Parameters:
// - s: the string to replace characters in
// - o: the characters to replace
// - r: the replacement character
//
// Example:
//
// replaceChars("a-b-c", []string{"-"}, "_") => "a_b_c"
//
// Returns:
// - the string with characters replaced
func replaceChars(s string, o []string, r string) string {
for _, c := range o {
s = strings.ReplaceAll(s, c, r)
}
return s
}
func ToSnakeCase(s string) string {
s = strings.TrimSpace(s)
s = strings.ToLower(s)
s = strings.ReplaceAll(s, " ", "_")
s = strings.ReplaceAll(s, "-", "_")
return s
return replaceChars(s, []string{" ", "-", "."}, "_")
}
func ToPascalCase(s string) string {
@@ -25,7 +42,34 @@ func ToPascalCase(s string) string {
func ToKebabCase(s string) string {
s = strings.TrimSpace(s)
s = strings.ToLower(s)
s = strings.ReplaceAll(s, " ", "-")
s = strings.ReplaceAll(s, "_", "-")
return replaceChars(s, []string{" ", "_", "."}, "-")
}
// splitStringWithQuotes splits a string with quotes
// Parameters:
// - s: the string to split
// - q: the quote character
// - d: the delimiter
// - r: the replacement
//
// Example:
//
// splitStringWithQuotes("a,b,c", "'", ",", ", ") => "'a', 'b', 'c'"
//
// Returns:
// - the split string
func splitStringWithQuotes(s, q, d, r string) string {
s = strings.TrimSpace(s)
s = strings.ReplaceAll(s, " ", "")
s = strings.ReplaceAll(s, d, q+r+q)
s = q + s + q
return s
}
func SplitStringWithSingleQuotes(s string) string {
return splitStringWithQuotes(s, "'", ",", ", ")
}
func SplitStringWithDoubleQuotes(s string) string {
return splitStringWithQuotes(s, "\"", ",", "\", \"")
}