diff --git a/backend/model/config_spider/scrapy.go b/backend/model/config_spider/scrapy.go index 5f4870e4..bf52d42a 100644 --- a/backend/model/config_spider/scrapy.go +++ b/backend/model/config_spider/scrapy.go @@ -225,7 +225,7 @@ func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string { // 如果为XPath if f.Attr == "" { // 文本 - return fmt.Sprintf(`xpath('%s/text()')`, f.Xpath) + return fmt.Sprintf(`xpath('string(%s)')`, f.Xpath) } else { // 属性 return fmt.Sprintf(`xpath('%s/@%s')`, f.Xpath, f.Attr) diff --git a/backend/template/spiderfile/Spiderfile.baidu b/backend/template/spiderfile/Spiderfile.baidu index 86388621..c97e87af 100644 --- a/backend/template/spiderfile/Spiderfile.baidu +++ b/backend/template/spiderfile/Spiderfile.baidu @@ -11,9 +11,9 @@ stages: page_attr: "href" fields: - name: "title" - css: "h3 > a" + xpath: ".//h3/a" - name: "url" - css: "h3 > a" + xpath: ".//h3/a" attr: "href" - name: "abstract" css: ".c-abstract"