From 40f6675ce893b9dd54fed0ec8e6432ef41789584 Mon Sep 17 00:00:00 2001 From: marvzhang Date: Tue, 3 Dec 2019 14:25:54 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0Spiderfile=E6=A8=A1=E7=89=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/config_spider/scrapy.go | 2 +- backend/template/spiderfile/Spiderfile.baidu | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/model/config_spider/scrapy.go b/backend/model/config_spider/scrapy.go index 5f4870e4..bf52d42a 100644 --- a/backend/model/config_spider/scrapy.go +++ b/backend/model/config_spider/scrapy.go @@ -225,7 +225,7 @@ func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string { // 如果为XPath if f.Attr == "" { // 文本 - return fmt.Sprintf(`xpath('%s/text()')`, f.Xpath) + return fmt.Sprintf(`xpath('string(%s)')`, f.Xpath) } else { // 属性 return fmt.Sprintf(`xpath('%s/@%s')`, f.Xpath, f.Attr) diff --git a/backend/template/spiderfile/Spiderfile.baidu b/backend/template/spiderfile/Spiderfile.baidu index 86388621..c97e87af 100644 --- a/backend/template/spiderfile/Spiderfile.baidu +++ b/backend/template/spiderfile/Spiderfile.baidu @@ -11,9 +11,9 @@ stages: page_attr: "href" fields: - name: "title" - css: "h3 > a" + xpath: ".//h3/a" - name: "url" - css: "h3 > a" + xpath: ".//h3/a" attr: "href" - name: "abstract" css: ".c-abstract"