mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
added spider for segmentfault
This commit is contained in:
BIN
spiders/segmentfault/screenshot.png
Normal file
BIN
spiders/segmentfault/screenshot.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 129 KiB |
30
spiders/segmentfault/segmentfault_spider.js
Normal file
30
spiders/segmentfault/segmentfault_spider.js
Normal file
@@ -0,0 +1,30 @@
|
||||
const puppeteer = require('puppeteer');
|
||||
|
||||
(async () => {
|
||||
const browser = await (puppeteer.launch({
|
||||
timeout: 15000
|
||||
}));
|
||||
|
||||
const url = 'https://segmentfault.com/newest';
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
await page.goto(url);
|
||||
await page.waitFor(2000);
|
||||
|
||||
await page.screenshot({path: 'screenshot.png'});
|
||||
|
||||
const titles = await page.evaluate(sel => {
|
||||
let results = [];
|
||||
document.querySelectorAll('.news-list .news-item .news__item-title').forEach(el => {
|
||||
results.push({
|
||||
title: el.innerText
|
||||
})
|
||||
});
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log(titles);
|
||||
|
||||
browser.close();
|
||||
})();
|
||||
Reference in New Issue
Block a user