Merge pull request #100 from tikazyq/develop

Develop
This commit is contained in:
Marvin Zhang
2019-08-01 13:29:04 +08:00
committed by GitHub
12 changed files with 120 additions and 7 deletions

9
Jenkinsfile vendored
View File

@@ -43,5 +43,14 @@ pipeline {
"""
}
}
stage('Cleanup') {
steps {
echo 'Cleanup...'
sh """
docker rmi `docker images | grep '<none>' | grep -v IMAGE | awk '{ print \$3 }' | xargs` | true
docker rm `docker ps -a | grep Exited | awk '{ print \$1 }' | xargs` | true
"""
}
}
}
}

View File

@@ -12,7 +12,7 @@
基于Golang的分布式爬虫管理平台支持Python、NodeJS、Go、Java、PHP等多种编程语言以及多种爬虫框架。
[查看演示 Demo](http://114.67.75.98:8080) | [文档](https://tikazyq.github.io/crawlab-docs)
[查看演示 Demo](http://crawlab.cn:8080) | [文档](https://tikazyq.github.io/crawlab-docs)
## 安装

View File

@@ -12,7 +12,7 @@
Golang-based distributed web crawler management platform, supporting various languages including Python, NodeJS, Go, Java, PHP and various web crawler frameworks including Scrapy, Puppeteer, Selenium.
[Demo](http://114.67.75.98:8080) | [Documentation](https://tikazyq.github.io/crawlab-docs)
[Demo](http://crawlab.cn:8080) | [Documentation](https://tikazyq.github.io/crawlab-docs)
## Installation

View File

@@ -99,7 +99,7 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) {
defer s.Close()
// 获取爬虫列表
var spiders []Spider
spiders := []Spider{}
if err := c.Find(filter).Skip(skip).Limit(limit).All(&spiders); err != nil {
debug.PrintStack()
return spiders, err

View File

@@ -0,0 +1,24 @@
FROM ubuntu:latest
ENV DEBIAN_FRONTEND=noninteractive
# 添加依赖描述文件
ADD requirements.txt /opt/crawlab/
# 添加二进制包
ADD crawlab /usr/local/bin/
RUN chmod +x /usr/local/bin/crawlab
# 安装基本环境
RUN apt-get update \
&& apt-get install -y curl git net-tools iputils-ping ntp python3 python3-pip \
&& apt-get clean \
&& ln -s /usr/bin/pip3 /usr/local/bin/pip \
&& ln -s /usr/bin/python3 /usr/local/bin/python
# 安装依赖
RUN pip install -r /opt/crawlab/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
WORKDIR /opt/crawlab
ENTRYPOINT ["crawlab"]

23
examples/worker/README.md Normal file
View File

@@ -0,0 +1,23 @@
# worker节点
由于master和worker节点的存储信息是在redis上并且使用节点所在的mac地址作为key所以在开发本地需要启动master和worker节点会比较麻烦。
这里是一个运行worker节点的一个例子。
基本思路是worker节点所需的依赖制作成一个镜像然后把crawlab编译成二进制包接着把配置文件和二进制包通过volumes的形式挂载到容器内部。
这样就可以正常的运行worker节点了。之后对于容器编排的worker节点可以直接把该镜像当成worker节点的基础镜像。
### 制作二进制包
`backend`目录下执行以下命令,生成二进制包
```
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o crawlab main.go
```
### 构建worker镜像
```
docker build -t crawlab:worker .
```
### 运行worker节点
```
docker-compose up -d
```

View File

@@ -0,0 +1,25 @@
mongo:
host: 127.0.0.1
port: 27017
db: crawlab_test
username: ""
password: ""
redis:
address: 127.0.0.1
password: ""
database: 1
port: 6379
log:
level: info
path: "/opt/crawlab/logs"
server:
host: 0.0.0.0
port: 8000
master: "N"
secret: "crawlab"
spider:
path: "/opt/crawlab/spiders"
task:
workers: 4
other:
tmppath: "/tmp"

BIN
examples/worker/crawlab Executable file

Binary file not shown.

View File

@@ -0,0 +1,8 @@
version: '3'
services:
worker:
image: crawlab:worker
container_name: crawlab-worker
volumes:
- $PWD/conf/config.yml:/opt/crawlab/conf/config.yml
- $PWD/crawlab:/usr/local/bin/crawlab

View File

@@ -0,0 +1,7 @@
geohash2==1.1
Scrapy==1.5.0
APScheduler==3.5.1
fonttools==3.34.2
elasticsearch==5.5.3
requests==2.22.0
pymysql==0.9.3

View File

@@ -2,7 +2,7 @@
<div class="navbar">
<hamburger :toggle-click="toggleSideBar" :is-active="sidebar.opened" class="hamburger-container"/>
<breadcrumb class="breadcrumb"/>
<el-dropdown class="avatar-container" trigger="click">
<el-dropdown class="avatar-container right" trigger="click">
<span class="el-dropdown-link">
{{username}}
<i class="el-icon-arrow-down el-icon--right"></i>
@@ -13,7 +13,7 @@
</el-dropdown-item>
</el-dropdown-menu>
</el-dropdown>
<el-dropdown class="lang-list" trigger="click">
<el-dropdown class="lang-list right" trigger="click">
<span class="el-dropdown-link">
{{$t($store.getters['lang/lang'])}}
<i class="el-icon-arrow-down el-icon--right"></i>
@@ -27,6 +27,12 @@
</el-dropdown-item>
</el-dropdown-menu>
</el-dropdown>
<el-dropdown class="documentation right">
<a href="https://tikazyq.github.io/crawlab-docs" target="_blank">
<font-awesome-icon :icon="['far', 'question-circle']"/>
<span style="margin-left: 5px;">文档</span>
</a>
</el-dropdown>
</div>
</template>
@@ -86,7 +92,6 @@ export default {
.lang-list {
cursor: pointer;
display: inline-block;
float: right;
margin-right: 35px;
/*position: absolute;*/
/*right: 80px;*/
@@ -103,10 +108,21 @@ export default {
cursor: pointer;
height: 50px;
display: inline-block;
float: right;
margin-right: 35px;
/*position: absolute;*/
/*right: 35px;*/
}
.documentation {
margin-right: 35px;
.span {
margin-left: 5px;
}
}
.right {
float: right
}
}
</style>

View File

@@ -8,6 +8,7 @@ services:
CRAWLAB_SERVER_MASTER: "Y"
CRAWLAB_MONGO_HOST: "mongo"
CRAWLAB_REDIS_ADDRESS: "redis"
CRAWLAB_LOG_PATH: "/var/logs/crawlab"
ports:
- "8080:8080" # frontend
- "8000:8000" # backend