refactor: cleanup code

This commit is contained in:
Marvin Zhang
2024-10-25 11:18:57 +08:00
parent 5af81a4a60
commit 1485dd85e9
91 changed files with 323 additions and 4604 deletions

View File

@@ -0,0 +1,57 @@
# images
FROM ubuntu:22.04
# set as non-interactive
ENV DEBIAN_FRONTEND noninteractive
# copy install scripts
COPY ./install /app/install
# install deps
RUN bash /app/install/deps/deps.sh
# install python
RUN bash /app/install/python/python.sh
# install golang
RUN bash /app/install/golang/golang.sh
# install node
RUN bash /app/install/node/node.sh
# install java
#RUN bash /app/install/java/java.sh
# install seaweedfs
RUN bash /app/install/seaweedfs/seaweedfs.sh
# install chromedriver
RUN bash /app/install/chromedriver/chromedriver.sh
# install rod
RUN bash /app/install/rod/rod.sh
# working directory
WORKDIR /app/backend
# node path
ENV NODE_PATH /usr/lib/node_modules
# timezone environment
ENV TZ Asia/Shanghai
# language environment
ENV LC_ALL C.UTF-8
ENV LANG C.UTF-8
# docker
ENV CRAWLAB_DOCKER Y
# goproxy
ENV GOPROXY goproxy.io,direct
# frontend port
EXPOSE 8080
# backend port
EXPOSE 8000

View File

@@ -0,0 +1,21 @@
#!/bin/bash
# version
version="106.0.5249.61"
# deps
apt-get install -y unzip xvfb libxi6 libgconf-2-4
# chrome
wget -q "http://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${version}-1_amd64.deb"
apt-get -y install "./google-chrome-stable_${version}-1_amd64.deb"
echo `google-chrome --version`
rm -f "./google-chrome-stable_${version}-1_amd64.deb"
# chromedriver
wget "https://chromedriver.storage.googleapis.com/${version}/chromedriver_linux64.zip"
unzip chromedriver_linux64.zip
mv chromedriver /usr/local/bin/chromedriver
chown root:root /usr/local/bin/chromedriver
chmod +x /usr/local/bin/chromedriver

View File

@@ -0,0 +1,13 @@
#!/bin/bash
# ensure directory mode of /tmp
chmod 777 /tmp
# update
apt-get update
# common deps
apt-get install -y curl git net-tools iputils-ping ntp ntpdate nginx wget dumb-init cloc unzip build-essential gnupg2 libc6
# chromedriver deps
apt-get install -y libglib2.0-0 libnss3 libx11-6 # chromedriver deps

View File

@@ -0,0 +1,5 @@
#!/bin/bash
curl -OL https://golang.org/dl/go1.22.4.linux-amd64.tar.gz \
&& tar -C /usr/local -xvf go1.22.4.linux-amd64.tar.gz \
&& ln -s /usr/local/go/bin/go /usr/local/bin/go

View File

@@ -0,0 +1,3 @@
#!/bin/bash
apt-get install -y openjdk-11-jdk

View File

@@ -0,0 +1,16 @@
#!/bin/bash
# install node
curl -sL https://deb.nodesource.com/setup_16.x -o /tmp/nodesource_setup.sh \
&& bash /tmp/nodesource_setup.sh \
&& apt-get install -y nodejs
# install node dependencies
npm install -g \
yarn \
pnpm \
crawlab-sdk@latest \
puppeteer \
playwright \
playwright-chromium \
crawlee

View File

@@ -0,0 +1,33 @@
#!/bin/bash
# install python
apt-get update \
&& apt install software-properties-common -y \
&& add-apt-repository ppa:deadsnakes/ppa -y \
&& apt install python3.10 -y \
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
# alias
rm /usr/local/bin/pip | true
rm /usr/local/bin/python | true
ln -s /usr/local/bin/pip3.10 /usr/local/bin/pip
ln -s /usr/bin/python3.10 /usr/local/bin/python
# verify
python_version=$(python -V)
if [[ $python_version =~ "Python 3.10" ]]; then
:
else
echo "ERROR: python version does not match. expect \"Python 3.10\", but actual is \"${python_version}\""
exit 1
fi
pip_version=$(pip -V)
if [[ $pip_version =~ "python 3.10" ]]; then
:
else
echo "ERROR: pip version does not match. expected: \"python 3.10\", but actual is \"${pip_version}\""
exit 1
fi
# install python dependencies
pip install -r /app/install/python/requirements.txt

View File

@@ -0,0 +1,9 @@
scrapy>=2.9.0
pymongo
bs4
crawlab-sdk>=0.6.0
crawlab-demo<=0.1.0
selenium
pyopenssl
playwright
feapder

View File

@@ -0,0 +1,27 @@
#!/bin/bash
cat <<EOF > go.mod
module rod_github
go 1.16
require github.com/go-rod/rod v0.107.1
EOF
cat <<EOF > main.go
package main
import "github.com/go-rod/rod"
func main() {
_ = rod.New().MustConnect()
}
EOF
go mod tidy
go run main.go
rm -f go.mod
rm -f go.sum
rm -f main.go
rm -f screenshot.png

View File

@@ -0,0 +1,5 @@
#!/bin/bash
wget https://github.com/seaweedfs/seaweedfs/releases/download/3.47/linux_amd64.tar.gz \
&& tar -zxf linux_amd64.tar.gz \
&& cp weed /usr/local/bin

View File

@@ -0,0 +1,6 @@
#!/bin/bash
# replace default api path to new one
python /app/bin/update_docker_js_api_address.py
crawlab-server server

4
docker/bin/gen-ver.sh Normal file
View File

@@ -0,0 +1,4 @@
#!/bin/bash
COMMIT_HASH=$(git rev-parse HEAD)
TIMESTAMP=$(date +%Y%m%d%H%M%S)
echo "v0.0.0-$TIMESTAMP-$COMMIT_HASH"

View File

@@ -0,0 +1,24 @@
import os
dir_path = '/app/dist/assets'
for file_name in os.listdir(dir_path):
if not file_name.endswith('.js'):
continue
file_path = os.path.join(dir_path, file_name)
api_url = 'http://localhost:8000'
with open(file_path, 'r') as f:
content = f.read()
if api_url not in content:
continue
content = content.replace(api_url, '/api')
with open(file_path, 'w') as f:
f.write(content)
print(f'replaced api url in file: {file_name}')

18
docker/nginx/crawlab.conf Normal file
View File

@@ -0,0 +1,18 @@
server {
gzip on;
gzip_min_length 1k;
gzip_buffers 4 16k;
gzip_comp_level 2;
gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript application/x-httpd-php image/jpeg image/gif image/png image/x-icon;
gzip_vary off;
gzip_disable "MSIE [1-6]\.";
client_max_body_size 200m;
listen 8080;
root /app/dist;
index index.html;
location /api/ {
rewrite /api/(.*) /$1 break;
proxy_pass http://localhost:8000/;
}
}