This commit is contained in:
2022-04-18 11:21:20 +08:00
commit 45a7af638f
210 changed files with 8997 additions and 0 deletions

112
datax/ADD/ccmd Executable file
View File

@@ -0,0 +1,112 @@
#!/bin/bash
##################################################
# Mount dir #
# - /opt/datax/log #
# - /opt/datax/log_perf #
# - /opt/datax/job #
# ENV #
# - JAVA_OPTS #
# - TIMEOUT #
# - MAX_PROCS #
##################################################
set -euo pipefail
export LANG=en_US.UTF-8
trap Quit EXIT
GOT_SIGTERM=
TIMEOUT="${TIMEOUT:-10m}"
MAX_PROCS=${MAX_PROCS:-1}
function Print {
local file=/dev/null
[ '-f' = "$1" ] && file=$2 && shift && shift
date +"[%F %T] $*" | tee -a $file
}
function Quit {
while :; do
pkill -f java && Print killing java ... || break
sleep 1
done
exec 1022<&-
Print Container stopped.
test -n "$GOT_SIGTERM"
}
function Usage {
Print 'This container should run with
**env TIMEOUT, default 10m(ten minutes)**
**env MAX_PROCS, default 1**
**/opt/datax/{log,log_perf,job} mounted from host**
'
}
function InitPipe {
Print Init named pipe ...
rm -rf pool.pipe
mkfifo pool.pipe
exec 1022<> pool.pipe
rm -rf pool.pipe
printf "%${MAX_PROCS}s" '' >&1022
}
function StartJob {
local job="$1"
local code=0
Print Start job $job with timeout $TIMEOUT ...
timeout ${TIMEOUT} java \
-server \
-Xms1g \
-Xmx1g \
-Duser.timezone=GMT+08 \
-XX:+HeapDumpOnOutOfMemoryError \
-XX:HeapDumpPath=$PWD/log \
${JAVA_OPTS:-} \
-Dfile.encoding=UTF-8 \
-Dlogback.statusListenerClass=ch.qos.logback.core.status.NopStatusListener \
-Djava.security.egd=file:///dev/urandom \
-Ddatax.home=$PWD \
-Dlogback.configurationFile=$PWD/conf/logback.xml \
-classpath "$PWD/lib/*:." \
-Dlog.file.name=$job \
com.alibaba.datax.core.Engine \
-mode standalone \
-jobid -1 \
-job $PWD/job/$job.json \
>/dev/null \
2>log/$job.error \
|| code=$?
if [ 0 -eq $code ]; then
Print Job $job finished.
elif [ 124 -eq $code ]; then
Print Job $job timeout!
else
Print Job $job stopped unexpectly!
fi
echo >&1022
}
function StartProc {
Print Start datax with max $MAX_PROCS parallel jobs ...
local job=
for job in $(ls job/ | grep '\.json$'); do
read -n 1 -u 1022
StartJob "${job%.json}" &
done
wait
[ -n "$job" ] || Print Not found any job!
}
function Main {
cd /opt/datax
Usage
InitPipe
trap "GOT_SIGTERM=1; Print Got SIGTERM ...; exit" SIGTERM
StartProc
}
# Start here
Main

View File

@@ -0,0 +1,27 @@
# 部署多进程 datax
- 两组 job
- 第一组每天 5 点执行一次,每次最多并行 3 个 job每个 job 超时时间十五分钟
- 第二组每天 6 点执行一次,每次最多并行 4 个 job每个 job 超时时间一小时
- 根据实际环境修改
- docker-compose.yml
- datax/job/xxxx.json
- 创建目录
```
grep '\<source:' docker-compose.yml | cut -d: -f2 | xargs mkdir -p
```
- 把第一组任务的 job 配置文件(xxxx.json)放在 /compose/datax1/job/ 下
- 把第二组任务的 job 配置文件(xxxx.json)放在 /compose/datax2/job/ 下
- 启动
```
docker-compose up -d
```
- 添加定时任务
```
* 5 * * * docker-compose -f /compose/docker-compose.yml up -d datax1
* 6 * * * docker-compose -f /compose/docker-compose.yml up -d datax2
```

View File

@@ -0,0 +1,43 @@
version: "3.7"
services:
datax1:
image: harbor.colben.cn/general/datax
container_name: datax1
restart: "no"
stop_grace_period: 1m
environment:
TIMEOUT: 15m
MAX_PROCS: 3
network_mode: "host"
volumes:
- type: bind
source: ./datax1/job
target: /opt/datax/job
- type: bind
source: ./datax1/log
target: /opt/datax/log
- type: bind
source: ./datax1/log_perf
target: /opt/datax/log_perf
datax2:
image: harbor.colben.cn/general/datax
container_name: datax2
restart: "no"
stop_grace_period: 1m
environment:
TIMEOUT: 1h
MAX_PROCS: 4
network_mode: "host"
volumes:
- type: bind
source: ./datax2/job
target: /opt/datax/job
- type: bind
source: ./datax2/log
target: /opt/datax/log
- type: bind
source: ./datax2/log_perf
target: /opt/datax/log_perf

View File

@@ -0,0 +1,23 @@
# 部署单进程 datax
- 每天 6 点执行一次,逐个执行 job每个 job 超时时间十分钟
- 根据实际环境修改
- docker-compose.yml
- datax/job/xxxx.json
- 创建目录
```
grep '\<source:' docker-compose.yml | cut -d: -f2 | xargs mkdir -p
```
- 上传需要的 job 文件到 datax/job/ 下
- 启动
```
docker-compose up -d
```
- 添加定时任务
```
* 6 * * * docker-compose -f /compose/docker-compose.yml up -d
```

View File

@@ -0,0 +1,20 @@
version: "3.7"
services:
datax:
image: harbor.colben.cn/general/datax
container_name: datax
restart: "no"
stop_grace_period: 1m
network_mode: "host"
volumes:
- type: bind
source: ./datax/job
target: /opt/datax/job
- type: bind
source: ./datax/log
target: /opt/datax/log
- type: bind
source: ./datax/log_perf
target: /opt/datax/log_perf

6
datax/Dockerfile Normal file
View File

@@ -0,0 +1,6 @@
ARG ARCH
FROM harbor.colben.cn/general/jdk$ARCH:8u202
MAINTAINER Colben colbenlee@gmail.com
ADD --chown=root:root /ADD/ /opt/
CMD ["/opt/ccmd"]

23
datax/README.md Normal file
View File

@@ -0,0 +1,23 @@
# 构建 datax 镜像
## 导入文件
- [下载 datax.tar.gz](https://github.com/alibaba/DataX)
## 定制
- 删除自带的 mysql-connector-java-5换成 mysql-connector-java-8
- 启动脚本换成 shell弃用原来的 python 脚本
## 外挂目录和文件
- /opt/datax/log: job 日志目录
- /opt/datax/log_perf: 性能日志目录
- /opt/datax/job: job 文件目录
## 引入环境变量
- JAVA_OPTS: jdk 参数,默认 "-Xms1g -Xmx1g"
- TIMEOUT: 每个任务的超时时间,默认 "10m"(10 分钟)
- MAX_PROCS: 最大并行任务数量,默认 1即串行
## 案例
- [Demo/SingleProc/](Demo/SingleProc/): 每次执行单个 job
- [Demo/MultiProc/](Demo/MultiProc/): 并行多个 job

77
datax/datax.sh Executable file
View File

@@ -0,0 +1,77 @@
#!/bin/bash
#=========================================
# Author : colben
#=========================================
set -euo pipefail
export LANG=en_US.UTF-8
trap Quit EXIT
[ 'x86_64' == "$(uname -m)" ] && ARCH='' || ARCH="-$(uname -m)"
ROOT_DIR="$(cd $(dirname $0) && pwd)"
IMAGE="harbor.colben.cn/general/$(basename ${0%.sh})$ARCH:latest"
if [ -t 0 ]; then
function Print { echo -e "\033[36;1m$(date +'[%F %T]')\033[32;1m $*\033[0m"; }
function Warn { echo -e "\033[36;1m$(date +'[%F %T]')\033[33;1m $*\033[0m"; }
function Error { echo -e "\033[36;1m$(date +'[%F %T]')\033[31;1m $*\033[0m"; exit 1; }
else
function Print { echo -e "$(date +'[%F %T INFO]') $*"; }
function Warn { echo -e "$(date +'[%F %T WARN]') $*"; }
function Error { echo -e "$(date +'[%F %T ERROR]') $*"; exit 1; }
fi
function Quit {
local exitCode=$?
[ 0 -ne $exitCode ] && Error Failed to build or push image!
[ -z "${END:-}" ] && echo && Error Interrupted manually!
Print Succeeded to build and push image.
}
function YesOrNo {
Warn $*
local sw=
while :; do
read -p '(Yes/No/Quit) ' -n1 sw
[[ "$sw" =~ ^Y|y$ ]] && echo && return 0
[[ "$sw" =~ ^N|n$ ]] && echo && return 1
[[ "$sw" =~ ^Q|q$ ]] && echo && exit 0
[ -n "$sw" ] && echo
done
}
function Update {
Warn Preparing datax ...
cd $ROOT_DIR/ADD
rm -rf $(ls | grep -v ccmd || true)
tar zxf /release/RUNTIME/datax.tar.gz -C .
rm -rf datax/tmp datax/job/*.json
rm -f datax/plugin/writer/mysqlwriter/libs/mysql-connector-java-5.1.34.jar
rm -f datax/plugin/reader/mysqlreader/libs/mysql-connector-java-5.1.34.jar
cp /release/RUNTIME/mysql-connector-java-8.0.27.jar datax/plugin/reader/mysqlreader/libs/
cp /release/RUNTIME/mysql-connector-java-8.0.27.jar datax/plugin/writer/mysqlwriter/libs/
find datax/ -type f | xargs chmod 0644
mkdir datax/{hook,log,log_perf}
}
function Build {
local yn
cd $ROOT_DIR
docker images --format='{{.Repository}}:{{.Tag}}' | grep "^$IMAGE$" \
&& Warn Removing image $IMAGE ... \
&& docker rmi $IMAGE
Warn Building image: $IMAGE ...
docker build --force-rm --build-arg ARCH="$ARCH" -t $IMAGE .
YesOrNo Push image: $IMAGE? && docker push $IMAGE
}
function Main {
Update
Build
END=1
}
# Start here
Main