253 lines
6.1 KiB
Bash
Executable File
253 lines
6.1 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
##################################################
|
|
# ENV #
|
|
# - PROMETHEUS_OPTS #
|
|
# - ALERTMANAGER_OPTS #
|
|
# - GRAFANA_OPTS #
|
|
# - LOKI_OPTS #
|
|
# Mount dir #
|
|
# - /etc/prometheus #
|
|
# - /var/log/prometheus #
|
|
# - /var/lib/prometheus #
|
|
##################################################
|
|
|
|
set -euo pipefail
|
|
export LANG=en_US.UTF-8
|
|
trap Quit EXIT
|
|
|
|
PIDS=
|
|
GOT_SIGTERM=
|
|
LOG_DIR='/var/log/prometheus'
|
|
DATA_DIR='/var/lib/prometheus'
|
|
CONF_DIR='/etc/prometheus'
|
|
|
|
function Print {
|
|
local file=/dev/null
|
|
[ '-f' = "$1" ] && file=$2 && shift && shift
|
|
date +"[%F %T] $*" | tee -a $file
|
|
}
|
|
|
|
function Quit {
|
|
local running
|
|
while running= ; do
|
|
pkill -f sleep && running=1 && Print killing sleep ...
|
|
pkill -f grafana-server && running=1 && Print killing grafana-server ...
|
|
pkill -f prometheus && running=1 && Print killing prometheus ...
|
|
pkill -f alertmanager && running=1 && Print killing alertmanager ...
|
|
pkill -f loki && running=1 && Print killing loki ...
|
|
[ -z "$running" ] && break
|
|
sleep 1
|
|
done
|
|
Print Container stopped.
|
|
test -n "$GOT_SIGTERM"
|
|
}
|
|
|
|
function SideCar {
|
|
local md5= last_md5=$(find $CONF_DIR -maxdepth 1 -type f \
|
|
-regex ".*\.yml\|.*\.tmpl" | xargs -I ^ md5sum ^ | md5sum)
|
|
while sleep 10; do
|
|
md5=$(find $CONF_DIR -maxdepth 1 -type f \
|
|
-regex ".*\.yml\|.*\.tmpl" | xargs -I ^ md5sum ^ | md5sum)
|
|
[ "$md5" != "$last_md5" ] \
|
|
&& last_md5=$md5 \
|
|
&& Print Reloading conf ... \
|
|
&& pkill -HUP -f prometheus \
|
|
&& pkill -HUP -f alertmanager
|
|
done
|
|
}
|
|
|
|
function Init {
|
|
mkdir -p $DATA_DIR/{tsdb,alertmanager,grafana,loki}
|
|
[ -f $CONF_DIR/prometheus.yml ] || echo 'global:
|
|
scrape_interval: 15s
|
|
evaluation_interval: 15s
|
|
scrape_timeout: 10s
|
|
|
|
alerting:
|
|
alertmanagers:
|
|
- static_configs:
|
|
- targets:
|
|
- 127.0.0.1:9093
|
|
|
|
rule_files:
|
|
# - node_rules.yml
|
|
|
|
scrape_configs:
|
|
#- job_name: prometheus
|
|
# honor_labels: true
|
|
# static_configs:
|
|
# - targets:
|
|
# - 127.0.0.1:9100
|
|
# labels:
|
|
# host: 127.0.0.1
|
|
|
|
#- job_name: nodes
|
|
# static_configs:
|
|
# - targets:
|
|
# - ip_1:9100
|
|
# - ip_2:9100
|
|
# metric_relabel_configs:
|
|
# - source_labels: [instance]
|
|
# target_label: host
|
|
# regex: "([^:]+):.+"
|
|
' > $CONF_DIR/prometheus.yml
|
|
[ -f $CONF_DIR/alertmanager.yml ] || echo 'global:
|
|
resolve_timeout: 10m
|
|
|
|
templates:
|
|
# - xxxx.tmpl
|
|
|
|
route:
|
|
group_by: [alertname]
|
|
group_wait: 10s
|
|
group_interval: 10s
|
|
repeat_interval: 1m
|
|
receiver: alert
|
|
|
|
receivers:
|
|
- name: alert
|
|
|
|
inhibit_rules:
|
|
- source_match:
|
|
severity: emergency
|
|
target_match_re:
|
|
severity: "warning|critical"
|
|
equal: [host]
|
|
' > $CONF_DIR/alertmanager.yml
|
|
[ -f $CONF_DIR/grafana.ini ] \
|
|
|| cp -af /usr/share/grafana/conf/sample.ini $CONF_DIR/grafana.ini
|
|
[ -d $CONF_DIR/provisioning ] \
|
|
|| cp -af /usr/share/grafana/conf/provisioning $CONF_DIR/provisioning
|
|
[ -f $CONF_DIR/loki.yml ] || echo "
|
|
auth_enabled: false
|
|
|
|
server:
|
|
http_listen_port: 3100
|
|
grpc_listen_port: 9096
|
|
log_level: warn
|
|
|
|
ingester:
|
|
wal:
|
|
enabled: true
|
|
dir: $DATA_DIR/loki/db/wal
|
|
lifecycler:
|
|
address: 127.0.0.1
|
|
ring:
|
|
kvstore:
|
|
store: inmemory
|
|
replication_factor: 1
|
|
final_sleep: 0s
|
|
chunk_idle_period: 1h
|
|
max_chunk_age: 1h
|
|
chunk_target_size: 1048576
|
|
chunk_retain_period: 30s
|
|
max_transfer_retries: 0
|
|
|
|
schema_config:
|
|
configs:
|
|
- from: 2020-10-24
|
|
store: boltdb-shipper
|
|
object_store: filesystem
|
|
schema: v11
|
|
index:
|
|
prefix: index_
|
|
period: 24h
|
|
|
|
storage_config:
|
|
boltdb_shipper:
|
|
active_index_directory: $DATA_DIR/loki/db/boltdb-shipper-active
|
|
cache_location: $DATA_DIR/loki/db/boltdb-shipper-cache
|
|
cache_ttl: 24h
|
|
shared_store: filesystem
|
|
filesystem:
|
|
directory: $DATA_DIR/loki/db/chunks
|
|
|
|
compactor:
|
|
working_directory: $DATA_DIR/loki/db/boltdb-shipper-compactor
|
|
shared_store: filesystem
|
|
|
|
limits_config:
|
|
reject_old_samples: true
|
|
reject_old_samples_max_age: 168h
|
|
|
|
chunk_store_config:
|
|
max_look_back_period: 0s
|
|
|
|
table_manager:
|
|
retention_deletes_enabled: false
|
|
retention_period: 0s
|
|
|
|
ruler:
|
|
storage:
|
|
trapype: local
|
|
local:
|
|
directory: $DATA_DIR/loki/db/rules
|
|
rule_path: $DATA_DIR/loki/db/rules-temp
|
|
alertmanager_url: http://127.0.0.1:9093
|
|
ring:
|
|
kvstore:
|
|
store: inmemory
|
|
enable_api: true
|
|
" > $CONF_DIR/loki.yml
|
|
}
|
|
|
|
function StartProc {
|
|
Print Starting alertmanager ...
|
|
alertmanager \
|
|
--config.file=$CONF_DIR/alertmanager.yml \
|
|
--storage.path=$DATA_DIR/alertmanager \
|
|
--web.external-url=http://0.0.0.0:9093/alertmanager/ \
|
|
${ALERTMANAGER_OPTS:-} &>> $LOG_DIR/alertmanager.out &
|
|
PIDS="$PIDS $!"
|
|
sleep 2
|
|
|
|
Print Starting prometheus ...
|
|
prometheus \
|
|
--config.file=$CONF_DIR/prometheus.yml \
|
|
--web.external-url=prometheus \
|
|
--web.console.templates=/usr/share/prometheus/consoles \
|
|
--web.console.libraries=/usr/share/prometheus/console_libraries \
|
|
--storage.tsdb.path=$DATA_DIR/tsdb \
|
|
${PROMETHEUS_OPTS:-} &>> $LOG_DIR/prometheus.out &
|
|
PIDS="$PIDS $!"
|
|
sleep 2
|
|
|
|
Print Starting grafana-server ...
|
|
grafana-server \
|
|
-homepath /usr/share/grafana \
|
|
-config $CONF_DIR/grafana.ini \
|
|
${GRAFANA_OPTS:-} web &>> $LOG_DIR/grafana.out &
|
|
PIDS="$PIDS $!"
|
|
sleep 2
|
|
|
|
Print Starting loki ...
|
|
loki \
|
|
--config.file=$CONF_DIR/loki.yml \
|
|
${LOKI_OPTS:-} &>> $LOG_DIR/loki.out &
|
|
PIDS="$PIDS $!"
|
|
sleep 2
|
|
|
|
Print Starting sidecar ...
|
|
SideCar &
|
|
PIDS="$PIDS $!"
|
|
|
|
Print All components started.
|
|
}
|
|
|
|
function Main {
|
|
local pid=
|
|
Init
|
|
StartProc
|
|
trap "GOT_SIGTERM=1; Print Got SIGTERM ..." SIGTERM
|
|
while [ -z "$GOT_SIGTERM" ] && sleep 1; do
|
|
for pid in $PIDS; do
|
|
[ ! -e /proc/$pid ] && Print Unexpected error! && exit
|
|
done
|
|
done
|
|
}
|
|
|
|
# Start here
|
|
Main
|
|
|