2023-04-16 21:38:32 +08:00

253 lines
6.1 KiB
Bash
Executable File

#!/bin/bash
##################################################
# ENV #
# - PROMETHEUS_OPTS #
# - ALERTMANAGER_OPTS #
# - GRAFANA_OPTS #
# - LOKI_OPTS #
# Mount dir #
# - /etc/prometheus #
# - /var/log/prometheus #
# - /var/lib/prometheus #
##################################################
set -euo pipefail
export LANG=en_US.UTF-8
trap Quit EXIT
PIDS=
GOT_SIGTERM=
LOG_DIR='/var/log/prometheus'
DATA_DIR='/var/lib/prometheus'
CONF_DIR='/etc/prometheus'
function Print {
local file=/dev/null
[ '-f' = "$1" ] && file=$2 && shift && shift
date +"[%F %T] $*" | tee -a $file
}
function Quit {
local running
while running= ; do
pkill -f sleep && running=1 && Print killing sleep ...
pkill -f grafana-server && running=1 && Print killing grafana-server ...
pkill -f prometheus && running=1 && Print killing prometheus ...
pkill -f alertmanager && running=1 && Print killing alertmanager ...
pkill -f loki && running=1 && Print killing loki ...
[ -z "$running" ] && break
sleep 1
done
Print Container stopped.
test -n "$GOT_SIGTERM"
}
function SideCar {
local md5= last_md5=$(find $CONF_DIR -maxdepth 1 -type f \
-regex ".*\.yml\|.*\.tmpl" | xargs -I ^ md5sum ^ | md5sum)
while sleep 10; do
md5=$(find $CONF_DIR -maxdepth 1 -type f \
-regex ".*\.yml\|.*\.tmpl" | xargs -I ^ md5sum ^ | md5sum)
[ "$md5" != "$last_md5" ] \
&& last_md5=$md5 \
&& Print Reloading conf ... \
&& pkill -HUP -f prometheus \
&& pkill -HUP -f alertmanager
done
}
function Init {
mkdir -p $DATA_DIR/{tsdb,alertmanager,grafana,loki}
[ -f $CONF_DIR/prometheus.yml ] || echo 'global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_timeout: 10s
alerting:
alertmanagers:
- static_configs:
- targets:
- 127.0.0.1:9093
rule_files:
# - node_rules.yml
scrape_configs:
#- job_name: prometheus
# honor_labels: true
# static_configs:
# - targets:
# - 127.0.0.1:9100
# labels:
# host: 127.0.0.1
#- job_name: nodes
# static_configs:
# - targets:
# - ip_1:9100
# - ip_2:9100
# metric_relabel_configs:
# - source_labels: [instance]
# target_label: host
# regex: "([^:]+):.+"
' > $CONF_DIR/prometheus.yml
[ -f $CONF_DIR/alertmanager.yml ] || echo 'global:
resolve_timeout: 10m
templates:
# - xxxx.tmpl
route:
group_by: [alertname]
group_wait: 10s
group_interval: 10s
repeat_interval: 1m
receiver: alert
receivers:
- name: alert
inhibit_rules:
- source_match:
severity: emergency
target_match_re:
severity: "warning|critical"
equal: [host]
' > $CONF_DIR/alertmanager.yml
[ -f $CONF_DIR/grafana.ini ] \
|| cp -af /usr/share/grafana/conf/sample.ini $CONF_DIR/grafana.ini
[ -d $CONF_DIR/provisioning ] \
|| cp -af /usr/share/grafana/conf/provisioning $CONF_DIR/provisioning
[ -f $CONF_DIR/loki.yml ] || echo "
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
log_level: warn
ingester:
wal:
enabled: true
dir: $DATA_DIR/loki/db/wal
lifecycler:
address: 127.0.0.1
ring:
kvstore:
store: inmemory
replication_factor: 1
final_sleep: 0s
chunk_idle_period: 1h
max_chunk_age: 1h
chunk_target_size: 1048576
chunk_retain_period: 30s
max_transfer_retries: 0
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h
storage_config:
boltdb_shipper:
active_index_directory: $DATA_DIR/loki/db/boltdb-shipper-active
cache_location: $DATA_DIR/loki/db/boltdb-shipper-cache
cache_ttl: 24h
shared_store: filesystem
filesystem:
directory: $DATA_DIR/loki/db/chunks
compactor:
working_directory: $DATA_DIR/loki/db/boltdb-shipper-compactor
shared_store: filesystem
limits_config:
reject_old_samples: true
reject_old_samples_max_age: 168h
chunk_store_config:
max_look_back_period: 0s
table_manager:
retention_deletes_enabled: false
retention_period: 0s
ruler:
storage:
trapype: local
local:
directory: $DATA_DIR/loki/db/rules
rule_path: $DATA_DIR/loki/db/rules-temp
alertmanager_url: http://127.0.0.1:9093
ring:
kvstore:
store: inmemory
enable_api: true
" > $CONF_DIR/loki.yml
}
function StartProc {
Print Starting alertmanager ...
alertmanager \
--config.file=$CONF_DIR/alertmanager.yml \
--storage.path=$DATA_DIR/alertmanager \
--web.external-url=http://0.0.0.0:9093/alertmanager/ \
${ALERTMANAGER_OPTS:-} &>> $LOG_DIR/alertmanager.out &
PIDS="$PIDS $!"
sleep 2
Print Starting prometheus ...
prometheus \
--config.file=$CONF_DIR/prometheus.yml \
--web.external-url=prometheus \
--web.console.templates=/usr/share/prometheus/consoles \
--web.console.libraries=/usr/share/prometheus/console_libraries \
--storage.tsdb.path=$DATA_DIR/tsdb \
${PROMETHEUS_OPTS:-} &>> $LOG_DIR/prometheus.out &
PIDS="$PIDS $!"
sleep 2
Print Starting grafana-server ...
grafana-server \
-homepath /usr/share/grafana \
-config $CONF_DIR/grafana.ini \
${GRAFANA_OPTS:-} web &>> $LOG_DIR/grafana.out &
PIDS="$PIDS $!"
sleep 2
Print Starting loki ...
loki \
--config.file=$CONF_DIR/loki.yml \
${LOKI_OPTS:-} &>> $LOG_DIR/loki.out &
PIDS="$PIDS $!"
sleep 2
Print Starting sidecar ...
SideCar &
PIDS="$PIDS $!"
Print All components started.
}
function Main {
local pid=
Init
StartProc
trap "GOT_SIGTERM=1; Print Got SIGTERM ..." SIGTERM
while [ -z "$GOT_SIGTERM" ] && sleep 1; do
for pid in $PIDS; do
[ ! -e /proc/$pid ] && Print Unexpected error! && exit
done
done
}
# Start here
Main