#!/bin/bash ################################################## # ENV # # - PROMETHEUS_OPTS # # - ALERTMANAGER_OPTS # # - GRAFANA_OPTS # # - LOKI_OPTS # # Mount dir # # - /etc/prometheus # # - /var/log/prometheus # # - /var/lib/prometheus # ################################################## set -euo pipefail export LANG=en_US.UTF-8 trap Quit EXIT PIDS= GOT_SIGTERM= LOG_DIR='/var/log/prometheus' DATA_DIR='/var/lib/prometheus' CONF_DIR='/etc/prometheus' function Print { local file=/dev/null [ '-f' = "$1" ] && file=$2 && shift && shift date +"[%F %T] $*" | tee -a $file } function Quit { local running while running= ; do pkill -f sleep && running=1 && Print killing sleep ... pkill -f grafana-server && running=1 && Print killing grafana-server ... pkill -f prometheus && running=1 && Print killing prometheus ... pkill -f alertmanager && running=1 && Print killing alertmanager ... pkill -f loki && running=1 && Print killing loki ... [ -z "$running" ] && break sleep 1 done Print Container stopped. test -n "$GOT_SIGTERM" } function SideCar { local md5= last_md5=$(find $CONF_DIR -maxdepth 1 -type f \ -regex ".*\.yml\|.*\.tmpl" | xargs -I ^ md5sum ^ | md5sum) while sleep 10; do md5=$(find $CONF_DIR -maxdepth 1 -type f \ -regex ".*\.yml\|.*\.tmpl" | xargs -I ^ md5sum ^ | md5sum) [ "$md5" != "$last_md5" ] \ && last_md5=$md5 \ && Print Reloading conf ... \ && pkill -HUP -f prometheus \ && pkill -HUP -f alertmanager done } function Init { mkdir -p $DATA_DIR/{tsdb,alertmanager,grafana,loki} [ -f $CONF_DIR/prometheus.yml ] || echo 'global: scrape_interval: 15s evaluation_interval: 15s scrape_timeout: 10s alerting: alertmanagers: - static_configs: - targets: - 127.0.0.1:9093 rule_files: # - node_rules.yml scrape_configs: #- job_name: prometheus # honor_labels: true # static_configs: # - targets: # - 127.0.0.1:9100 # labels: # host: 127.0.0.1 #- job_name: nodes # static_configs: # - targets: # - ip_1:9100 # - ip_2:9100 # metric_relabel_configs: # - source_labels: [instance] # target_label: host # regex: "([^:]+):.+" ' > $CONF_DIR/prometheus.yml [ -f $CONF_DIR/alertmanager.yml ] || echo 'global: resolve_timeout: 10m templates: # - xxxx.tmpl route: group_by: [alertname] group_wait: 10s group_interval: 10s repeat_interval: 1m receiver: alert receivers: - name: alert inhibit_rules: - source_match: severity: emergency target_match_re: severity: "warning|critical" equal: [host] ' > $CONF_DIR/alertmanager.yml [ -f $CONF_DIR/grafana.ini ] \ || cp -af /usr/share/grafana/conf/sample.ini $CONF_DIR/grafana.ini [ -d $CONF_DIR/provisioning ] \ || cp -af /usr/share/grafana/conf/provisioning $CONF_DIR/provisioning [ -f $CONF_DIR/loki.yml ] || echo " auth_enabled: false server: http_listen_port: 3100 grpc_listen_port: 9096 log_level: warn ingester: wal: enabled: true dir: $DATA_DIR/loki/db/wal lifecycler: address: 127.0.0.1 ring: kvstore: store: inmemory replication_factor: 1 final_sleep: 0s chunk_idle_period: 1h max_chunk_age: 1h chunk_target_size: 1048576 chunk_retain_period: 30s max_transfer_retries: 0 schema_config: configs: - from: 2020-10-24 store: boltdb-shipper object_store: filesystem schema: v11 index: prefix: index_ period: 24h storage_config: boltdb_shipper: active_index_directory: $DATA_DIR/loki/db/boltdb-shipper-active cache_location: $DATA_DIR/loki/db/boltdb-shipper-cache cache_ttl: 24h shared_store: filesystem filesystem: directory: $DATA_DIR/loki/db/chunks compactor: working_directory: $DATA_DIR/loki/db/boltdb-shipper-compactor shared_store: filesystem limits_config: reject_old_samples: true reject_old_samples_max_age: 168h chunk_store_config: max_look_back_period: 0s table_manager: retention_deletes_enabled: false retention_period: 0s ruler: storage: trapype: local local: directory: $DATA_DIR/loki/db/rules rule_path: $DATA_DIR/loki/db/rules-temp alertmanager_url: http://127.0.0.1:9093 ring: kvstore: store: inmemory enable_api: true " > $CONF_DIR/loki.yml } function StartProc { Print Starting alertmanager ... alertmanager \ --config.file=$CONF_DIR/alertmanager.yml \ --storage.path=$DATA_DIR/alertmanager \ --web.external-url=http://0.0.0.0:9093/alertmanager/ \ ${ALERTMANAGER_OPTS:-} &>> $LOG_DIR/alertmanager.out & PIDS="$PIDS $!" sleep 2 Print Starting prometheus ... prometheus \ --config.file=$CONF_DIR/prometheus.yml \ --web.external-url=prometheus \ --web.console.templates=/usr/share/prometheus/consoles \ --web.console.libraries=/usr/share/prometheus/console_libraries \ --storage.tsdb.path=$DATA_DIR/tsdb \ ${PROMETHEUS_OPTS:-} &>> $LOG_DIR/prometheus.out & PIDS="$PIDS $!" sleep 2 Print Starting grafana-server ... grafana-server \ -homepath /usr/share/grafana \ -config $CONF_DIR/grafana.ini \ ${GRAFANA_OPTS:-} web &>> $LOG_DIR/grafana.out & PIDS="$PIDS $!" sleep 2 Print Starting loki ... loki \ --config.file=$CONF_DIR/loki.yml \ ${LOKI_OPTS:-} &>> $LOG_DIR/loki.out & PIDS="$PIDS $!" sleep 2 Print Starting sidecar ... SideCar & PIDS="$PIDS $!" Print All components started. } function Main { local pid= Init StartProc trap "GOT_SIGTERM=1; Print Got SIGTERM ..." SIGTERM while [ -z "$GOT_SIGTERM" ] && sleep 1; do for pid in $PIDS; do [ ! -e /proc/$pid ] && Print Unexpected error! && exit done done } # Start here Main