redis_exporter
#创建工作目录
mkdir /date/redis -p
cat > /data/redis/start.sh << 'EOF'
docker run -d \
--name redis \
-p 6379:6379 \
redis redis-server \
--appendonly yes \
--requirepass password \
--maxmemory 20M
EOF
bash /data/redis/start.sh
mkdir /data/redis_exporter/ -p
#启动脚本
cat > /data/redis_exporter/start.sh<< 'EOF'
docker run -d
--name redis_exporter
--restart=always
-p 9121:9121
-e REDIS_ADDR=192.168.11.221:6379
-e REDIS_PASSWORD=password
-e REDIS_EXPORTER_INCL_SYSTEM_METRICS=true
-v /etc/localtime:/etc/localtime:ro
oliver006/redis_exporter
EOF
bash /data/redis_exporter/start.sh
cat > /data/prometheus/conf/rules/redis.rules << 'EOF'
groups:
- name: Redis-监控告警
rules:
-
alert: 警报!Redis应用不可用
expr: redis_up == 0
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis应用不可用"
description: "Redis应用不可达\n 当前值 = {{ $value }}"
-
alert: 警报!丢失Master节点
expr: (count(redis_instance_info{role="master"}) ) < 1
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} 丢失Redis master"
description: "Redis集群当前没有主节点\n 当前值 = {{ $value }}"
-
alert: 警报!脑裂,主节点太多
expr: count(redis_instance_info{role="master"}) > 1
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis脑裂,主节点太多"
description: "{{ $labels.instance }} 主节点太多\n 当前值 = {{ $value }}"
-
alert: 警报!Slave连接不可达
expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis丢失slave节点"
description: "Redis slave不可达.请确认主从同步状态\n 当前值 = {{ $value }}"
-
alert: 警报!Redis副本不一致
expr: delta(redis_connected_slaves[1m]) < 0
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis 副本不一致"
description: "Redis集群丢失一个slave节点\n 当前值 = {{ $value }}"
-
alert: 警报!Redis集群抖动
expr: changes(redis_connected_slaves[1m]) > 1
for: 2m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis集群抖动"
description: "Redis集群抖动,请检查.\n 当前值 = {{ $value }}"
-
alert: 警报!持久化失败
expr: (time() - redis_rdb_last_save_timestamp_seconds) / 3600 > 24
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} Redis持久化失败"
description: "Redis持久化失败(>24小时)\n 当前值 = {{ printf "%.1f" $value }}小时"
-
alert: 警报!内存不足
expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90
for: 2m
labels:
severity: 一般告警
annotations:
summary: "{{ $labels.instance }}系统内存不足"
description: "Redis占用系统内存(> 90%)\n 当前值 = {{ printf "%.2f" $value }}%"
-
alert: 警报!Maxmemory不足
expr: redis_config_maxmemory !=0 and redis_memory_used_bytes / redis_memory_max_bytes * 100 > 80
for: 2m
labels:
severity: 一般告警
annotations:
summary: "{{ $labels.instance }} Maxmemory设置太小"
description: "超出设置最大内存(> 80%)\n 当前值 = {{ printf "%.2f" $value }}%"
-
alert: 警报!连接数太多
expr: redis_connected_clients > 200
for: 2m
labels:
severity: 一般告警
annotations:
summary: "{{ $labels.instance }} 实时连接数太多"
description: "连接数太多(>200)\n 当前值 = {{ $value }}"
-
alert: 警报!连接数太少
expr: redis_connected_clients < 1
for: 2m
labels:
severity: 一般告警
annotations:
summary: "{{ $labels.instance }} 实时连接数太少"
description: "连接数(<1)\n 当前值 = {{ $value }}"
-
alert: 警报!拒绝连接数
expr: increase(redis_rejected_connections_total[1m]) > 0
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} 拒绝连接"
description: "Redis有拒绝连接,请检查连接数配置\n 当前值 = {{ printf "%.0f" $value }}"
-
alert: 警报!执行命令数大于1000
expr: rate(redis_commands_processed_total[1m]) > 1000
for: 0m
labels:
severity: 严重告警
annotations:
summary: "{{ $labels.instance }} 执行命令次数太多"
description: "Redis执行命令次数太多\n 当前值 = {{ printf "%.0f" $value }}"
EOF
#添加自动发现脚本
cat >> /data/prometheus/conf/prometheus.yml << 'EOF'
redis自动发现
- job_name: 'redis'
file_sd_configs:
- files:
- /etc/prometheus/sd_config/redis.yaml
refresh_interval: 5s
metrics_path: /scrape
relabel_configs:
- source_labels: [address]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: address
replacement: 192.168.11.221:9121
EOF
#自动发现配置
cat >> /data/prometheus/conf/sd_config/redis.yaml << 'EOF'
redis自动发现
- labels:
project: 民生redis
targets:
- 192.168.11.221:6379
- 192.168.11.222:6379
- 192.168.11.223:6379
EOF
grfana_id :2751
grfana_id: 14615 (cluster)
监控redis_cluster
http://t.zoukankan.com/fsckzy-p-12053604.html
</article>