From b901ea81921b41ed26ae481dbea7e9d2c995f573 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Fri, 9 Sep 2016 14:48:35 +0200 Subject: [PATCH] add HA documentation to README.md --- README.md | 32 ++++++++++++++++++++++++++++++++ examples/ha/alertmanager.yaml | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 54bf94c8..ef54420c 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,38 @@ receivers: - service_key: ``` +## High Availability + +> Warning: High Availablility is under active development + +To create a highly available cluster of the Alertmanager the instances need to +be configured to communicate with each other. This is configured using the +`-mesh.*` flags. + +- `-mesh.hardware-address` string: MAC address, i.e. mesh peer ID (default "<hardware-mac-address>") +- `-mesh.listen-address` string: mesh listen address (default "0.0.0.0:6783") +- `-mesh.nickname` string: peer nickname (default "<machine-hostname>") +- `-mesh.peer` value: initial peers (may be repeated) + +The `mesh.hardware-address` flag is used as a unique ID among the peers. It +defaults to the MAC address, therefore the default value should typically be a +good option. The same applies to the default of the `mesh.nickname` flag, as it +defaults to the hostname. The chosen port in the `mesh.listen-address` flag is +the port that needs to be specified in the `mesh.peer` flag of the other peers. + +To start a cluster of three peers on your local machine use `goreman` and the +Procfile within this repository. + + goreman start + +To point your prometheus instance to multiple Alertmanagers use the +`-alertmanager.url` parameter. It allows passing in a comma separated list. +Start your prometheus like this, for example: + + ./prometheus -config.file=prometheus.yml -alertmanager.url http://localhost:9095,http://localhost:9094,http://localhost:9093 + +> Note: make sure to have a valid `prometheus.yml` in your current directory + ## Architecture ![](https://raw.githubusercontent.com/prometheus/alertmanager/4e6695682acd2580773a904e4aa2e3b927ee27b7/doc/arch.jpg) diff --git a/examples/ha/alertmanager.yaml b/examples/ha/alertmanager.yaml index aa49f195..0db750a8 100644 --- a/examples/ha/alertmanager.yaml +++ b/examples/ha/alertmanager.yaml @@ -1,5 +1,5 @@ global: - resolve_timeout: 1m + resolve_timeout: 5m route: group_by: ['alertname']