Author: Pedro Lucas Porcellis <porcellis@eletrotupi.com>
Flesh out initial metrics bits
README.md | 4 ++ alertmanager/alertmanager-irc-relay.yml | 51 +++++++++++++++++++++++++++ alertmanager/alertmanager.yml | 16 ++++++++ prometheus/prometheus.yml | 41 +++++++++++++++++++++ rules/service_rules.yml | 10 +++++
diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..26be3543f5dcba523d654690dcacca1a794a2c7a --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# metricas.eletrotupi.com + +Arquivos de alertas e configurações do Prometheus, Alert Manager, Alert Manager +IRC Relay. diff --git a/alertmanager/alertmanager-irc-relay.yml b/alertmanager/alertmanager-irc-relay.yml new file mode 100644 index 0000000000000000000000000000000000000000..7ecace3b17c9a19158fae4cbf1eb9e3ad2cda9d5 --- /dev/null +++ b/alertmanager/alertmanager-irc-relay.yml @@ -0,0 +1,51 @@ +# Start the HTTP server receiving alerts from Prometheus Webhook binding to +# this host/port. +# +http_host: localhost +http_port: 9094 + +# Connect to this IRC host/port. +# +# Note: SSL is enabled by default, use "irc_use_ssl: no" to disable. +irc_host: irc.libera.chat +irc_port: 7000 +irc_use_ssl: no + +# Use this IRC nickname. +irc_nickname: inaie +# Password used to identify with NickServ +#irc_nickname_password: +# Use this IRC real name +irc_realname: inaie + +# Optionally pre-join certain channels. +# +# Note: If an alert is sent to a non # pre-joined channel the bot will join +# that channel anyway before sending the notice. Of course this cannot work +# with password-protected channels. +irc_channels: + - name: "#eletrotupiniquim" + +# Define how IRC messages should be sent. +# +# Send only one notice when webhook data is received. +# Note: By default a notice is sent for each alert in the webhook data. +notice_once_per_alert_group: no +use_privmsg: yes + +# Define how IRC messages should be formatted. +# +# The formatting is based on golang's text/template . +msg_template: "Alert {{ .Labels.alertname }} for {{ .Labels.instance }} is {{ .Status }}" +# Note: When sending only one notice per alert group the default +# notice_template is set to +# "Alert {{ .GroupLabels.alertname }} for {{ .GroupLabels.job }} is {{ .Status }}" + +# Patterns used to guess whether NickServ is asking us to IDENTIFY +# # Note: If you need to change this because the bot is not catching a request +# # from a rather common NickServ, please consider sending a PR to update the +# # default config instead. +nickserv_identify_patterns: + - "identify via /msg NickServ identify <password>" + - "type /msg NickServ IDENTIFY password" + - "authenticate yourself to services with the IDENTIFY command" diff --git a/alertmanager/alertmanager.yml b/alertmanager/alertmanager.yml new file mode 100644 index 0000000000000000000000000000000000000000..916898045953f740dfa3a1b6bdeacb12a0c77ce2 --- /dev/null +++ b/alertmanager/alertmanager.yml @@ -0,0 +1,16 @@ +route: + group_by: ['alertname'] + group_wait: 30s + group_interval: 5m + repeat_interval: 1h + receiver: 'web.hook' +receivers: +- name: 'web.hook' + webhook_configs: + - url: 'http://localhost:9094/eletrotupiniquim' +inhibit_rules: + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname', 'dev', 'instance'] diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml new file mode 100644 index 0000000000000000000000000000000000000000..722ce49a79d658f5ab3797bfdde3798efe364ae5 --- /dev/null +++ b/prometheus/prometheus.yml @@ -0,0 +1,41 @@ +# my global config +global: + scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + +# Alertmanager configuration +alerting: + alertmanagers: + - follow_redirects: true + - static_configs: + - targets: + - localhost:9093 + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + - "/etc/prometheus/service_rules.yml" + # - "second_rules.yml" + +scrape_configs: + # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. + - job_name: 'prometheus' + + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'service' + honor_timestamps: true + static_configs: + - targets: + - rss.eletrotupi.com + + - job_name: 'node' + static_configs: + - targets: + - marreco.eletrotupi.com + - guara.eletrotupi.com + diff --git a/rules/service_rules.yml b/rules/service_rules.yml new file mode 100644 index 0000000000000000000000000000000000000000..aac69846257396e3f8f189cd03115a69f65c69d9 --- /dev/null +++ b/rules/service_rules.yml @@ -0,0 +1,10 @@ +groups: + - name: node + rules: + - alert: Instance down + expr: up == 0 + for: 30s + labels: + severity: urgent + annotations: + summary: "Instance {{ $labels.instance }} is down"