metricas.eletrotupi.com

commit 335a8c19365559c8b292aa3b7f97c62ff98cf418

Author: Pedro Lucas Porcellis <porcellis@eletrotupi.com>

Flesh out initial metrics bits

 README.md | 4 ++
 alertmanager/alertmanager-irc-relay.yml | 51 +++++++++++++++++++++++++++
 alertmanager/alertmanager.yml | 16 ++++++++
 prometheus/prometheus.yml | 41 +++++++++++++++++++++
 rules/service_rules.yml | 10 +++++


diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..26be3543f5dcba523d654690dcacca1a794a2c7a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,4 @@
+# metricas.eletrotupi.com
+
+Arquivos de alertas e configurações do Prometheus, Alert Manager, Alert Manager
+IRC Relay.




diff --git a/alertmanager/alertmanager-irc-relay.yml b/alertmanager/alertmanager-irc-relay.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7ecace3b17c9a19158fae4cbf1eb9e3ad2cda9d5
--- /dev/null
+++ b/alertmanager/alertmanager-irc-relay.yml
@@ -0,0 +1,51 @@
+# Start the HTTP server receiving alerts from Prometheus Webhook binding to
+# this host/port.
+#
+http_host: localhost
+http_port: 9094
+
+# Connect to this IRC host/port.
+#
+# Note: SSL is enabled by default, use "irc_use_ssl: no" to disable.
+irc_host: irc.libera.chat
+irc_port: 7000
+irc_use_ssl: no
+
+# Use this IRC nickname.
+irc_nickname: inaie
+# Password used to identify with NickServ
+#irc_nickname_password:
+# Use this IRC real name
+irc_realname: inaie
+
+# Optionally pre-join certain channels.
+#
+# Note: If an alert is sent to a non # pre-joined channel the bot will join
+# that channel anyway before sending the notice. Of course this cannot work
+# with password-protected channels.
+irc_channels:
+  - name: "#eletrotupiniquim"
+
+# Define how IRC messages should be sent.
+#
+# Send only one notice when webhook data is received.
+# Note: By default a notice is sent for each alert in the webhook data.
+notice_once_per_alert_group: no
+use_privmsg: yes
+
+# Define how IRC messages should be formatted.
+#
+# The formatting is based on golang's text/template .
+msg_template: "Alert {{ .Labels.alertname }} for {{ .Labels.instance }} is {{ .Status }}"
+# Note: When sending only one notice per alert group the default
+# notice_template is set to
+# "Alert {{ .GroupLabels.alertname }} for {{ .GroupLabels.job }} is {{ .Status }}"
+
+# Patterns used to guess whether NickServ is asking us to IDENTIFY
+# # Note: If you need to change this because the bot is not catching a request
+# # from a rather common NickServ, please consider sending a PR to update the
+# # default config instead.
+nickserv_identify_patterns:
+  - "identify via /msg NickServ identify <password>"
+  - "type /msg NickServ IDENTIFY password"
+  - "authenticate yourself to services with the IDENTIFY command"




diff --git a/alertmanager/alertmanager.yml b/alertmanager/alertmanager.yml
new file mode 100644
index 0000000000000000000000000000000000000000..916898045953f740dfa3a1b6bdeacb12a0c77ce2
--- /dev/null
+++ b/alertmanager/alertmanager.yml
@@ -0,0 +1,16 @@
+route:
+  group_by: ['alertname']
+  group_wait: 30s
+  group_interval: 5m
+  repeat_interval: 1h
+  receiver: 'web.hook'
+receivers:
+- name: 'web.hook'
+  webhook_configs:
+  - url: 'http://localhost:9094/eletrotupiniquim'
+inhibit_rules:
+  - source_match:
+      severity: 'critical'
+    target_match:
+      severity: 'warning'
+    equal: ['alertname', 'dev', 'instance']




diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml
new file mode 100644
index 0000000000000000000000000000000000000000..722ce49a79d658f5ab3797bfdde3798efe364ae5
--- /dev/null
+++ b/prometheus/prometheus.yml
@@ -0,0 +1,41 @@
+# my global config
+global:
+  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
+  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
+  # scrape_timeout is set to the global default (10s).
+
+# Alertmanager configuration
+alerting:
+  alertmanagers:
+  - follow_redirects: true
+  - static_configs:
+    - targets:
+      - localhost:9093
+
+# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
+rule_files:
+  - "/etc/prometheus/service_rules.yml"
+  # - "second_rules.yml"
+
+scrape_configs:
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+  - job_name: 'prometheus'
+
+    # metrics_path defaults to '/metrics'
+    # scheme defaults to 'http'.
+
+    static_configs:
+    - targets: ['localhost:9090']
+
+  - job_name: 'service'
+    honor_timestamps: true
+    static_configs:
+      - targets:
+        - rss.eletrotupi.com
+
+  - job_name: 'node'
+    static_configs:
+      - targets:
+        - marreco.eletrotupi.com
+        - guara.eletrotupi.com
+




diff --git a/rules/service_rules.yml b/rules/service_rules.yml
new file mode 100644
index 0000000000000000000000000000000000000000..aac69846257396e3f8f189cd03115a69f65c69d9
--- /dev/null
+++ b/rules/service_rules.yml
@@ -0,0 +1,10 @@
+groups:
+  - name: node
+    rules:
+      - alert: Instance down
+        expr: up == 0
+        for: 30s
+        labels:
+          severity: urgent
+        annotations:
+          summary: "Instance {{ $labels.instance }} is down"