参考文档

https://www.psvmc.cn/article/2022-12-27-prometheus-install.html

背景

闲着无聊

部署Prometheus Server

安装与配置

下载
1
2
3
4
5
**尽量安装LTS长期支持版本**
https://prometheus.io/download/
wget https://github.com/prometheus/prometheus/releases/download/v2.53.3/prometheus-2.53.3.linux-amd64.tar.gz
tar -zxvf prometheus-2.53.3.linux-amd64.tar.gz -C /opt/
ln -s /opt/prometheus-2.53.3.linux-amd64 /opt/prometheus
设置用户和组
1
2
3
groupadd prometheus
useradd -g prometheus -s /sbin/nologin -M prometheus
chown -R prometheus:prometheus /opt/prometheus-2.53.3.linux-amd64
修改配置
1
vi /opt/prometheus/prometheus.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).

# Remote Grafana monitor
remote_write:
- url: https://localhost:12345/agent/api/v1/metrics/instance/hosted-prometheus/write


# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

static_configs:
# prometheus 自身监控
- targets: ["localhost:9090"]
labels:
area: "ShangHai"

# node_exporter 监控数据采集
- job_name: "node"
static_configs:
- targets:
- "localhost:9100"
labels:
area: "SGP"

- targets:
- "host1:9100"
labels:
area: "HK"
检查配置文件
1
/opt/prometheus/promtool check config /opt/prometheus/prometheus.yml
1
vi /usr/lib/systemd/system/prometheus.service
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
[Unit]
Description=prometheus
After=network.target

[Service]
User=monitor
Group=monitor
WorkingDirectory=/opt/prometheus
ExecStart=/opt/prometheus/prometheus
ExecReload=/bin/kill -HUP $MAINPID
SuccessExitStatus=3 4
Restart=on-failure

[Install]
WantedBy=multi-user.target
启动
1
2
3
systemctl daemon-reload 
systemctl start prometheus.service
systemctl enable prometheus.service

部署Node-Exporter

all in
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
sudo groupadd -g 9100 monitor
sudo useradd -g 9100 -u 9100 -s /sbin/nologin -M monitor

wget https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-amd64.tar.gz

tar -zxvf node_exporter-1.8.2.linux-amd64.tar.gz

sudo mv node_exporter-1.8.2.linux-amd64 /opt/node_exporter

chown -R monitor:monitor /opt/node_exporter


sudo cat << EOF > /etc/systemd/system/node-exporter.service
[Unit]
Description=node-exporter service
After=network.target

[Service]
User=monitor
Group=monitor
KillMode=control-group
Restart=on-failure
RestartSec=60
ExecStart=/opt/node_exporter/node_exporter --collector.cpu --collector.meminfo --collector.cpu.info --collector.diskstats --collector.ipvs --collector.loadavg --collector.netclass --web.listen-address=:9100

[Install]
WantedBy=multi-user.target
EOF
重启服务
1
2
3
systemctl daemon-reload 
systemctl start node_exporter.service
systemctl enable node_exporter.service

安全配置

1
2
sudo iptables -A INPUT -p tcp -m tcp -s 1.1.1.1/32 --dport 9100 -j ACCEPT
sudo iptables -A INPUT -p tcp -m tcp --dport 9100 -j DROP

使用Grafana Cloud

ref https://grafana.com/docs/grafana-cloud/