1 安装

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
var_base=/root/helm
var_app=$var_base/prometheus
var_version=22.6.2

mkdir -p $var_app
cd $var_app

helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
helm pull prometheus-community/prometheus --version $var_version
tar xf prometheus-$var_version.tgz

helm upgrade --install --dry-run --debug \
  --namespace monitoring \
  --create-namespace \
  --set server.ingress.enabled=true \
  --set server.ingress.ingressClassName=nginx \
  --set-string server.ingress.annotations."kubernetes\.io/tls-acme"=true \
  --set-string server.ingress.annotations."nginx\.ingress\.kubernetes\.io/ssl-redirect"=true \
  --set-string server.ingress.annotations."cert-manager\.io/cluster-issuer"='letsencrypt-prod' \
  --set server.ingress.hosts[0]='prometheus.example.com' \
  --set server.ingress.path='/' \
  --set server.ingress.pathType='Prefix' \
  --set server.ingress.tls[0].secretName='prometheus-example-com-tls' \
  --set server.ingress.tls[0].hosts[0]='prometheus.example.com' \
  --set server.persistentVolume.enabled=true \
  --set server.persistentVolume.size=50Gi \
  --set server.persistentVolume.storageClass=rook-cephfs \
  --set server.resources.limits.cpu=2000m \
  --set server.resources.limits.memory=2048Mi \
  --set server.resources.requests.cpu=500m \
  --set server.resources.requests.memory=1024Mi \
  --set server.retention="30d" \
  --set alertmanager.enabled=true \
  --set alertmanager.persistence.size=10Gi \
  --set alertmanager.persistence.storageClass=rook-cephfs \
  --set alertmanager.configmapReload.enabled=true \
  --set alertmanager.ingress.enabled=true \
  --set alertmanager.ingress.className=nginx \
  --set-string alertmanager.ingress.annotations."kubernetes\.io/tls-acme"=true \
  --set-string alertmanager.ingress.annotations."nginx\.ingress\.kubernetes\.io/ssl-redirect"=true \
  --set-string alertmanager.ingress.annotations."cert-manager\.io/cluster-issuer"='letsencrypt-prod' \
  --set alertmanager.ingress.hosts[0].host='alertmanager.example.com' \
  --set alertmanager.ingress.hosts[0].paths[0].path='/' \
  --set alertmanager.ingress.hosts[0].paths[0].pathType='ImplementationSpecific' \
  --set alertmanager.ingress.tls[0].secretName='alertmanager-example-com-tls' \
  --set alertmanager.ingress.tls[0].hosts[0]='alertmanager.example.com' \
  --set alertmanager.resources.limits.cpu=500m \
  --set alertmanager.resources.limits.memory=512Mi \
  --set alertmanager.resources.requests.cpu=100m \
  --set alertmanager.resources.requests.memory=128Mi \
  --set kube-state-metrics.enabled=true \
  --set kube-state-metrics.image.repository='bitnami/kube-state-metrics' \
  --set kube-state-metrics.image.tag='2.9.2-debian-11-r0' \
  --set prometheus-node-exporter.enabled=true \
  --set prometheus-pushgateway.enabled=true \
  prometheus ./prometheus


helm upgrade --install \
  --namespace monitoring \
  --create-namespace \
  --set server.ingress.enabled=true \
  --set server.ingress.ingressClassName=nginx \
  --set-string server.ingress.annotations."kubernetes\.io/tls-acme"=true \
  --set-string server.ingress.annotations."nginx\.ingress\.kubernetes\.io/ssl-redirect"=true \
  --set-string server.ingress.annotations."cert-manager\.io/cluster-issuer"='letsencrypt-prod' \
  --set server.ingress.hosts[0]='prometheus.example.com' \
  --set server.ingress.path='/' \
  --set server.ingress.pathType='Prefix' \
  --set server.ingress.tls[0].secretName='prometheus-example-com-tls' \
  --set server.ingress.tls[0].hosts[0]='prometheus.example.com' \
  --set server.persistentVolume.enabled=true \
  --set server.persistentVolume.size=50Gi \
  --set server.persistentVolume.storageClass=rook-cephfs \
  --set server.resources.limits.cpu=2000m \
  --set server.resources.limits.memory=2048Mi \
  --set server.resources.requests.cpu=500m \
  --set server.resources.requests.memory=1024Mi \
  --set server.retention="30d" \
  --set alertmanager.enabled=true \
  --set alertmanager.persistence.size=10Gi \
  --set alertmanager.persistence.storageClass=rook-cephfs \
  --set alertmanager.configmapReload.enabled=true \
  --set alertmanager.ingress.enabled=true \
  --set alertmanager.ingress.className=nginx \
  --set-string alertmanager.ingress.annotations."kubernetes\.io/tls-acme"=true \
  --set-string alertmanager.ingress.annotations."nginx\.ingress\.kubernetes\.io/ssl-redirect"=true \
  --set-string alertmanager.ingress.annotations."cert-manager\.io/cluster-issuer"='letsencrypt-prod' \
  --set alertmanager.ingress.hosts[0].host='alertmanager.example.com' \
  --set alertmanager.ingress.hosts[0].paths[0].path='/' \
  --set alertmanager.ingress.hosts[0].paths[0].pathType='ImplementationSpecific' \
  --set alertmanager.ingress.tls[0].secretName='alertmanager-example-com-tls' \
  --set alertmanager.ingress.tls[0].hosts[0]='alertmanager.example.com' \
  --set alertmanager.resources.limits.cpu=500m \
  --set alertmanager.resources.limits.memory=512Mi \
  --set alertmanager.resources.requests.cpu=100m \
  --set alertmanager.resources.requests.memory=128Mi \
  --set kube-state-metrics.enabled=true \
  --set kube-state-metrics.image.repository='bitnami/kube-state-metrics' \
  --set kube-state-metrics.image.tag='2.9.2-debian-11-r0' \
  --set prometheus-node-exporter.enabled=true \
  --set prometheus-pushgateway.enabled=true \
  prometheus ./prometheus

watch kubectl -n monitoring get pods
kubectl -n monitoring rollout restart deployment prometheus-server
# ingress 测试访问
curl -H 'Host:prometheus.example.com' http://192.168.1.211

helm uninstall prometheus -n monitoring

2 修改配置

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

# 修改配置

vim prometheus/values.yaml

kubectl -n monitoring exec -it prometheus-server-678c6759d-rmmz4 -- /bin/sh

kubectl -n monitoring delete -f - <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
  namespace: monitoring
  name: test-prometheus
data:
  test_prometheus_token.yaml: |
    xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
EOF

cat <<EOF >> /etc/hosts
192.168.1.101 controller-1-01.test.com
192.168.1.102 worker-1-01.test.com
192.168.1.103 controller-2-01.test.com
192.168.1.104 worker-2-01.test.com
192.168.1.105 controller-3-01.test.com
192.168.1.106 worker-3-01.test.com
EOF

kubectl apply -f test-prometheus-configmap.yaml

      - job_name: test-kubernetes-nodes
        tls_config:
          insecure_skip_verify: true
        bearer_token_file: /etc/config/test_prometheus_token.yaml
        kubernetes_sd_configs:
          - api_server: https://192.168.1.101:6443
            role: node
            bearer_token_file: /etc/config/test_prometheus_token.yaml
            tls_config:
              insecure_skip_verify: true
        relabel_configs:
          - separator: ;
            regex: __meta_kubernetes_node_label_(.+)
            replacement: $1
            action: labelmap
          - source_labels: [__meta_kubernetes_node_name]
            separator: ;
            regex: (.+)
            replacement: '${1}.test.com:9100'
            target_label: __address__
            action: replace

      - job_name: test-kubernetes-nodes-cadvisor
        scheme: https
        tls_config:
          insecure_skip_verify: true
        bearer_token_file: /etc/config/test_prometheus_token.yaml
        kubernetes_sd_configs:
          - api_server: https://192.168.1.101:6443
            role: node
            bearer_token_file: /etc/config/test_prometheus_token.yaml
            tls_config:
              insecure_skip_verify: true
        relabel_configs:
          - separator: ;
            regex: __meta_kubernetes_node_label_(.+)
            replacement: $1
            action: labelmap
          - separator: ;
            regex: (.*)
            target_label: __address__
            replacement: 192.168.1.101:6443
            action: replace
          - source_labels: [__meta_kubernetes_node_name]
            separator: ;
            regex: (.+)
            target_label: __metrics_path__
            replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
            action: replace

      - job_name: test-kube-state-metrics
        static_configs:
          - targets: ['192.168.1.101:32622']