共计 17019 个字符,预计需要花费 43 分钟才能阅读完成。
一、环境介绍
相干软件版本:
kubernetets:v1.22.2
kube-prometheus:release 0.9
二、开始部署
在部署之前留神先去官网查看 kube-prometheus 与 k8s 集群的兼容性。kube-prometheus 官网:kube-prometheus
在 master 节点下载相干软件包。
root@master01:~/monitor# git clone https://github.com/prometheus-operator/kube-prometheus.git
Cloning into 'kube-prometheus'...
remote: Enumerating objects: 17447, done.
remote: Counting objects: 100% (353/353), done.
remote: Compressing objects: 100% (129/129), done.
remote: Total 17447 (delta 262), reused 279 (delta 211), pack-reused 17094
Receiving objects: 100% (17447/17447), 9.12 MiB | 5.59 MiB/s, done.
Resolving deltas: 100% (11455/11455), done.
root@master01:~/monitor# ls
kube-prometheus
kube-prometheus 部署:留神 manifests/setup/ 文件夹的资源在部署的时候必须用 create 不能用 apply,否则会提醒“The CustomResourceDefinition “prometheuses.monitoring.coreos.com” is invalid: metadata.annotations: Too long: must have at most 262144 bytes”,manifests 文件夹中的内容能够用 apply 创立。
root@master01:~/monitor# ls
kube-prometheus
root@master01:~/monitor/kube-prometheus# kubectl create -f manifests/setup/
customresourcedefinition.apiextensions.k8s.io/alertmanagerconfigs.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/alertmanagers.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/podmonitors.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/probes.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/prometheuses.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/prometheusrules.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/servicemonitors.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/thanosrulers.monitoring.coreos.com created
namespace/monitoring created
root@master01:~/monitor/kube-prometheus# kubectl apply -f manifests/
alertmanager.monitoring.coreos.com/main created
networkpolicy.networking.k8s.io/alertmanager-main created
poddisruptionbudget.policy/alertmanager-main created
prometheusrule.monitoring.coreos.com/alertmanager-main-rules created
secret/alertmanager-main created
service/alertmanager-main created
serviceaccount/alertmanager-main created
servicemonitor.monitoring.coreos.com/alertmanager-main created
clusterrole.rbac.authorization.k8s.io/blackbox-exporter created
clusterrolebinding.rbac.authorization.k8s.io/blackbox-exporter created
configmap/blackbox-exporter-configuration created
deployment.apps/blackbox-exporter created
networkpolicy.networking.k8s.io/blackbox-exporter created
service/blackbox-exporter created
serviceaccount/blackbox-exporter created
servicemonitor.monitoring.coreos.com/blackbox-exporter created
secret/grafana-config created
secret/grafana-datasources created
configmap/grafana-dashboard-alertmanager-overview created
configmap/grafana-dashboard-apiserver created
configmap/grafana-dashboard-cluster-total created
configmap/grafana-dashboard-controller-manager created
configmap/grafana-dashboard-grafana-overview created
configmap/grafana-dashboard-k8s-resources-cluster created
configmap/grafana-dashboard-k8s-resources-namespace created
configmap/grafana-dashboard-k8s-resources-node created
configmap/grafana-dashboard-k8s-resources-pod created
configmap/grafana-dashboard-k8s-resources-workload created
configmap/grafana-dashboard-k8s-resources-workloads-namespace created
configmap/grafana-dashboard-kubelet created
configmap/grafana-dashboard-namespace-by-pod created
configmap/grafana-dashboard-namespace-by-workload created
configmap/grafana-dashboard-node-cluster-rsrc-use created
configmap/grafana-dashboard-node-rsrc-use created
configmap/grafana-dashboard-nodes-darwin created
configmap/grafana-dashboard-nodes created
configmap/grafana-dashboard-persistentvolumesusage created
configmap/grafana-dashboard-pod-total created
configmap/grafana-dashboard-prometheus-remote-write created
configmap/grafana-dashboard-prometheus created
configmap/grafana-dashboard-proxy created
configmap/grafana-dashboard-scheduler created
configmap/grafana-dashboard-workload-total created
configmap/grafana-dashboards created
deployment.apps/grafana created
networkpolicy.networking.k8s.io/grafana created
prometheusrule.monitoring.coreos.com/grafana-rules created
service/grafana created
serviceaccount/grafana created
servicemonitor.monitoring.coreos.com/grafana created
prometheusrule.monitoring.coreos.com/kube-prometheus-rules created
clusterrole.rbac.authorization.k8s.io/kube-state-metrics created
clusterrolebinding.rbac.authorization.k8s.io/kube-state-metrics created
deployment.apps/kube-state-metrics created
networkpolicy.networking.k8s.io/kube-state-metrics created
prometheusrule.monitoring.coreos.com/kube-state-metrics-rules created
service/kube-state-metrics created
serviceaccount/kube-state-metrics created
servicemonitor.monitoring.coreos.com/kube-state-metrics created
prometheusrule.monitoring.coreos.com/kubernetes-monitoring-rules created
servicemonitor.monitoring.coreos.com/kube-apiserver created
servicemonitor.monitoring.coreos.com/coredns created
servicemonitor.monitoring.coreos.com/kube-controller-manager created
servicemonitor.monitoring.coreos.com/kube-scheduler created
servicemonitor.monitoring.coreos.com/kubelet created
clusterrole.rbac.authorization.k8s.io/node-exporter created
clusterrolebinding.rbac.authorization.k8s.io/node-exporter created
daemonset.apps/node-exporter created
networkpolicy.networking.k8s.io/node-exporter created
prometheusrule.monitoring.coreos.com/node-exporter-rules created
service/node-exporter created
serviceaccount/node-exporter created
servicemonitor.monitoring.coreos.com/node-exporter created
clusterrole.rbac.authorization.k8s.io/prometheus-k8s created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-k8s created
networkpolicy.networking.k8s.io/prometheus-k8s created
poddisruptionbudget.policy/prometheus-k8s created
prometheus.monitoring.coreos.com/k8s created
prometheusrule.monitoring.coreos.com/prometheus-k8s-prometheus-rules created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s-config created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s-config created
role.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s created
service/prometheus-k8s created
serviceaccount/prometheus-k8s created
servicemonitor.monitoring.coreos.com/prometheus-k8s created
apiservice.apiregistration.k8s.io/v1beta1.metrics.k8s.io created
clusterrole.rbac.authorization.k8s.io/prometheus-adapter created
clusterrole.rbac.authorization.k8s.io/system:aggregated-metrics-reader created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-adapter created
clusterrolebinding.rbac.authorization.k8s.io/resource-metrics:system:auth-delegator created
clusterrole.rbac.authorization.k8s.io/resource-metrics-server-resources created
configmap/adapter-config created
deployment.apps/prometheus-adapter created
networkpolicy.networking.k8s.io/prometheus-adapter created
poddisruptionbudget.policy/prometheus-adapter created
rolebinding.rbac.authorization.k8s.io/resource-metrics-auth-reader created
service/prometheus-adapter created
serviceaccount/prometheus-adapter created
servicemonitor.monitoring.coreos.com/prometheus-adapter created
clusterrole.rbac.authorization.k8s.io/prometheus-operator created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-operator created
deployment.apps/prometheus-operator created
networkpolicy.networking.k8s.io/prometheus-operator created
prometheusrule.monitoring.coreos.com/prometheus-operator-rules created
service/prometheus-operator created
serviceaccount/prometheus-operator created
servicemonitor.monitoring.coreos.com/prometheus-operator created
部署后果验证,能够看到新创建的 monitoring 名称空间中的 POD 全副失常启动,相干资源失常被创立。
root@master01:~/monitor/kube-prometheus# kubectl get po -n monitoring
NAME READY STATUS RESTARTS AGE
alertmanager-main-0 2/2 Running 0 2m18s
alertmanager-main-1 2/2 Running 0 2m18s
alertmanager-main-2 2/2 Running 0 2m18s
blackbox-exporter-58d99cfb6d-5f44f 3/3 Running 0 2m30s
grafana-5cfbb9b4c5-7qw6c 1/1 Running 0 2m29s
kube-state-metrics-c9f8b947b-xrhbd 3/3 Running 0 2m29s
node-exporter-gzspv 2/2 Running 0 2m28s
node-exporter-jz588 2/2 Running 0 2m28s
node-exporter-z8vps 2/2 Running 0 2m28s
prometheus-adapter-5bf8d6f7c6-5jf6n 1/1 Running 0 2m26s
prometheus-adapter-5bf8d6f7c6-sz4ns 1/1 Running 0 2m27s
prometheus-k8s-0 2/2 Running 0 2m16s
prometheus-k8s-1 2/2 Running 0 2m16s
prometheus-operator-6cbd5c84fb-8xslw 2/2 Running 0 2m26s
root@master01:~# kubectl get crd -n monitoring
NAME CREATED AT
alertmanagerconfigs.monitoring.coreos.com 2023-01-04T14:54:38Z
alertmanagers.monitoring.coreos.com 2023-01-04T14:54:38Z
podmonitors.monitoring.coreos.com 2023-01-04T14:54:38Z
probes.monitoring.coreos.com 2023-01-04T14:54:38Z
prometheuses.monitoring.coreos.com 2023-01-04T14:54:38Z
prometheusrules.monitoring.coreos.com 2023-01-04T14:54:38Z
servicemonitors.monitoring.coreos.com 2023-01-04T14:54:38Z
thanosrulers.monitoring.coreos.com 2023-01-04T14:54:38Z
root@master01:~# kubectl get secret -n monitoring
NAME TYPE DATA AGE
alertmanager-main Opaque 1 3d21h
alertmanager-main-generated Opaque 1 3d21h
alertmanager-main-tls-assets-0 Opaque 0 3d21h
alertmanager-main-token-6c5pj kubernetes.io/service-account-token 3 3d21h
alertmanager-main-web-config Opaque 1 3d21h
blackbox-exporter-token-22c6s kubernetes.io/service-account-token 3 3d21h
default-token-kff56 kubernetes.io/service-account-token 3 3d22h
grafana-config Opaque 1 3d21h
grafana-datasources Opaque 1 3d21h
grafana-token-7dvc9 kubernetes.io/service-account-token 3 3d21h
kube-state-metrics-token-nzhtn kubernetes.io/service-account-token 3 3d21h
node-exporter-token-8zrpf kubernetes.io/service-account-token 3 3d21h
prometheus-adapter-token-jmdch kubernetes.io/service-account-token 3 3d21h
prometheus-k8s Opaque 1 3d21h
prometheus-k8s-tls-assets-0 Opaque 0 3d21h
prometheus-k8s-token-jfwmh kubernetes.io/service-account-token 3 3d21h
prometheus-k8s-web-config Opaque 1 3d21h
prometheus-operator-token-w4xcf kubernetes.io/service-account-token 3 3d21h
三、服务裸露
服务裸露分为两种形式,能够应用 ingress 或 NodePort 的形式将服务裸露进来使宿主机能够进行拜访。留神如果集群应用的如果是 calico 网络的话会默认创立出若干条网络规定导致服务裸露后也无奈在客户端进行拜访。可将网络策略批改或间接删除。(留神图片中所操作的策略也须要删除)。
root@master01:~# kubectl get networkpolicies.networking.k8s.io -n monitoring
NAME POD-SELECTOR AGE
alertmanager-main app.kubernetes.io/component=alert-router,app.kubernetes.io/instance=main,app.kubernetes.io/name=alertmanager,app.kubernetes.io/part-of=kube-prometheus 14h
blackbox-exporter app.kubernetes.io/component=exporter,app.kubernetes.io/name=blackbox-exporter,app.kubernetes.io/part-of=kube-prometheus 14h
grafana app.kubernetes.io/component=grafana,app.kubernetes.io/name=grafana,app.kubernetes.io/part-of=kube-prometheus 14h
kube-state-metrics app.kubernetes.io/component=exporter,app.kubernetes.io/name=kube-state-metrics,app.kubernetes.io/part-of=kube-prometheus 14h
node-exporter app.kubernetes.io/component=exporter,app.kubernetes.io/name=node-exporter,app.kubernetes.io/part-of=kube-prometheus 14h
prometheus-adapter app.kubernetes.io/component=metrics-adapter,app.kubernetes.io/name=prometheus-adapter,app.kubernetes.io/part-of=kube-prometheus 14h
prometheus-operator app.kubernetes.io/component=controller,app.kubernetes.io/name=prometheus-operator,app.kubernetes.io/part-of=kube-prometheus 14h
root@master01:~# kubectl delete networkpolicies.networking.k8s.io grafana -n monitoring
networkpolicy.networking.k8s.io "grafana" deleted
root@master01:~# kubectl delete networkpolicies.networking.k8s.io alertmanager-main -n monitoring
networkpolicy.networking.k8s.io "alertmanager-main" deleted
3.1 应用 NodePort 形式裸露服务(可选)
批改 prometheus 的 service 文件
root@master01:~/monitor/kube-prometheus# cat manifests/prometheus-service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: monitoring
spec:
type: NodePort
ports:
- name: web
port: 9090
targetPort: web
nodePort: 30090
- name: reloader-web
port: 8080
targetPort: reloader-web
selector:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: ClientIP
root@master01:~/monitor/kube-prometheus# kubectl apply -f manifests/prometheus-service.yaml
service/prometheus-k8s configured
批改 grafana 和 alert-manager 的 service 文件。
root@master01:~/monitor/kube-prometheus/manifests# cat grafana-service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 9.3.2
name: grafana
namespace: monitoring
spec:
type: NodePort
ports:
- name: http
port: 3000
targetPort: http
nodePort: 30030
selector:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
root@master01:~/monitor/kube-prometheus/manifests# cat alertmanager-service.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: alert-router
app.kubernetes.io/instance: main
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 0.25.0
name: alertmanager-main
namespace: monitoring
spec:
type: NodePort
ports:
- name: web
port: 9093
targetPort: web
nodePort: 30093
- name: reloader-web
port: 8080
targetPort: reloader-web
selector:
app.kubernetes.io/component: alert-router
app.kubernetes.io/instance: main
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: ClientIP
批改完后 apply
root@master01:~/monitor/kube-prometheus/manifests# kubectl apply -f grafana-service.yaml
service/grafana configured
root@master01:~/monitor/kube-prometheus/manifests# kubectl apply -f alertmanager-service.yaml
service/alertmanager-main configured
批改完相干 service 文件后验证
root@master01:~# kubectl get svc -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
alertmanager-main NodePort 10.100.23.255 <none> 9093:30093/TCP,8080:32647/TCP 14h
alertmanager-operated ClusterIP None <none> 9093/TCP,9094/TCP,9094/UDP 14h
blackbox-exporter ClusterIP 10.100.48.67 <none> 9115/TCP,19115/TCP 14h
grafana NodePort 10.100.153.195 <none> 3000:30030/TCP 14h
kube-state-metrics ClusterIP None <none> 8443/TCP,9443/TCP 14h
node-exporter ClusterIP None <none> 9100/TCP 14h
prometheus-adapter ClusterIP 10.100.226.131 <none> 443/TCP 14h
prometheus-k8s NodePort 10.100.206.216 <none> 9090:30090/TCP,8080:30019/TCP 14h
prometheus-operated ClusterIP None <none> 9090/TCP 14h
prometheus-operator ClusterIP None <none> 8443/TCP 14h
在客户端用浏览器进行拜访验证
下面咱们能够看到 Prometheus 的 POD 是两个正本,咱们这里通过 Service 去拜访,按失常来说申请是会去轮询拜访后端的两个 Prometheus 实例的,但实际上咱们这里拜访的时候始终是通过 service 的负载平衡个性调度到后端的任意一个实例下来,因为这里的 Service 在创立的时候增加了 sessionAffinity: ClientIP 这样的属性,会依据 ClientIP 来做 session 亲和性,所以不必放心申请会到不同的正本下来,失常多正本应该是看成高可用的罕用计划,实践上来说不同正本本地的数据是统一的,然而须要留神的是 Prometheus 的被动 Pull 拉取监控指标的形式,因为抓取工夫不能完全一致,即便统一也不肯定就能保障网络没什么问题,所以最终不同正本下存储的数据很大可能是不一样的,所以官网提供的 yaml 文件里配置了 session 亲和性,能够保障咱们在拜访数据的时候始终是统一的。
此时能够应用 admin:admin,登录 granfna 平台。
从内置监控模板中抉择一个模板测试。能够发现曾经能够看到监控数据。
3.2 应用 ingress 形式裸露服务(可选)
ingress 工作逻辑简介:先创立 ingress 控制器,相似于 nginx 主过程,监听一个端口,也就是将宿主机的某个端口映射给 ingress 的 controller,所有的申请先到控制器,各种配置文件都在控制器上,之后再将申请转给各个 ingress 工作线程。
本试验采纳的是 nginx-ingress 测试
root@master01:~# kubectl get svc -n ingress-nginx
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
ingress-nginx-controller NodePort 10.100.184.74 <none> 80:30080/TCP,443:30787/TCP 3d6h
ingress-nginx-controller-admission ClusterIP 10.100.216.215 <none> 443/TCP 3d6h
root@master01:~# kubectl get po -n ingress-nginx
NAME READY STATUS RESTARTS AGE
ingress-nginx-controller-69fbfb4bfd-zjs2w 1/1 Running 1 (9h ago) 9h
编写通过 ingress 拜访监控零碎的 yaml 文件,在编写 ingress 的 yaml 文件的时候留神集群版本与 api 变动。
ingress 的 api 版本历经过屡次变动他们的配置项也不太一样别离是:
● extensions/v1beta1:1.16 版本之前应用
● networking.k8s.io/v1beta1:1.19 版本之前应用
● networking.k8s.io/v1:1.19 版本之后应用
编写须要裸露服务的 yaml 文件并创立相干资源。
root@master01:~/ingress/monitor-ingress# cat promethues-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: prometheus-ingress
namespace: monitoring
spec:
ingressClassName: nginx #此版本 ingress 控制器由此处指定,不必注解指了
rules:
- host: prometheus.snow.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: prometheus-k8s
port:
number: 9090
root@master01:~/ingress/monitor-ingress# cat grafana-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: grafana-ingress
namespace: monitoring
spec:
ingressClassName: nginx
rules:
- host: grafana.snow.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: grafana
port:
number: 3000
root@master01:~/ingress/monitor-ingress# cat alert-manager-ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: alert-ingress
namespace: monitoring
spec:
ingressClassName: nginx
rules:
- host: alert.snow.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: alertmanager-main
port:
number: 9093
root@master01:~/ingress/monitor-ingress# kubectl apply -f promethues-ingress.yaml
ingress.networking.k8s.io/prometheus-ingress created
root@master01:~/ingress/monitor-ingress# kubectl apply -f grafana-ingress.yaml
ingress.networking.k8s.io/grafana-ingress created
root@master01:~/ingress/monitor-ingress# kubectl apply -f alert-manager-ingress.yaml
ingress.networking.k8s.io/alter-ingress created
ingress 资源创立结束后验证
root@master01:~/ingress/monitor-ingress# kubectl get ingress -n monitoring
NAME CLASS HOSTS ADDRESS PORTS AGE
alter-ingress nginx alert.snow.com 10.100.184.74 80 3m42s
grafana-ingress nginx grafana.snow.com 10.100.184.74 80 14h
prometheus-ingress nginx prometheus.snow.com 10.100.184.74 80 12h
批改宿主机 hosts 文件,留神在宿主机进行拜访的时候敞开代理工具,clash 会使本机的 hosts 解析生效。
在浏览器应用域名拜访测试
ps:以上内容在自己实现环境中已试验胜利,如发现有问题或表述不清的中央欢送斧正。