共计 9922 个字符,预计需要花费 25 分钟才能阅读完成。
1. 硬件要求
1、Master 主机:2 核 CPU、4G 内存、20G 硬盘
2、Node 主机:4+ 核 CPU、8G+ 内存、40G+ 硬盘
2、集群中的所有机器的网络彼此均能相互连接
3、节点之中不能够有反复的主机名、MAC 地址或 product_uuid
4、开启机器上的某些端口
5、为了保障 kubelet 失常工作,必须禁用替换分区
- 在每一台节点上 写上集群主机的 hosts
10.2.xx.215 gz-xx-gw-c7 # master
10.2.xx.128 gz-xx-node1-c7 # node
10.2.xx.246 gz-xx-node2-c7 # node
2. 服务器环境配置
2.1 敞开防火墙(所有节点)
敞开防火墙并设置开机不启动
systemctl stop firewalld
systemctl disable firewalld
2.3 敞开 swap 分区(所有节点)
批改后重启服务器失效
swapoff -a
vim /etc/fstab #永恒禁用 swap, 删除或正文掉 /etc/fstab 里的 swap 设施的挂载命令即可
#/dev/mapper/centos-swap swap swap defaults 0 0
2.4 Centos7 内核降级(所有节点)
因为 centos7.9 的零碎默认内核版本是 3.10,3.10 的内核有很多 BUG,最常见的一个就是 group memory leak(四台主机都要执行)
1)下载所须要的内核版本,我这里采纳 rpm 装置,所以间接下载的 rpm 包
[root@localhost ~]# wget https://cbs.centos.org/kojifiles/packages/kernel/4.9.220/37.el7/x86_64/kernel-4.9.220-37.el7.x86_64.rpm
2)执行 rpm 降级即可
[root@localhost ~]# rpm -ivh kernel-4.9.220-37.el7.x86_64.rpm
#查看零碎可用内核,并设置启动项
[root@gz-bjrd-devops-gw-c7 dd]# sudo awk -F\''$1=="menuentry "{print i++" : "$2}' /etc/grub2.cfg
0 : CentOS Linux (4.9.220-37.el7.x86_64) 7 (Core)
1 : CentOS Linux (3.10.0-1160.88.1.el7.x86_64) 7 (Core)
2 : CentOS Linux (3.10.0-1160.76.1.el7.x86_64) 7 (Core)
3 : CentOS Linux (3.10.0-1160.el7.x86_64) 7 (Core)
4 : CentOS Linux (0-rescue-1caefa67ba0d4c758d6742dfc455d487) 7 (Core)
#指定开机启动内核版本
grub2-set-default 0 或者 grub2-set-default 'CentOS Linux (6.3.1-1.el7.elrepo.x86_64) 7 (Core)'
#生成 grub 配置文件
grub2-mkconfig -o /boot/grub2/grub.cfg
3)降级完 reboot,而后查看内核是否胜利降级 ################ 肯定要重启
[root@localhost ~]# reboot
#重启零碎,验证
[root@k8s-master ~]# uname -a
Linux gz-xxs-gw-c7 4.9.220-37.el7.x86_64 #1 SMP Tue Apr 28 10:14:25 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux
2.5 设置主机名(所有节点)
[root@k8s-master ~]# cat /etc/hosts
10.2.xx.215 gz-xx-gw-c7 # master
10.2.xx.128 gz-xx-node1-c7 # node
10.2.xx.246 gz-xx-node2-c7 # node
2.6 工夫同步(所有节点)
ntpdate cn.pool.ntp.org
2.7 配制 iptables 规定
留神本人原有的规定(谨慎点敌人)
iptables -F && iptables -X && iptables -F -t nat && iptables -X -t nat && iptables -P FORWARD ACCEPT
设置零碎参数
cat <<EOF > /etc/sysctl.d/k8s.conf net.bridge.bridge-nf-call-ip6tables = 1 net.bridge.bridge-nf-call-iptables = 1 EOF sysctl --system
3、装置 docker(所有节点)
[root@gz-xx-gw-c7 ~]# yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
[root@gz-xx-gw-c7 ~]# yum install docker-ce-23.0.6-1.el7 -y
[root@gz-xx-gw-c7 ~]# mkdir -pv /opt/docker
[root@gz-xx-gw-c7 ~]# cat /etc/docker/daemon.json
{"registry-mirrors": ["https://zd6lf0p4.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"],
"data-root":"/opt/docker"
}
[root@gz-xx-gw-c7 ~]# systemctl start docker
[root@gz-xx-gw-c7 ~]# systemctl enable docker
[root@gz-xx-gw-c7 ~]#
配制镜像减速和 cgroup
没有则本人创立文件
没有则本人创立文件
[root@k8s-master ~]# cat /etc/docker/daemon.json
{"registry-mirrors": ["https://zd6lf0p4.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
#"exec-opts": ["native.cgroupdriver=systemd"]为 docker 应用 cgroup 的形式,k8s 应用形式也是 systemd,两边要统一
#加载配制
systemctl restart docker
systemctl enable docker
4. 装置 cri-dockerd(所有节点)
# 从 https://github.com/Mirantis/cri-dockerd/releases 中下载最新的 rpm 包, 手动下载后上传到服务器里
rpm -ivh cri-dockerd-0.3.1-3.el7.x86_64.rpm
#批改 /usr/lib/systemd/system/cri-docker.service 文件中的 ExecStart 配置
vim /usr/lib/systemd/system/cri-docker.service
ExecStart=/usr/bin/cri-dockerd --network-plugin=cni --pod-infra-container-image=registry.aliyuncs.com/google_containers/pause:3.7
systemctl daemon-reload
systemctl enable --now cri-docker
5.yum 装置 kubeadm、kubelet、kubectl(所有节点)
配置 yum 源
vim /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
yum install -y kubelet kubeadm kubectl
systemctl enable kubelet --now
master pull images (master)
kubeadm config images pull --kubernetes-version=v1.27.3 --image-repository registry.aliyuncs.com/google_containers --cri-socket unix:///var/run/cri-dockerd.sock
6. 初始化 master 节点的控制面板(master 节点)
kubeadm init \
--apiserver-advertise-address=10.2.xx.215 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.27.3 \
--service-cidr=172.18x.0.0/12 \
--pod-network-cidr=172.17x.0.0/16 \
--cri-socket unix:///var/run/cri-dockerd.sock \
--ignore-preflight-errors=all
会呈现上面的内容
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 10.2.xx.215:6443 --token 4eozgp.xm7tfxxxxxxxxxz42y \
--discovery-token-ca-cert-hash sha256:66bfxxxxxxxxxxxxxxxx8ac68cbc927e86789a9e4e8183365ded688a1
- 在 master 节点执行
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
- 在 node 节点执行
kubeadm join 10.2.xx.215:6443 --token 4eozgp.xm7tfxxxxxxxxxz42y --discovery-token-ca-cert-hash sha256:66bfxxxxxxxxxxxxxxxx8ac68cbc927e86789a9e4e8183365ded688a1
还是依据下面的提醒来增加节点,上面的命令是在你要退出的节点上执行,master 节点不必执行
- 在 master 节点查看
[root@gz-bjrd-devops-gw-c7 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
gz-xx-gw-c7 NotReady control-plane 22h v1.27.3
gz-xx-node1-c7 NotReady <none> 21h v1.27.3
gz-xx-node2-c7 NotReady <none> 21h v1.27.3
不要慌,网络插件搞好就 ready 了
7. 装置网络插件(flannel)
https://github.com/flannel-io…
[root@gz-bjrd-devops-gw-c7 ~]# cat flannel.yml
apiVersion: v1
kind: Namespace
metadata:
labels:
k8s-app: flannel
pod-security.kubernetes.io/enforce: privileged
name: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: flannel
name: flannel
namespace: kube-flannel
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: flannel
name: flannel
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
- apiGroups:
- networking.k8s.io
resources:
- clustercidrs
verbs:
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: flannel
name: flannel
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flannel
subjects:
- kind: ServiceAccount
name: flannel
namespace: kube-flannel
---
apiVersion: v1
data:
cni-conf.json: |
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {"portMappings": true}
}
]
}
net-conf.json: |
{
"Network": "172.17x.0.0/16", # 这里得改成你本人的
"Backend": {"Type": "vxlan"}
}
kind: ConfigMap
metadata:
labels:
app: flannel
k8s-app: flannel
tier: node
name: kube-flannel-cfg
namespace: kube-flannel
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app: flannel
k8s-app: flannel
tier: node
name: kube-flannel-ds
namespace: kube-flannel
spec:
selector:
matchLabels:
app: flannel
k8s-app: flannel
template:
metadata:
labels:
app: flannel
k8s-app: flannel
tier: node
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/os
operator: In
values:
- linux
containers:
- args:
- --ip-masq
- --kube-subnet-mgr
command:
- /opt/bin/flanneld
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: EVENT_QUEUE_DEPTH
value: "5000"
image: docker.io/flannel/flannel:v0.21.5
name: kube-flannel
resources:
requests:
cpu: 100m
memory: 50Mi
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_RAW
privileged: false
volumeMounts:
- mountPath: /run/flannel
name: run
- mountPath: /etc/kube-flannel/
name: flannel-cfg
- mountPath: /run/xtables.lock
name: xtables-lock
hostNetwork: true
initContainers:
- args:
- -f
- /flannel
- /opt/cni/bin/flannel
command:
- cp
image: docker.io/flannel/flannel-cni-plugin:v1.1.2
name: install-cni-plugin
volumeMounts:
- mountPath: /opt/cni/bin
name: cni-plugin
- args:
- -f
- /etc/kube-flannel/cni-conf.json
- /etc/cni/net.d/10-flannel.conflist
command:
- cp
image: docker.io/flannel/flannel:v0.21.5
name: install-cni
volumeMounts:
- mountPath: /etc/cni/net.d
name: cni
- mountPath: /etc/kube-flannel/
name: flannel-cfg
priorityClassName: system-node-critical
serviceAccountName: flannel
tolerations:
- effect: NoSchedule
operator: Exists
volumes:
- hostPath:
path: /run/flannel
name: run
- hostPath:
path: /opt/cni/bin
name: cni-plugin
- hostPath:
path: /etc/cni/net.d
name: cni
- configMap:
name: kube-flannel-cfg
name: flannel-cfg
- hostPath:
path: /run/xtables.lock
type: FileOrCreate
name: xtables-lock
kubectl apply -f flannel.yml
测试
能够进行一个简略的测试
[root@k8s-master /data/yaml]# cat nginx.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deploy
labels:
app: ceshi
spec:
replicas: 2
selector:
matchLabels:
school: qinghua
template:
metadata:
name: nginx-pod
labels:
school: qinghua
spec:
containers:
- name: nginx-web
image: nginx:1.20.2
ports:
- containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: nginx-service
labels:
role: leader
spec:
type: NodePort
ports:
- port: 8888
targetPort: 80
nodePort: 30000
kubectl apply -f nginx.yaml
[root@k8s-master /data/yaml]# kubectl get pods
NAME READY STATUS RESTARTS AGE
nginx-deploy-6659dbd7c4-kldxj 1/1 Running 0 79m
nginx-deploy-6659dbd7c4-qgr4v 1/1 Running 0 79m
这里因为是第一次下载镜像会很慢,等一段时间 running 就代表实现了
# 查看 service 状态
Endpoint 有 ip 应该就没问题了
[root@k8s-master /data/yaml]# kubectl describe svc nginx-service
Name: nginx-service
Namespace: default
Labels: role=leader
Annotations: <none>
Selector: school=qinghua
Type: NodePort
IP Family Policy: SingleStack
IP Families: IPv4
IP: 10.2.2xx.219
IPs: 10.2.2xx.219
Port: <unset> 8888/TCP
TargetPort: 80/TCP
NodePort: <unset> 30000/TCP
Endpoints: 172.17.1.12:80,172.117.1.13:80
Session Affinity: None
External Traffic Policy: Cluster
Events: <none>
留神:k8s1.24 版本当前 nodeport(应用宿主机端口)在 node 宿主机不会显示端口,如 ss -lntup,间接浏览器 nodeip 加端口的形式拜访给就能够
问题解决
Error registering network: failed to acquire lease: node “caasfaasslave1.XXXXXX.local” pod cidr not assigned
* 在 master 上查看
pod 没有取得 podCIDR。我也遇到了这种状况,只管主节点上的清单说 podCIDR 是真的,但它依然不起作用,漏斗在 crashbackloop 中运行
[root@gz-xxs-gw-c7 ~]# sudo cat /etc/kubernetes/manifests/kube-controller-manager.yaml | grep -i cluster-cidr
- --cluster-cidr=172.17x.0.0/16
kubectl patch node gz-xx-node2-c7 -p '{"spec":{"podCIDR":"172.17x.0.0/16"}}'
kubectl patch node gz-xx-node1-c7 -p '{"spec":{"podCIDR":"172.17x.0.0/16"}}'
https://stackoverflow.com/que…