参考文档
官方文档: 使用 kubeadm 创建集群
containerd镜像配置: containerd官方的HOST配置
镜像设置: containerd k3s 镜像加速
版本列表
- 操作系统: CentOS Linux release 7.9.2009 (Core)
- Kubernetes 版本: v1.29.12
- 无需科学上网,使用镜像源,而非代理模式
- 如无特别指出,本文所有的命令均以root执行
1. 运行环境检测
# 1.1 检验所有节点之中不可以有重复的主机名、MAC 地址或 product_uuid。
ifconfig -a
sudo cat /sys/class/dmi/id/product_uuid # 1.2 关闭防火墙
# 查询状态
systemctl status firewalld
# 关闭防火墙
systemctl stop firewalld
# 禁用服务(重启后依旧关闭)
systemctl disable firewalld
# (附)启用防火墙的指令
# systemctl enable firewalld
# systemctl start firewalld# 1.3 禁用交换分区
swapoff -a
# 需要永久关闭请修改/etc/fstab
# 重启
reboot
# 检查是否关闭如果输入为0则已关闭
free -m# 1.4 由于centos7停止服务,yum拉取不到,设置镜像源
sudo cp /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.bak
sudo curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
2.安装容器运行时CRI(containerd)
# 2.1 先决条件
# 转发 IPv4 并让 iptables 看到桥接流量
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfiltercat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
# 应用 sysctl 参数而不重新启动
sudo sysctl --system
# 验证配置,运行以下指令确认 br_netfilter 和 overlay 模块被加载:
lsmod | grep br_netfilter
lsmod | grep overlay
# 验证配置,确认 net.bridge.bridge-nf-call-iptables、net.bridge.bridge-nf-call-ip6tables 和 net.ipv4.ip_forward 系统变量在你的 sysctl 配置中被设置为 1
sysctl net.bridge.bridge-nf-call-iptables net.bridge.bridge-nf-call-ip6tables net.ipv4.ip_forward# 配置yum的源,如果出现网络错误:Network file descriptor is not. 反复尝试十几次,一般都能设置上
yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
# 安装containerd.io,如果报错: 获取 GPG 密钥失败 依旧是重试几遍即可
yum install containerd.io -y
# 检查运行状态,如果是dead很正常,缺少配置文件
systemctl status containerd
# 设置默认的配置文件,重启程序,设置开机启动,再查看状态应该是running了
containerd config default > /etc/containerd/config.toml
systemctl start containerd
systemctl enable containerd
systemctl status containerd
3.安装 kubeadm、kubelet 和 kubectl
# 3.1 将 SELinux 设置为 permissive 模式(相当于将其禁用)
sudo setenforce 0
sudo sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config# 3.2 添加 Kubernetes 的 yum 仓库。(这里用的阿里云的)
# 此操作会覆盖 /etc/yum.repos.d/kubernetes.repo 中现存的所有配置
cat <<EOF | tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.29/rpm/
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.29/rpm/repodata/repomd.xml.key
EOF
# 3.3 安装 kubelet、kubeadm 和 kubectl,并启用 kubelet 以确保它在启动时自动启动:
yum install -y kubelet kubeadm kubectl --disableexcludes=kubernetes
systemctl enable --now kubelet# 3.4 验证安装
# kubeadm 如果显示版本则安装ok
# 输出应该如下: kubeadm version: &version.Info{Major:"1", Minor:"29", GitVersion:"v1.29.12", GitCommit:"9253c9bda3d8bd76848bb4a21b309c28c0aab2f7", GitTreeState:"clean", BuildDate:"2024-12-10T11:34:15Z", GoVersion:"go1.22.9", Compiler:"gc", Platform:"linux/amd64"}
kubeadm version
# kubectl 如果显示版本则安装ok
# 输出应该如下: Client Version: v1.29.12 Kustomize Version: v5.0.4-0.20230601165947-6ce0bf390ce3 The connection to the server localhost:8080 was refused - did you specify the right host or port?
kubectl version
# systemctl 检查运行状态,应该是不能正常运行的,这样是正常的: activating (auto-restart) kubelet.service - kubelet: The Kubernetes Node Agent.
systemctl status kubelet
4.kubeadm 初始化
# 4.1 前置条件,非常重要
# 设置containerd的镜像源,否则无法拉取镜像的
# 编辑 /etc/containerd/config.toml
# 找到这一行,修改config_path 为[plugins."io.containerd.grpc.v1.cri".registry]config_path = "/etc/containerd/certs.d"
# 重启服务
systemctl restart containerd
# 配置镜像源
cd /etc/containerd/
mkdir -p certs.d && cd certs.d/
# docker hub镜像加速
mkdir -p /etc/containerd/certs.d/docker.io
cat > /etc/containerd/certs.d/docker.io/hosts.toml << EOF
server = "https://docker.io"
[host."https://dockerproxy.com"]capabilities = ["pull", "resolve"][host."https://docker.m.daocloud.io"]capabilities = ["pull", "resolve"][host."https://reg-mirror.qiniu.com"]capabilities = ["pull", "resolve"][host."https://registry.docker-cn.com"]capabilities = ["pull", "resolve"][host."http://hub-mirror.c.163.com"]capabilities = ["pull", "resolve"]EOF# registry.k8s.io镜像加速
mkdir -p /etc/containerd/certs.d/registry.k8s.io
tee /etc/containerd/certs.d/registry.k8s.io/hosts.toml << 'EOF'
server = "https://registry.k8s.io"[host."https://k8s.m.daocloud.io"]capabilities = ["pull", "resolve", "push"]
EOF# docker.elastic.co镜像加速
mkdir -p /etc/containerd/certs.d/docker.elastic.co
tee /etc/containerd/certs.d/docker.elastic.co/hosts.toml << 'EOF'
server = "https://docker.elastic.co"[host."https://elastic.m.daocloud.io"]capabilities = ["pull", "resolve", "push"]
EOF# gcr.io镜像加速
mkdir -p /etc/containerd/certs.d/gcr.io
tee /etc/containerd/certs.d/gcr.io/hosts.toml << 'EOF'
server = "https://gcr.io"[host."https://gcr.m.daocloud.io"]capabilities = ["pull", "resolve", "push"]
EOF# ghcr.io镜像加速
mkdir -p /etc/containerd/certs.d/ghcr.io
tee /etc/containerd/certs.d/ghcr.io/hosts.toml << 'EOF'
server = "https://ghcr.io"[host."https://ghcr.m.daocloud.io"]capabilities = ["pull", "resolve", "push"]
EOF# k8s.gcr.io镜像加速
mkdir -p /etc/containerd/certs.d/k8s.gcr.io
tee /etc/containerd/certs.d/k8s.gcr.io/hosts.toml << 'EOF'
server = "https://k8s.gcr.io"[host."https://k8s-gcr.m.daocloud.io"]capabilities = ["pull", "resolve", "push"]
EOF# mcr.m.daocloud.io镜像加速
mkdir -p /etc/containerd/certs.d/mcr.microsoft.com
tee /etc/containerd/certs.d/mcr.microsoft.com/hosts.toml << 'EOF'
server = "https://mcr.microsoft.com"[host."https://mcr.m.daocloud.io"]capabilities = ["pull", "resolve", "push"]
EOF# nvcr.io镜像加速
mkdir -p /etc/containerd/certs.d/nvcr.io
tee /etc/containerd/certs.d/nvcr.io/hosts.toml << 'EOF'
server = "https://nvcr.io"[host."https://nvcr.m.daocloud.io"]capabilities = ["pull", "resolve", "push"]
EOF# quay.io镜像加速
mkdir -p /etc/containerd/certs.d/quay.io
tee /etc/containerd/certs.d/quay.io/hosts.toml << 'EOF'
server = "https://quay.io"[host."https://quay.m.daocloud.io"]capabilities = ["pull", "resolve", "push"]
EOF# registry.jujucharms.com镜像加速
mkdir -p /etc/containerd/certs.d/registry.jujucharms.com
tee /etc/containerd/certs.d/registry.jujucharms.com/hosts.toml << 'EOF'
server = "https://registry.jujucharms.com"[host."https://jujucharms.m.daocloud.io"]capabilities = ["pull", "resolve", "push"]
EOF# rocks.canonical.com镜像加速
mkdir -p /etc/containerd/certs.d/rocks.canonical.com
tee /etc/containerd/certs.d/rocks.canonical.com/hosts.toml << 'EOF'
server = "https://rocks.canonical.com"[host."https://rocks-canonical.m.daocloud.io"]capabilities = ["pull", "resolve", "push"]
EOF# 4.2 (可选)测试拉取镜像,这样可以看到是否使用了镜像源,流量限制,不会很快的,稍等几分钟
ctr --debug images pull --hosts-dir=/etc/containerd/certs.d registry.k8s.io/kube-apiserver:v1.29.12
# 4.3 拉取k8sd的镜像,这步耗时有点长,我这里实测大约7分钟
kubeadm config images pull
# 4.4 选择合适的ip段,查找以 "default via" 开头的行,查询可用的ip段,别和列出的重复即可
ip route show
# 4.5 初始化,我这里使用的IP段为:10.244.0.0/16
# 显示Your Kubernetes control-plane has initialized successfully!即初始化成功
kubeadm init --pod-network-cidr=10.244.0.0/16
5. 安装容器网络插件(CNI)
# 5.1 配置 kubeconfig 文件。
#直接执行以下命令.会报错E1219 16:46:59.873640 8689 memcache.go:265] couldn't get current server API group list
kubectl get pods --all-namespaces
# 为了能够使用 kubectl 与 Kubernetes 集群进行交互,您需要配置 kubeconfig 文件。
export KUBECONFIG=/etc/kubernetes/admin.conf
# 这个export 只能设置在当前会话,这个配置到root上,永久生效
cp /etc/kubernetes/admin.conf ~/.kube/config
# 重新执行应该就可以了 输出形如
#[root@etronprobe1 certs.d]# kubectl get pods --all-namespaces
#NAMESPACE NAME READY STATUS RESTARTS AGE
#kube-system coredns-76f75df574-7grrb 0/1 Pending 0 4m19s
#kube-system coredns-76f75df574-jxp4m 0/1 Pending 0 4m19s
#kube-system etcd-etronprobe1 1/1 Running 0 4m34s
#kube-system kube-apiserver-etronprobe1 1/1 Running 0 4m34s
#kube-system kube-controller-manager-etronprobe1 1/1 Running 0 4m34s
#kube-system kube-proxy-l64gg 1/1 Running 0 4m19s
#kube-system kube-scheduler-etronprobe1 1/1 Running 0 4m34s
kubectl get pods --all-namespaces# 5.2 安装网络插件,从网络上
kubectl apply -f https://github.com/flannel-io/flannel/releases/latest/download/kube-flannel.yml
# github不可达时,下载文件到本地执行(本文提供了附件)
kubectl apply -f ./kube-flannel.yml
# 稍等一会,等flannelinitok,如果执行命令,显示全部running,特别是kube-flannel,即配置成功
kubectl get pods --all-namespaces
# 5.3 检查是否安装成功,是否ready
kubectl get nodes
# 测试部署pod
kubectl run nginx --image=nginx --restart=Never
# 查看是否正常运行
kubectl get pods --all-namespaces -o wide
# 查看记录,如果失败了
kubectl describe pod nginx
# 只有单节点会报错
# Warning FailedScheduling 61s default-scheduler 0/1 nodes are available: 1 node(s) had untolerated taint {node-role.kubernetes.io/control-plane: }. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.
# 移除污点,重试
kubectl taint nodes etronprobe node-role.kubernetes.io/control-plane-
# 再次查看
kubectl get pods --all-namespaces -o wide
附件:kube-flannel.yml
apiVersion: v1
kind: Namespace
metadata:labels:k8s-app: flannelpod-security.kubernetes.io/enforce: privilegedname: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:labels:k8s-app: flannelname: flannelnamespace: kube-flannel
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:labels:k8s-app: flannelname: flannel
rules:
- apiGroups:- ""resources:- podsverbs:- get
- apiGroups:- ""resources:- nodesverbs:- get- list- watch
- apiGroups:- ""resources:- nodes/statusverbs:- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:labels:k8s-app: flannelname: flannel
roleRef:apiGroup: rbac.authorization.k8s.iokind: ClusterRolename: flannel
subjects:
- kind: ServiceAccountname: flannelnamespace: kube-flannel
---
apiVersion: v1
data:cni-conf.json: |{"name": "cbr0","cniVersion": "0.3.1","plugins": [{"type": "flannel","delegate": {"hairpinMode": true,"isDefaultGateway": true}},{"type": "portmap","capabilities": {"portMappings": true}}]}net-conf.json: |{"Network": "10.244.0.0/16","EnableNFTables": false,"Backend": {"Type": "vxlan"}}
kind: ConfigMap
metadata:labels:app: flannelk8s-app: flanneltier: nodename: kube-flannel-cfgnamespace: kube-flannel
---
apiVersion: apps/v1
kind: DaemonSet
metadata:labels:app: flannelk8s-app: flanneltier: nodename: kube-flannel-dsnamespace: kube-flannel
spec:selector:matchLabels:app: flannelk8s-app: flanneltemplate:metadata:labels:app: flannelk8s-app: flanneltier: nodespec:affinity:nodeAffinity:requiredDuringSchedulingIgnoredDuringExecution:nodeSelectorTerms:- matchExpressions:- key: kubernetes.io/osoperator: Invalues:- linuxcontainers:- args:- --ip-masq- --kube-subnet-mgrcommand:- /opt/bin/flanneldenv:- name: POD_NAMEvalueFrom:fieldRef:fieldPath: metadata.name- name: POD_NAMESPACEvalueFrom:fieldRef:fieldPath: metadata.namespace- name: EVENT_QUEUE_DEPTHvalue: "5000"image: docker.io/flannel/flannel:v0.26.2name: kube-flannelresources:requests:cpu: 100mmemory: 50MisecurityContext:capabilities:add:- NET_ADMIN- NET_RAWprivileged: falsevolumeMounts:- mountPath: /run/flannelname: run- mountPath: /etc/kube-flannel/name: flannel-cfg- mountPath: /run/xtables.lockname: xtables-lockhostNetwork: trueinitContainers:- args:- -f- /flannel- /opt/cni/bin/flannelcommand:- cpimage: docker.io/flannel/flannel-cni-plugin:v1.6.0-flannel1name: install-cni-pluginvolumeMounts:- mountPath: /opt/cni/binname: cni-plugin- args:- -f- /etc/kube-flannel/cni-conf.json- /etc/cni/net.d/10-flannel.conflistcommand:- cpimage: docker.io/flannel/flannel:v0.26.2name: install-cnivolumeMounts:- mountPath: /etc/cni/net.dname: cni- mountPath: /etc/kube-flannel/name: flannel-cfgpriorityClassName: system-node-criticalserviceAccountName: flanneltolerations:- effect: NoScheduleoperator: Existsvolumes:- hostPath:path: /run/flannelname: run- hostPath:path: /opt/cni/binname: cni-plugin- hostPath:path: /etc/cni/net.dname: cni- configMap:name: kube-flannel-cfgname: flannel-cfg- hostPath:path: /run/xtables.locktype: FileOrCreatename: xtables-lock
安装helm和Dashboard
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
./get_helm.sh
helm repo add kubernetes-dashboard https://kubernetes.github.io/dashboard/
helm upgrade --install kubernetes-dashboard kubernetes-dashboard/kubernetes-dashboard \--create-namespace --namespace kubernetes-dashboard \--version 7.5.0