二进制
高可用集群3个master 2个node
192.168.1.200 long1 #master1
192.168.1.201 long2 #master2
192.168.1.202 long3 #master3
192.168.1.203 long4 #node1
192.168.1.204 long5 #node2
192.168.1.250 vip
K8s Service网段:192.168.0.0/16
K8s Pod网段:172.16.0.0/12
主机与k8s的pod和service的网段都不能有冲突
基础配置
#做long1对其他机器的免密
ssh-keygen -t rsa
for i in long{1..5};do ssh-copy-id ${i} ;done
########所有机器操作
#配置hosts
#安装cenos7 yum源
curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
sed -i -e '/mirrors.cloud.aliyuncs.com/d' -e '/mirrors.aliyuncs.com/d' /etc/yum.repos.d/CentOS-Base.repo
#必备工具安装
yum -y install wget jq psmisc vim net-tools telnet yum-utils device-mapper-persistent-data lvm2 git
#所有节点关闭firewalld 、dnsmasq、selinux(CentOS7需要关闭NetworkManager,CentOS8不需要)
systemctl disable --now firewalld
systemctl disable --now dnsmasq
systemctl disable --now NetworkManager
setenforce 0
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/sysconfig/selinux
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/selinux/config
#所有节点关闭swap分区,fstab注释swap
swapoff -a && sysctl -w vm.swappiness=0
sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab
#所有节点同步时间 安装ntpdate
rpm -ivh http://mirrors.wlnmp.com/centos/wlnmp-release-centos.noarch.rpm
yum install ntpdate -y
#所有节点同步时间。时间同步配置如下:
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
echo 'Asia/Shanghai' >/etc/timezone
ntpdate time2.aliyun.com
(echo "*/5 * * * * /usr/sbin/ntpdate time2.aliyun.com" ; crontab -l )| crontab
#所有节点配置limit:
ulimit -SHn 65535
vim /etc/security/limits.conf
# 末尾添加如下内容
* soft nofile 65536
* hard nofile 131072
* soft nproc 65535
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited
#所有节点升级系统并重启
yum update -y --exclude=kernel* && reboot
#升级系统的时候报错了 当前系统7.5 放弃升级了
#CentOS7 需要升级内核至4.18+,本次升级的版本为4.19 上传内核rpm包或去下载
wget http://193.49.22.109/elrepo/kernel/el7/x86_64/RPMS/kernel-ml-devel-4.19.12-1.el7.elrepo.x86_64.rpm
wget http://193.49.22.109/elrepo/kernel/el7/x86_64/RPMS/kernel-ml-4.19.12-1.el7.elrepo.x86_64.rpm
#从long1传到其他机器
for i in long{2..5};do rsync -avz *.rpm ${i}:/root/ ;done
#所有机器执行安装
yum localinstall -y kernel-ml*
#所有节点更改内核启动顺序
grub2-set-default 0 && grub2-mkconfig -o /etc/grub2.cfg
grubby --args="user_namespace.enable=1" --update-kernel="$(grubby --default-kernel)"
#检查默认内核是不是4.19 然后重启
grubby --default-kernel
reboot
#重启后检查当前内核版本
uname -a
#所有节点安装ipvsadm:
yum install ipvsadm ipset sysstat conntrack libseccomp -y
#所有节点配置ipvs模块,在内核4.19+版本nf_conntrack_ipv4已经改为nf_conntrack, 4.18以下使用nf_conntrack_ipv4即可:
vim /etc/modules-load.d/ipvs.conf
# 加入以下内容
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
ip_vs_sh
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
systemctl enable --now systemd-modules-load.service
#检查是否加载: (重启检查)
lsmod | grep -e ip_vs -e nf_conntrack
#开启一些k8s集群中必须的内核参数,所有节点配置k8s内核:
cat <<EOF > /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
fs.may_detach_mounts = 1
vm.overcommit_memory=1
net.ipv4.conf.all.route_localnet = 1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16384
EOF
sysctl --system
组件安装
#安装docker
yum install docker-ce-19.03.* docker-ce-cli-19.03.* -y
由于新版kubelet建议使用systemd,所以可以把docker的CgroupDriver改成systemd
mkdir /etc/docker
cat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"registry-mirrors": ["https://docker.mirrors.ustc.edu.cn","https://registry.docker-cn.com","http://hub-mirror.c.163.com"],
"max-concurrent-downloads": 3,
"max-concurrent-uploads": 5,
"log-driver": "json-file",
"log-opts": {
"max-file": "2",
"max-size": "300m"
},
"storage-driver": "overlay2",
"storage-opts": ["overlay2.override_kernel_check=true"],
"live-restore": true
}
EOF
#"data-root": "/app/sinova/docker",
#"exec-root": "/app/sinova/docker/exec",
#"insecure-registries": ["10.127.13.232:90"],
备选配置 数据存放目录 镜像仓库
log-opts是容器输出到终端的日志存储, 容器长时间不重启 日志会非常大 做限制 300m轮转 最多2次
#所有节点设置开机自启动Docker:
systemctl daemon-reload && systemctl enable --now docker
#安装k8s 及 etcd
#Master01下载kubernetes安装包(1.22.0需要更改为你看到的最新版本)
# wget https://dl.k8s.io/v1.22.0/kubernetes-server-linux-amd64.tar.gz
#注意目前版本是1.22.0安装时需要下载最新的1.22.x版本:
#https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.22.md
#下载etcd安装包
#wget https://github.com/etcd-io/etcd/releases/download/v3.5.0/etcd-v3.5.0-linux-amd64.tar.gz
#解压kubernetes安装文件合etcd
tar -xf kubernetes-server-linux-amd64.tar.gz --strip-components=3 -C /usr/local/bin kubernetes/server/bin/kube{let,ctl,-apiserver,-controller-manager,-scheduler,-proxy}
tar -zxvf etcd-v3.5.0-linux-amd64.tar.gz --strip-components=1 -C /usr/local/bin etcd-v3.5.0-linux-amd64/etcd{,ctl}
#将解压后得文件拷至其他节点
for i in long{2,3};do rsync -avz /usr/local/bin/kube{let,ctl,-apiserver,-controller-manager,-scheduler,-proxy} ${i}:/usr/local/bin ;done
for i in long{2,3};do rsync -avz /usr/local/bin/etcd* ${i}:/usr/local/bin ;done
#把kubelet和kube-proxy 拷到从
for i in long{4,5};do rsync -avz /usr/local/bin/kube{let,-proxy} ${i}:/usr/local/bin ;done
#所有节点创建目录
mkdir -p /opt/cni/bin
#long1下载文件
git clone https://gitee.com/dukuan/k8s-ha-install.git
cd /root/k8s-ha-install
#查看分支
[root@long1 k8s-ha-install]# git branch -a
* master
remotes/origin/HEAD -> origin/master
remotes/origin/manual-installation
remotes/origin/manual-installation-v1.16.x
remotes/origin/manual-installation-v1.17.x
remotes/origin/manual-installation-v1.18.x
remotes/origin/manual-installation-v1.19.x
remotes/origin/manual-installation-v1.20.x
remotes/origin/manual-installation-v1.20.x-csi-hostpath
remotes/origin/manual-installation-v1.21.x
remotes/origin/manual-installation-v1.22.x
remotes/origin/manual-installation-v1.23.x
remotes/origin/master
#切换分支
git checkout manual-installation-v1.22.x
[root@long1 k8s-ha-install]# ls
bootstrap calico CoreDNS csi-hostpath dashboard kubeadm-metrics-server metrics-server pki snapshotter
生成证书
#long1下载生成证书工具
#wget "https://pkg.cfssl.org/R1.2/cfssl_linux-amd64" -O /usr/local/bin/cfssl
#wget "https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64" -O /usr/local/bin/cfssljson
chmod +x /usr/local/bin/cfssl /usr/local/bin/cfssljson
#所有Master节点创建etcd证书目录
mkdir /etc/etcd/ssl -p
#所有节点创建kubernetes相关目录
mkdir -p /etc/kubernetes/pki
#抽空看下数字证数原理...
#Master01节点生成etcd证书
#生成证书的CSR文件:证书签名请求文件,配置了一些域名、公司、单位
cd /root/k8s-ha-install/pki
# 生成etcd CA证书和CA证书的key
cfssl gencert -initca etcd-ca-csr.json | cfssljson -bare /etc/etcd/ssl/etcd-ca
cfssl gencert \
-ca=/etc/etcd/ssl/etcd-ca.pem \
-ca-key=/etc/etcd/ssl/etcd-ca-key.pem \
-config=ca-config.json \
-hostname=127.0.0.1,long1,long2,long3,192.168.1.200,192.168.1.201,192.168.1.202 \
-profile=kubernetes \
etcd-csr.json | cfssljson -bare /etc/etcd/ssl/etcd
#将证书复制到其他master节点(long2,long3)
for i in etcd-ca-key.pem etcd-ca.pem etcd-key.pem etcd.pem; do rsync -avz /etc/etcd/ssl/${i} long2:/etc/etcd/ssl/ ;done
#生成k8s组件的根证书,创建所有证书
cfssl gencert -initca ca-csr.json | cfssljson -bare /etc/kubernetes/pki/ca
# 192.168.0.是k8s service的网段,如果说需要更改k8s service网段,那就需要更改192.168.0.1
# 如果不是高可用集群,192.168.1.250为Master01的IP
cfssl gencert -ca=/etc/kubernetes/pki/ca.pem -ca-key=/etc/kubernetes/pki/ca-key.pem -config=ca-config.json -hostname=192.168.0.1,192.168.1.250,127.0.0.1,kubernetes,kubernetes.default,kubernetes.default.svc,kubernetes.default.svc.cluster,kubernetes.default.svc.cluster.local,192.168.1.200,192.168.1.201,192.168.1.202 -profile=kubernetes apiserver-csr.json | cfssljson -bare /etc/kubernetes/pki/apiserver
#生成apiserver的聚合证书。(有告警 忽略)
cfssl gencert -initca front-proxy-ca-csr.json | cfssljson -bare /etc/kubernetes/pki/front-proxy-ca
cfssl gencert -ca=/etc/kubernetes/pki/front-proxy-ca.pem -ca-key=/etc/kubernetes/pki/front-proxy-ca-key.pem -config=ca-config.json -profile=kubernetes front-proxy-client-csr.json | cfssljson -bare /etc/kubernetes/pki/front-proxy-client
# 生成controller-manage的证书
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
manager-csr.json | cfssljson -bare /etc/kubernetes/pki/controller-manager
# 注意,如果不是高可用集群,192.168.1.250:8443改为master01的地址,8443改为apiserver的端口,默认是6443
# set-cluster:设置一个集群项,
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.1.250:8443 \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig
# 设置一个环境项,一个上下文
kubectl config set-context system:kube-controller-manager@kubernetes \
--cluster=kubernetes \
--user=system:kube-controller-manager \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig
# set-credentials 设置一个用户项
kubectl config set-credentials system:kube-controller-manager \
--client-certificate=/etc/kubernetes/pki/controller-manager.pem \
--client-key=/etc/kubernetes/pki/controller-manager-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig
# 使用某个环境当做默认环境
kubectl config use-context system:kube-controller-manager@kubernetes \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
scheduler-csr.json | cfssljson -bare /etc/kubernetes/pki/scheduler
########
kubectl config set-cluster kubernetes \
--certificate-authority=/etc/kubernetes/pki/ca.pem \
--embed-certs=true \
--server=https://192.168.1.250:8443 \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
kubectl config set-credentials system:kube-scheduler \
--client-certificate=/etc/kubernetes/pki/scheduler.pem \
--client-key=/etc/kubernetes/pki/scheduler-key.pem \
--embed-certs=true \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
kubectl config set-context system:kube-scheduler@kubernetes \
--cluster=kubernetes \
--user=system:kube-scheduler \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
kubectl config use-context system:kube-scheduler@kubernetes \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
##########
cfssl gencert \
-ca=/etc/kubernetes/pki/ca.pem \
-ca-key=/etc/kubernetes/pki/ca-key.pem \
-config=ca-config.json \
-profile=kubernetes \
admin-csr.json | cfssljson -bare /etc/kubernetes/pki/admin
kubectl config set-cluster kubernetes --certificate-authority=/etc/kubernetes/pki/ca.pem --embed-certs=true --server=https://192.168.1.250:8443 --kubeconfig=/etc/kubernetes/admin.kubeconfig
kubectl config set-credentials kubernetes-admin --client-certificate=/etc/kubernetes/pki/admin.pem --client-key=/etc/kubernetes/pki/admin-key.pem --embed-certs=true --kubeconfig=/etc/kubernetes/admin.kubeconfig
kubectl config set-context kubernetes-admin@kubernetes --cluster=kubernetes --user=kubernetes-admin --kubeconfig=/etc/kubernetes/admin.kubeconfig
kubectl config use-context kubernetes-admin@kubernetes --kubeconfig=/etc/kubernetes/admin.kubeconfig
#创建ServiceAccount Key secret
openssl genrsa -out /etc/kubernetes/pki/sa.key 2048
openssl rsa -in /etc/kubernetes/pki/sa.key -pubout -out /etc/kubernetes/pki/sa.pub
#发送证书至其他节点
for NODE in long2 long3;do for FILE in $(ls /etc/kubernetes/pki | grep -v etcd);do scp /etc/kubernetes/pki/${FILE} $NODE:/etc/kubernetes/pki/${FILE};done;for FILE in admin.kubeconfig controller-manager.kubeconfig scheduler.kubeconfig;do scp /etc/kubernetes/${FILE} $NODE:/etc/kubernetes/${FILE};done; done
#检查其他master的证书 共23个证书
ls /etc/kubernetes/pki/ |wc -l
Kubernetes系统组件配置
#etcd配置大致相同,注意修改每个Master节点的etcd配置的主机名和IP地址
#注意修改data-dir 数据目录
#Master01
vim /etc/etcd/etcd.config.yml
name: 'long1'
data-dir: /var/lib/etcd
wal-dir: /var/lib/etcd/wal
snapshot-count: 5000
heartbeat-interval: 100
election-timeout: 1000
quota-backend-bytes: 0
listen-peer-urls: 'https://192.168.1.200:2380'
listen-client-urls: 'https://192.168.1.200:2379,http://127.0.0.1:2379'
max-snapshots: 3
max-wals: 5
cors:
initial-advertise-peer-urls: 'https://192.168.1.200:2380'
advertise-client-urls: 'https://192.168.1.200:2379'
discovery:
discovery-fallback: 'proxy'
discovery-proxy:
discovery-srv:
initial-cluster: 'long1=https://192.168.1.200:2380,long2=https://192.168.1.201:2380,long3=https://192.168.1.202:2380'
initial-cluster-token: 'etcd-k8s-cluster'
initial-cluster-state: 'new'
strict-reconfig-check: false
enable-v2: true
enable-pprof: true
proxy: 'off'
proxy-failure-wait: 5000
proxy-refresh-interval: 30000
proxy-dial-timeout: 1000
proxy-write-timeout: 5000
proxy-read-timeout: 0
client-transport-security:
cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
client-cert-auth: true
trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
auto-tls: true
peer-transport-security:
cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
peer-client-cert-auth: true
trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
auto-tls: true
debug: false
log-package-levels:
log-outputs: [default]
force-new-cluster: false
##############Master02################
name: 'long2'
data-dir: /var/lib/etcd
wal-dir: /var/lib/etcd/wal
snapshot-count: 5000
heartbeat-interval: 100
election-timeout: 1000
quota-backend-bytes: 0
listen-peer-urls: 'https://192.168.1.201:2380'
listen-client-urls: 'https://192.168.1.201:2379,http://127.0.0.1:2379'
max-snapshots: 3
max-wals: 5
cors:
initial-advertise-peer-urls: 'https://192.168.1.201:2380'
advertise-client-urls: 'https://192.168.1.201:2379'
discovery:
discovery-fallback: 'proxy'
discovery-proxy:
discovery-srv:
initial-cluster: 'long1=https://192.168.1.200:2380,long2=https://192.168.1.201:2380,long3=https://192.168.1.202:2380'
initial-cluster-token: 'etcd-k8s-cluster'
initial-cluster-state: 'new'
strict-reconfig-check: false
enable-v2: true
enable-pprof: true
proxy: 'off'
proxy-failure-wait: 5000
proxy-refresh-interval: 30000
proxy-dial-timeout: 1000
proxy-write-timeout: 5000
proxy-read-timeout: 0
client-transport-security:
cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
client-cert-auth: true
trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
auto-tls: true
peer-transport-security:
cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
peer-client-cert-auth: true
trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
auto-tls: true
debug: false
log-package-levels:
log-outputs: [default]
force-new-cluster: false
##############Master03################
name: 'long3'
data-dir: /var/lib/etcd
wal-dir: /var/lib/etcd/wal
snapshot-count: 5000
heartbeat-interval: 100
election-timeout: 1000
quota-backend-bytes: 0
listen-peer-urls: 'https://192.168.1.202:2380'
listen-client-urls: 'https://192.168.1.202:2379,http://127.0.0.1:2379'
max-snapshots: 3
max-wals: 5
cors:
initial-advertise-peer-urls: 'https://192.168.1.202:2380'
advertise-client-urls: 'https://192.168.1.202:2379'
discovery:
discovery-fallback: 'proxy'
discovery-proxy:
discovery-srv:
initial-cluster: 'long1=https://192.168.1.200:2380,long2=https://192.168.1.201:2380,long3=https://192.168.1.202:2380'
initial-cluster-token: 'etcd-k8s-cluster'
initial-cluster-state: 'new'
strict-reconfig-check: false
enable-v2: true
enable-pprof: true
proxy: 'off'
proxy-failure-wait: 5000
proxy-refresh-interval: 30000
proxy-dial-timeout: 1000
proxy-write-timeout: 5000
proxy-read-timeout: 0
client-transport-security:
cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
client-cert-auth: true
trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
auto-tls: true
peer-transport-security:
cert-file: '/etc/kubernetes/pki/etcd/etcd.pem'
key-file: '/etc/kubernetes/pki/etcd/etcd-key.pem'
peer-client-cert-auth: true
trusted-ca-file: '/etc/kubernetes/pki/etcd/etcd-ca.pem'
auto-tls: true
debug: false
log-package-levels:
log-outputs: [default]
force-new-cluster: false
#所有Master节点创建etcd service并启动
vim /usr/lib/systemd/system/etcd.service
[Unit]
Description=Etcd Service
Documentation=https://coreos.com/etcd/docs/latest/
After=network.target
[Service]
Type=notify
ExecStart=/usr/local/bin/etcd --config-file=/etc/etcd/etcd.config.yml
Restart=on-failure
RestartSec=10
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
Alias=etcd3.service
#所有Master节点创建etcd的证书目录
mkdir /etc/kubernetes/pki/etcd
ln -s /etc/etcd/ssl/* /etc/kubernetes/pki/etcd/
systemctl daemon-reload
systemctl enable --now etcd
#查看etcd状态
export ETCDCTL_API=3
etcdctl --endpoints="192.168.1.202:2379,192.168.1.201:2379,192.168.1.200:2379" --cacert=/etc/kubernetes/pki/etcd/etcd-ca.pem --cert=/etc/kubernetes/pki/etcd/etcd.pem --key=/etc/kubernetes/pki/etcd/etcd-key.pem endpoint status --write-out=table
高可用配置
#所有Master节点安装keepalived和haproxy
yum install keepalived haproxy -y
#所有Master配置HAProxy,配置一样
vim /etc/haproxy/haproxy.cfg
global
maxconn 2000
ulimit-n 16384
log 127.0.0.1 local0 err
stats timeout 30s
defaults
log global
mode http
option httplog
timeout connect 5000
timeout client 50000
timeout server 50000
timeout http-request 15s
timeout http-keep-alive 15s
frontend k8s-master
bind 0.0.0.0:8443
bind 127.0.0.1:8443
mode tcp
option tcplog
tcp-request inspect-delay 5s
default_backend k8s-master
backend k8s-master
mode tcp
option tcplog
option tcp-check
balance roundrobin
default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
server long1 192.168.1.200:6443 check
server long2 192.168.1.201:6443 check
server long3 192.168.1.202:6443 check
#所有Master节点配置KeepAlived,配置不一样,注意区分
vim /etc/keepalived/keepalived.conf
#master01 注意网卡和广播id interface ens33 和 virtual_router_id 51
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state MASTER
interface ens33
mcast_src_ip 192.168.1.200
virtual_router_id 51
priority 101
nopreempt
advert_int 2
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH
}
virtual_ipaddress {
192.168.1.250
}
track_script {
chk_apiserver
} }
#master02
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state BACKUP
interface ens33
mcast_src_ip 192.168.1.201
virtual_router_id 51
priority 100
nopreempt
advert_int 2
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH
}
virtual_ipaddress {
192.168.1.250
}
track_script {
chk_apiserver
} }
#master03
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 5
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state BACKUP
interface ens33
mcast_src_ip 192.168.1.202
virtual_router_id 51
priority 100
nopreempt
advert_int 2
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH
}
virtual_ipaddress {
192.168.1.250
}
track_script {
chk_apiserver
} }
#健康检查脚本 所有master配置
vim /etc/keepalived/check_apiserver.sh
#!/bin/bash
err=0
for k in $(seq 1 3)
do
check_code=$(pgrep haproxy)
if [[ $check_code == "" ]]; then
err=$(expr $err + 1)
sleep 1
continue
else
err=0
break
fi
done
if [[ $err != "0" ]]; then
echo "systemctl stop keepalived"
/usr/bin/systemctl stop keepalived
exit 1
else
exit 0
fi
chmod +x /etc/keepalived/check_apiserver.sh
#所有master节点启动haproxy和keepalived
systemctl daemon-reload
systemctl enable --now haproxy
systemctl enable --now keepalived
#测试
ip a
ping 192.168.1.250
telnet 192.168.1.250 8443
Kubernetes组件配置
#所有节点创建相关目录
mkdir -p /etc/kubernetes/manifests/ /etc/systemd/system/kubelet.service.d /var/lib/kubelet /var/log/kubernetes
#所有Master节点创建kube-apiserver service
#k8s service网段为192.168.0.0/16,该网段不能和宿主机的网段、Pod网段的重复,请按需修改
#三个master节点的唯一区别就是 --advertise-address的ip注意改下
vim /usr/lib/systemd/system/kube-apiserver.service
[Unit]
Description=Kubernetes API Server
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-apiserver \
--v=2 \
--logtostderr=true \
--allow-privileged=true \
--bind-address=0.0.0.0 \
--secure-port=6443 \
--insecure-port=0 \
--advertise-address=192.168.1.200 \
--service-cluster-ip-range=192.168.0.0/16 \
--service-node-port-range=30000-32767 \
--etcd-servers=https://192.168.1.200:2379,https://192.168.1.201:2379,https://192.168.1.202:2379 \
--etcd-cafile=/etc/etcd/ssl/etcd-ca.pem \
--etcd-certfile=/etc/etcd/ssl/etcd.pem \
--etcd-keyfile=/etc/etcd/ssl/etcd-key.pem \
--client-ca-file=/etc/kubernetes/pki/ca.pem \
--tls-cert-file=/etc/kubernetes/pki/apiserver.pem \
--tls-private-key-file=/etc/kubernetes/pki/apiserver-key.pem \
--kubelet-client-certificate=/etc/kubernetes/pki/apiserver.pem \
--kubelet-client-key=/etc/kubernetes/pki/apiserver-key.pem \
--service-account-key-file=/etc/kubernetes/pki/sa.pub \
--service-account-signing-key-file=/etc/kubernetes/pki/sa.key \
--service-account-issuer=https://kubernetes.default.svc.cluster.local \
--kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname \
--enable-admission-plugins=NamespaceLifecycle,LimitRanger,ServiceAccount,DefaultStorageClass,DefaultTolerationSeconds,NodeRestriction,ResourceQuota \
--authorization-mode=Node,RBAC \
--enable-bootstrap-token-auth=true \
--requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \
--proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.pem \
--proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client-key.pem \
--requestheader-allowed-names=aggregator \
--requestheader-group-headers=X-Remote-Group \
--requestheader-extra-headers-prefix=X-Remote-Extra- \
--requestheader-username-headers=X-Remote-User
# --token-auth-file=/etc/kubernetes/token.csv
Restart=on-failure
RestartSec=10s
LimitNOFILE=65535
[Install]
WantedBy=multi-user.target
#所有Master节点开启kube-apiserver
systemctl daemon-reload && systemctl enable --now kube-apiserver
systemctl status kube-apiserver
#所有Master节点配置kube-controller-manager service(所有master节点配置一样)
#k8s Pod网段为172.16.0.0/12,该网段不能和宿主机的网段、k8s Service网段的重复,请按需修改
vim /usr/lib/systemd/system/kube-controller-manager.service
[Unit]
Description=Kubernetes Controller Manager
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-controller-manager \
--v=2 \
--logtostderr=true \
--address=127.0.0.1 \
--root-ca-file=/etc/kubernetes/pki/ca.pem \
--cluster-signing-cert-file=/etc/kubernetes/pki/ca.pem \
--cluster-signing-key-file=/etc/kubernetes/pki/ca-key.pem \
--service-account-private-key-file=/etc/kubernetes/pki/sa.key \
--kubeconfig=/etc/kubernetes/controller-manager.kubeconfig \
--leader-elect=true \
--use-service-account-credentials=true \
--node-monitor-grace-period=40s \
--node-monitor-period=5s \
--pod-eviction-timeout=2m0s \
--controllers=*,bootstrapsigner,tokencleaner \
--allocate-node-cidrs=true \
--cluster-cidr=172.16.0.0/12 \
--requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.pem \
--node-cidr-mask-size=24
Restart=always
RestartSec=10s
[Install]
WantedBy=multi-user.target
#所有Master节点启动kube-controller-manager
systemctl daemon-reload && systemctl enable --now kube-controller-manager
systemctl status kube-controller-manager
#所有Master节点配置kube-scheduler service(所有master节点配置一样)
vim /usr/lib/systemd/system/kube-scheduler.service
[Unit]
Description=Kubernetes Scheduler
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-scheduler \
--v=2 \
--logtostderr=true \
--address=127.0.0.1 \
--leader-elect=true \
--kubeconfig=/etc/kubernetes/scheduler.kubeconfig
Restart=always
RestartSec=10s
[Install]
WantedBy=multi-user.target
#所有Master节点启动kube-scheduler
systemctl daemon-reload && systemctl enable --now kube-scheduler
systemctl status kube-scheduler
TLS Bootstrapping配置
#自动给kubelet颁发证书的工具,便于增减node节点时自动管理证书
#只需要在Master01创建bootstrap
cd /root/k8s-ha-install/bootstrap
kubectl config set-cluster kubernetes --certificate-authority=/etc/kubernetes/pki/ca.pem --embed-certs=true --server=https://192.168.1.250:8443 --kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig
kubectl config set-credentials tls-bootstrap-token-user --token=c8ad9c.2e4d610cf3e7426e --kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig
kubectl config set-context tls-bootstrap-token-user@kubernetes --cluster=kubernetes --user=tls-bootstrap-token-user --kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig
kubectl config use-context tls-bootstrap-token-user@kubernetes --kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig
mkdir -p /root/.kube ; cp /etc/kubernetes/admin.kubeconfig /root/.kube/config
#可以正常查询集群状态,才可以继续往下,否则不行,需要排查k8s组件是否有故障
[root@long1 bootstrap]# kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
controller-manager Healthy ok
scheduler Healthy ok
etcd-1 Healthy {"health":"true","reason":""}
etcd-0 Healthy {"health":"true","reason":""}
etcd-2 Healthy {"health":"true","reason":""}
kubectl create -f bootstrap.secret.yaml
Node节点配置
#Master01节点复制证书至Node节点
for NODE in long2 long3 long4 long5; do
ssh $NODE mkdir -p /etc/kubernetes/pki /etc/etcd/ssl /etc/etcd/ssl
for FILE in etcd-ca.pem etcd.pem etcd-key.pem; do
scp /etc/etcd/ssl/$FILE $NODE:/etc/etcd/ssl/
done
for FILE in pki/ca.pem pki/ca-key.pem pki/front-proxy-ca.pem bootstrap-kubelet.kubeconfig; do
scp /etc/kubernetes/$FILE $NODE:/etc/kubernetes/${FILE}
done
done
#所有节点创建相关目录
mkdir -p /var/lib/kubelet /var/log/kubernetes /etc/systemd/system/kubelet.service.d /etc/kubernetes/manifests/
#所有节点配置kubelet service
vim /usr/lib/systemd/system/kubelet.service
[Unit]
Description=Kubernetes Kubelet
Documentation=https://github.com/kubernetes/kubernetes
After=docker.service
Requires=docker.service
[Service]
ExecStart=/usr/local/bin/kubelet
Restart=always
StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=multi-user.target
vim /etc/systemd/system/kubelet.service.d/10-kubelet.conf
[Service]
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig --kubeconfig=/etc/kubernetes/kubelet.kubeconfig"
Environment="KUBELET_SYSTEM_ARGS=--network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin"
Environment="KUBELET_CONFIG_ARGS=--config=/etc/kubernetes/kubelet-conf.yml --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.5"
Environment="KUBELET_EXTRA_ARGS=--node-labels=node.kubernetes.io/node='' "
ExecStart=
ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_SYSTEM_ARGS $KUBELET_EXTRA_ARGS
#创建kubelet的配置文件
#注意:如果更改了k8s的service网段,需要更改kubelet-conf.yml 的clusterDNS:配置,改成k8s Service网段的第十个地址,比如192.168.0.10
vim /etc/kubernetes/kubelet-conf.yml
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
address: 0.0.0.0
port: 10250
readOnlyPort: 10255
authentication:
anonymous:
enabled: false
webhook:
cacheTTL: 2m0s
enabled: true
x509:
clientCAFile: /etc/kubernetes/pki/ca.pem
authorization:
mode: Webhook
webhook:
cacheAuthorizedTTL: 5m0s
cacheUnauthorizedTTL: 30s
cgroupDriver: systemd
cgroupsPerQOS: true
clusterDNS:
- 192.168.0.10
clusterDomain: cluster.local
containerLogMaxFiles: 5
containerLogMaxSize: 10Mi
contentType: application/vnd.kubernetes.protobuf
cpuCFSQuota: true
cpuManagerPolicy: none
cpuManagerReconcilePeriod: 10s
enableControllerAttachDetach: true
enableDebuggingHandlers: true
enforceNodeAllocatable:
- pods
eventBurst: 10
eventRecordQPS: 5
evictionHard:
imagefs.available: 15%
memory.available: 100Mi
nodefs.available: 10%
nodefs.inodesFree: 5%
evictionPressureTransitionPeriod: 5m0s
failSwapOn: true
fileCheckFrequency: 20s
hairpinMode: promiscuous-bridge
healthzBindAddress: 127.0.0.1
healthzPort: 10248
httpCheckFrequency: 20s
imageGCHighThresholdPercent: 85
imageGCLowThresholdPercent: 80
imageMinimumGCAge: 2m0s
iptablesDropBit: 15
iptablesMasqueradeBit: 14
kubeAPIBurst: 10
kubeAPIQPS: 5
makeIPTablesUtilChains: true
maxOpenFiles: 1000000
maxPods: 110
nodeStatusUpdateFrequency: 10s
oomScoreAdj: -999
podPidsLimit: -1
registryBurst: 10
registryPullQPS: 5
resolvConf: /etc/resolv.conf
rotateCertificates: true
runtimeRequestTimeout: 2m0s
serializeImagePulls: true
staticPodPath: /etc/kubernetes/manifests
streamingConnectionIdleTimeout: 4h0m0s
syncFrequency: 1m0s
volumeStatsAggPeriod: 1m0s
#启动所有节点kubelet
systemctl daemon-reload
systemctl enable --now kubelet
#此时系统日志/var/log/messages显示只有如下信息为正常
Unable to update cni config: no networks found in /etc/cni/net.d
kubectl get node
#能看到所有节点且NotReady就可以了,因为calico没配置所以是NotReady
#kube-proxy配置
cd /root/k8s-ha-install
kubectl -n kube-system create serviceaccount kube-proxy
kubectl create clusterrolebinding system:kube-proxy --clusterrole system:node-proxier --serviceaccount kube-system:kube-proxy
SECRET=$(kubectl -n kube-system get sa/kube-proxy \
--output=jsonpath='{.secrets[0].name}')
JWT_TOKEN=$(kubectl -n kube-system get secret/$SECRET \
--output=jsonpath='{.data.token}' | base64 -d)
PKI_DIR=/etc/kubernetes/pki
K8S_DIR=/etc/kubernetes
kubectl config set-cluster kubernetes --certificate-authority=/etc/kubernetes/pki/ca.pem --embed-certs=true --server=https://192.168.1.250:8443 --kubeconfig=${K8S_DIR}/kube-proxy.kubeconfig
kubectl config set-credentials kubernetes --token=${JWT_TOKEN} --kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig
kubectl config set-context kubernetes --cluster=kubernetes --user=kubernetes --kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig
kubectl config use-context kubernetes --kubeconfig=/etc/kubernetes/kube-proxy.kubeconfig
#将kubeconfig发送至其他节点
for NODE in long2 long3 long4 long5; do
scp /etc/kubernetes/kube-proxy.kubeconfig $NODE:/etc/kubernetes/kube-proxy.kubeconfig
done
#所有节点添加kube-proxy的配置和service文件:
vim /usr/lib/systemd/system/kube-proxy.service
[Unit]
Description=Kubernetes Kube Proxy
Documentation=https://github.com/kubernetes/kubernetes
After=network.target
[Service]
ExecStart=/usr/local/bin/kube-proxy \
--config=/etc/kubernetes/kube-proxy.yaml \
--v=2
Restart=always
RestartSec=10s
[Install]
WantedBy=multi-user.target
#如果更改了集群Pod的网段,需要更改kube-proxy.yaml的clusterCIDR为自己的Pod网段:
vim /etc/kubernetes/kube-proxy.yaml
apiVersion: kubeproxy.config.k8s.io/v1alpha1
bindAddress: 0.0.0.0
clientConnection:
acceptContentTypes: ""
burst: 10
contentType: application/vnd.kubernetes.protobuf
kubeconfig: /etc/kubernetes/kube-proxy.kubeconfig
qps: 5
clusterCIDR: 172.16.0.0/12
configSyncPeriod: 15m0s
conntrack:
max: null
maxPerCore: 32768
min: 131072
tcpCloseWaitTimeout: 1h0m0s
tcpEstablishedTimeout: 24h0m0s
enableProfiling: false
healthzBindAddress: 0.0.0.0:10256
hostnameOverride: ""
iptables:
masqueradeAll: false
masqueradeBit: 14
minSyncPeriod: 0s
syncPeriod: 30s
ipvs:
masqueradeAll: true
minSyncPeriod: 5s
scheduler: "rr"
syncPeriod: 30s
kind: KubeProxyConfiguration
metricsBindAddress: 127.0.0.1:10249
mode: "ipvs"
nodePortAddresses: null
oomScoreAdj: -999
portRange: ""
udpIdleTimeout: 250ms
#所有节点启动kube-proxy
systemctl daemon-reload
systemctl enable --now kube-proxy
安装Calico
#以下步骤只在master01执行
cd /root/k8s-ha-install/calico/
#更改calico的网段,主要需要将红色部分的网段,改为自己的Pod网段
sed -i "s#POD_CIDR#172.16.0.0/12#g" calico.yaml
kubectl apply -f calico.yaml
#查看容器状态
kubectl get po -n kube-system
安装CoreDNS
#安装官方推荐版本(推荐)
cd /root/k8s-ha-install/
#如果更改了k8s service的网段需要将coredns的serviceIP改成k8s service网段的第十个IP
#COREDNS_SERVICE_IP=`kubectl get svc | grep kubernetes | awk '{print $3}'`0
#sed -i "s#192.168.0.10#${COREDNS_SERVICE_IP}#g" CoreDNS/coredns.yaml
这里用的就是192.168.0.10
安装coredns
kubectl create -f CoreDNS/coredns.yaml
安装Metrics Server
#安装metrics server
cd /root/k8s-ha-install/metrics-server
kubectl create -f .
kubectl top pod -A
集群验证
#启动一个测试pod
cat<<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: busybox
namespace: default
spec:
containers:
- name: busybox
image: busybox:latest
command:
- sleep
- "3600"
imagePullPolicy: IfNotPresent
restartPolicy: Always
EOF
1.Pod必须能解析Service
2.Pod必须能解析跨namespace的Service
3.每个节点都必须要能访问Kubernetes的kubernetes svc 443和kube-dns的service 53
4.Pod和Pod之前要能通
a)同namespace能通信
b)跨namespace能通信
c)跨机器能通信
#1-2 进入容器尝试解析域名
kubectl exec busybox -n default -- nslookup kubernetes
kubectl exec busybox -n default -- nslookup kube-dns.kube-system
#3 所有机器执行
telnet 192.168.0.10 53
telnet 192.168.0.1 443
#4 在测试容器内ping一下其他pod的ip
安装dashboard
cd /root/k8s-ha-install/dashboard/
kubectl create -f .
kubectl edit svc kubernetes-dashboard -n kubernetes-dashboard
将ClusterIP更改为NodePort(如果已经为NodePort忽略此步骤)
kubectl get svc -n kubernetes-dashboard
#浏览器打开页面 如果打不开
#浏览器右键属性 在chrome.exe"后添加参数 --test-type --ignore-certificate-errors
查看token
kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin-user | awk '{print $1}')
配置优化
controller-manager
vim /usr/lib/systemd/system/kube-controller-manager.service
# --feature-gates=RotateKubeletClientCertificate=true,RotateKubeletServerCertificate=true \ 新版本默认就是开启的了 不需要加了
--cluster-signing-duration=876000h0m0s \
#controller-manager的证书有效期,不一定生效,最长有效期可能是五年 ,过期了bootstrapping会自动再颁发一个
systemctl daemon-reload && systemctl restart kube-controller-manager.service
kubelet
vim /etc/systemd/system/kubelet.service.d/10-kubelet.conf
Environment="KUBELET_EXTRA_ARGS=--tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 --image-pull-progress-deadline=30m"
#修改加密方式 防止扫出漏洞
#延长kubelet从公网下载镜像等待时间
#新版本的配置都推荐放在这个配置文件中 包括上面配置的这两条
vim /etc/kubernetes/kubelet-conf.yml
添加如下配置
rotateServerCertificates: true
allowedUnsafeSysctls:
- "net.core*"
- "net.ipv4.*"
kubeReserved:
cpu: "10m"
memory: 1Gi
ephemeral-storage: 10Gi
systemReserved:
cpu: "10m"
memory: 1Gi
ephemeral-storage: 10Gi
#allowedUnsafeSysctls 默认是不允许容器修改内核参数的,通过这个允许容器修改一些内核参数,涉及安全问题
#kubeReserved 在系统上给k8s组件预留足够的资源,不让容器按照 机器总资源去使用
#systemReserved cpu 可以m 内存单位可以Mi Gi 需要计算合适的值进行预留!!!!!!!!!
kubectl get node
NAME STATUS ROLES AGE VERSION
long1 Ready <none> 4d22h v1.22.0
long2 Ready <none> 4d22h v1.22.0
long3 Ready <none> 4d22h v1.22.0
long4 Ready <none> 4d22h v1.22.0
long5 Ready <none> 4d22h v1.22.0
添加 ROLES
[root@long1 dashboard]# kubectl label node long1 node-role.kubernetes.io/master=''
node/long1 labeled
[root@long1 dashboard]# kubectl label node long2 node-role.kubernetes.io/666=''
node/long2 labeled
[root@long1 dashboard]# kubectl get node
NAME STATUS ROLES AGE VERSION
long1 Ready master 4d22h v1.22.0
long2 Ready 666 4d22h v1.22.0
long3 Ready <none> 4d22h v1.22.0
long4 Ready <none> 4d22h v1.22.0
long5 Ready <none> 4d22h v1.22.0
1)生产环境中etcd一定要和系统盘分开,一定要用ssd硬盘。
2)Docker数据盘也要和系统盘分开,有条件的话可以使用ssd硬盘
echo “source <(kubectl completion bash)” >> ~/.bashrc