The intention of this manual is to describe the steps I followed to create an on-premise kubernetes cluster:
- kubernetes 1.9 (using kubeadm)
- centOS 7
- etcd outside k8s
- ha-proxy master load balancer
SELinux disabled in all hosts.
Network diagram:
+--------------+
| NODE 1 | k8s
| 10.11.12.2 |
+--------------+
|
|
+-------v--------+
| LOAD BALANCER | ha-proxy
| 10.11.12.9 |
+----------------+
|
+---------------------------+
| |
+------v------+ +------v-------+
| MASTER 1 | k8s | MASTER 2 | k8s
| 10.11.12.3 | etcd | 10.11.12.4 | etcd
+-------------+ +--------------+
Download and compile ha-proxy 1.8.3
yum install gcc pcre-static pcre-devel -y
wget http://www.haproxy.org/download/1.8/src/haproxy-1.8.3.tar.gz -O ~/haproxy.tar.gz
tar xzvf ~/haproxy.tar.gz -C ~/
cd ~/haproxy-1.8.3
make TARGET=linux2628
make install
mkdir -p /etc/haproxy
mkdir -p /var/lib/haproxy
touch /var/lib/haproxy/stats
ln -s /usr/local/sbin/haproxy /usr/sbin/haproxy
cp ./examples/haproxy.init /etc/init.d/haproxy
chmod 755 /etc/init.d/haproxy
systemctl daemon-reload
systemctl enable haproxy
useradd -r haproxy
Open kubernetes API port
firewall-cmd --permanent --zone=public --add-port=6443/tcp
firewall-cmd --zone=public --add-port=6443/tcp
firewall-cmd --permanent --zone=public --add-port=10250/tcp
firewall-cmd --zone=public --add-port=10250/tcp
Open HAProxy status port (optional)
firewall-cmd --permanent --zone=public --add-port=8080/tcp
firewall-cmd --zone=public --add-port=8080/tcp
Configure and start HAProxy
cat << EOF > /etc/haproxy/haproxy.cfg
global
log /dev/log local0
log /dev/log local1 notice
chroot /var/lib/haproxy
stats timeout 30s
user haproxy
group haproxy
daemon
defaults
mode tcp
log global
option httplog
option dontlognull
option http-server-close
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
frontend http_stats
bind *:8080
mode http
stats uri /haproxy?stats
frontend haproxy_kube
bind *:6443
mode tcp
option tcplog
timeout client 10800s
default_backend masters
backend masters
mode tcp
option tcplog
balance leastconn
timeout server 10800s
server master1 10.11.12.3:6443
server master2 10.11.12.4:6443
EOF
systemctl start haproxy
Check (if you opened port 8080) to get HAProxy stats: http://10.11.12.9:8080/haproxy?stats
On master 1:
yum -y install etcd
cat <<EOF > /etc/etcd/etcd.conf
# [member]
ETCD_NAME=etcd1
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="http://10.11.12.3:2380"
ETCD_LISTEN_CLIENT_URLS="http://10.11.12.3:2379,http://127.0.0.1:2379"
# [cluster]
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.11.12.3:2380"
ETCD_INITIAL_CLUSTER="etcd1=http://10.11.12.3:2380,etcd2=http://10.11.12.4:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="KUBE-ETCD-CLUSTER"
ETCD_ADVERTISE_CLIENT_URLS="http://10.11.12.3:2379"
EOF
systemctl enable etcd
systemctl start etcd
On master 2:
yum -y install etcd
cat <<EOF > /etc/etcd/etcd.conf
# [member]
ETCD_NAME=etcd2
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="http://10.11.12.4:2380"
ETCD_LISTEN_CLIENT_URLS="http://10.11.12.4:2379,http://127.0.0.1:2379"
# [cluster]
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.11.12.4:2380"
ETCD_INITIAL_CLUSTER="etcd1=http://10.11.12.3:2380,etcd2=http://10.11.12.4:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="KUBE-ETCD-CLUSTER"
ETCD_ADVERTISE_CLIENT_URLS="http://10.11.12.4:2379"
EOF
systemctl enable etcd
systemctl start etcd
Now the ETCD cluster is ready. My recommendation is to use at least 3 etcd servers (and kuber masters) to ensure real HA. I've changed master1, master2 network to permit all incoming traffic (kubernetes over vpn: pritunl)
firewall-cmd --permanent --zone=trusted --add-source=10.11.12.0/24
firewall-cmd --zone=trusted --add-source=10.11.12.0/24
firewall-cmd --zone=trusted --add-interface=tun0 --permanent
Docker install (in production environments configure docker to use lvm volume directly, check this)
yum install docker
systemctl enable docker
systemctl start docker
Kubectl installation
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
chmod +x ./kubectl
mv ./kubectl /usr/local/bin/kubectl
Kubelet installation
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg
https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
EOF
yum install -y kubelet
systemctl enable kubelet
Kubeadm installation
yum install -y kubeadm
As of kubernetes 1.9 swap space must be deactivated. Turn swap space off and remove from fstab:
swapoff -a
vi /etc/fstab
Send bridged packets to iptables:
echo '1' > /proc/sys/net/bridge/bridge-nf-call-iptables
echo "net.bridge.bridge-nf-call-iptables = 1" >> /etc/sysctl.d/99-sysctl.conf
sysctl -p /etc/sysctl.conf
Create kube_adm.yaml configuration file for kubeadm
Note we are including the load balancer ip and domain name (master.yourdomain.com). The advertiseAddress is set to load balancer.
cat <<EOF > kube_adm.yaml
apiVersion: kubeadm.k8s.io/v1alpha1
kind: MasterConfiguration
etcd:
endpoints:
- "http://10.11.12.3:2379"
- "http://10.11.12.4:2379"
apiServerCertSANs:
- "10.11.12.3"
- "10.11.12.4"
- "10.11.12.9"
- "127.0.0.1"
- "master.yourdomain.com"
token: "9aeb42.99b7540a5833866a"
tokenTTL: "0s"
api:
advertiseAddress: "10.11.12.9"
networking:
podSubnet: 10.244.0.0/16
EOF
Initialize the first cluster master:
kubeadm init --config kube_adm.yaml
First node started successfully. Save the join command, it will be used later. In case of loose you can retrieve this tokens directly into etcd cluster.
Troubleshooting: If could not determine external etcd version error is thrown, ensure that etcd service is started.
Configure kubectl to use credentials to this cluster
Be careful! This command will remove access to other clusters
rm -rf $HOME/.kube
mkdir -p $HOME/.kube
cp /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config
Apply this configuration to all devices (your desktop) that need root-access to cluster. Try with:
kubectl get nodes
NotReady status will be shown. CNI is missing.
Installation of CNI (we used Canal: a mix of flannel & calico).
kubectl apply -f https://raw.githubusercontent.com/projectcalico/canal/master/k8s-install/1.7/rbac.yaml
kubectl apply -f https://raw.githubusercontent.com/projectcalico/canal/master/k8s-install/1.7/canal.yaml
EDIT: In my case, my network devices are openvpn managed. If VPN network restarts/shutdowns the master device is removed from the server and then the flannel.1 device disappears. Flannel was not able to recover from this. I had to change flanneld image to one modified to allow recovering flannel.1 interface. Docker hub image
The final CNI I used is this modified flannel forked from 0.9.1:
cat <<EOF > kube_cni.yaml
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: flannel
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
name: flannel
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: flannel
subjects:
- kind: ServiceAccount
name: flannel
namespace: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: flannel
namespace: kube-system
---
kind: ConfigMap
apiVersion: v1
metadata:
name: kube-flannel-cfg
namespace: kube-system
labels:
tier: node
app: flannel
data:
cni-conf.json: |
{
"name": "cbr0",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
net-conf.json: |
{
"Network": "10.244.0.0/16",
"Backend": {
"Type": "vxlan"
}
}
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: kube-flannel-ds
namespace: kube-system
labels:
tier: node
app: flannel
spec:
template:
metadata:
labels:
tier: node
app: flannel
spec:
hostNetwork: true
nodeSelector:
beta.kubernetes.io/arch: amd64
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
serviceAccountName: flannel
initContainers:
- name: install-cni
image: indiketa/flanneld-autorestart:0.9.1
command:
- cp
args:
- -f
- /etc/kube-flannel/cni-conf.json
- /etc/cni/net.d/10-flannel.conflist
volumeMounts:
- name: cni
mountPath: /etc/cni/net.d
- name: flannel-cfg
mountPath: /etc/kube-flannel/
containers:
- name: kube-flannel
image: indiketa/flanneld-autorestart:0.9.1
command:
- /opt/bin/flanneld
args:
- --ip-masq
- --kube-subnet-mgr
- --iface=tun0
- --backend-healthz-interval=30
resources:
requests:
cpu: "100m"
memory: "50Mi"
limits:
cpu: "100m"
memory: "50Mi"
securityContext:
privileged: true
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
volumeMounts:
- name: run
mountPath: /run
- name: flannel-cfg
mountPath: /etc/kube-flannel/
volumes:
- name: run
hostPath:
path: /run
- name: cni
hostPath:
path: /etc/cni/net.d
- name: flannel-cfg
configMap:
name: kube-flannel-cfg
updateStrategy:
type: RollingUpdate
EOF
kubectl apply -f kube_cni.yaml
Retry node status, the node is Ready.
kubectl get nodes
First kubernetes master is completed. Prior installing things like dashboard, heapster, etc... I recommend to setup the other master nodes.
Dashboard installation. Run on a master node (with kubectl configured)
kubectl apply -f https://raw.githubusercontent.com/kubernetes/dashboard/master/src/deploy/recommended/kubernetes-dashboard.yaml
We want to execute Heapster & Grafana in master nodes. We need to modify influxDb definitions adding tolerations to permit this (we need to donwload and modify specs):
yum install -y wget unzip
wget https://github.com/kubernetes/heapster/archive/master.zip
unzip master.zip
cd heapster-master/deploy/kube-config/influxdb
Edit all yaml files (grafana.yaml, heapster.yaml, influxdb.yaml) inserting a toleration to allow execute on master nodes:
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: monitoring-grafana
namespace: kube-system
spec:
replicas: 1
template:
metadata:
labels:
task: monitoring
k8s-app: grafana
spec:
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
containers:
- name: grafana
image: k8s.gcr.io/heapster-grafana-amd64:v4.4.3
ports:
- containerPort: 3000
protocol: TCP
volumeMounts:
- mountPath: /etc/ssl/certs
name: ca-certificates
And then create RBAC auth, and heapster, grafana & influxdb services from your edited files:
kubectl create -f heapster-master/deploy/kube-config/rbac/heapster-rbac.yaml
kubectl create -f heapster-master/deploy/kube-config/influxdb/
Create a user for accessing dashboard-ui:
cat <<EOF > master_account.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin-user
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: admin-user
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: admin-user
namespace: kube-system
EOF
kubectl apply -f master_account.yaml
Get a bearer token to login to dashboard-ui:
kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin-user | awk '{print $1}')
(copy the token)
Open a proxy from your desktop:
kubectl proxy
Access dashboard-ui and paste the token obtained above.
Now, in order to CANAL (flanneld) to work correctly i had to specify which network interface flanneld should use. This is done through config maps. Select kube-system namespace, then config maps. Open and edit canal-config. Find "canal_iface" key on data field and put your interface name, in my case "tun0".
\\\"Backend\\\": {\\n \\\"Type\\\": \\\"vxlan\\\"\\n }\\n}\\n\"},\"kind\":\"ConfigMap\",\"metadata\":{\"annotations\":{},\"name\":\"canal-config\",\"namespace\":\"kube-system\"}}\n"
}
},
"data": {
"canal_iface": "tun0",
"cni_network_config": "{\n \"name\": \"k8s-pod-network\",\n \"cniVersion\": \"0.3.0\",\n \"plugins\": [\n {\n \"type\": \"calico\",\n \"log_level\": \"info\",\n \"datastore_type\": \"kubernetes\",\n \"nodename\": \"__KUBERNETES_NODE_NAME__\",\n \"ipam\": {\n \"type\": \"host-local\",\n \"subnet\": \"usePodC
First install docker, kubelet, kubeadm as shown on first master node chapter and remember to turn swap off. Create /etc/kubernetes/pki and copy all files from first master to this directory:
mkdir -p /etc/kubernetes/pki
scp -r [email protected]:/etc/kubernetes/pki /etc/kubernetes
Copy kubeadm config file too:
scp -r [email protected]:kube_adm.yaml .
Init master node:
kubeadm init --config kube_adm.yaml
Ensure that kubeadm finds the certificates and uses them in this master instance:
...
[preflight] Starting the kubelet service
[certificates] Using the existing ca certificate and key.
[certificates] Using the existing apiserver certificate and key.
[certificates] Using the existing apiserver-kubelet-client certificate and key.
[certificates] Using the existing sa key.
[certificates] Using the existing front-proxy-ca certificate and key.
[certificates] Using the existing front-proxy-client certificate and key.
[certificates] Valid certificates and keys now exist in "/etc/kubernetes/pki"
...
Repeat this process for each master node (2 nodes in this example) and check that all is running:
kubectl get nodes
First install docker, kubelet, kubeadm as shown on first master node chapter and remember to turn swap off. Create services for kubelet and docker. And then join to cluster with the command we obtained previously with master nodes (notice we are registering with load balancer ip):
kubeadm join --token 9aeb42.99b7540a5833866a 10.11.12.9:6443 --discovery-token-ca-cert-hash sha256:5ea4d794cbd1285d486a70bb37d2d6250908fa33cbfc4103b2d1f957ca204476
If join command was lost, you can retrieve the token with:
kubeadm token list
On a master node to obtain token value. You can regenerate CA sha256 digest with:
openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
With this 2 values, you can regenerate the join command:
kubeadm join --token <token from kubeadm token list> --discovery-token-ca-cert-hash sha256:<value from openssl command> 10.11.12.9:6443
Label this node (from master):
kubectl label node node.name.com name=piolin
Clear container logs
cat << EOF > /etc/logrotate.d/containers
/var/lib/docker/containers/*/*-json.log {
rotate 5
copytruncate
missingok
notifempty
compress
maxsize 10M
daily
create 0644 root root
}
EOF
My images are pushed into a ECR registry and AWS:
cat <<EOF > registryCreds.yaml
apiVersion: v1
kind: Secret
metadata:
name: registry-creds-ecr
namespace: kube-system
labels:
app: registry-creds
cloud: ecr
data:
AWS_ACCESS_KEY_ID: <-- PUT YOUR BASE64 ENCODED AWS IAM ACCESS KEY ID -->
AWS_SECRET_ACCESS_KEY: <-- PUT YOUR BASE64 ENCODED AWS IAM ACCESS KEY -->
aws-account: <-- PUT YOUR BASE64 ENCODED AWS ACCOUNT -->
aws-region: <-- PUT YOUR BASE64 ENCODED AWS REGION -->
type: Opaque
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: registry-creds
rules:
- apiGroups:
- ""
resources:
- namespaces
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- secrets
verbs:
- create
- get
- update
- apiGroups:
- ""
resources:
- serviceaccounts
verbs:
- get
- update
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: registry-creds
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: registry-creds
subjects:
- kind: ServiceAccount
name: registry-creds
namespace: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: registry-creds
namespace: kube-system
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
version: v1.7
name: registry-creds
namespace: kube-system
spec:
replicas: 1
template:
metadata:
labels:
name: registry-creds
version: v1.7
spec:
serviceAccountName: registry-creds
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
containers:
- image: upmcenterprises/registry-creds:1.7
name: registry-creds
imagePullPolicy: Always
env:
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: registry-creds-ecr
key: AWS_ACCESS_KEY_ID
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: registry-creds-ecr
key: AWS_SECRET_ACCESS_KEY
- name: awsaccount
valueFrom:
secretKeyRef:
name: registry-creds-ecr
key: aws-account
- name: awsregion
valueFrom:
secretKeyRef:
name: registry-creds-ecr
key: aws-region
EOF
kubectl create -f registryCreds.yaml