#!/bin/bash 
#
# check_k8s_global
#
# Author        : Nohaj
# Contact       : johan@slashroot.fr
# Date          : 19/02/19
# Version       : 1.0
# Description   : Basic script to check if a k8s cluster is OK
# Require       : kubectl utility with a configured access to a k8s cluster
#

#
# Variables and checks
#

# This script needs kubectl
type kubectl &> /dev/null || { echo >&2 "UNKNOWN: This script need the kubectl utility to run."; exit 3; }

# Check kubectl command
if ( ! kubectl get nodes &> /dev/null ) ; then
    echo "UNKNOWN: Kubectl command doesn't seems to be working"
    exit 3
fi

usage (){
cat <<EOF

Usage : check_k8s_global -w running_ETCD,running_CONTROLPLANE,running_WORKER -c running_ETCD,running_CONTROLPLANE,running_WORKER

Options:
    -h                      Print help
    -w <WARN_THRESOLDS>     Warning thresolds for number of running components
    -c <CRIT_THRESOLDS>     Critical thresolds for number of running components

Example :

    check_k8s_global -w 2,2,10 -c 1,1,8
        Warning state if only 2 etcd, 2 controlplane or 10 workers are up and running
        Critical state if only 1 etcd, 1 controlplanes or 8 workers are up and running

EOF
exit 3
}

# We need a lot of variables
exit_code=0
node_ready=0
node_failed=0
worker_ok=0
worker_err=0
controlplane_ok=0
controlplane_err=0
etcd_ok=0
etcd_err=0

#
# Let's go
#

while getopts ":h:w:c:" opt; do
    case "$opt" in
        w)
            if [[ "$OPTARG" =~ ^[0-9]*[0-9],[0-9]*[0-9],[0-9]*[0-9]$ ]] ; then
                etcd_warn_thres=$(echo "$OPTARG" | cut -d ',' -f 1)
                cp_warn_thres=$(echo "$OPTARG" | cut -d ',' -f 2)
                worker_warn_thres=$(echo "$OPTARG" | cut -d ',' -f 3)
            fi
            ;;
        c)
            if [[ "$OPTARG" =~ ^[0-9]*[0-9],[0-9]*[0-9],[0-9]*[0-9]$ ]] ; then
                etcd_crit_thres=$(echo "$OPTARG" | cut -d ',' -f 1)
                cp_crit_thres=$(echo "$OPTARG" | cut -d ',' -f 2)
                worker_crit_thres=$(echo "$OPTARG" | cut -d ',' -f 3)
            fi  
            ;;
        h) 
            usage 
            ;;
        *) 
            usage
            ;;
    esac
done

# We check that we have all the thresolds we need
if [[ -z $etcd_warn_thres || -z $cp_warn_thres || -z $worker_warn_thres || -z $etcd_crit_thres || -z $cp_crit_thres || -z $worker_crit_thres ]] || [[ $etcd_warn_thres != [0-9]* || $cp_warn_thres != [0-9]* || $worker_warn_thres != [0-9]* || $etcd_crit_thres != [0-9]* || $cp_crit_thres != [0-9]* || $worker_crit_thres != [0-9]* ]] ; then
    usage
fi

# Retrieve states of the node
OLDIFS="$IFS"
IFS=$'\n'
for line in $(kubectl get nodes --no-headers | cat) ; do
    node=$(echo $line | awk -F " " '{print $1}')
    state=$(echo $line | awk -F " " '{print $2}')
    roles="$(echo $line | awk -F " " '{print $3}' | tr ',' '\n')"
    if [[ $state == "Ready" ]] ; then
        node_ready=$((node_ready+1))
        for role in $roles ; do
            case $role in
                worker)
                    worker_ok=$((worker_ok+1))
                    ;;
                etcd)
                    etcd_ok=$((etcd_ok+1))
                    ;;
                controlplane)
                    controlplane_ok=$((controlplane_ok+1))
                    ;;
            esac
        done
    else
        node_failed=$((node_failed+1))
        for role in $roles ; do
            case $role in
               worker)
                   worker_err=$((worker_err+1))
                   ;;
               etcd)
                   etcd_err=$((etcd_err+1))
                   ;;
               controlplane)
                   controlplane_err=$((controlplane_err+1))
                   ;;
           esac
        done                               
    fi
done
IFS="$OLDIFS"

# Some maths
total_node=$((node_ready+node_failed))
total_worker=$((worker_ok+worker_err))
total_etcd=$((etcd_ok+etcd_err))
total_controlplane=$((controlplane_ok+controlplane_err))

# Print result
if [[ $worker_crit_thres -ge $worker_ok || $etcd_crit_thres -ge $etcd_ok || $cp_crit_thres -ge $controlplane_ok ]] ; then
    exit_code=2
    echo "CRITICAL: The Kubernetes cluster is in a CRITICAL state"
elif [[ $worker_warn_thres -ge $worker_ok || $etcd_warn_thres -ge $etcd_ok || $cp_warn_thres -ge $controlplane_ok ]] ; then
    exit_code=1
    echo "WARNING: The Kubernetes cluster is in a WARNING state"
else
    echo "OK: The Kubernetes cluster is in a OK state"
fi

echo "- Nodes : $node_ready/$total_node are in ready state"
echo "- Controlplane : $controlplane_ok/$total_controlplane are in ready state"
echo "- Etcd : $etcd_ok/$total_etcd are in ready state"
echo "- Worker : $worker_ok/$total_worker are in ready state"
exit $exit_code
