Dica: As Prometheus consultas são separadas em scripts individuais. Cada script inclui consultas que coletam dados de monitoramento relacionados.
Utilização dos recursos do cluster
#!/bin/bash
print_usage() {
echo ""
echo "cluster_resource_utilization: Prometheus API calls to show OpenShift cluster resource use"
echo ""
echo "options:"
echo "--csv write output sections to CSV files"
echo "-q, --quiet quiet the display of results in the terminal"
echo "-h, --help show help"
exit 0
}
write_csv() {
# csv header $1, result $2, filename $3
echo " - ${3}"
echo "${1}${2}" > "${3}"
}
display_result() {
# banner $1, result data $2
echo "# ------------------------------------------------------------------------------"
echo "# ${1}"
echo "# ------------------------------------------------------------------------------"
echo "${2}"
echo ""
}
WRITE_CSV="false"
SHOW_DISPLAY="true"
for arg in "$@"
do
case $arg in
--csv)
WRITE_CSV="true"
shift
;;
-q|--quiet)
SHOW_DISPLAY="false"
shift
;;
-h|--help)
print_usage
;;
esac
done
if [[ -n ${OCP_TOKEN} ]]; then
oc login --token=${OCP_TOKEN} --server=${OCP_URL} > /dev/null 2>&1
elif [[ -n ${OCP_PASSWORD} ]]; then
oc login ${OCP_URL} -u=${OCP_USERNAME} -p=${OCP_PASSWORD} --insecure-skip-tls-verify > /dev/null 2>&1
fi
TOKEN=$(oc whoami -t)
if [[ -z ${TOKEN} || -z ${PROJECT_CPD_INST_OPERANDS} ]]; then
echo "OpenShift login unsuccessful. Please verify the credentials stored in your environment (PROJECT_CPD_INST_OPERANDS, OCP_URL, OCP_USERNAME, OCP_PASSWORD/OCP_TOKEN)."
exit
fi
CLUSTER_NAME=$(oc whoami --show-server | sed -e 's/^http:\/\///g' -e 's/^https:\/\///g' -e 's/^api.//g' -e 's/:6443//g')
PROM_OCP_ROUTE=$(oc get route prometheus-k8s -n openshift-monitoring | grep -w prometheus-k8s | tr -s ' ' | cut -d " " -f2)
PROM_URL="https://${PROM_OCP_ROUTE}"
TOP10_MEM_BANNER="Top 10 memory-consuming pods, ${PROJECT_CPD_INST_OPERANDS} namespace: <pod>, <memory GB>"
TOP10_MEM_QUERY="topk(10, max(container_memory_working_set_bytes{namespace=\"${PROJECT_CPD_INST_OPERANDS}\",container!=\"\",pod!=\"\"}) by (pod) ) / 10^9"
TOP10=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode "query=${TOP10_MEM_QUERY}" | \
jq -r '.data.result[] | .metric.pod + ", " + (((.value[1]|tonumber)*100|round/100)|tostring) + "GB"')
CPU_PER_NODE_BANNER="CPU utilization per node, 5min interval: <node name>, <node cpu seconds>"
CPU_PER_NODE=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode 'query=(avg by (instance, nodename)(irate(node_cpu_seconds_total{mode!="idle"}[5m]))) *100 * on (instance) group_left (nodename) node_uname_info' | \
jq -r '.data.result[] | .metric.nodename + ", " + (((.value[1]|tonumber)*100|round/100)|tostring)')
MEM_PER_NODE_BANNER="Memory utilization per node: <node name>, <memory usage %>"
MEM_PER_NODE=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode 'query=(100 * ((node_memory_MemTotal_bytes -(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes) * on (instance) group_left (nodename) node_uname_info)' | \
jq -r '.data.result[] | .metric.nodename + ", " + (((.value[1]|tonumber)*100|round/100)|tostring) + "%"')
NET_IO_PER_NODE_BANNER="Network I/O per node, 5min interval: <node name>, <I/O KB>"
NET_IO_PER_NODE=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode 'query=(avg by (instance) ((irate(node_network_receive_bytes_total[5m]) + irate(node_network_transmit_bytes_total[5m])) ) * on (instance) group_left (nodename) node_uname_info / 10^3)' | \
jq -r '.data.result[] | .metric.nodename + ", " + (((.value[1]|tonumber)*100|round/100)|tostring) + "KB"')
OCP_API_HTTP_STATS_BANNER="OpenShift API call statuses: <HTTP code>, <count over last 30min>"
OCP_API_HTTP_STATS=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode 'query=sum by (code)(rate(apiserver_request_total{verb=~"POST|PUT|DELETE|PATCH|GET|LIST|WATCH"}[30m]))' | \
jq -r '.data.result[] | .metric.code + ", " + (((.value[1]|tonumber)|round)|tostring)')
if [[ ${SHOW_DISPLAY} = "true" ]]; then
echo ""
echo "#==============================================================================="
echo "# Cluster resource utililzation: ${CLUSTER_NAME}"
echo "#==============================================================================="
echo ""
display_result "${TOP10_MEM_BANNER}" "${TOP10}"
display_result "${CPU_PER_NODE_BANNER}" "${CPU_PER_NODE}"
display_result "${MEM_PER_NODE_BANNER}" "${MEM_PER_NODE}"
display_result "${NET_IO_PER_NODE_BANNER}" "${NET_IO_PER_NODE}"
display_result "${OCP_API_HTTP_STATS_BANNER}" "${OCP_API_HTTP_STATS}"
echo ""
fi
if [[ ${WRITE_CSV} = "true" ]]; then
WORKING_DIR=$(pwd)
echo "# Writing cluster resource utililzation result files to: ${WORKING_DIR}"
write_csv $'pod,mem_gb\n' "${TOP10}" "cluster_mem_top10_pods.csv"
write_csv $'node,cpu_seconds\n' "${CPU_PER_NODE}" "cluster_cpu_seconds_per_node.csv"
write_csv $'node,mem_usage_pct\n' "${MEM_PER_NODE}" "cluster_mem_usage_per_node.csv"
write_csv $'node,net_io_kb\n' "${NET_IO_PER_NODE}" "cluster_net_io_per_node.csv"
write_csv $'http_code,count\n' "${OCP_API_HTTP_STATS}" "cluster_api_http_stats.csv"
echo ""
fi
Velocidade e utilização do disco
#!/bin/bash
print_usage() {
echo ""
echo "disk_speed_utilization: Prometheus API calls to show OpenShift cluster disk speed and use"
echo ""
echo "options:"
echo "--csv write output sections to CSV files"
echo "-q, --quiet quiet the display of results in the terminal"
echo "-h, --help show help"
exit 0
}
write_csv() {
# header $1, result $2, filename $3
echo " - ${3}"
echo "${1}${2}" > "${3}"
}
display_result() {
# banner $1, result data $2
echo "# ------------------------------------------------------------------------------"
echo "# ${1}"
echo "# ------------------------------------------------------------------------------"
echo "${2}"
echo ""
}
WRITE_CSV="false"
SHOW_DISPLAY="true"
for arg in "$@"
do
case $arg in
--csv)
WRITE_CSV="true"
shift
;;
-q|--quiet)
SHOW_DISPLAY="false"
shift
;;
-h|--help)
print_usage
;;
esac
done
if [[ -n ${OCP_TOKEN} ]]; then
oc login --token=${OCP_TOKEN} --server=${OCP_URL} > /dev/null 2>&1
elif [[ -n ${OCP_PASSWORD} ]]; then
oc login ${OCP_URL} -u=${OCP_USERNAME} -p=${OCP_PASSWORD} --insecure-skip-tls-verify > /dev/null 2>&1
fi
TOKEN=$(oc whoami -t)
if [[ -z ${TOKEN} || -z ${PROJECT_CPD_INST_OPERANDS} ]]; then
echo "OpenShift login unsuccessful. Please verify the credentials stored in your environment (PROJECT_CPD_INST_OPERANDS, OCP_URL, OCP_USERNAME, OCP_PASSWORD/OCP_TOKEN)."
exit
fi
CLUSTER_NAME=$(oc whoami --show-server | sed -e 's/^http:\/\///g' -e 's/^https:\/\///g' -e 's/^api.//g' -e 's/:6443//g')
PROM_OCP_ROUTE=$(oc get route prometheus-k8s -n openshift-monitoring | grep -w prometheus-k8s | tr -s ' ' | cut -d " " -f2)
PROM_URL="https://${PROM_OCP_ROUTE}"
DISK_IO_PER_NODE_BANNER="Disk I/O per node: <node name>, <disk I/O over last 5min>"
DISK_IO_PER_NODE_QUERY="(avg by (instance) (irate(node_disk_io_time_seconds_total[5m])/1000) * on (instance) group_left (nodename) node_uname_info)"
DISK_IO_PER_NODE=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode "query=${DISK_IO_PER_NODE_QUERY}" | \
jq -r '.data.result[] | .metric.nodename + ", " + .value[1]')
DISK_WRITE_SPEED_BANNER="Disk write speed: <node name>, <I/O MB over last 2min>"
DISK_WRITE_SPEED_QUERY="(sum by (instance) (irate(node_disk_written_bytes_total[2m])) / 1024 / 1024)"
DISK_WRITE_SPEED=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode "query=${DISK_WRITE_SPEED_QUERY}" | \
jq -r '.data.result[] | .metric.instance + ", " + (((.value[1]|tonumber)*100|round/100)|tostring) + "MB"')
DISK_SPACE_FREE_BANNER="Free disk space: <node name>, <free space %>"
DISK_SPACE_FREE_QUERY='(node_filesystem_free_bytes{mountpoint ="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100)'
DISK_SPACE_FREE=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode "query=${DISK_SPACE_FREE_QUERY}" | \
jq -r '.data.result[] | .metric.instance + ", " + (((.value[1]|tonumber)*100|round/100)|tostring) + "%"')
if [[ ${SHOW_DISPLAY} = "true" ]]; then
echo ""
echo "#==============================================================================="
echo "# Disk speed and utililzation: ${CLUSTER_NAME}"
echo "#==============================================================================="
echo ""
display_result "${DISK_IO_PER_NODE_BANNER}" "${DISK_IO_PER_NODE}"
display_result "${DISK_WRITE_SPEED_BANNER}" "${DISK_WRITE_SPEED}"
display_result "${DISK_SPACE_FREE_BANNER}" "${DISK_SPACE_FREE}"
echo ""
fi
if [[ ${WRITE_CSV} = "true" ]]; then
WORKING_DIR=$(pwd)
echo "# Writing disk speed and utililzation result files to: ${WORKING_DIR}"
write_csv $'node,disk_io_time_seconds_5min\n' "${DISK_IO_PER_NODE}" "disk_io_per_node_5min.csv"
write_csv $'node,io_mb_2min\n' "${DISK_WRITE_SPEED}" "disk_write_speed_per_node_2min.csv"
write_csv $'node,free_space_pct\n' "${DISK_SPACE_FREE}" "disk_free_space_per_node.csv"
echo ""
fi
Software HubIBM utilização de recursos
#!/bin/bash
print_usage() {
echo ""
echo "cluster_resource_utilization: Prometheus API calls to show OpenShift cluster resource use"
echo ""
echo "options:"
echo "--csv write output sections to CSV files"
echo "-q, --quiet quiet the display of results in the terminal"
echo "-h, --help show help"
exit 0
}
write_csv() {
# header $1, result $2, filename $3
echo " - ${3}"
echo "${1}${2}" > "${3}"
}
display_result() {
# banner $1, result data $2
echo "# ------------------------------------------------------------------------------"
echo "# ${1}"
echo "# ------------------------------------------------------------------------------"
echo "${2}"
echo ""
}
WRITE_CSV="false"
SHOW_DISPLAY="true"
for arg in "$@"
do
case $arg in
--csv)
WRITE_CSV="true"
shift
;;
-q|--quiet)
SHOW_DISPLAY="false"
shift
;;
-h|--help)
print_usage
;;
esac
done
if [[ -n ${OCP_TOKEN} ]]; then
oc login --token=${OCP_TOKEN} --server=${OCP_URL} > /dev/null 2>&1
elif [[ -n ${OCP_PASSWORD} ]]; then
oc login ${OCP_URL} -u=${OCP_USERNAME} -p=${OCP_PASSWORD} --insecure-skip-tls-verify > /dev/null 2>&1
fi
TOKEN=$(oc whoami -t)
if [[ -z ${TOKEN} || -z ${PROJECT_CPD_INST_OPERANDS} ]]; then
echo "OpenShift login unsuccessful. Please verify the credentials stored in your environment (PROJECT_CPD_INST_OPERANDS, OCP_URL, OCP_USERNAME, OCP_PASSWORD/OCP_TOKEN)."
exit
fi
CLUSTER_NAME=$(oc whoami --show-server | sed -e 's/^http:\/\///g' -e 's/^https:\/\///g' -e 's/^api.//g' -e 's/:6443//g')
PROM_OCP_ROUTE=$(oc get route prometheus-k8s -n openshift-monitoring | grep -w prometheus-k8s | tr -s ' ' | cut -d " " -f2)
PROM_URL="https://${PROM_OCP_ROUTE}"
CPU_PCT_LIMIT_BANNER="Percentage of CPU utilization compared to limit: <pod name>, <cpu %>"
CPU_LIMIT_QUERY="sort_desc(100*sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"${PROJECT_CPD_INST_OPERANDS}\"}) by (pod) / sum(kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",namespace=\"${PROJECT_CPD_INST_OPERANDS}\",pod=~\"(spark|zen|rstudio|spawner|portal-main|ax-).*\"}) by (pod))"
CPU_PCT_LIMIT=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode "query=${CPU_LIMIT_QUERY}" | \
jq -r '.data.result[] | .metric.pod + ", " + (((.value[1]|tonumber)*100|round/100)|tostring) + "%"')
MEM_PCT_LIMIT_BANNER="Percentage of memory utilization compared to limit: <pod name>, <mem %>"
MEM_LIMIT_QUERY="sort_desc(100 * sum(container_memory_usage_bytes{namespace=\"${PROJECT_CPD_INST_OPERANDS}\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{resource=\"memory\",unit=\"byte\",namespace=\"${PROJECT_CPD_INST_OPERANDS}\",pod=~\"(spark|zen|rstudio|spawner|portal-main|ax-).*\"}) by (pod))"
MEM_PCT_LIMIT=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode "query=${MEM_LIMIT_QUERY}" | \
jq -r '.data.result[] | .metric.pod + ", " + (((.value[1]|tonumber)*100|round/100)|tostring) + "%"')
CPU_CPD_ADDON_BANNER="CPU utilization per IBM Software Hub product: <cp4d product label>, <cpu cores>"
CPU_CPD_ADDON_QUERY="sort_desc(sum(max(kube_pod_labels{namespace=\"${PROJECT_CPD_INST_OPERANDS}\", label_icpdsupport_add_on_id!=\"\" }) by (label_icpdsupport_add_on_id,pod) * on(pod) group_right(label_icpdsupport_add_on_id)max(kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",namespace=\"${PROJECT_CPD_INST_OPERANDS}\"}) by (pod)) by (label_icpdsupport_add_on_id))"
CPU_CPD_ADDON=$(curl --globoff -s -k -X POST -H "Authorization: Bearer ${TOKEN}" \
-g "${PROM_URL}/api/v1/query" \
--data-urlencode "query=${CPU_CPD_ADDON_QUERY}" | \
jq -r '.data.result[] | .metric.label_icpdsupport_add_on_id + ", " + (((.value[1]|tonumber)*100|round/100)|tostring)')
if [[ ${SHOW_DISPLAY} = "true" ]]; then
echo ""
echo "#==============================================================================="
echo "# IBM Software Hub resource utililzation: ${CLUSTER_NAME}"
echo "# namespace: ${PROJECT_CPD_INST_OPERANDS}"
echo "#==============================================================================="
echo ""
display_result "${CPU_PCT_LIMIT_BANNER}" "${CPU_PCT_LIMIT}"
display_result "${MEM_PCT_LIMIT_BANNER}" "${MEM_PCT_LIMIT}"
display_result "${CPU_CPD_ADDON_BANNER}" "${CPU_CPD_ADDON}"
echo ""
fi
if [[ ${WRITE_CSV} = "true" ]]; then
WORKING_DIR=$(pwd)
echo "# Writing IBM Software Hub resource utililzation result files to: ${WORKING_DIR}"
write_csv $'pod,cpu_pct\n' "${CPU_PCT_LIMIT}" "cp4d_cpu_limit_pct.csv"
write_csv $'pod,mem_pct\n' "${MEM_PCT_LIMIT}" "cp4d_mem_limit_pct.csv"
write_csv $'cp4d_product,cpu_cores\n' "${CPU_CPD_ADDON}" "cp4d_product_cpu.csv"
echo ""
fi