From d4fbc7cd784cee210538dce5ca26d0243202e817 Mon Sep 17 00:00:00 2001
From: Liliia Butorina
Date: Wed, 28 Aug 2019 17:44:08 +0300
Subject: [PATCH] Repatch extended resource cmk.intel.com/exclusive-cores after
kubelet restart.
Since 1.10 version of Kubernetes Kubelet sets extended resource
capacity to zero after it restarts. To repatch extended resource
cmk.intel.com/exclusive-cores "discovery" container added to
reconcile-report daemonset.
Signed-off-by: Liliia Butorina
---
cmk.py | 2 +-
docs/cli.md | 2 +-
docs/html/docs/cli.html | 2 +-
docs/html/docs/operator.html | 2 +-
docs/operator.md | 4 ++--
intel/clusterinit.py | 20 +++++++++++++-------
intel/k8s.py | 4 +++-
intel/uninstall.py | 2 +-
8 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/cmk.py b/cmk.py
index eeae16a8..8008f156 100755
--- a/cmk.py
+++ b/cmk.py
@@ -47,7 +47,7 @@
software.
--cmk-cmd-list= Comma seperated list of CMK sub-commands to run
on each host
- [default: init,reconcile,install,discover,nodereport].
+ [default: init,install,discover,rediscover,reconcile,nodereport].
--cmk-img=
CMK Docker image [default: cmk:v1.3.1].
--cmk-img-pol= Image pull policy for the CMK Docker image
[default: IfNotPresent].
diff --git a/docs/cli.md b/docs/cli.md
index e00ae2d9..f1a7f330 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -1030,7 +1030,7 @@ $ docker run -it --volume=/etc/cmk:/etc/cmk:rw \
### `cmk uninstall`
Removes `cmk` from a node. Uninstall process reverts `cmk cluster-init`:
- - deletes `cmk-reconcile-nodereport-pod-{node}` if present
+ - deletes `cmk-rediscover-reconcile-nodereport-pod-{node}` if present
- removes `NodeReport` from Kubernetes ThirdPartyResources if present
- removes `ReconcileReport` from Kubernetes ThirdPartyResources if present
- removes cmk node label if present
diff --git a/docs/html/docs/cli.html b/docs/html/docs/cli.html
index 939f30d6..fcadb9c2 100644
--- a/docs/html/docs/cli.html
+++ b/docs/html/docs/cli.html
@@ -1027,7 +1027,7 @@
Removes cmk from a node. Uninstall process reverts cmk cluster-init:
-- deletes
cmk-reconcile-nodereport-pod-{node} if present
+- deletes
cmk-rediscover-reconcile-nodereport-pod-{node} if present
- removes
NodeReport from Kubernetes ThirdPartyResources if present
- removes
ReconcileReport from Kubernetes ThirdPartyResources if present
- removes cmk node label if present
diff --git a/docs/html/docs/operator.html b/docs/html/docs/operator.html
index 653e8792..8590d356 100644
--- a/docs/html/docs/operator.html
+++ b/docs/html/docs/operator.html
@@ -617,7 +617,7 @@
the recommended way to start troubleshooting is to look at the logs using kubectl logs POD_NAME [CONTAINER_NAME] -f.
For example, assuming you ran the cmk-cluster-init-pod template with default options, it
should create two pods on each node named cmk-init-install-discover-pod-<node-name> and
-cmk-reconcile-nodereport-<node-name>, where <node-name> should be replaced with the name of the node.
+cmk-rediscover-reconcile-nodereport-<node-name>, where <node-name> should be replaced with the name of the node.
If you want to look at the logs from the container which ran the discover subcommand in the pod, you can use
kubectl logs -f cmk-init-install-discover-pod-<node-name> discover
If you want to look at the logs from the container which ran the reconcile subcommand in the pod, you can use
diff --git a/docs/operator.md b/docs/operator.md
index 34f6224e..4af17760 100644
--- a/docs/operator.md
+++ b/docs/operator.md
@@ -567,13 +567,13 @@ the recommended way to start troubleshooting is to look at the logs using `kubec
For example, assuming you ran the [cmk-cluster-init-pod template][cluster-init-template] with default options, it
should create two pods on each node named `cmk-init-install-discover-pod-` and
-`cmk-reconcile-nodereport-`, where `` should be replaced with the name of the node.
+`cmk-rediscover-reconcile-nodereport-`, where `` should be replaced with the name of the node.
If you want to look at the logs from the container which ran the `discover` subcommand in the pod, you can use
`kubectl logs -f cmk-init-install-discover-pod- discover`
If you want to look at the logs from the container which ran the `reconcile` subcommand in the pod, you can use
-`kubectl logs -f cmk-reconcile-nodereport-pod- reconcile`
+`kubectl logs -f cmk-rediscover-reconcile-nodereport-pod- reconcile`
If you want to remove `cmk` use `cmk-uninstall-pod.yaml`. [nodeSelector](https://kubernetes.io/docs/user-guide/node-selection)
can help to fine-grain the deletion for specific node.
diff --git a/intel/clusterinit.py b/intel/clusterinit.py
index 7a7cccb9..daa627f8 100644
--- a/intel/clusterinit.py
+++ b/intel/clusterinit.py
@@ -36,7 +36,7 @@ def cluster_init(host_list, all_hosts, cmd_list, cmk_img, cmk_img_pol,
# Check if all the flag values passed are valid.
# Check if cmk_cmd_list is valid.
- valid_cmd_list = ["init", "discover", "install", "reconcile", "nodereport"]
+ valid_cmd_list = ["init", "discover", "install", "rediscover", "reconcile", "nodereport"]
for cmk_cmd in cmk_cmd_list:
if cmk_cmd not in valid_cmd_list:
raise RuntimeError("CMK command should be one of {}"
@@ -147,6 +147,8 @@ def run_cmd_pods(cmd_list, cmd_init_list, cmk_img, cmk_img_pol, conf_dir,
args = "/cmk/cmk.py isolate --pool=infra /cmk/cmk.py -- reconcile --interval=5 --publish" # noqa: E501
elif cmd == "nodereport":
args = "/cmk/cmk.py isolate --pool=infra /cmk/cmk.py -- node-report --interval=5 --publish" # noqa: E501
+ elif cmd == "rediscover":
+ args = "/cmk/cmk.py isolate --pool=infra /cmk/cmk.py -- discover; sleep infinity" # noqa: E501
update_pod_with_container(pod, cmd, cmk_img, cmk_img_pol, args)
elif cmd_init_list:
@@ -178,10 +180,10 @@ def run_cmd_pods(cmd_list, cmd_init_list, cmk_img, cmk_img_pol, conf_dir,
for node_name in cmk_node_list:
if cmd_list:
- update_pod_with_node_details(pod, node_name, cmd_list)
+ update_pod_with_node_details(pod, node_name, cmd_list, "ds")
daemon_set = k8s.ds_from(pod=pod)
elif cmd_init_list:
- update_pod_with_node_details(pod, node_name, cmd_init_list)
+ update_pod_with_node_details(pod, node_name, cmd_init_list, "pod")
try:
if cmd_list:
@@ -315,8 +317,9 @@ def wait_for_pod_phase(pod_name, phase_name):
sys.exit(1)
for pod in pod_list_resp["items"]:
- if ("metadata" in pod) and ("name" in pod["metadata"]) \
- and pod_name in pod["metadata"]["name"]:
+ if ("metadata" in pod) and ("labels" in pod["metadata"]) \
+ and ("podname" in pod["metadata"]["labels"]) \
+ and (pod_name == pod["metadata"]["labels"]["podname"]):
if pod["status"]["phase"] == phase_name:
wait = False
break
@@ -333,10 +336,13 @@ def update_pod(pod, restart_pol, conf_dir, install_dir, serviceaccount):
pod["spec"]["volumes"][2]["hostPath"]["path"] = install_dir
-def update_pod_with_node_details(pod, node_name, cmd_list):
+def update_pod_with_node_details(pod, node_name, cmd_list, res_type):
pod["spec"]["nodeName"] = node_name
- pod_name = "cmk-{}-pod-{}".format("-".join(cmd_list), node_name)
+ pod_name = "cmk-{}-{}-{}".format("-".join(cmd_list), res_type, node_name)
pod["metadata"]["name"] = pod_name
+ # name max length is 63, so move to labels key-value
+ pod["metadata"]["labels"] = {"podname": pod_name}
+ logging.info("Created pod name: {}".format(pod_name))
def update_pod_with_pull_secret(pod, pull_secret):
diff --git a/intel/k8s.py b/intel/k8s.py
index de053d41..a71b13c1 100644
--- a/intel/k8s.py
+++ b/intel/k8s.py
@@ -69,7 +69,9 @@ def ds_from(pod):
"metadata": {
"labels": {
"app":
- pod["metadata"]["name"].replace("pod", "ds")
+ pod["metadata"]["name"].replace("pod", "ds"),
+ "podname":
+ pod["metadata"]["labels"]["podname"]
}
},
"spec": pod["spec"]
diff --git a/intel/uninstall.py b/intel/uninstall.py
index 00d54260..731c122a 100644
--- a/intel/uninstall.py
+++ b/intel/uninstall.py
@@ -32,7 +32,7 @@
def uninstall(install_dir, conf_dir, namespace):
delete_cmk_pod("cmk-init-install-discover-pod", namespace,
postfix=os.getenv("NODE_NAME"))
- delete_cmk_pod("cmk-reconcile-nodereport-ds", namespace,
+ delete_cmk_pod("cmk-rediscover-reconcile-nodereport-ds", namespace,
postfix=os.getenv("NODE_NAME"))
delete_cmk_pod("cmk-node-report-ds-all", namespace)