Skip to content

SAGE-1486: Auto renew certificates after a specified number of days. #34

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bk-api/bk_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,8 @@ def deploy_wes(node_id, this_debug, force=False):

logger.debug("(deploy_wes) using beehive %s", beehive_id)

# NOTE(sean) it is important to keep this step before certificate generation. we want to make
# sure that in the case a node is not ready, we do not wastefully generate certificates.
logger.debug("(deploy_wes) checking if kubernetes is running on node %s", node_id)
try:
result_stdout_str ,result_stderr_str, exit_code = node_ssh(node_id, "kubectl get nodes")
Expand All @@ -696,7 +698,6 @@ def deploy_wes(node_id, this_debug, force=False):
if exit_code != 0:
raise Exception(f"ssh failed or kubectl is not yet ready ({result_stderr_str})")


logger.debug("calling create_ssh_upload_cert")
try:
create_ssh_upload_cert(bee_db, node_id, beehive_obj, force=force )
Expand Down Expand Up @@ -1062,7 +1063,6 @@ def post(self, node_id):
logger.error(e)
raise ErrorResponse(f"add_vsn returned: { type(e).__name__ }: {str(e)} {ShowException()}" , status_code=HTTPStatus.INTERNAL_SERVER_ERROR)


return jsonify({"success": True})


Expand Down
122 changes: 59 additions & 63 deletions bk-deploy-manager/deploy_manager.py
Original file line number Diff line number Diff line change
@@ -1,83 +1,76 @@
#!/usr/bin/env python3

import datetime
import logging
import os
import sys
import time

import dateutil.parser
import requests

logging.basicConfig(level=logging.INFO)
from datetime import datetime, timedelta

BEEKEEPER_URL = os.getenv("BEEKEEPER_URL", "http://localhost:5000")
BEEKEEPER_RENEW_DAYS = int(os.getenv("BEEKEEPER_RENEW_DAYS", "7"))


# example input 2021-11-19T02:07:22
# returns datetime.datetime
def parseTime(timestamp):
def parseTime(s: str):
if s in [None, ""]:
return None
# dateutil.parser.isoparse('2008-09-03T20:56:35.450686')
return dateutil.parser.isoparse(timestamp)
return dateutil.parser.isoparse(s)


def get_candidates():
def age(t: datetime) -> timedelta:
return datetime.now() - t

if BEEKEEPER_URL == "":
logging.error(f"BEEKEEPER_URL not defined")
sys.exit(1)

logging.info(f"BEEKEEPER_URL: {BEEKEEPER_URL}")
def get_node_list_from_beekeeper():
logging.info("BEEKEEPER_URL: %s", BEEKEEPER_URL)
url = f"{BEEKEEPER_URL}/state"
logging.info(f"url: {url}")

try:
resp = requests.get(url)
except Exception as e:
raise Exception(f"GET request to {url} failed: {str(e)}")
logging.info("url: %s", url)
resp = requests.get(url)
resp.raise_for_status()
return resp.json()["data"]

if resp.status_code != 200:
raise Exception(f"status_code: {resp.status_code} body: {resp.text}")

nodes = resp.json()
def get_deploy_wes_candidates():
nodes = get_node_list_from_beekeeper()

candidates = []

if not "data" in nodes:
raise Exception("Field data missing")

for n in nodes["data"]:
node_id = n["id"]
registration_event = n.get("registration_event")
wes_deploy_event = n.get("wes_deploy_event")
# print("id: "+node_id)
# print("wes_deploy_event: "+n["wes_deploy_event"])
if registration_event in ["", None]:
logging.info("node %s is not registered", node_id)
continue

if n.get("beehive") in ["", None]:
logging.info(f"node {node_id} does not belong to a beehive")
continue

if wes_deploy_event in ["", None] or parseTime(registration_event) >= parseTime(wes_deploy_event):
logging.info(
f"scheduling node {node_id} for wes deployment (reason: no previous deployment or re-registered node)"
)
candidates.append(n)
continue

logging.info(f"node {node_id} needs no deployment")
for node in nodes:
registration_time = parseTime(node.get("registration_event"))
wes_deploy_time = parseTime(node.get("wes_deploy_event"))

if registration_time is None:
logging.info("node %s is not registered", node["id"])
elif node.get("beehive") in ["", None]:
logging.info("node %s does not belong to a beehive", node["id"])
elif wes_deploy_time is None:
logging.info("scheduling node %s for wes deployment: no existing deployment", node["id"])
candidates.append((node, False))
elif registration_time >= wes_deploy_time:
logging.info("scheduling node %s for wes deployment: node reregistered", node["id"])
candidates.append((node, False))
elif age(wes_deploy_time) >= timedelta(days=BEEKEEPER_RENEW_DAYS):
logging.info("scheduling node %s for wes deployment: renewing node credentials", node["id"])
candidates.append((node, True))
else:
logging.info("node %s needs no deployment", node["id"])

return candidates


def try_wes_deployment(candidates):
if len(candidates) == 0:
logging.info("no candidates required deployment")
return

success_count = 0

for candidate in candidates:
for candidate, force in candidates:
try:
deploy_wes_to_candidate(candidate)
deploy_wes_to_candidate(candidate, force=force)
success_count += 1
except KeyboardInterrupt:
return
Expand All @@ -89,9 +82,14 @@ def try_wes_deployment(candidates):
logging.info("done")


def deploy_wes_to_candidate(candidate):
def deploy_wes_to_candidate(candidate, force):
node_id = candidate["id"]
url = f"{BEEKEEPER_URL}/node/{node_id}"

if force:
url = f"{BEEKEEPER_URL}/node/{node_id}?force=true"
else:
url = f"{BEEKEEPER_URL}/node/{node_id}"

resp = requests.post(url, json={"deploy_wes": True})
resp.raise_for_status()
result = resp.json()
Expand All @@ -100,24 +98,22 @@ def deploy_wes_to_candidate(candidate):


def main():
logging.basicConfig(level=logging.INFO)

logging.info("Starting...")
while True:

candidates = []
while True:
try:
candidates = get_candidates()
except Exception as e:
logging.error(f"error: get_candidates returned: {str(e)}")
candidates = get_deploy_wes_candidates()
except Exception:
logging.exception("get_deploy_wes_candidates raised an exception. will retry in 10s")
time.sleep(10)
continue

if len(candidates) == 0:
logging.info("no candidates for wes deployment found")
else:
logging.info("candidates:")
logging.info(candidates)
try_wes_deployment(candidates)
try_wes_deployment(candidates)

logging.info("waiting 5 minutes...")
time.sleep(5 * 60)
logging.info("done. will recheck in 5min...")
time.sleep(5*60)


if __name__ == "__main__":
Expand Down