Skip to content
This repository was archived by the owner on Feb 23, 2021. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ env26/
env27/
env32/
env33/
.tox/
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Imagine you want to take down the server `web01` for maintenance. Just SSH to it
* `mysql_username`: username to use when logging into mysql for checks
* `mysql_password`: password to use when logging into mysql for checks
* `rlimit_nofile`: set the NOFILE rlimit. If the string "max", will set the rlimit to the hard rlimit; otherwise, will be interpreted as an integer and set to that value.
* `allow_remote_spool_changes`: whether to allow remote control of spool files.

### Monitoring

Expand Down
6 changes: 5 additions & 1 deletion hacheck/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,13 @@ def timed_out(*args, **kwargs):
# Do not cache spool checks
@tornado.concurrent.return_future
def check_spool(service_name, port, query, io_loop, callback, query_params, headers):
up, extra_info = spool.is_up(service_name)
up, extra_info = spool.is_up(service_name, port=port)
if not up:
info_string = 'Service %s in down state' % (extra_info['service'],)
if extra_info.get('creation') is not None:
info_string += ' since %f' % extra_info['creation']
if extra_info.get('expiration') is not None:
info_string += ' until %f' % extra_info['expiration']
if extra_info.get('reason', ''):
info_string += ": %s" % extra_info['reason']
callback((503, info_string))
Expand Down
1 change: 1 addition & 0 deletions hacheck/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def nested3(*managers):
def bchr3(c):
return bytes((c,))


def bchr2(c):
return chr(c)

Expand Down
3 changes: 2 additions & 1 deletion hacheck/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def max_or_int(some_str_value):
'log_path': (str, 'stderr'),
'mysql_username': (str, None),
'mysql_password': (str, None),
'rlimit_nofile': (max_or_int, None)
'rlimit_nofile': (max_or_int, None),
'allow_remote_spool_changes': (bool, False),
}


Expand Down
30 changes: 30 additions & 0 deletions hacheck/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

from . import cache
from . import checker
from . import config
from . import spool

log = logging.getLogger('hacheck')

Expand Down Expand Up @@ -105,6 +107,34 @@ def get(self, service_name, port, query):
class SpoolServiceHandler(BaseServiceHandler):
CHECKERS = [checker.check_spool]

def post(self, service_name, port, query):
if not config.config['allow_remote_spool_changes']:
self.set_status(403)
self.write('remote spool changes are not enabled')
return

port = int(port) or None
status = self.get_argument('status')

if status == 'up':
spool.up(service_name, port=port)
elif status == 'down':
expiration = self.get_argument('expiration', None)
if expiration is not None:
expiration = float(expiration)
reason = self.get_argument('reason')
creation = self.get_argument('creation', None)
if creation is not None:
creation = float(creation)
spool.down(service_name, reason=reason, port=port, expiration=expiration, creation=creation)
else:
self.set_status(400)
self.write("status must be up or down")
return

self.set_status(200)
self.write("")


class HTTPServiceHandler(BaseServiceHandler):
CHECKERS = [checker.check_spool, checker.check_http]
Expand Down
44 changes: 35 additions & 9 deletions hacheck/haupdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ def print_s(fmt_string, *formats):
print(fmt_string % formats)


def print_status(service_name, port, is_up, info_dict):
"""Print a status line for a given service"""
if is_up:
print_s('UP\t%s', service_name)
else:
expiration = info_dict.get('expiration')
if expiration is None:
expiration = float('Inf')
if port is not None:
print_s('DOWN\t%f\t%s:%d\t%s', expiration, service_name, port, info_dict.get('reason', ''))
else:
print_s('DOWN\t%f\t%s\t%s', expiration, service_name, info_dict.get('reason', ''))


def main(default_action='list'):
ACTIONS = ('up', 'down', 'status', 'status_downed', 'list')
parser = optparse.OptionParser(usage='%prog [options] service_name(s)')
Expand All @@ -63,6 +77,20 @@ def main(default_action='list'):
default="",
help='Reason string when setting down'
)
parser.add_option(
'-e',
'--expiration',
type=float,
default=None,
help='Expiration time (unix time) when setting down',
)
parser.add_option(
'-P',
'--service-port',
type=int,
default=None,
help='Port to check/set status for',
)
parser.add_option(
'-p',
'--port',
Expand Down Expand Up @@ -116,27 +144,25 @@ def main(default_action='list'):
elif opts.action == 'up':
hacheck.spool.configure(opts.spool_root, needs_write=True)
for service_name in service_names:
hacheck.spool.up(service_name)
hacheck.spool.up(service_name, port=opts.service_port)
return 0
elif opts.action == 'down':
hacheck.spool.configure(opts.spool_root, needs_write=True)
for service_name in service_names:
hacheck.spool.down(service_name, opts.reason)
hacheck.spool.down(service_name, opts.reason, expiration=opts.expiration, port=opts.service_port)
return 0
elif opts.action == 'status_downed':
hacheck.spool.configure(opts.spool_root, needs_write=False)
for service_name, info in hacheck.spool.status_all_down():
print_s('DOWN\t%s\t%s', service_name, info.get('reason', ''))
for service_name, port, info in hacheck.spool.status_all_down():
print_status(service_name, port, False, info)
return 0
else:
hacheck.spool.configure(opts.spool_root, needs_write=False)
rv = 0
for service_name in service_names:
status, info = hacheck.spool.status(service_name)
if status:
print_s('UP\t%s', service_name)
else:
print_s('DOWN\t%s\t%s', service_name, info.get('reason', ''))
status, info = hacheck.spool.status(service_name, port=opts.service_port)
print_status(service_name, opts.service_port, status, info)
if not status:
rv = 1
return rv

Expand Down
95 changes: 80 additions & 15 deletions hacheck/spool.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,54 @@
import json
import os
import time

config = {
'spool_root': None,
}


def spool_file_path(service_name, port):
if port is None:
base_name = service_name
else:
base_name = "%s:%s" % (service_name, port)

return os.path.join(config['spool_root'], base_name)


def parse_spool_file_path(path):
base_name = os.path.basename(path)

if ':' in base_name:
service_name, port = base_name.rsplit(':', 1)
port = int(port)
else:
service_name = base_name
port = None

return service_name, port


def serialize_spool_file_contents(reason, expiration=None, creation=None):
return json.dumps({
"reason": reason,
"expiration": expiration,
"creation": (time.time() if creation is None else creation),
})


def deserialize_spool_file_contents(contents):
try:
return json.loads(contents)
except ValueError:
# in case we're looking at a file created by earlier versions of hacheck
return {
"reason": contents,
"expiration": None,
"creation": None,
}


def configure(spool_root, needs_write=False):
access_required = os.W_OK | os.R_OK if needs_write else os.R_OK
if os.path.exists(spool_root):
Expand All @@ -15,51 +59,72 @@ def configure(spool_root, needs_write=False):
config['spool_root'] = spool_root


def is_up(service_name):
def is_up(service_name, port=None):
"""Check whether a service is asserted to be up or down. Includes the logic
for checking system-wide all state

:returns: (bool of service status, dict of extra information)
"""
all_up, all_info = status("all")
if all_up:
return status(service_name)
# Check with port=None first, because if service foo is down, then service foo on port 123 should be down too.
service_up, service_info = status(service_name, port=None)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth commenting why you first check with port=None?

if service_up:
return status(service_name, port=port)
else:
return service_up, service_info
else:
return all_up, all_info


def status(service_name):
def status(service_name, port=None):
"""Check whether a service is asserted to be up or down, without checking
the system-wide 'all' state.

:returns: (bool of service status, dict of extra information)
"""
happy_retval = (True, {'service': service_name, 'reason': '', 'expiration': None})
path = spool_file_path(service_name, port)
try:
with open(os.path.join(config['spool_root'], service_name), 'r') as f:
reason = f.read()
return False, {'service': service_name, 'reason': reason}
with open(path, 'r') as f:
info_dict = deserialize_spool_file_contents(f.read())
info_dict['service'] = service_name
expiration = info_dict.get('expiration')
if expiration is not None and expiration < time.time():
os.remove(path)
return happy_retval
return False, info_dict
except IOError:
return True, {'service': service_name, 'reason': ''}
return happy_retval


def status_all_down():
"""List all down services

:returns: Iterable of pairs of (service name, dict of extra information)
"""
for service_name in os.listdir(config['spool_root']):
up, info = status(service_name)
for filename in os.listdir(config['spool_root']):
service_name, port = parse_spool_file_path(filename)
up, info = status(service_name, port=port)
if not up:
yield service_name, info
yield service_name, port, info


def up(service_name):
def up(service_name, port=None):
try:
os.unlink(os.path.join(config['spool_root'], service_name))
os.unlink(spool_file_path(service_name, port))
except OSError:
pass


def down(service_name, reason=""):
with open(os.path.join(config['spool_root'], service_name), 'w') as f:
f.write(reason)
def down(service_name, reason="", port=None, expiration=None, creation=None):
currently_up, info = status(service_name, port=port)

# If we already downed the service for the same reason, leave the creation time alone. This allows a user to
# repeatedly down a service to refresh its expiration time, and we will keep track of how long it has been down
# for.
if creation is None and (not currently_up) and reason == info['reason']:
creation = info.get('creation', creation)

with open(spool_file_path(service_name, port), 'w') as f:
f.write(serialize_spool_file_contents(reason, expiration=expiration, creation=creation))
35 changes: 35 additions & 0 deletions tests/test_application.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from hacheck import main
from hacheck import spool
from hacheck import cache
from hacheck import config
from hacheck import handlers


Expand Down Expand Up @@ -80,6 +81,14 @@ def test_spool_checker(self):
response = self.fetch('/spool/foo/1/status')
self.assertEqual(response.code, 503)
self.assertEqual(response.body, b'Service any in down state: just because')
with mock.patch.object(
spool,
'is_up',
return_value=(False, {"service": "any", "reason": "reason", "expiration": 5, "creation": 4})
):
response = self.fetch('/spool/foo/1/status')
self.assertEqual(response.code, 503)
self.assertRegexpMatches(response.body, b'^Service any in down state since 4\.0+ until 5\.0+: reason$')

def test_calls_all_checkers(self):
rv1 = tornado.concurrent.Future()
Expand Down Expand Up @@ -176,3 +185,29 @@ def test_show_recent(self):
'seen_services': [['foo', {'code': 200, 'ts': mock.ANY, 'remote_ip': '127.0.0.1'}]],
'threshold_seconds': 20
})

def test_remote_spool_check_forbidden(self):
with mock.patch.dict(config.config, {'allow_remote_spool_changes': False}):
response = self.fetch('/spool/foo/1/status', method='POST', body="")
self.assertEqual(response.code, 403)

def test_spool_post(self):
with nested(
mock.patch.dict(config.config, {'allow_remote_spool_changes': True}),
mock.patch.object(spool, 'up'),
mock.patch.object(spool, 'down'),
) as (_1, spool_up, spool_down):

response = self.fetch('/spool/foo/0/status', method='POST', body="status=up")
self.assertEqual(response.code, 200)
spool_up.assert_called_once_with('foo', port=None)

response = self.fetch('/spool/foo/1234/status', method='POST', body="status=down&reason=because")
self.assertEqual(response.code, 200)
spool_down.assert_called_once_with('foo', reason='because', port=1234, expiration=None, creation=None)

spool_down.reset_mock()
response = self.fetch('/spool/foo/1234/status', method='POST',
body="status=down&reason=because&expiration=1&creation=2")
self.assertEqual(response.code, 200)
spool_down.assert_called_once_with('foo', reason='because', port=1234, expiration=1, creation=2)
Loading