Notify more relations when cluster is bootstrapped

Currently mon_relation only calls notify_rbd_mirrors when the cluster is
already bootstrapped which leads to broker requests not being handled
for other relations in some cases.

The change also moves the bootstrap attempt code into a separate
function and adds unit tests for mon_relation to cover different
branches for various inputs.

Closes-Bug: #1942224
Change-Id: Id9b611d128acb7d49a9a9ad9c096b232fefd6c68
This commit is contained in:
Dmitrii Shcherbakov
2021-09-01 23:19:53 +03:00
parent 93e9bb39af
commit 82743ab7e5
2 changed files with 151 additions and 65 deletions

View File

@@ -482,79 +482,88 @@ def mon_relation():
if ceph.is_bootstrapped():
# The ceph-mon unit chosen for handling broker requests is based on
# internal Ceph MON leadership and not Juju leadership. To update
# the rbd-mirror relation on all ceph-mon units after pool creation
# the relations on all ceph-mon units after pool creation
# the unit handling the broker request will update a nonce on the
# mon relation.
notify_rbd_mirrors()
notify_relations()
else:
status_set('maintenance', 'Bootstrapping MON cluster')
# the following call raises an exception
# if it can't add the keyring
try:
ceph.bootstrap_monitor_cluster(leader_get('monitor-secret'))
except FileNotFoundError as e: # NOQA -- PEP8 is still PY2
log("Couldn't bootstrap the monitor yet: {}".format(str(e)))
exit(0)
ceph.wait_for_bootstrap()
ceph.wait_for_quorum()
ceph.create_keyrings()
if cmp_pkgrevno('ceph', '12.0.0') >= 0:
status_set('maintenance', 'Bootstrapping Ceph MGR')
ceph.bootstrap_manager()
if ceph.monitor_key_exists('admin', 'autotune'):
autotune = ceph.monitor_key_get('admin', 'autotune')
else:
ceph.wait_for_manager()
autotune = config('pg-autotune')
if (cmp_pkgrevno('ceph', '14.2.0') >= 0 and
(autotune == 'true' or
autotune == 'auto')):
ceph.monitor_key_set('admin', 'autotune', 'true')
else:
ceph.monitor_key_set('admin', 'autotune', 'false')
if ceph.monitor_key_get('admin', 'autotune') == 'true':
try:
mgr_enable_module('pg_autoscaler')
except subprocess.CalledProcessError:
log("Failed to initialize autoscaler, it must be "
"initialized on the last monitor", level='info')
# If we can and want to
if is_leader() and config('customize-failure-domain'):
# But only if the environment supports it
if os.environ.get('JUJU_AVAILABILITY_ZONE'):
cmds = [
"ceph osd getcrushmap -o /tmp/crush.map",
"crushtool -d /tmp/crush.map| "
"sed 's/step chooseleaf firstn 0 type host/step "
"chooseleaf firstn 0 type rack/' > "
"/tmp/crush.decompiled",
"crushtool -c /tmp/crush.decompiled -o /tmp/crush.map",
"crushtool -i /tmp/crush.map --test",
"ceph osd setcrushmap -i /tmp/crush.map"
]
for cmd in cmds:
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as e:
log("Failed to modify crush map:", level='error')
log("Cmd: {}".format(cmd), level='error')
log("Error: {}".format(e.output), level='error')
break
else:
log(
"Your Juju environment doesn't"
"have support for Availability Zones"
)
notify_osds()
notify_radosgws()
notify_client()
notify_rbd_mirrors()
notify_prometheus()
if attempt_mon_cluster_bootstrap():
notify_relations()
else:
log('Not enough mons ({}), punting.'
.format(len(get_mon_hosts())))
def attempt_mon_cluster_bootstrap():
status_set('maintenance', 'Bootstrapping MON cluster')
# the following call raises an exception
# if it can't add the keyring
try:
ceph.bootstrap_monitor_cluster(leader_get('monitor-secret'))
except FileNotFoundError as e: # NOQA -- PEP8 is still PY2
log("Couldn't bootstrap the monitor yet: {}".format(str(e)))
return False
ceph.wait_for_bootstrap()
ceph.wait_for_quorum()
ceph.create_keyrings()
if cmp_pkgrevno('ceph', '12.0.0') >= 0:
status_set('maintenance', 'Bootstrapping Ceph MGR')
ceph.bootstrap_manager()
if ceph.monitor_key_exists('admin', 'autotune'):
autotune = ceph.monitor_key_get('admin', 'autotune')
else:
ceph.wait_for_manager()
autotune = config('pg-autotune')
if (cmp_pkgrevno('ceph', '14.2.0') >= 0 and
(autotune == 'true' or
autotune == 'auto')):
ceph.monitor_key_set('admin', 'autotune', 'true')
else:
ceph.monitor_key_set('admin', 'autotune', 'false')
if ceph.monitor_key_get('admin', 'autotune') == 'true':
try:
mgr_enable_module('pg_autoscaler')
except subprocess.CalledProcessError:
log("Failed to initialize autoscaler, it must be "
"initialized on the last monitor", level='info')
# If we can and want to
if is_leader() and config('customize-failure-domain'):
# But only if the environment supports it
if os.environ.get('JUJU_AVAILABILITY_ZONE'):
cmds = [
"ceph osd getcrushmap -o /tmp/crush.map",
"crushtool -d /tmp/crush.map| "
"sed 's/step chooseleaf firstn 0 type host/step "
"chooseleaf firstn 0 type rack/' > "
"/tmp/crush.decompiled",
"crushtool -c /tmp/crush.decompiled -o /tmp/crush.map",
"crushtool -i /tmp/crush.map --test",
"ceph osd setcrushmap -i /tmp/crush.map"
]
for cmd in cmds:
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError as e:
log("Failed to modify crush map:", level='error')
log("Cmd: {}".format(cmd), level='error')
log("Error: {}".format(e.output), level='error')
break
else:
log(
"Your Juju environment doesn't"
"have support for Availability Zones"
)
return True
def notify_relations():
notify_osds()
notify_radosgws()
notify_client()
notify_rbd_mirrors()
notify_prometheus()
def notify_prometheus():
if relation_ids('prometheus') and ceph.is_bootstrapped():
prometheus_permitted = cmp_pkgrevno('ceph', '12.2.0') >= 0

View File

@@ -470,6 +470,83 @@ class CephHooksTestCase(test_utils.CharmTestCase):
mgr_enable_module.assert_not_called()
class CephMonRelationTestCase(test_utils.CharmTestCase):
def setUp(self):
super(CephMonRelationTestCase, self).setUp(ceph_hooks, [
'config',
'is_leader',
'is_relation_made',
'leader_get',
'leader_set',
'log',
'relation_ids',
'related_units',
'relation_get',
'relations_of_type',
'status_set',
'get_mon_hosts',
'notify_relations',
'emit_cephconf',
])
self.config.side_effect = self.test_config.get
self.leader_get.side_effect = self.test_leader_settings.get
self.leader_set.side_effect = self.test_leader_settings.set
self.relation_get.side_effect = self.test_relation.get
self.test_config.set('monitor-count', 3)
self.test_leader_settings.set({'monitor-secret': '42'})
self.get_mon_hosts.return_value = ['foo', 'bar', 'baz']
@patch.object(ceph_hooks.ceph, 'is_bootstrapped')
def test_mon_relation_bootstrapped(self, _is_bootstrapped):
_is_bootstrapped.return_value = True
ceph_hooks.mon_relation()
self.notify_relations.assert_called_with()
@patch.object(ceph_hooks, 'attempt_mon_cluster_bootstrap')
@patch.object(ceph_hooks.ceph, 'is_bootstrapped')
def test_mon_relation_attempt_bootstrap_success(self, _is_bootstrapped,
_attempt_bootstrap):
_is_bootstrapped.return_value = False
_attempt_bootstrap.return_value = True
ceph_hooks.mon_relation()
self.notify_relations.assert_called_with()
@patch.object(ceph_hooks, 'attempt_mon_cluster_bootstrap')
@patch.object(ceph_hooks.ceph, 'is_bootstrapped')
def test_mon_relation_attempt_bootstrap_failure(self, _is_bootstrapped,
_attempt_bootstrap):
_is_bootstrapped.return_value = False
_attempt_bootstrap.return_value = False
ceph_hooks.mon_relation()
self.notify_relations.assert_not_called()
@patch.object(ceph_hooks, 'attempt_mon_cluster_bootstrap')
@patch.object(ceph_hooks.ceph, 'is_bootstrapped')
def test_mon_relation_no_enough_mons(self, _is_bootstrapped,
_attempt_bootstrap):
_is_bootstrapped.return_value = False
_attempt_bootstrap.return_value = False
self.get_mon_hosts.return_value = ['foo', 'bar']
ceph_hooks.mon_relation()
self.notify_relations.assert_not_called()
self.log.assert_called_once_with('Not enough mons (2), punting.')
@patch.object(ceph_hooks, 'attempt_mon_cluster_bootstrap')
@patch.object(ceph_hooks.ceph, 'is_bootstrapped')
def test_mon_relation_no_secret(self, _is_bootstrapped,
_attempt_bootstrap):
_is_bootstrapped.return_value = False
_attempt_bootstrap.return_value = False
self.get_mon_hosts.return_value = ['foo', 'bar']
self.test_leader_settings.set({'monitor-secret': None})
ceph_hooks.mon_relation()
self.notify_relations.assert_not_called()
_attempt_bootstrap.assert_not_called()
self.log.assert_called_once_with(
'still waiting for leader to setup keys')
class RelatedUnitsTestCase(unittest.TestCase):
_units = {