From 7bf9c879a04867fa030c407ceda9f76742b8d97f Mon Sep 17 00:00:00 2001 From: Hemanth Nakkina Date: Tue, 2 Feb 2021 14:16:29 +0530 Subject: [PATCH] optimizations to reduce charm upgrade time charm upgrades takes longer time for ceph-mon to get into idle state when there are more number of OSDs and ceph clients whenever there is change in osd-relation data. In these cases osd_relation triggers notify_client() that takes significant amount of time as the client_relation() is executed for every related unit on each client relation. Some of the function calls and ceph commands can be reduced to be executed for every relation or once per notify_client instead of executing them for every related unit. ceph.get_named_key() is currently executed for every related unit and also each execution takes longer time as it is supposed to run minimum of 2 ceph commands. This patch tries to reduce the number of calls for ceph.get_named_key to once per relation. Partial-Bug: #1913992 Change-Id: Ic455cd7c4876efafee221bc6e7a5ec61fee5643f --- hooks/ceph_hooks.py | 87 +++++++++++++++++++++++++------- unit_tests/test_ceph_hooks.py | 94 ++++++++++++++++++++++++++++------- 2 files changed, 147 insertions(+), 34 deletions(-) diff --git a/hooks/ceph_hooks.py b/hooks/ceph_hooks.py index 60c021dc..381c887e 100755 --- a/hooks/ceph_hooks.py +++ b/hooks/ceph_hooks.py @@ -580,10 +580,72 @@ def notify_rbd_mirrors(): rbd_mirror_relation(relid=relid, unit=unit, recurse=False) +def _get_ceph_info_from_configs(): + """Create dictionary of ceph information required to set client relation. + + :returns: Dictionary of ceph configurations needed for client relation + :rtpe: dict + """ + public_addr = get_public_addr() + rbd_features = get_rbd_features() + data = { + 'auth': config('auth-supported'), + 'ceph-public-address': public_addr + } + if rbd_features: + data['rbd-features'] = rbd_features + return data + + +def _handle_client_relation(relid, unit, data=None): + """Handle broker request and set the relation data + + :param relid: Relation ID + :type relid: str + :param unit: Unit name + :type unit: str + :param data: Initial relation data + :type data: dict + """ + if data is None: + data = {} + + if is_unsupported_cmr(unit): + return + data.update( + handle_broker_request(relid, unit, add_legacy_response=True)) + relation_set(relation_id=relid, relation_settings=data) + + def notify_client(): + send_osd_settings() + if not ready_for_service(): + log("mon cluster is not in quorum, skipping notify_client", + level=WARNING) + return + for relid in relation_ids('client'): + data = _get_ceph_info_from_configs() + + service_name = None + # Loop through all related units until client application name is found + # This is done in seperate loop to avoid calling ceph to retreive named + # key for every unit for unit in related_units(relid): - client_relation(relid, unit) + service_name = get_client_application_name(relid, unit) + if service_name: + data.update({'key': ceph.get_named_key(service_name)}) + break + + if not service_name: + log('Unable to determine remote service name, deferring processing' + ' of broker requests for relation {} '.format(relid)) + # continue with next relid + continue + + for unit in related_units(relid): + _handle_client_relation(relid, unit, data) + for relid in relation_ids('admin'): admin_relation_joined(relid) for relid in relation_ids('mds'): @@ -993,26 +1055,17 @@ def client_relation(relid=None, unit=None): if ready_for_service(): log('mon cluster in quorum and osds bootstrapped ' '- providing client with keys, processing broker requests') + if not unit: + unit = remote_unit() service_name = get_client_application_name(relid, unit) if not service_name: log('Unable to determine remote service name, deferring ' - 'processing of broker requests') + 'processing of broker requests for relation {} ' + 'remote unit {}'.format(relid, unit)) return - public_addr = get_public_addr() - data = {'key': ceph.get_named_key(service_name), - 'auth': config('auth-supported'), - 'ceph-public-address': public_addr} - rbd_features = get_rbd_features() - if rbd_features: - data['rbd-features'] = rbd_features - if not unit: - unit = remote_unit() - if is_unsupported_cmr(unit): - return - data.update( - handle_broker_request(relid, unit, add_legacy_response=True)) - relation_set(relation_id=relid, - relation_settings=data) + data = _get_ceph_info_from_configs() + data.update({'key': ceph.get_named_key(service_name)}) + _handle_client_relation(relid, unit, data) @hooks.hook('upgrade-charm.real') diff --git a/unit_tests/test_ceph_hooks.py b/unit_tests/test_ceph_hooks.py index 52ac16b2..99a712c3 100644 --- a/unit_tests/test_ceph_hooks.py +++ b/unit_tests/test_ceph_hooks.py @@ -237,28 +237,81 @@ class CephHooksTestCase(test_utils.CharmTestCase): mock_notify_prometheus.assert_called_once_with() mock_service_pause.assert_called_with('ceph-create-keys') + @patch.object(ceph_hooks, 'relation_get') @patch.object(ceph_hooks, 'mds_relation_joined') @patch.object(ceph_hooks, 'admin_relation_joined') - @patch.object(ceph_hooks, 'client_relation') + @patch.object(ceph_hooks, 'relation_set') + @patch.object(ceph_hooks, 'handle_broker_request') + @patch.object(ceph_hooks, 'config') @patch.object(ceph_hooks, 'related_units') + @patch.object(ceph_hooks.ceph, 'get_named_key') + @patch.object(ceph_hooks.hookenv, 'remote_service_name') @patch.object(ceph_hooks, 'relation_ids') - def test_notify_client(self, mock_relation_ids, mock_related_units, - mock_client_relation, - mock_admin_relation_joined, - mock_mds_relation_joined): - mock_relation_ids.return_value = ['arelid'] - mock_related_units.return_value = ['aunit'] + @patch.object(ceph_hooks.ceph, 'is_leader') + @patch.object(ceph_hooks, 'get_rbd_features') + @patch.object(ceph_hooks, 'get_public_addr') + @patch.object(ceph_hooks, 'ready_for_service') + @patch.object(ceph_hooks, 'send_osd_settings') + def test_notify_client(self, + _send_osd_settings, + _ready_for_service, + _get_public_addr, + _get_rbd_features, + _is_leader, + _relation_ids, + _remote_service_name, + _get_named_key, + _related_units, + _config, + _handle_broker_request, + _relation_set, + _admin_relation_joined, + _mds_relation_joined, + _relation_get): + _relation_ids.return_value = ['arelid'] + _related_units.return_value = ['aunit/0'] + _relation_get.return_value = {'application-name': 'aunit'} + _remote_service_name.return_value = 'aunit' + _is_leader.return_value = True + config = copy.deepcopy(CHARM_CONFIG) + _config.side_effect = lambda key: config[key] + _handle_broker_request.return_value = {} + _get_rbd_features.return_value = None + ceph_hooks.notify_client() - mock_relation_ids.assert_has_calls([ - call('client'), + _send_osd_settings.assert_called_once_with() + _ready_for_service.assert_called_once_with() + _get_public_addr.assert_called_once_with() + _get_named_key.assert_called_once_with('aunit') + _handle_broker_request.assert_called_once_with( + 'arelid', 'aunit/0', add_legacy_response=True) + _relation_set.assert_called_once_with( + relation_id='arelid', + relation_settings={ + 'key': _get_named_key(), + 'auth': False, + 'ceph-public-address': _get_public_addr() + }) + + _relation_ids.assert_has_calls([ call('admin'), call('mds'), ]) - mock_related_units.assert_called_with('arelid') - mock_client_relation.assert_called_once_with('arelid', 'aunit') - mock_admin_relation_joined.assert_called_once_with('arelid') - mock_mds_relation_joined.assert_called_once_with(relid='arelid', - unit='aunit') + _admin_relation_joined.assert_called_once_with('arelid') + _mds_relation_joined.assert_called_once_with(relid='arelid', + unit='aunit/0') + + _get_rbd_features.return_value = 42 + _relation_set.reset_mock() + ceph_hooks.notify_client() + _relation_set.assert_called_once_with( + relation_id='arelid', + relation_settings={ + 'key': _get_named_key(), + 'auth': False, + 'ceph-public-address': _get_public_addr(), + 'rbd-features': 42, + }) @patch.object(ceph_hooks, 'rbd_mirror_relation') @patch.object(ceph_hooks, 'related_units') @@ -307,6 +360,7 @@ class CephHooksTestCase(test_utils.CharmTestCase): ceph_hooks.get_client_application_name('rel:1', None), 'glance') + @patch.object(ceph_hooks, 'notify_client') @patch.object(ceph_hooks.ceph, 'list_pools') @patch.object(ceph_hooks, 'mgr_enable_module') @patch.object(ceph_hooks, 'emit_cephconf') @@ -323,12 +377,14 @@ class CephHooksTestCase(test_utils.CharmTestCase): create_sysctl, emit_ceph_conf, mgr_enable_module, - list_pools): + list_pools, + notify_client): relations_of_type.return_value = False self.test_config.set('pg-autotune', 'false') ceph_hooks.config_changed() mgr_enable_module.assert_not_called() + @patch.object(ceph_hooks, 'notify_client') @patch.object(ceph_hooks.ceph, 'monitor_key_set') @patch.object(ceph_hooks.ceph, 'list_pools') @patch.object(ceph_hooks, 'mgr_enable_module') @@ -348,7 +404,9 @@ class CephHooksTestCase(test_utils.CharmTestCase): create_sysctl, emit_ceph_conf, mgr_enable_module, - list_pools, monitor_key_set): + list_pools, + monitor_key_set, + notify_client): relations_of_type.return_value = False cmp_pkgrevno.return_value = 1 self.test_config.set('pg-autotune', 'true') @@ -356,6 +414,7 @@ class CephHooksTestCase(test_utils.CharmTestCase): mgr_enable_module.assert_called_once_with('pg_autoscaler') monitor_key_set.assert_called_once_with('admin', 'autotune', 'true') + @patch.object(ceph_hooks, 'notify_client') @patch.object(ceph_hooks.ceph, 'list_pools') @patch.object(ceph_hooks, 'mgr_enable_module') @patch.object(ceph_hooks, 'emit_cephconf') @@ -374,7 +433,8 @@ class CephHooksTestCase(test_utils.CharmTestCase): create_sysctl, emit_ceph_conf, mgr_enable_module, - list_pools): + list_pools, + notify_client): relations_of_type.return_value = False cmp_pkgrevno.return_value = 1 self.test_config.set('pg-autotune', 'auto')