diff options
author | Adam King <47704447+adk3798@users.noreply.github.com> | 2023-12-13 18:15:05 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-13 18:15:05 +0100 |
commit | df7773bec7ce47930ee88f71a234d2c392cdb1a9 (patch) | |
tree | 0698f520e7966ad9b7b92392bdeb9cd5cb8ad5fe /src | |
parent | Merge pull request #54536 from phlogistonjohn/jjm-cephadm-units-n-scripts (diff) | |
parent | python-common/drive_selection: fix limit with existing devices (diff) | |
download | ceph-df7773bec7ce47930ee88f71a234d2c392cdb1a9.tar.xz ceph-df7773bec7ce47930ee88f71a234d2c392cdb1a9.zip |
Merge pull request #54681 from adk3798/device-limit-testing
python-common/drive_selection: fix limit with existing devices
Reviewed-by: Michael Fritch <mfritch@suse.com>
Reviewed-by: Guillaume Abrioux <gabrioux@ibm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/pybind/mgr/cephadm/services/osd.py | 9 | ||||
-rw-r--r-- | src/pybind/mgr/cephadm/tests/test_cephadm.py | 7 | ||||
-rw-r--r-- | src/python-common/ceph/deployment/drive_selection/selector.py | 24 | ||||
-rw-r--r-- | src/python-common/ceph/tests/test_disk_selector.py | 41 |
4 files changed, 69 insertions, 12 deletions
diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py index 3616d42deee..75b3fc58c76 100644 --- a/src/pybind/mgr/cephadm/services/osd.py +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -319,11 +319,16 @@ class OSDService(CephService): logger.exception('Cannot decode JSON: \'%s\'' % ' '.join(out)) concat_out = {} notes = [] - if osdspec.data_devices is not None and osdspec.data_devices.limit and len(concat_out) < osdspec.data_devices.limit: + if ( + osdspec.data_devices is not None + and osdspec.data_devices.limit + and (len(concat_out) + ds.existing_daemons) < osdspec.data_devices.limit + ): found = len(concat_out) limit = osdspec.data_devices.limit notes.append( - f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit (Found: {found} | Limit: {limit})') + f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit\n' + f'(New Devices: {found} | Existing Matching Daemons: {ds.existing_daemons} | Limit: {limit})') ret_all.append({'data': concat_out, 'osdspec': osdspec.service_id, 'host': host, diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index ebdf7eea5aa..2477de13e00 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -1193,7 +1193,8 @@ class TestCephadm(object): @mock.patch('cephadm.services.osd.OSDService.driveselection_to_ceph_volume') @mock.patch('cephadm.services.osd.OsdIdClaims.refresh', lambda _: None) @mock.patch('cephadm.services.osd.OsdIdClaims.get', lambda _: {}) - def test_limit_not_reached(self, d_to_cv, _run_cv_cmd, cephadm_module): + @mock.patch('cephadm.inventory.HostCache.get_daemons_by_service') + def test_limit_not_reached(self, _get_daemons_by_service, d_to_cv, _run_cv_cmd, cephadm_module): with with_host(cephadm_module, 'test'): dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'), data_devices=DeviceSelection(limit=5, rotational=1), @@ -1203,12 +1204,14 @@ class TestCephadm(object): '[{"data": "/dev/vdb", "data_size": "50.00 GB", "encryption": "None"}, {"data": "/dev/vdc", "data_size": "50.00 GB", "encryption": "None"}]'] d_to_cv.return_value = 'foo' _run_cv_cmd.side_effect = async_side_effect((disks_found, '', 0)) + _get_daemons_by_service.return_value = [DaemonDescription(daemon_type='osd', hostname='test', service_name='not_enough')] preview = cephadm_module.osd_service.generate_previews([dg], 'test') for osd in preview: assert 'notes' in osd assert osd['notes'] == [ - 'NOTE: Did not find enough disks matching filter on host test to reach data device limit (Found: 2 | Limit: 5)'] + ('NOTE: Did not find enough disks matching filter on host test to reach ' + 'data device limit\n(New Devices: 2 | Existing Matching Daemons: 1 | Limit: 5)')] @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) def test_prepare_drivegroup(self, cephadm_module): diff --git a/src/python-common/ceph/deployment/drive_selection/selector.py b/src/python-common/ceph/deployment/drive_selection/selector.py index 1b3bfbb4ee3..31e330432cd 100644 --- a/src/python-common/ceph/deployment/drive_selection/selector.py +++ b/src/python-common/ceph/deployment/drive_selection/selector.py @@ -53,9 +53,12 @@ class DriveSelection(object): # type: () -> List[Device] return self._journal - def _limit_reached(self, device_filter, len_devices, - disk_path): - # type: (DeviceSelection, int, str) -> bool + def _limit_reached( + self, + device_filter: DeviceSelection, + devices: List[Device], + disk_path: str + ) -> bool: """ Check for the <limit> property and apply logic If a limit is set in 'device_attrs' we have to stop adding @@ -63,14 +66,21 @@ class DriveSelection(object): If limit is set (>0) and len(devices) >= limit - :param int len_devices: Length of the already populated device set/list + :param List[Device] devices: Already populated device set/list :param str disk_path: The disk identifier (for logging purposes) :return: True/False if the device should be added to the list of devices :rtype: bool """ limit = device_filter.limit or 0 - - if limit > 0 and (len_devices + self.existing_daemons >= limit): + # If device A is being used for an OSD already, it can still + # match the filter (this is necessary as we still want the + # device in the resulting ceph-volume lvm batch command). + # If that is the case, we don't want to count the device + # towards the limit as it will already be counted through the + # existing daemons + non_ceph_devices = [d for d in devices if not d.ceph_device] + + if limit > 0 and (len(non_ceph_devices) + self.existing_daemons >= limit): logger.debug("Refuse to add {} due to limit policy of <{}>".format( disk_path, limit)) return True @@ -147,7 +157,7 @@ class DriveSelection(object): continue # break on this condition. - if self._limit_reached(device_filter, len(devices), disk.path): + if self._limit_reached(device_filter, devices, disk.path): logger.debug("Ignoring disk {}. Limit reached".format( disk.path)) break diff --git a/src/python-common/ceph/tests/test_disk_selector.py b/src/python-common/ceph/tests/test_disk_selector.py index b08236130e0..03bfcbe16c9 100644 --- a/src/python-common/ceph/tests/test_disk_selector.py +++ b/src/python-common/ceph/tests/test_disk_selector.py @@ -557,4 +557,43 @@ class TestDriveSelection(object): inventory = _mk_inventory(_mk_device(rotational=True)*2) m = 'Failed to validate OSD spec "foobar.data_devices": No filters applied' with pytest.raises(DriveGroupValidationError, match=m): - drive_selection.DriveSelection(spec, inventory)
\ No newline at end of file + drive_selection.DriveSelection(spec, inventory) + + +class TestDeviceSelectionLimit: + + def test_limit_existing_devices(self): + # Initial setup for this test is meant to be that /dev/sda + # is already being used for an OSD, hence it being marked + # as a ceph_device. /dev/sdb and /dev/sdc are not being used + # for OSDs yet. The limit will be set to 2 and the DriveSelection + # is set to have 1 pre-existing device (corresponding to /dev/sda) + dev_a = Device('/dev/sda', ceph_device=True, available=False) + dev_b = Device('/dev/sdb', ceph_device=False, available=True) + dev_c = Device('/dev/sdc', ceph_device=False, available=True) + all_devices: List[Device] = [dev_a, dev_b, dev_c] + processed_devices: List[Device] = [] + filter = DeviceSelection(all=True, limit=2) + dgs = DriveGroupSpec(data_devices=filter) + ds = drive_selection.DriveSelection(dgs, all_devices, existing_daemons=1) + + # Check /dev/sda. It's already being used for an OSD and will + # be counted in existing_daemons. This check should return False + # as we are not over the limit. + assert not ds._limit_reached(filter, processed_devices, '/dev/sda') + processed_devices.append(dev_a) + + # We should still not be over the limit here with /dev/sdb since + # we will have only one pre-existing daemon /dev/sdb itself. This + # case previously failed as devices that contributed to existing_daemons + # would be double counted both as devices and daemons. + assert not ds._limit_reached(filter, processed_devices, '/dev/sdb') + processed_devices.append(dev_b) + + # Now, with /dev/sdb and the pre-existing daemon taking up the 2 + # slots, /dev/sdc should be rejected for us being over the limit. + assert ds._limit_reached(filter, processed_devices, '/dev/sdc') + + # DriveSelection does device assignment on initialization. Let's check + # it picked up the expected devices + assert ds._data == [dev_a, dev_b] |