summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAdam King <47704447+adk3798@users.noreply.github.com>2023-12-13 18:15:05 +0100
committerGitHub <noreply@github.com>2023-12-13 18:15:05 +0100
commitdf7773bec7ce47930ee88f71a234d2c392cdb1a9 (patch)
tree0698f520e7966ad9b7b92392bdeb9cd5cb8ad5fe /src
parentMerge pull request #54536 from phlogistonjohn/jjm-cephadm-units-n-scripts (diff)
parentpython-common/drive_selection: fix limit with existing devices (diff)
downloadceph-df7773bec7ce47930ee88f71a234d2c392cdb1a9.tar.xz
ceph-df7773bec7ce47930ee88f71a234d2c392cdb1a9.zip
Merge pull request #54681 from adk3798/device-limit-testing
python-common/drive_selection: fix limit with existing devices Reviewed-by: Michael Fritch <mfritch@suse.com> Reviewed-by: Guillaume Abrioux <gabrioux@ibm.com>
Diffstat (limited to 'src')
-rw-r--r--src/pybind/mgr/cephadm/services/osd.py9
-rw-r--r--src/pybind/mgr/cephadm/tests/test_cephadm.py7
-rw-r--r--src/python-common/ceph/deployment/drive_selection/selector.py24
-rw-r--r--src/python-common/ceph/tests/test_disk_selector.py41
4 files changed, 69 insertions, 12 deletions
diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py
index 3616d42deee..75b3fc58c76 100644
--- a/src/pybind/mgr/cephadm/services/osd.py
+++ b/src/pybind/mgr/cephadm/services/osd.py
@@ -319,11 +319,16 @@ class OSDService(CephService):
logger.exception('Cannot decode JSON: \'%s\'' % ' '.join(out))
concat_out = {}
notes = []
- if osdspec.data_devices is not None and osdspec.data_devices.limit and len(concat_out) < osdspec.data_devices.limit:
+ if (
+ osdspec.data_devices is not None
+ and osdspec.data_devices.limit
+ and (len(concat_out) + ds.existing_daemons) < osdspec.data_devices.limit
+ ):
found = len(concat_out)
limit = osdspec.data_devices.limit
notes.append(
- f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit (Found: {found} | Limit: {limit})')
+ f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit\n'
+ f'(New Devices: {found} | Existing Matching Daemons: {ds.existing_daemons} | Limit: {limit})')
ret_all.append({'data': concat_out,
'osdspec': osdspec.service_id,
'host': host,
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
index ebdf7eea5aa..2477de13e00 100644
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -1193,7 +1193,8 @@ class TestCephadm(object):
@mock.patch('cephadm.services.osd.OSDService.driveselection_to_ceph_volume')
@mock.patch('cephadm.services.osd.OsdIdClaims.refresh', lambda _: None)
@mock.patch('cephadm.services.osd.OsdIdClaims.get', lambda _: {})
- def test_limit_not_reached(self, d_to_cv, _run_cv_cmd, cephadm_module):
+ @mock.patch('cephadm.inventory.HostCache.get_daemons_by_service')
+ def test_limit_not_reached(self, _get_daemons_by_service, d_to_cv, _run_cv_cmd, cephadm_module):
with with_host(cephadm_module, 'test'):
dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
data_devices=DeviceSelection(limit=5, rotational=1),
@@ -1203,12 +1204,14 @@ class TestCephadm(object):
'[{"data": "/dev/vdb", "data_size": "50.00 GB", "encryption": "None"}, {"data": "/dev/vdc", "data_size": "50.00 GB", "encryption": "None"}]']
d_to_cv.return_value = 'foo'
_run_cv_cmd.side_effect = async_side_effect((disks_found, '', 0))
+ _get_daemons_by_service.return_value = [DaemonDescription(daemon_type='osd', hostname='test', service_name='not_enough')]
preview = cephadm_module.osd_service.generate_previews([dg], 'test')
for osd in preview:
assert 'notes' in osd
assert osd['notes'] == [
- 'NOTE: Did not find enough disks matching filter on host test to reach data device limit (Found: 2 | Limit: 5)']
+ ('NOTE: Did not find enough disks matching filter on host test to reach '
+ 'data device limit\n(New Devices: 2 | Existing Matching Daemons: 1 | Limit: 5)')]
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
def test_prepare_drivegroup(self, cephadm_module):
diff --git a/src/python-common/ceph/deployment/drive_selection/selector.py b/src/python-common/ceph/deployment/drive_selection/selector.py
index 1b3bfbb4ee3..31e330432cd 100644
--- a/src/python-common/ceph/deployment/drive_selection/selector.py
+++ b/src/python-common/ceph/deployment/drive_selection/selector.py
@@ -53,9 +53,12 @@ class DriveSelection(object):
# type: () -> List[Device]
return self._journal
- def _limit_reached(self, device_filter, len_devices,
- disk_path):
- # type: (DeviceSelection, int, str) -> bool
+ def _limit_reached(
+ self,
+ device_filter: DeviceSelection,
+ devices: List[Device],
+ disk_path: str
+ ) -> bool:
""" Check for the <limit> property and apply logic
If a limit is set in 'device_attrs' we have to stop adding
@@ -63,14 +66,21 @@ class DriveSelection(object):
If limit is set (>0) and len(devices) >= limit
- :param int len_devices: Length of the already populated device set/list
+ :param List[Device] devices: Already populated device set/list
:param str disk_path: The disk identifier (for logging purposes)
:return: True/False if the device should be added to the list of devices
:rtype: bool
"""
limit = device_filter.limit or 0
-
- if limit > 0 and (len_devices + self.existing_daemons >= limit):
+ # If device A is being used for an OSD already, it can still
+ # match the filter (this is necessary as we still want the
+ # device in the resulting ceph-volume lvm batch command).
+ # If that is the case, we don't want to count the device
+ # towards the limit as it will already be counted through the
+ # existing daemons
+ non_ceph_devices = [d for d in devices if not d.ceph_device]
+
+ if limit > 0 and (len(non_ceph_devices) + self.existing_daemons >= limit):
logger.debug("Refuse to add {} due to limit policy of <{}>".format(
disk_path, limit))
return True
@@ -147,7 +157,7 @@ class DriveSelection(object):
continue
# break on this condition.
- if self._limit_reached(device_filter, len(devices), disk.path):
+ if self._limit_reached(device_filter, devices, disk.path):
logger.debug("Ignoring disk {}. Limit reached".format(
disk.path))
break
diff --git a/src/python-common/ceph/tests/test_disk_selector.py b/src/python-common/ceph/tests/test_disk_selector.py
index b08236130e0..03bfcbe16c9 100644
--- a/src/python-common/ceph/tests/test_disk_selector.py
+++ b/src/python-common/ceph/tests/test_disk_selector.py
@@ -557,4 +557,43 @@ class TestDriveSelection(object):
inventory = _mk_inventory(_mk_device(rotational=True)*2)
m = 'Failed to validate OSD spec "foobar.data_devices": No filters applied'
with pytest.raises(DriveGroupValidationError, match=m):
- drive_selection.DriveSelection(spec, inventory) \ No newline at end of file
+ drive_selection.DriveSelection(spec, inventory)
+
+
+class TestDeviceSelectionLimit:
+
+ def test_limit_existing_devices(self):
+ # Initial setup for this test is meant to be that /dev/sda
+ # is already being used for an OSD, hence it being marked
+ # as a ceph_device. /dev/sdb and /dev/sdc are not being used
+ # for OSDs yet. The limit will be set to 2 and the DriveSelection
+ # is set to have 1 pre-existing device (corresponding to /dev/sda)
+ dev_a = Device('/dev/sda', ceph_device=True, available=False)
+ dev_b = Device('/dev/sdb', ceph_device=False, available=True)
+ dev_c = Device('/dev/sdc', ceph_device=False, available=True)
+ all_devices: List[Device] = [dev_a, dev_b, dev_c]
+ processed_devices: List[Device] = []
+ filter = DeviceSelection(all=True, limit=2)
+ dgs = DriveGroupSpec(data_devices=filter)
+ ds = drive_selection.DriveSelection(dgs, all_devices, existing_daemons=1)
+
+ # Check /dev/sda. It's already being used for an OSD and will
+ # be counted in existing_daemons. This check should return False
+ # as we are not over the limit.
+ assert not ds._limit_reached(filter, processed_devices, '/dev/sda')
+ processed_devices.append(dev_a)
+
+ # We should still not be over the limit here with /dev/sdb since
+ # we will have only one pre-existing daemon /dev/sdb itself. This
+ # case previously failed as devices that contributed to existing_daemons
+ # would be double counted both as devices and daemons.
+ assert not ds._limit_reached(filter, processed_devices, '/dev/sdb')
+ processed_devices.append(dev_b)
+
+ # Now, with /dev/sdb and the pre-existing daemon taking up the 2
+ # slots, /dev/sdc should be rejected for us being over the limit.
+ assert ds._limit_reached(filter, processed_devices, '/dev/sdc')
+
+ # DriveSelection does device assignment on initialization. Let's check
+ # it picked up the expected devices
+ assert ds._data == [dev_a, dev_b]