summaryrefslogtreecommitdiffstats
path: root/src/python-common
diff options
context:
space:
mode:
authorAdam King <adking@redhat.com>2023-11-27 21:04:42 +0100
committerAdam King <adking@redhat.com>2023-11-29 19:57:00 +0100
commitd3f1a0e1c0b98b9f1251837ecc8edc367e590dad (patch)
tree6b94ea8bf27e51401804e894440e968f1eb09c07 /src/python-common
parentMerge pull request #54578 from ronen-fr/wip-rf-dedup-clang (diff)
downloadceph-d3f1a0e1c0b98b9f1251837ecc8edc367e590dad.tar.xz
ceph-d3f1a0e1c0b98b9f1251837ecc8edc367e590dad.zip
python-common/drive_selection: fix limit with existing devices
When devices have already been used for OSDs, they are still allowed to pass filtering as they are still needed for the resulting ceph-volume lvm batch command. This was causing an issue with limit however. Limit adds the devices we've found that match the filter and existing OSD daemons tied to the spec. This allows double counting of devices that hae been used for OSDs, as they're counted in terms of being an existing device and that they match the filter. To avoid this issue, devices should only be counted towards the limit if they are not already part of an OSD. An additional note: The limit feature is only applied for data devices, so there is no need to worry about the effect of this change on selection of db, wal, or journal devices. Also, we would still want to not count these devices if they did end up passing the data device filter but had been used for a db/wal/journal device previously. Fixes: https://tracker.ceph.com/issues/63525 Signed-off-by: Adam King <adking@redhat.com>
Diffstat (limited to 'src/python-common')
-rw-r--r--src/python-common/ceph/deployment/drive_selection/selector.py24
-rw-r--r--src/python-common/ceph/tests/test_disk_selector.py41
2 files changed, 57 insertions, 8 deletions
diff --git a/src/python-common/ceph/deployment/drive_selection/selector.py b/src/python-common/ceph/deployment/drive_selection/selector.py
index 1b3bfbb4ee3..31e330432cd 100644
--- a/src/python-common/ceph/deployment/drive_selection/selector.py
+++ b/src/python-common/ceph/deployment/drive_selection/selector.py
@@ -53,9 +53,12 @@ class DriveSelection(object):
# type: () -> List[Device]
return self._journal
- def _limit_reached(self, device_filter, len_devices,
- disk_path):
- # type: (DeviceSelection, int, str) -> bool
+ def _limit_reached(
+ self,
+ device_filter: DeviceSelection,
+ devices: List[Device],
+ disk_path: str
+ ) -> bool:
""" Check for the <limit> property and apply logic
If a limit is set in 'device_attrs' we have to stop adding
@@ -63,14 +66,21 @@ class DriveSelection(object):
If limit is set (>0) and len(devices) >= limit
- :param int len_devices: Length of the already populated device set/list
+ :param List[Device] devices: Already populated device set/list
:param str disk_path: The disk identifier (for logging purposes)
:return: True/False if the device should be added to the list of devices
:rtype: bool
"""
limit = device_filter.limit or 0
-
- if limit > 0 and (len_devices + self.existing_daemons >= limit):
+ # If device A is being used for an OSD already, it can still
+ # match the filter (this is necessary as we still want the
+ # device in the resulting ceph-volume lvm batch command).
+ # If that is the case, we don't want to count the device
+ # towards the limit as it will already be counted through the
+ # existing daemons
+ non_ceph_devices = [d for d in devices if not d.ceph_device]
+
+ if limit > 0 and (len(non_ceph_devices) + self.existing_daemons >= limit):
logger.debug("Refuse to add {} due to limit policy of <{}>".format(
disk_path, limit))
return True
@@ -147,7 +157,7 @@ class DriveSelection(object):
continue
# break on this condition.
- if self._limit_reached(device_filter, len(devices), disk.path):
+ if self._limit_reached(device_filter, devices, disk.path):
logger.debug("Ignoring disk {}. Limit reached".format(
disk.path))
break
diff --git a/src/python-common/ceph/tests/test_disk_selector.py b/src/python-common/ceph/tests/test_disk_selector.py
index b08236130e0..03bfcbe16c9 100644
--- a/src/python-common/ceph/tests/test_disk_selector.py
+++ b/src/python-common/ceph/tests/test_disk_selector.py
@@ -557,4 +557,43 @@ class TestDriveSelection(object):
inventory = _mk_inventory(_mk_device(rotational=True)*2)
m = 'Failed to validate OSD spec "foobar.data_devices": No filters applied'
with pytest.raises(DriveGroupValidationError, match=m):
- drive_selection.DriveSelection(spec, inventory) \ No newline at end of file
+ drive_selection.DriveSelection(spec, inventory)
+
+
+class TestDeviceSelectionLimit:
+
+ def test_limit_existing_devices(self):
+ # Initial setup for this test is meant to be that /dev/sda
+ # is already being used for an OSD, hence it being marked
+ # as a ceph_device. /dev/sdb and /dev/sdc are not being used
+ # for OSDs yet. The limit will be set to 2 and the DriveSelection
+ # is set to have 1 pre-existing device (corresponding to /dev/sda)
+ dev_a = Device('/dev/sda', ceph_device=True, available=False)
+ dev_b = Device('/dev/sdb', ceph_device=False, available=True)
+ dev_c = Device('/dev/sdc', ceph_device=False, available=True)
+ all_devices: List[Device] = [dev_a, dev_b, dev_c]
+ processed_devices: List[Device] = []
+ filter = DeviceSelection(all=True, limit=2)
+ dgs = DriveGroupSpec(data_devices=filter)
+ ds = drive_selection.DriveSelection(dgs, all_devices, existing_daemons=1)
+
+ # Check /dev/sda. It's already being used for an OSD and will
+ # be counted in existing_daemons. This check should return False
+ # as we are not over the limit.
+ assert not ds._limit_reached(filter, processed_devices, '/dev/sda')
+ processed_devices.append(dev_a)
+
+ # We should still not be over the limit here with /dev/sdb since
+ # we will have only one pre-existing daemon /dev/sdb itself. This
+ # case previously failed as devices that contributed to existing_daemons
+ # would be double counted both as devices and daemons.
+ assert not ds._limit_reached(filter, processed_devices, '/dev/sdb')
+ processed_devices.append(dev_b)
+
+ # Now, with /dev/sdb and the pre-existing daemon taking up the 2
+ # slots, /dev/sdc should be rejected for us being over the limit.
+ assert ds._limit_reached(filter, processed_devices, '/dev/sdc')
+
+ # DriveSelection does device assignment on initialization. Let's check
+ # it picked up the expected devices
+ assert ds._data == [dev_a, dev_b]