diff options
Diffstat (limited to 'src')
703 files changed, 25129 insertions, 8555 deletions
diff --git a/src/BLAKE3 b/src/BLAKE3 new file mode 160000 +Subproject 92e4cd71be48fdf9a79e88ef37b8f415ec5ac21 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5b21a2db04f..79b45ef171f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -196,6 +196,9 @@ if(HAS_GLIBCXX_ASSERTIONS AND CMAKE_BUILD_TYPE STREQUAL Debug) add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-D_GLIBCXX_ASSERTIONS>) endif() +# add BLAKE3 before we clobber CMAKE_ASM_COMPILER +add_subdirectory(BLAKE3/c EXCLUDE_FROM_ALL) + include(SIMDExt) if(HAVE_INTEL) if(APPLE) diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 6c55646fc76..a2bb7ab86a7 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -233,6 +233,7 @@ public: uint64_t get_size() const { return size; } uint64_t get_block_size() const { return block_size; } uint64_t get_optimal_io_size() const { return optimal_io_size; } + bool is_discard_supported() const { return support_discard; } /// hook to provide utilization of thinly-provisioned device virtual int get_ebd_state(ExtBlkDevState &state) const { @@ -286,7 +287,7 @@ public: virtual int flush() = 0; virtual bool try_discard(interval_set<uint64_t> &to_release, bool async=true) { return false; } virtual void discard_drain() { return; } - + virtual void swap_discard_queued(interval_set<uint64_t>& other) { other.clear(); } // for managing buffered readers/writers virtual int invalidate_cache(uint64_t off, uint64_t len) = 0; virtual int open(const std::string& path) = 0; diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 6337292f5de..086828b3a9e 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -591,7 +591,7 @@ void KernelDevice::discard_drain() { dout(10) << __func__ << dendl; std::unique_lock l(discard_lock); - while (!discard_queued.empty() || discard_running) { + while (!discard_queued.empty() || (discard_running > 0)) { discard_cond.wait(l); } } @@ -731,6 +731,12 @@ void KernelDevice::_aio_thread() dout(10) << __func__ << " end" << dendl; } +void KernelDevice::swap_discard_queued(interval_set<uint64_t>& other) +{ + std::unique_lock l(discard_lock); + discard_queued.swap(other); +} + void KernelDevice::_discard_thread(uint64_t tid) { dout(10) << __func__ << " thread " << tid << " start" << dendl; @@ -755,13 +761,21 @@ void KernelDevice::_discard_thread(uint64_t tid) discard_cond.wait(l); dout(20) << __func__ << " wake" << dendl; } else { - // Swap the queued discards for a local list we'll process here - // without caring about thread fairness. This allows the current - // thread to wait on the discard running while other threads pick - // up the next-in-queue, and do the same, ultimately issuing more - // discards in parallel, which is the goal. - discard_processing.swap(discard_queued); - discard_running = true; + // Limit local processing to MAX_LOCAL_DISCARD items. + // This will allow threads to work in parallel + // instead of a single thread taking over the whole discard_queued. + // It will also allow threads to finish in a timely manner. + constexpr unsigned MAX_LOCAL_DISCARD = 32; + unsigned count = 0; + for (auto p = discard_queued.begin(); + p != discard_queued.end() && count < MAX_LOCAL_DISCARD; + ++p, ++count) { + discard_processing.insert(p.get_start(), p.get_len()); + discard_queued.erase(p); + } + + // there are multiple active threads -> must use a counter instead of a flag + discard_running ++; l.unlock(); dout(20) << __func__ << " finishing" << dendl; for (auto p = discard_processing.begin(); p != discard_processing.end(); ++p) { @@ -771,7 +785,8 @@ void KernelDevice::_discard_thread(uint64_t tid) discard_callback(discard_callback_priv, static_cast<void*>(&discard_processing)); discard_processing.clear(); l.lock(); - discard_running = false; + discard_running --; + ceph_assert(discard_running >= 0); } } @@ -1116,8 +1131,8 @@ int KernelDevice::_discard(uint64_t offset, uint64_t len) return 0; } dout(10) << __func__ - << " 0x" << std::hex << offset << "~" << len << std::dec - << dendl; + << " 0x" << std::hex << offset << "~" << len << std::dec + << dendl; r = BlkDev{fd_directs[WRITE_LIFE_NOT_SET]}.discard((int64_t)offset, (int64_t)len); return r; } diff --git a/src/blk/kernel/KernelDevice.h b/src/blk/kernel/KernelDevice.h index 914f05e64c4..70962117403 100644 --- a/src/blk/kernel/KernelDevice.h +++ b/src/blk/kernel/KernelDevice.h @@ -55,7 +55,7 @@ private: ceph::mutex discard_lock = ceph::make_mutex("KernelDevice::discard_lock"); ceph::condition_variable discard_cond; - bool discard_running = false; + int discard_running = 0; interval_set<uint64_t> discard_queued; struct AioCompletionThread : public Thread { @@ -124,7 +124,7 @@ public: void aio_submit(IOContext *ioc) override; void discard_drain() override; - + void swap_discard_queued(interval_set<uint64_t>& other) override; int collect_metadata(const std::string& prefix, std::map<std::string,std::string> *pm) const override; int get_devname(std::string *s) const override { if (devname.empty()) { diff --git a/src/boost_redis b/src/boost_redis deleted file mode 160000 -Subproject 78792199efad17f1756603f929daa2930c45694 diff --git a/src/ceph-volume/ceph_volume/devices/lvm/migrate.py b/src/ceph-volume/ceph_volume/devices/lvm/migrate.py index 474b479dee9..83ed16845e7 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/migrate.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/migrate.py @@ -10,7 +10,7 @@ from ceph_volume.util import encryption as encryption_utils from ceph_volume import decorators, terminal, process from ceph_volume.api import lvm as api from ceph_volume.systemd import systemctl - +from ceph_volume.devices.lvm import zap logger = logging.getLogger(__name__) mlogger = terminal.MultiLogger(__name__) @@ -174,6 +174,7 @@ class VolumeTagTracker(object): remaining_devices.append(self.wal_device) outdated_tags = [] + removed_devices = [] for device, type in source_devices: if type == "block" or type == target_type: continue @@ -182,10 +183,13 @@ class VolumeTagTracker(object): outdated_tags.append("ceph.{}_uuid".format(type)) outdated_tags.append("ceph.{}_device".format(type)) device.lv_api.clear_tags() + removed_devices.append(device) + if len(outdated_tags) > 0: for d in remaining_devices: if d and d.is_lv: d.lv_api.clear_tags(outdated_tags) + return removed_devices def replace_lvs(self, source_devices, target_type): remaining_devices = [self.data_device] @@ -195,6 +199,7 @@ class VolumeTagTracker(object): remaining_devices.append(self.wal_device) outdated_tags = [] + removed_devices = [] for device, type in source_devices: if type == "block": continue @@ -203,6 +208,7 @@ class VolumeTagTracker(object): outdated_tags.append("ceph.{}_uuid".format(type)) outdated_tags.append("ceph.{}_device".format(type)) device.lv_api.clear_tags() + removed_devices.append(device) new_tags = {} new_tags["ceph.{}_uuid".format(target_type)] = self.target_lv.lv_uuid @@ -224,6 +230,7 @@ class VolumeTagTracker(object): tags["ceph.{}_uuid".format(target_type)] = self.target_lv.lv_uuid tags["ceph.{}_device".format(target_type)] = self.target_lv.lv_path self.target_lv.set_tags(tags) + return removed_devices def undo(self): mlogger.info( @@ -339,7 +346,7 @@ class Migrate(object): # ceph-bluestore-tool removes source volume(s) other than block one # and attaches target one after successful migration - tag_tracker.replace_lvs(source_devices, target_type) + removed_devices = tag_tracker.replace_lvs(source_devices, target_type) osd_path = get_osd_path(osd_id, osd_fsid) source_args = self.get_source_args(osd_path, source_devices) @@ -364,6 +371,9 @@ class Migrate(object): target_type))) if tag_tracker.data_device.lv_api.encrypted: self.close_encrypted(source_devices) + for d in removed_devices: + if d and d.is_lv: + zap.Zap([d.lv_api.lv_path]).main() terminal.success('Migration successful.') except: @@ -395,7 +405,7 @@ class Migrate(object): try: # ceph-bluestore-tool removes source volume(s) other than # block and target ones after successful migration - tag_tracker.remove_lvs(source_devices, target_type) + removed_devices = tag_tracker.remove_lvs(source_devices, target_type) source_args = self.get_source_args(osd_path, source_devices) mlogger.info("Migrate to existing, Source: {} Target: {}".format( source_args, target_path)) @@ -415,6 +425,9 @@ class Migrate(object): 'Failed to migrate to : {}'.format(self.args.target)) if tag_tracker.data_device.lv_api.encrypted: self.close_encrypted(source_devices) + for d in removed_devices: + if d and d.is_lv: + zap.Zap([d.lv_api.lv_path]).main() terminal.success('Migration successful.') except: tag_tracker.undo() diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py index 7e4d963c8b4..062ea511a8e 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py @@ -1,6 +1,7 @@ import pytest from ceph_volume.devices import lvm from ceph_volume.api import lvm as api +from mock import patch, Mock # TODO: add tests for following commands - # ceph-volume list @@ -68,6 +69,7 @@ class TestList(object): stdout, stderr = capsys.readouterr() assert stdout == '{}\n' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_empty_device_json_zero_exit_status(self, is_root,factory,capsys): args = factory(format='json', device='/dev/sda1') lvm.listing.List([]).list(args) @@ -79,6 +81,7 @@ class TestList(object): with pytest.raises(SystemExit): lvm.listing.List([]).list(args) + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_empty_device_zero_exit_status(self, is_root, factory): args = factory(format='pretty', device='/dev/sda1') with pytest.raises(SystemExit): @@ -86,6 +89,7 @@ class TestList(object): class TestFullReport(object): + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_no_ceph_lvs(self, monkeypatch): # ceph lvs are detected by looking into its tags osd = api.Volume(lv_name='volume1', lv_path='/dev/VolGroup/lv', @@ -98,6 +102,7 @@ class TestFullReport(object): result = lvm.listing.List([]).full_report() assert result == {} + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_ceph_data_lv_reported(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' pv = api.PVolume(pv_name='/dev/sda1', pv_tags={}, pv_uuid="0000", @@ -113,6 +118,7 @@ class TestFullReport(object): result = lvm.listing.List([]).full_report() assert result['0'][0]['name'] == 'volume1' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_ceph_journal_lv_reported(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' journal_tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=journal' @@ -134,6 +140,7 @@ class TestFullReport(object): assert result['0'][0]['name'] == 'volume1' assert result['0'][1]['name'] == 'journal' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_ceph_wal_lv_reported(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.wal_uuid=x,ceph.type=data' wal_tags = 'ceph.osd_id=0,ceph.wal_uuid=x,ceph.type=wal' @@ -151,6 +158,7 @@ class TestFullReport(object): assert result['0'][0]['name'] == 'volume1' assert result['0'][1]['name'] == 'wal' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) @pytest.mark.parametrize('type_', ['journal', 'db', 'wal']) def test_physical_2nd_device_gets_reported(self, type_, monkeypatch): tags = ('ceph.osd_id=0,ceph.{t}_uuid=x,ceph.type=data,' @@ -168,6 +176,7 @@ class TestFullReport(object): class TestSingleReport(object): + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_not_a_ceph_lv(self, monkeypatch): # ceph lvs are detected by looking into its tags lv = api.Volume(lv_name='lv', lv_tags={}, lv_path='/dev/VolGroup/lv', @@ -178,6 +187,7 @@ class TestSingleReport(object): result = lvm.listing.List([]).single_report('VolGroup/lv') assert result == {} + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_a_ceph_lv(self, monkeypatch): # ceph lvs are detected by looking into its tags tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' @@ -194,6 +204,7 @@ class TestSingleReport(object): assert result['0'][0]['path'] == '/dev/VolGroup/lv' assert result['0'][0]['devices'] == [] + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_a_ceph_journal_device(self, monkeypatch): # ceph lvs are detected by looking into its tags tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data,' + \ @@ -242,6 +253,7 @@ class TestSingleReport(object): assert result['0'][0]['path'] == '/dev/VolGroup/lv' assert result['0'][0]['devices'] == ['/dev/sda1', '/dev/sdb1'] + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_by_osd_id_for_just_block_dev(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=block' lvs = [ api.Volume(lv_name='lv1', lv_tags=tags, lv_path='/dev/vg/lv1', @@ -256,6 +268,7 @@ class TestSingleReport(object): assert result['0'][0]['lv_path'] == '/dev/vg/lv1' assert result['0'][0]['vg_name'] == 'vg' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_by_osd_id_for_just_data_dev(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' lvs = [ api.Volume(lv_name='lv1', lv_tags=tags, lv_path='/dev/vg/lv1', @@ -270,6 +283,7 @@ class TestSingleReport(object): assert result['0'][0]['lv_path'] == '/dev/vg/lv1' assert result['0'][0]['vg_name'] == 'vg' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_by_osd_id_for_just_block_wal_and_db_dev(self, monkeypatch): tags1 = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=block' tags2 = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=wal' @@ -298,7 +312,7 @@ class TestSingleReport(object): assert result['0'][2]['lv_path'] == '/dev/vg/lv3' assert result['0'][2]['vg_name'] == 'vg' - + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_by_osd_id_for_data_and_journal_dev(self, monkeypatch): tags1 = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' tags2 = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=journal' @@ -320,6 +334,7 @@ class TestSingleReport(object): assert result['0'][1]['lv_path'] == '/dev/vg/lv2' assert result['0'][1]['vg_name'] == 'vg' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_by_nonexistent_osd_id(self, monkeypatch): lv = api.Volume(lv_name='lv', lv_tags={}, lv_path='/dev/VolGroup/lv', vg_name='VolGroup') @@ -329,6 +344,7 @@ class TestSingleReport(object): result = lvm.listing.List([]).single_report('1') assert result == {} + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_a_ceph_lv_with_no_matching_devices(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.type=data' lv = api.Volume(lv_name='lv', vg_name='VolGroup', lv_uuid='aaaa', diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_migrate.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_migrate.py index 072d4f1ef35..19e61f7e371 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_migrate.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_migrate.py @@ -1,11 +1,13 @@ import pytest -from mock.mock import patch +from mock.mock import patch, Mock from ceph_volume import process from ceph_volume.api import lvm as api from ceph_volume.devices.lvm import migrate from ceph_volume.util.device import Device from ceph_volume.util import system from ceph_volume.util import encryption as encryption_utils +from ceph_volume.devices.lvm.zap import Zap + class TestGetClusterName(object): @@ -533,6 +535,7 @@ class TestNew(object): expected = 'This command needs to be executed with sudo or as root' assert expected in str(error.value) + @patch('ceph_volume.api.lvm.get_lv_by_fullname', Mock(return_value=None)) def test_newdb_not_target_lvm(self, is_root, capsys): with pytest.raises(SystemExit) as error: migrate.NewDB(argv=[ @@ -1214,7 +1217,8 @@ Example calls for supported scenarios: assert not stderr - def test_migrate_data_db_to_new_db(self, is_root, monkeypatch): + @patch.object(Zap, 'main') + def test_migrate_data_db_to_new_db(self, m_zap, is_root, monkeypatch): source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev' @@ -1312,9 +1316,11 @@ Example calls for supported scenarios: '--command', 'bluefs-bdev-migrate', '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db'] + m_zap.assert_called_once() + @patch.object(Zap, 'main') @patch('os.getuid') - def test_migrate_data_db_to_new_db_encrypted(self, m_getuid, monkeypatch): + def test_migrate_data_db_to_new_db_encrypted(self, m_getuid, m_zap, monkeypatch): m_getuid.return_value = 0 source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ @@ -1427,6 +1433,8 @@ Example calls for supported scenarios: '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db'] + m_zap.assert_called_once() + def test_migrate_data_db_to_new_db_active_systemd(self, is_root, monkeypatch, capsys): source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev' @@ -1490,7 +1498,8 @@ Example calls for supported scenarios: assert '--> OSD is running, stop it with: systemctl stop ceph-osd@2' == stderr.rstrip() assert not stdout - def test_migrate_data_db_to_new_db_no_systemd(self, is_root, monkeypatch): + @patch.object(Zap, 'main') + def test_migrate_data_db_to_new_db_no_systemd(self, m_zap, is_root, monkeypatch): source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev' source_db_tags = 'ceph.osd_id=2,ceph.type=db,ceph.osd_fsid=1234,' \ @@ -1586,7 +1595,10 @@ Example calls for supported scenarios: '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db'] - def test_migrate_data_db_to_new_db_skip_wal(self, is_root, monkeypatch): + m_zap.assert_called_once() + + @patch.object(Zap, 'main') + def test_migrate_data_db_to_new_db_skip_wal(self, m_zap, is_root, monkeypatch): source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev' source_db_tags = 'ceph.osd_id=2,ceph.type=db,ceph.osd_fsid=1234,' \ @@ -1705,7 +1717,10 @@ Example calls for supported scenarios: '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db'] - def test_migrate_data_db_wal_to_new_db(self, is_root, monkeypatch): + m_zap.assert_called_once() + + @patch.object(Zap, 'main') + def test_migrate_data_db_wal_to_new_db(self, m_zap, is_root, monkeypatch): source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev,' \ 'ceph.wal_uuid=waluuid,ceph.wal_device=wal_dev' @@ -1829,8 +1844,11 @@ Example calls for supported scenarios: '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + assert len(m_zap.mock_calls) == 2 + + @patch.object(Zap, 'main') @patch('os.getuid') - def test_migrate_data_db_wal_to_new_db_encrypted(self, m_getuid, monkeypatch): + def test_migrate_data_db_wal_to_new_db_encrypted(self, m_getuid, m_zap, monkeypatch): m_getuid.return_value = 0 source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ @@ -1970,6 +1988,8 @@ Example calls for supported scenarios: '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + assert len(m_zap.mock_calls) == 2 + @patch('os.getuid') def test_dont_migrate_data_db_wal_to_new_data(self, m_getuid, @@ -2334,7 +2354,9 @@ Example calls for supported scenarios: '--command', 'bluefs-bdev-migrate', '--devs-source', '/var/lib/ceph/osd/ceph-2/block'] + @patch.object(Zap, 'main') def test_migrate_data_wal_to_db(self, + m_zap, is_root, monkeypatch, capsys): @@ -2436,11 +2458,15 @@ Example calls for supported scenarios: '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + m_zap.assert_called_once() + + @patch.object(Zap, 'main') @patch('os.getuid') def test_migrate_wal_to_db(self, - m_getuid, - monkeypatch, - capsys): + m_getuid, + m_zap, + monkeypatch, + capsys): m_getuid.return_value = 0 source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ @@ -2523,9 +2549,13 @@ Example calls for supported scenarios: '--command', 'bluefs-bdev-migrate', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + m_zap.assert_called_once() + + @patch.object(Zap, 'main') @patch('os.getuid') def test_migrate_data_wal_to_db_encrypted(self, m_getuid, + m_zap, monkeypatch, capsys): m_getuid.return_value = 0 @@ -2637,6 +2667,8 @@ Example calls for supported scenarios: '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + m_zap.assert_called_once() + def test_migrate_data_wal_to_db_active_systemd(self, is_root, monkeypatch, capsys): source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev,' \ @@ -2709,7 +2741,8 @@ Example calls for supported scenarios: assert '--> OSD is running, stop it with: systemctl stop ceph-osd@2' == stderr.rstrip() assert not stdout - def test_migrate_data_wal_to_db_no_systemd(self, is_root, monkeypatch): + @patch.object(Zap, 'main') + def test_migrate_data_wal_to_db_no_systemd(self, m_zap, is_root, monkeypatch): source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev,' \ 'ceph.wal_uuid=waluuid,ceph.wal_device=wal_dev' @@ -2805,3 +2838,5 @@ Example calls for supported scenarios: '--command', 'bluefs-bdev-migrate', '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + + m_zap.assert_called_once()
\ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py index 51f66abfc78..d630a7a6bf8 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py @@ -1,7 +1,7 @@ import os import pytest from copy import deepcopy -from mock.mock import patch, call +from mock.mock import patch, call, Mock from ceph_volume import process from ceph_volume.api import lvm as api from ceph_volume.devices.lvm import zap @@ -100,6 +100,7 @@ class TestFindAssociatedDevices(object): class TestEnsureAssociatedLVs(object): + @patch('ceph_volume.devices.lvm.zap.api', Mock(return_value=[])) def test_nothing_is_found(self): volumes = [] result = zap.ensure_associated_lvs(volumes) @@ -148,6 +149,7 @@ class TestEnsureAssociatedLVs(object): result = zap.ensure_associated_lvs(volumes) assert result == ['/dev/VolGroup/lv'] + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_multiple_journals_are_found(self): tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=journal' volumes = [] @@ -160,6 +162,7 @@ class TestEnsureAssociatedLVs(object): assert '/dev/VolGroup/lv1' in result assert '/dev/VolGroup/lv2' in result + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_multiple_dbs_are_found(self): tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=db' volumes = [] @@ -172,6 +175,7 @@ class TestEnsureAssociatedLVs(object): assert '/dev/VolGroup/lv1' in result assert '/dev/VolGroup/lv2' in result + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_multiple_wals_are_found(self): tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.wal_uuid=x,ceph.type=wal' volumes = [] @@ -184,6 +188,7 @@ class TestEnsureAssociatedLVs(object): assert '/dev/VolGroup/lv1' in result assert '/dev/VolGroup/lv2' in result + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_multiple_backing_devs_are_found(self): volumes = [] for _type in ['journal', 'db', 'wal']: diff --git a/src/ceph-volume/ceph_volume/tests/objectstore/test_lvmbluestore.py b/src/ceph-volume/ceph_volume/tests/objectstore/test_lvmbluestore.py index 45fbd3005b6..346e5f45563 100644 --- a/src/ceph-volume/ceph_volume/tests/objectstore/test_lvmbluestore.py +++ b/src/ceph-volume/ceph_volume/tests/objectstore/test_lvmbluestore.py @@ -477,6 +477,7 @@ class TestLvmBlueStore: assert "Was unable to find any OSDs to activate" in stderr assert "Verify OSDs are present with" in stderr + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) @patch('ceph_volume.systemd.systemctl.osd_is_active', return_value=True) def test_activate_all_osd_is_active(self, mock_lvm_direct_report, diff --git a/src/ceph-volume/ceph_volume/tests/util/test_encryption.py b/src/ceph-volume/ceph_volume/tests/util/test_encryption.py index 4a720241dd9..745230659af 100644 --- a/src/ceph-volume/ceph_volume/tests/util/test_encryption.py +++ b/src/ceph-volume/ceph_volume/tests/util/test_encryption.py @@ -1,6 +1,44 @@ from ceph_volume.util import encryption -from mock.mock import patch +from mock.mock import patch, Mock import base64 +import pytest + + +class TestNoWorkqueue: + def setup_method(self): + encryption.conf.dmcrypt_no_workqueue = None + + @patch('ceph_volume.util.encryption.process.call', + Mock(return_value=(['cryptsetup 2.7.2 flags: UDEV BLKID KEYRING' \ + 'FIPS KERNEL_CAPI PWQUALITY '], [''], 0))) + def test_set_dmcrypt_no_workqueue_true(self): + encryption.set_dmcrypt_no_workqueue() + assert encryption.conf.dmcrypt_no_workqueue + + @patch('ceph_volume.util.encryption.process.call', + Mock(return_value=(['cryptsetup 2.0.0'], [''], 0))) + def test_set_dmcrypt_no_workqueue_false(self): + encryption.set_dmcrypt_no_workqueue() + assert encryption.conf.dmcrypt_no_workqueue is None + + @patch('ceph_volume.util.encryption.process.call', + Mock(return_value=([''], ['fake error'], 1))) + def test_set_dmcrypt_no_workqueue_cryptsetup_version_fails(self): + with pytest.raises(RuntimeError): + encryption.set_dmcrypt_no_workqueue() + + @patch('ceph_volume.util.encryption.process.call', + Mock(return_value=(['unexpected output'], [''], 0))) + def test_set_dmcrypt_no_workqueue_pattern_not_found(self): + with pytest.raises(RuntimeError): + encryption.set_dmcrypt_no_workqueue() + + @patch('ceph_volume.util.encryption.process.call', + Mock(return_value=([], [''], 0))) + def test_set_dmcrypt_no_workqueue_index_error(self): + with pytest.raises(RuntimeError): + encryption.set_dmcrypt_no_workqueue() + class TestGetKeySize(object): def test_get_size_from_conf_default(self, conf_ceph_stub): diff --git a/src/ceph-volume/ceph_volume/util/encryption.py b/src/ceph-volume/ceph_volume/util/encryption.py index 844a81620d2..15a31315645 100644 --- a/src/ceph-volume/ceph_volume/util/encryption.py +++ b/src/ceph-volume/ceph_volume/util/encryption.py @@ -1,6 +1,7 @@ import base64 import os import logging +import re from ceph_volume import process, conf, terminal from ceph_volume.util import constants, system from ceph_volume.util.device import Device @@ -12,14 +13,49 @@ logger = logging.getLogger(__name__) mlogger = terminal.MultiLogger(__name__) def set_dmcrypt_no_workqueue(target_version: str = '2.3.4') -> None: - """ - set `conf.dmcrypt_no_workqueue` to `True` if the available - version of `cryptsetup` is greater or equal to `version` + """Set `conf.dmcrypt_no_workqueue` to `True` if the installed version + of `cryptsetup` is greater than or equal to the specified `target_version`. + + Depending on the crypsetup version, `cryptsetup --version` output can be different. + Eg: + + CentOS Stream9: + $ cryptsetup --version + cryptsetup 2.6.0 flags: UDEV BLKID KEYRING FIPS KERNEL_CAPI PWQUALITY + + CentOS Stream8: + $ cryptsetup --version + cryptsetup 2.3.7 + + Args: + target_version (str, optional): The minimum version required for setting + `conf.dmcrypt_no_workqueue` to `True`. Defaults to '2.3.4'. + + Raises: + RuntimeError: If failed to retrieve the cryptsetup version. + RuntimeError: If failed to parse the cryptsetup version. + RuntimeError: If failed to compare the cryptsetup version with the target version. """ command = ["cryptsetup", "--version"] out, err, rc = process.call(command) + + # This regex extracts the version number from + # the `cryptsetup --version` output + pattern: str = r'(\d+\.?)+' + + if rc: + raise RuntimeError(f"Can't retrieve cryptsetup version: {err}") + try: - if version.parse(out[0]) >= version.parse(f'cryptsetup {target_version}'): + cryptsetup_version = re.search(pattern, out[0]) + + if cryptsetup_version is None: + _output: str = "\n".join(out) + raise RuntimeError('Error while checking cryptsetup version.\n', + '`cryptsetup --version` output:\n', + f'{_output}') + + if version.parse(cryptsetup_version.group(0)) >= version.parse(target_version): conf.dmcrypt_no_workqueue = True except IndexError: mlogger.debug(f'cryptsetup version check: rc={rc} out={out} err={err}') diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index db2f2423351..fd9de7fe3e3 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -6,9 +6,8 @@ import json import sys import host import osd -from multiprocessing import Process, Pool +from multiprocessing import Pool from util import ( - BoxType, Config, Target, ensure_inside_container, @@ -19,12 +18,9 @@ from util import ( run_dc_shell_commands, get_container_engine, run_shell_command, - run_shell_commands, - ContainerEngine, DockerEngine, PodmanEngine, colored, - engine, engine_compose, Colors, get_seed_name diff --git a/src/cephadm/box/host.py b/src/cephadm/box/host.py index aae16d07f45..6b49def2318 100644 --- a/src/cephadm/box/host.py +++ b/src/cephadm/box/host.py @@ -12,7 +12,6 @@ from util import ( run_dc_shell_command, run_shell_command, engine, - BoxType ) diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py index 827a4de36c0..3e559b2fe8c 100644 --- a/src/cephadm/box/osd.py +++ b/src/cephadm/box/osd.py @@ -5,7 +5,6 @@ import re from typing import Dict from util import ( - BoxType, Config, Target, ensure_inside_container, diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py index 7dcf883f8a3..4aa5645b26b 100644 --- a/src/cephadm/box/util.py +++ b/src/cephadm/box/util.py @@ -417,5 +417,4 @@ class PodmanEngine(ContainerEngine): def get_container_engine() -> ContainerEngine: if engine() == 'docker': return DockerEngine() - else: - return PodmanEngine() + return PodmanEngine() diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index 3fb3148cba9..5deaec55949 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -18,7 +18,7 @@ import tempfile import time import errno import ssl -from typing import Dict, List, Tuple, Optional, Union, Any, Callable, Sequence, TypeVar, cast, Iterable +from typing import Dict, List, Tuple, Optional, Union, Any, Callable, Sequence, TypeVar, cast import re import uuid @@ -96,6 +96,7 @@ from cephadmlib.data_utils import ( try_convert_datetime, read_config, with_units_to_int, + _extract_host_info_from_applied_spec, ) from cephadmlib.file_utils import ( get_file_timestamp, @@ -175,6 +176,7 @@ from cephadmlib.daemons import ( NFSGanesha, SMB, SNMPGateway, + MgmtGateway, Tracing, NodeProxy, ) @@ -226,6 +228,7 @@ def get_supported_daemons(): supported_daemons.append(Keepalived.daemon_type) supported_daemons.append(CephadmAgent.daemon_type) supported_daemons.append(SNMPGateway.daemon_type) + supported_daemons.append(MgmtGateway.daemon_type) supported_daemons.extend(Tracing.components) supported_daemons.append(NodeProxy.daemon_type) supported_daemons.append(SMB.daemon_type) @@ -462,6 +465,8 @@ def update_default_image(ctx: CephadmContext) -> None: ctx.image = Keepalived.default_image if type_ == SNMPGateway.daemon_type: ctx.image = SNMPGateway.default_image + if type_ == MgmtGateway.daemon_type: + ctx.image = MgmtGateway.default_image if type_ == CephNvmeof.daemon_type: ctx.image = CephNvmeof.default_image if type_ in Tracing.components: @@ -854,6 +859,10 @@ def create_daemon_dirs( sg = SNMPGateway.init(ctx, fsid, ident.daemon_id) sg.create_daemon_conf() + elif daemon_type == MgmtGateway.daemon_type: + cg = MgmtGateway.init(ctx, fsid, ident.daemon_id) + cg.create_daemon_dirs(data_dir, uid, gid) + elif daemon_type == NodeProxy.daemon_type: node_proxy = NodeProxy.init(ctx, fsid, ident.daemon_id) node_proxy.create_daemon_dirs(data_dir, uid, gid) @@ -2465,6 +2474,14 @@ def prepare_bootstrap_config( ): cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false') + if not cp.has_section('osd'): + cp.add_section('osd') + if ( + not cp.has_option('osd', 'osd_memory_target_autotune') + and not cp.has_option('osd', 'osd memory target autotune') + ): + cp.set('osd', 'osd_memory_target_autotune', 'true') + if ctx.single_host_defaults: logger.info('Adjusting default settings to suit single-host cluster...') # replicate across osds, not hosts @@ -2556,6 +2573,12 @@ def finish_bootstrap_config( if ipv6 or ipv6_cluster_network: logger.info('Enabling IPv6 (ms_bind_ipv6) binding') cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true']) + # note: Ceph does not fully support dual stack. + # kernel clients: https://tracker.ceph.com/issues/49581 + # if we do not disable ipv4 binding, daemons will bind + # to 0.0.0.0 and clients will misbehave. + logger.info('Disabling IPv4 (ms_bind_ipv4) binding') + cli(['config', 'set', 'global', 'ms_bind_ipv4', 'false']) with open(ctx.output_config, 'w') as f: f.write(config) @@ -2563,88 +2586,6 @@ def finish_bootstrap_config( pass -def _extract_host_info_from_applied_spec(f: Iterable[str]) -> List[Dict[str, str]]: - # overall goal of this function is to go through an applied spec and find - # the hostname (and addr is provided) for each host spec in the applied spec. - # Generally, we should be able to just pass the spec to the mgr module where - # proper yaml parsing can happen, but for host specs in particular we want to - # be able to distribute ssh keys, which requires finding the hostname (and addr - # if possible) for each potential host spec in the applied spec. - - specs: List[List[str]] = [] - current_spec: List[str] = [] - for line in f: - if re.search(r'^---\s+', line): - if current_spec: - specs.append(current_spec) - current_spec = [] - else: - line = line.strip() - if line: - current_spec.append(line) - if current_spec: - specs.append(current_spec) - - host_specs: List[List[str]] = [] - for spec in specs: - for line in spec: - if 'service_type' in line: - try: - _, type = line.split(':') - type = type.strip() - if type == 'host': - host_specs.append(spec) - except ValueError as e: - spec_str = '\n'.join(spec) - logger.error(f'Failed to pull service_type from spec:\n{spec_str}. Got error: {e}') - break - spec_str = '\n'.join(spec) - logger.error(f'Failed to find service_type within spec:\n{spec_str}') - - host_dicts = [] - for s in host_specs: - host_dict = _extract_host_info_from_spec(s) - # if host_dict is empty here, we failed to pull the hostname - # for the host from the spec. This should have already been logged - # so at this point we just don't want to include it in our output - if host_dict: - host_dicts.append(host_dict) - - return host_dicts - - -def _extract_host_info_from_spec(host_spec: List[str]) -> Dict[str, str]: - # note:for our purposes here, we only really want the hostname - # and address of the host from each of these specs in order to - # be able to distribute ssh keys. We will later apply the spec - # through the mgr module where proper yaml parsing can be done - # The returned dicts from this function should only contain - # one or two entries, one (required) for hostname, one (optional) for addr - # { - # hostname: <hostname> - # addr: <ip-addr> - # } - # if we fail to find the hostname, an empty dict is returned - - host_dict = {} # type: Dict[str, str] - for line in host_spec: - for field in ['hostname', 'addr']: - if field in line: - try: - _, field_value = line.split(':') - field_value = field_value.strip() - host_dict[field] = field_value - except ValueError as e: - spec_str = '\n'.join(host_spec) - logger.error(f'Error trying to pull {field} from host spec:\n{spec_str}. Got error: {e}') - - if 'hostname' not in host_dict: - spec_str = '\n'.join(host_spec) - logger.error(f'Could not find hostname in host spec:\n{spec_str}') - return {} - return host_dict - - def _distribute_ssh_keys(ctx: CephadmContext, host_info: Dict[str, str], bootstrap_hostname: str) -> int: # copy ssh key to hosts in host spec (used for apply spec) ssh_key = CEPH_DEFAULT_PUBKEY @@ -2792,7 +2733,7 @@ def command_bootstrap(ctx): if not os.path.isfile(ctx.custom_prometheus_alerts): raise Error(f'No custom prometheus alerts file found at {ctx.custom_prometheus_alerts}') - (user_conf, _) = get_config_and_keyring(ctx) + _, _ = get_config_and_keyring(ctx) if ctx.ssh_user != 'root': check_ssh_connectivity(ctx) @@ -2892,18 +2833,17 @@ def command_bootstrap(ctx): # create mgr create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli) - if user_conf: - # user given config settings were already assimilated earlier - # but if the given settings contained any attributes in - # the mgr (e.g. mgr/cephadm/container_image_prometheus) - # they don't seem to be stored if there isn't a mgr yet. - # Since re-assimilating the same conf settings should be - # idempotent we can just do it again here. - with tempfile.NamedTemporaryFile(buffering=0) as tmp: - tmp.write(user_conf.encode('utf-8')) - cli(['config', 'assimilate-conf', - '-i', '/var/lib/ceph/user.conf'], - {tmp.name: '/var/lib/ceph/user.conf:z'}) + # user given config settings were already assimilated earlier + # but if the given settings contained any attributes in + # the mgr (e.g. mgr/cephadm/container_image_prometheus) + # they don't seem to be stored if there isn't a mgr yet. + # Since re-assimilating the same conf settings should be + # idempotent we can just do it again here. + with tempfile.NamedTemporaryFile(buffering=0) as tmp: + tmp.write(config.encode('utf-8')) + cli(['config', 'assimilate-conf', + '-i', '/var/lib/ceph/user.conf'], + {tmp.name: '/var/lib/ceph/user.conf:z'}) if getattr(ctx, 'log_dest', None): ldkey = 'mgr/cephadm/cephadm_log_destination' @@ -2952,6 +2892,10 @@ def command_bootstrap(ctx): cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force']) + if ctx.no_cgroups_split: + logger.info('Setting mgr/cephadm/cgroups_split to false') + cli(['config', 'set', 'mgr', 'mgr/cephadm/cgroups_split', 'false', '--force']) + if not ctx.skip_dashboard: prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart) @@ -2985,10 +2929,6 @@ def command_bootstrap(ctx): save_cluster_config(ctx, uid, gid, fsid) - # enable autotune for osd_memory_target - logger.info('Enabling autotune for osd_memory_target') - cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true']) - # Notify the Dashboard to show the 'Expand cluster' page on first log in. cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED']) @@ -3643,6 +3583,9 @@ def list_daemons( elif daemon_type == SNMPGateway.daemon_type: version = SNMPGateway.get_version(ctx, fsid, daemon_id) seen_versions[image_id] = version + elif daemon_type == MgmtGateway.daemon_type: + version = MgmtGateway.get_version(ctx, container_id) + seen_versions[image_id] = version else: logger.warning('version for unknown daemon type %s' % daemon_type) else: diff --git a/src/cephadm/cephadmlib/constants.py b/src/cephadm/cephadmlib/constants.py index a6cf4389ff6..b0a989df620 100644 --- a/src/cephadm/cephadmlib/constants.py +++ b/src/cephadm/cephadmlib/constants.py @@ -19,6 +19,7 @@ DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29' DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29' DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29' DEFAULT_SMB_IMAGE = 'quay.io/samba.org/samba-server:devbuilds-centos-amd64' +DEFAULT_NGINX_IMAGE = 'quay.io/ceph/nginx:1.26.1' DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this # ------------------------------------------------------------------------------ @@ -49,3 +50,5 @@ DEFAULT_RETRY = 15 DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ' QUIET_LOG_LEVEL = 9 # DEBUG is 10, so using 9 to be lower level than DEBUG NO_DEPRECATED = False +UID_NOBODY = 65534 +GID_NOGROUP = 65534 diff --git a/src/cephadm/cephadmlib/daemons/__init__.py b/src/cephadm/cephadmlib/daemons/__init__.py index 1a9d2d568bc..279f6f1a898 100644 --- a/src/cephadm/cephadmlib/daemons/__init__.py +++ b/src/cephadm/cephadmlib/daemons/__init__.py @@ -9,6 +9,7 @@ from .smb import SMB from .snmp import SNMPGateway from .tracing import Tracing from .node_proxy import NodeProxy +from .mgmt_gateway import MgmtGateway __all__ = [ 'Ceph', @@ -25,4 +26,5 @@ __all__ = [ 'SNMPGateway', 'Tracing', 'NodeProxy', + 'MgmtGateway', ] diff --git a/src/cephadm/cephadmlib/daemons/ceph.py b/src/cephadm/cephadmlib/daemons/ceph.py index e6392876cc6..55a92835129 100644 --- a/src/cephadm/cephadmlib/daemons/ceph.py +++ b/src/cephadm/cephadmlib/daemons/ceph.py @@ -292,8 +292,8 @@ class CephExporter(ContainerDaemonForm): self.image = image self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/') - ipv4_addrs, _ = get_ip_addresses(get_hostname()) - addrs = '0.0.0.0' if ipv4_addrs else '::' + _, ipv6_addrs = get_ip_addresses(get_hostname()) + addrs = '::' if ipv6_addrs else '0.0.0.0' self.addrs = config_json.get('addrs', addrs) self.port = config_json.get('port', self.DEFAULT_PORT) self.prio_limit = config_json.get('prio-limit', 5) diff --git a/src/cephadm/cephadmlib/daemons/custom.py b/src/cephadm/cephadmlib/daemons/custom.py index e833c80c9a5..76b4162e289 100644 --- a/src/cephadm/cephadmlib/daemons/custom.py +++ b/src/cephadm/cephadmlib/daemons/custom.py @@ -15,6 +15,7 @@ from ..deploy import DeploymentType from ..deployment_utils import to_deployment_container from ..file_utils import write_new, makedirs from ..net_utils import EndPoint +from ..constants import UID_NOBODY, GID_NOGROUP logger = logging.getLogger() @@ -43,8 +44,8 @@ class CustomContainer(ContainerDaemonForm): # config-json options self.entrypoint = dict_get(config_json, 'entrypoint') - self.uid = dict_get(config_json, 'uid', 65534) # nobody - self.gid = dict_get(config_json, 'gid', 65534) # nobody + self.uid = dict_get(config_json, 'uid', UID_NOBODY) + self.gid = dict_get(config_json, 'gid', GID_NOGROUP) self.volume_mounts = dict_get(config_json, 'volume_mounts', {}) self.args = dict_get(config_json, 'args', []) self.envs = dict_get(config_json, 'envs', []) diff --git a/src/cephadm/cephadmlib/daemons/iscsi.py b/src/cephadm/cephadmlib/daemons/iscsi.py index ade88a90af0..c4b60f4a771 100644 --- a/src/cephadm/cephadmlib/daemons/iscsi.py +++ b/src/cephadm/cephadmlib/daemons/iscsi.py @@ -119,22 +119,31 @@ class CephIscsi(ContainerDaemonForm): @staticmethod def get_version(ctx, container_id): # type: (CephadmContext, str) -> Optional[str] - version = None - out, err, code = call( - ctx, - [ - ctx.container_engine.path, - 'exec', - container_id, - '/usr/bin/python3', - '-c', - "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)", - ], - verbosity=CallVerbosity.QUIET, + def python(s: str) -> Tuple[str, str, int]: + return call( + ctx, + [ + ctx.container_engine.path, + 'exec', + container_id, + '/usr/bin/python3', + '-c', + s, + ], + verbosity=CallVerbosity.QUIET, + ) + + out, _, code = python( + "from importlib.metadata import version; print(version('ceph_iscsi'))" + ) + if code == 0: + return out.strip() + out, _, code = python( + "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)" ) if code == 0: - version = out.strip() - return version + return out.strip() + return None def validate(self): # type: () -> None diff --git a/src/cephadm/cephadmlib/daemons/mgmt_gateway.py b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py new file mode 100644 index 00000000000..93dfc275c41 --- /dev/null +++ b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py @@ -0,0 +1,174 @@ +import logging +import os +from typing import Dict, List, Tuple, Optional +import re + +from ..call_wrappers import call, CallVerbosity +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container +from ..container_types import CephContainer +from ..context import CephadmContext +from ..context_getters import fetch_configs +from ..daemon_form import register as register_daemon_form +from ..daemon_identity import DaemonIdentity +from ..deployment_utils import to_deployment_container +from ..constants import DEFAULT_NGINX_IMAGE, UID_NOBODY, GID_NOGROUP +from ..data_utils import dict_get, is_fsid +from ..file_utils import populate_files, makedirs, recursive_chown +from ..exceptions import Error + +logger = logging.getLogger() + + +@register_daemon_form +class MgmtGateway(ContainerDaemonForm): + """Defines an MgmtGateway container""" + + daemon_type = 'mgmt-gateway' + required_files = [ + 'nginx.conf', + 'nginx_external_server.conf', + 'nginx_internal_server.conf', + 'nginx_internal.crt', + 'nginx_internal.key', + ] + + default_image = DEFAULT_NGINX_IMAGE + + @classmethod + def for_daemon_type(cls, daemon_type: str) -> bool: + return cls.daemon_type == daemon_type + + def __init__( + self, + ctx: CephadmContext, + fsid: str, + daemon_id: str, + config_json: Dict, + image: str = DEFAULT_NGINX_IMAGE, + ): + self.ctx = ctx + self.fsid = fsid + self.daemon_id = daemon_id + self.image = image + self.files = dict_get(config_json, 'files', {}) + self.validate() + + @classmethod + def init( + cls, ctx: CephadmContext, fsid: str, daemon_id: str + ) -> 'MgmtGateway': + return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image) + + @classmethod + def create( + cls, ctx: CephadmContext, ident: DaemonIdentity + ) -> 'MgmtGateway': + return cls.init(ctx, ident.fsid, ident.daemon_id) + + @property + def identity(self) -> DaemonIdentity: + return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) + + def validate(self) -> None: + if not is_fsid(self.fsid): + raise Error(f'not an fsid: {self.fsid}') + if not self.daemon_id: + raise Error(f'invalid daemon_id: {self.daemon_id}') + if not self.image: + raise Error(f'invalid image: {self.image}') + + # check for the required files + if self.required_files: + for fname in self.required_files: + if fname not in self.files: + raise Error( + 'required file missing from config-json: %s' % fname + ) + + def container(self, ctx: CephadmContext) -> CephContainer: + ctr = daemon_to_container(ctx, self) + return to_deployment_container(ctx, ctr) + + def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: + return UID_NOBODY, GID_NOGROUP + + def get_daemon_args(self) -> List[str]: + return [] + + def default_entrypoint(self) -> str: + return '' + + def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None: + """Create files under the container data dir""" + if not os.path.isdir(data_dir): + raise OSError('data_dir is not a directory: %s' % (data_dir)) + logger.info('Writing mgmt-gateway config...') + config_dir = os.path.join(data_dir, 'etc/') + makedirs(config_dir, uid, gid, 0o755) + recursive_chown(config_dir, uid, gid) + populate_files(config_dir, self.files, uid, gid) + + def _get_container_mounts(self, data_dir: str) -> Dict[str, str]: + mounts: Dict[str, str] = {} + mounts[ + os.path.join(data_dir, 'nginx.conf') + ] = '/etc/nginx/nginx.conf:Z' + return mounts + + @staticmethod + def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]: + """Return the version of the Nginx container""" + version = None + out, err, code = call( + ctx, + [ + ctx.container_engine.path, + 'exec', + container_id, + 'nginx', + '-v', + ], + verbosity=CallVerbosity.QUIET, + ) + if code == 0: + # nginx is using stderr to print the version!! + match = re.search(r'nginx version:\s*nginx\/(.+)', err) + if match: + version = match.group(1) + return version + + def customize_container_mounts( + self, ctx: CephadmContext, mounts: Dict[str, str] + ) -> None: + data_dir = self.identity.data_dir(ctx.data_dir) + mounts.update( + { + os.path.join( + data_dir, 'etc/nginx.conf' + ): '/etc/nginx/nginx.conf:Z', + os.path.join( + data_dir, 'etc/nginx_internal_server.conf' + ): '/etc/nginx_internal_server.conf:Z', + os.path.join( + data_dir, 'etc/nginx_external_server.conf' + ): '/etc/nginx_external_server.conf:Z', + os.path.join( + data_dir, 'etc/nginx_internal.crt' + ): '/etc/nginx/ssl/nginx_internal.crt:Z', + os.path.join( + data_dir, 'etc/nginx_internal.key' + ): '/etc/nginx/ssl/nginx_internal.key:Z', + } + ) + + if 'nginx.crt' in self.files: + mounts.update( + { + os.path.join( + data_dir, 'etc/nginx.crt' + ): '/etc/nginx/ssl/nginx.crt:Z', + os.path.join( + data_dir, 'etc/nginx.key' + ): '/etc/nginx/ssl/nginx.key:Z', + } + ) diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py index aa93ebe7305..fca4da406e5 100644 --- a/src/cephadm/cephadmlib/daemons/monitoring.py +++ b/src/cephadm/cephadmlib/daemons/monitoring.py @@ -10,6 +10,8 @@ from ..constants import ( DEFAULT_NODE_EXPORTER_IMAGE, DEFAULT_PROMETHEUS_IMAGE, DEFAULT_PROMTAIL_IMAGE, + UID_NOBODY, + GID_NOGROUP, ) from ..container_daemon_form import ContainerDaemonForm, daemon_to_container from ..container_types import CephContainer, extract_uid_gid @@ -170,7 +172,7 @@ class Monitoring(ContainerDaemonForm): if daemon_type == 'prometheus': uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus') elif daemon_type == 'node-exporter': - uid, gid = 65534, 65534 + uid, gid = UID_NOBODY, GID_NOGROUP elif daemon_type == 'grafana': uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana') elif daemon_type == 'loki': @@ -260,6 +262,7 @@ class Monitoring(ContainerDaemonForm): retention_size = config.get( 'retention_size', '0' ) # default to disabled + use_url_prefix = config.get('use_url_prefix', False) r += [f'--storage.tsdb.retention.time={retention_time}'] r += [f'--storage.tsdb.retention.size={retention_size}'] scheme = 'http' @@ -271,10 +274,17 @@ class Monitoring(ContainerDaemonForm): # use the first ipv4 (if any) otherwise use the first ipv6 addr = next(iter(ipv4_addrs or ipv6_addrs), None) host = wrap_ipv6(addr) if addr else host - r += [f'--web.external-url={scheme}://{host}:{port}'] + if use_url_prefix: + r += [ + f'--web.external-url={scheme}://{host}:{port}/prometheus' + ] + r += ['--web.route-prefix=/prometheus/'] + else: + r += [f'--web.external-url={scheme}://{host}:{port}'] r += [f'--web.listen-address={ip}:{port}'] if daemon_type == 'alertmanager': config = fetch_configs(ctx) + use_url_prefix = config.get('use_url_prefix', False) peers = config.get('peers', list()) # type: ignore for peer in peers: r += ['--cluster.peer={}'.format(peer)] @@ -284,6 +294,8 @@ class Monitoring(ContainerDaemonForm): pass # some alertmanager, by default, look elsewhere for a config r += ['--config.file=/etc/alertmanager/alertmanager.yml'] + if use_url_prefix: + r += ['--web.route-prefix=/alertmanager'] if daemon_type == 'promtail': r += ['--config.expand-env'] if daemon_type == 'prometheus': diff --git a/src/cephadm/cephadmlib/daemons/nvmeof.py b/src/cephadm/cephadmlib/daemons/nvmeof.py index f22147c775c..7e8ab251636 100644 --- a/src/cephadm/cephadmlib/daemons/nvmeof.py +++ b/src/cephadm/cephadmlib/daemons/nvmeof.py @@ -76,12 +76,30 @@ class CephNvmeof(ContainerDaemonForm): mounts[log_dir] = '/var/log/ceph:z' return mounts + def _get_tls_cert_key_mounts( + self, data_dir: str, files: Dict[str, str] + ) -> Dict[str, str]: + mounts = dict() + for fn in [ + 'server_cert', + 'server_key', + 'client_cert', + 'client_key', + 'root_ca_cert', + ]: + if fn in files: + mounts[ + os.path.join(data_dir, fn) + ] = f'/{fn.replace("_", ".")}' + return mounts + def customize_container_mounts( self, ctx: CephadmContext, mounts: Dict[str, str] ) -> None: data_dir = self.identity.data_dir(ctx.data_dir) log_dir = os.path.join(ctx.log_dir, self.identity.fsid) mounts.update(self._get_container_mounts(data_dir, log_dir)) + mounts.update(self._get_tls_cert_key_mounts(data_dir, self.files)) def customize_container_binds( self, ctx: CephadmContext, binds: List[List[str]] diff --git a/src/cephadm/cephadmlib/daemons/tracing.py b/src/cephadm/cephadmlib/daemons/tracing.py index 4d4fecacbb0..365458a9c57 100644 --- a/src/cephadm/cephadmlib/daemons/tracing.py +++ b/src/cephadm/cephadmlib/daemons/tracing.py @@ -15,6 +15,7 @@ from ..context_getters import fetch_configs from ..daemon_form import register as register_daemon_form from ..daemon_identity import DaemonIdentity from ..deployment_utils import to_deployment_container +from ..constants import UID_NOBODY, GID_NOGROUP logger = logging.getLogger() @@ -87,7 +88,7 @@ class Tracing(ContainerDaemonForm): return to_deployment_container(ctx, ctr) def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]: - return 65534, 65534 + return UID_NOBODY, GID_NOGROUP def get_daemon_args(self) -> List[str]: return self.components[self.identity.daemon_type].get( diff --git a/src/cephadm/cephadmlib/data_utils.py b/src/cephadm/cephadmlib/data_utils.py index 9493a37d00f..2f4674752cc 100644 --- a/src/cephadm/cephadmlib/data_utils.py +++ b/src/cephadm/cephadmlib/data_utils.py @@ -4,15 +4,20 @@ import datetime import os import re import uuid +import yaml +import logging from configparser import ConfigParser -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, Iterable, List from .constants import DATEFMT, DEFAULT_REGISTRY from .exceptions import Error +logger = logging.getLogger() + + def dict_get( d: Dict, key: str, default: Any = None, require: bool = False ) -> Any: @@ -197,3 +202,78 @@ def get_legacy_config_fsid(cluster, legacy_dir=None): ): return config.get('global', 'fsid') return None + + +def _extract_host_info_from_applied_spec( + f: Iterable[str], +) -> List[Dict[str, str]]: + # overall goal of this function is to go through an applied spec and find + # the hostname (and addr is provided) for each host spec in the applied spec. + # Generally, we should be able to just pass the spec to the mgr module where + # proper yaml parsing can happen, but for host specs in particular we want to + # be able to distribute ssh keys, which requires finding the hostname (and addr + # if possible) for each potential host spec in the applied spec. + + specs: List[str] = [] + current_spec: str = '' + for line in f: + if re.search(r'^---\s+', line): + if current_spec: + specs.append(current_spec) + current_spec = '' + else: + if line: + current_spec += line + if current_spec: + specs.append(current_spec) + + host_specs: List[Dict[str, Any]] = [] + for spec in specs: + yaml_data = yaml.safe_load(spec) + if 'service_type' in yaml_data.keys(): + if yaml_data['service_type'] == 'host': + host_specs.append(yaml_data) + else: + spec_str = yaml.safe_dump(yaml_data) + logger.error( + f'Failed to pull service_type from spec:\n{spec_str}.' + ) + + host_dicts = [] + for s in host_specs: + host_dict = _extract_host_info_from_spec(s) + # if host_dict is empty here, we failed to pull the hostname + # for the host from the spec. This should have already been logged + # so at this point we just don't want to include it in our output + if host_dict: + host_dicts.append(host_dict) + + return host_dicts + + +def _extract_host_info_from_spec(host_spec: Dict[str, Any]) -> Dict[str, str]: + # note:for our purposes here, we only really want the hostname + # and address of the host from each of these specs in order to + # be able to distribute ssh keys. We will later apply the spec + # through the mgr module where proper yaml parsing can be done + # The returned dicts from this function should only contain + # one or two entries, one (required) for hostname, one (optional) for addr + # { + # hostname: <hostname> + # addr: <ip-addr> + # } + # if we fail to find the hostname, an empty dict is returned + + host_dict = {} # type: Dict[str, str] + for field in ['hostname', 'addr']: + try: + host_dict[field] = host_spec[field] + except KeyError as e: + logger.error( + f'Error trying to pull {field} from host spec:\n{host_spec}. Got error: {e}' + ) + + if 'hostname' not in host_dict: + logger.error(f'Could not find hostname in host spec:\n{host_spec}') + return {} + return host_dict diff --git a/src/cephadm/cephadmlib/host_facts.py b/src/cephadm/cephadmlib/host_facts.py index 1cfb2ac84d9..387a4a3cb0a 100644 --- a/src/cephadm/cephadmlib/host_facts.py +++ b/src/cephadm/cephadmlib/host_facts.py @@ -719,8 +719,9 @@ class HostFacts: else: summary = {} # type: Dict[str, int] for line in profiles.split('\n'): - item, mode = line.split(' ') - mode = mode.strip('()') + mode = line.rsplit(' ', 1)[-1] + assert mode[0] == '(' and mode[-1] == ')' + mode = mode[1:-1] if mode in summary: summary[mode] += 1 else: diff --git a/src/cephadm/cephadmlib/templating.py b/src/cephadm/cephadmlib/templating.py index 8c28cde57c2..04a40cf0afd 100644 --- a/src/cephadm/cephadmlib/templating.py +++ b/src/cephadm/cephadmlib/templating.py @@ -76,15 +76,42 @@ class _PackageLoader(jinja2.PackageLoader): zipimporter function. """ + def __init__(self, pkg: str, dir: str) -> None: + super().__init__(pkg, dir) + # see the comment in the get_source function below about + # the _loader attribute. This _original_package_name + # attribute is being set up for dealing with the same + # old jinja2 version that comment references. + self._original_package_name = pkg + def get_source( self, environment: jinja2.Environment, template: str ) -> Tuple[str, str, Optional[Callable[[], bool]]]: + if not hasattr(self, '_loader'): + # This if-block is intended to only be run when we are using an old + # enough version of jinja2 that there is no `_loader` attribute + # on the jinja2.PackageLoader class. Specifically the one within + # the current rhel 9 RPM for jinja2. In versions that old + # there is instead a "provider" attribute pointing to an + # IResourceProvider object that seems to itself have a loader + # that we can use. See the changes in + # https://github.com/pallets/jinja/pull/1082 to get a feel for + # the before and after we're expecting from the PackageLoader. + # Becuase of this special case, mypy will complain about + # accessing the provider attribute when run with newer versions + # of Jinja2 that no longer have the attribute. As we generally expect + # to be running unit tests on versions where this is true, this additional + # assertion is needed to make mypy happy + assert hasattr(self, 'provider') + self._loader = self.provider.loader if isinstance(self._loader, zipimport.zipimporter): return self._get_archive_source(template) return super().get_source(environment, template) def _get_archive_source(self, template: str) -> Tuple[str, str, None]: assert isinstance(self._loader, zipimport.zipimporter) + if not hasattr(self, 'package_name'): + self.package_name = self._original_package_name arelpath = posixpath.join( self.package_name, self.package_path, template ) diff --git a/src/cephadm/tests/test_enclosure.py b/src/cephadm/tests/test_enclosure.py deleted file mode 100644 index 48d05cf8318..00000000000 --- a/src/cephadm/tests/test_enclosure.py +++ /dev/null @@ -1,74 +0,0 @@ -import pytest - -from unittest import mock -from tests.fixtures import host_sysfs, import_cephadm - -from cephadmlib.host_facts import Enclosure - -_cephadm = import_cephadm() - - -@pytest.fixture -def enclosure(host_sysfs): - e = Enclosure( - enc_id='1', - enc_path='/sys/class/scsi_generic/sg2/device/enclosure/0:0:1:0', - dev_path='/sys/class/scsi_generic/sg2') - yield e - - -class TestEnclosure: - - def test_enc_metadata(self, enclosure): - """Check metadata for the enclosure e.g. vendor and model""" - - assert enclosure.vendor == "EnclosuresInc" - assert enclosure.components == '12' - assert enclosure.model == "D12" - assert enclosure.enc_id == '1' - - assert enclosure.ses_paths == ['sg2'] - assert enclosure.path_count == 1 - - def test_enc_slots(self, enclosure): - """Check slot count""" - - assert len(enclosure.slot_map) == 12 - - def test_enc_slot_format(self, enclosure): - """Check the attributes of a slot are as expected""" - - assert all(k in ['fault', 'locate', 'serial', 'status'] - for k, _v in enclosure.slot_map['0'].items()) - - def test_enc_slot_status(self, enclosure): - """Check the number of occupied slots is correct""" - - occupied_slots = [slot_id for slot_id in enclosure.slot_map - if enclosure.slot_map[slot_id].get('status').upper() == 'OK'] - - assert len(occupied_slots) == 6 - - def test_enc_disk_count(self, enclosure): - """Check the disks found matches the slot info""" - - assert len(enclosure.device_lookup) == 6 - assert enclosure.device_count == 6 - - def test_enc_device_serial(self, enclosure): - """Check the device serial numbers are as expected""" - - assert all(fake_serial in enclosure.device_lookup.keys() - for fake_serial in [ - 'fake000', - 'fake001', - 'fake002', - 'fake003', - 'fake004', - 'fake005']) - - def test_enc_slot_to_serial(self, enclosure): - """Check serial number to slot matches across slot_map and device_lookup""" - - for serial, slot in enclosure.device_lookup.items(): - assert enclosure.slot_map[slot].get('serial') == serial diff --git a/src/cephadm/tests/test_host_facts.py b/src/cephadm/tests/test_host_facts.py new file mode 100644 index 00000000000..a48089f77f6 --- /dev/null +++ b/src/cephadm/tests/test_host_facts.py @@ -0,0 +1,117 @@ +import pytest + +from unittest import mock +from tests.fixtures import host_sysfs, import_cephadm, cephadm_fs + +from cephadmlib.host_facts import Enclosure + +_cephadm = import_cephadm() + + +@pytest.fixture +def enclosure(host_sysfs): + e = Enclosure( + enc_id='1', + enc_path='/sys/class/scsi_generic/sg2/device/enclosure/0:0:1:0', + dev_path='/sys/class/scsi_generic/sg2', + ) + yield e + + +class TestEnclosure: + + def test_enc_metadata(self, enclosure): + """Check metadata for the enclosure e.g. vendor and model""" + + assert enclosure.vendor == "EnclosuresInc" + assert enclosure.components == '12' + assert enclosure.model == "D12" + assert enclosure.enc_id == '1' + + assert enclosure.ses_paths == ['sg2'] + assert enclosure.path_count == 1 + + def test_enc_slots(self, enclosure): + """Check slot count""" + + assert len(enclosure.slot_map) == 12 + + def test_enc_slot_format(self, enclosure): + """Check the attributes of a slot are as expected""" + + assert all( + k in ['fault', 'locate', 'serial', 'status'] + for k, _v in enclosure.slot_map['0'].items() + ) + + def test_enc_slot_status(self, enclosure): + """Check the number of occupied slots is correct""" + + occupied_slots = [ + slot_id + for slot_id in enclosure.slot_map + if enclosure.slot_map[slot_id].get('status').upper() == 'OK' + ] + + assert len(occupied_slots) == 6 + + def test_enc_disk_count(self, enclosure): + """Check the disks found matches the slot info""" + + assert len(enclosure.device_lookup) == 6 + assert enclosure.device_count == 6 + + def test_enc_device_serial(self, enclosure): + """Check the device serial numbers are as expected""" + + assert all( + fake_serial in enclosure.device_lookup.keys() + for fake_serial in [ + 'fake000', + 'fake001', + 'fake002', + 'fake003', + 'fake004', + 'fake005', + ] + ) + + def test_enc_slot_to_serial(self, enclosure): + """Check serial number to slot matches across slot_map and device_lookup""" + + for serial, slot in enclosure.device_lookup.items(): + assert enclosure.slot_map[slot].get('serial') == serial + + +def test_host_facts_security(cephadm_fs): + cephadm_fs.create_file('/sys/kernel/security/lsm', contents='apparmor\n') + cephadm_fs.create_file('/etc/apparmor', contents='foo\n') + # List from https://tracker.ceph.com/issues/66389 + profiles_lines = [ + 'foo (complain)', + '/usr/bin/man (enforce)', + '1password (unconfined)', + 'Discord (unconfined)', + 'MongoDB Compass (unconfined)', + 'profile name with spaces (enforce)', + ] + cephadm_fs.create_file( + '/sys/kernel/security/apparmor/profiles', + contents='\n'.join(profiles_lines), + ) + + from cephadmlib.host_facts import HostFacts + + class TestHostFacts(HostFacts): + def _populate_sysctl_options(self): + return {} + + ctx = mock.MagicMock() + hfacts = TestHostFacts(ctx) + ksec = hfacts.kernel_security + assert ksec + assert ksec['type'] == 'AppArmor' + assert ksec['type'] == 'AppArmor' + assert ksec['complain'] == 0 + assert ksec['enforce'] == 1 + assert ksec['unconfined'] == 2 diff --git a/src/client/Client.cc b/src/client/Client.cc index fffbd11f600..f8412139e62 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -9405,6 +9405,12 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p, int r = _getattr(dn->inode, mask, dirp->perms); if (r < 0) return r; + + /* fix https://tracker.ceph.com/issues/56288 */ + if (dirp->inode->dir == NULL) { + ldout(cct, 0) << " dir is closed, so we should return" << dendl; + return -CEPHFS_EAGAIN; + } // the content of readdir_cache may change after _getattr(), so pd may be invalid iterator pd = dir->readdir_cache.begin() + idx; @@ -13868,7 +13874,7 @@ int Client::_do_setxattr(Inode *in, const char *name, const void *value, int xattr_flags = 0; if (!value) - xattr_flags |= CEPH_XATTR_REMOVE; + xattr_flags |= CEPH_XATTR_REMOVE | CEPH_XATTR_REMOVE2; if (flags & XATTR_CREATE) xattr_flags |= CEPH_XATTR_CREATE; if (flags & XATTR_REPLACE) @@ -13926,6 +13932,7 @@ int Client::_setxattr(Inode *in, const char *name, const void *value, mode_t new_mode = in->mode; if (value) { int ret = posix_acl_equiv_mode(value, size, &new_mode); + ldout(cct, 3) << __func__ << "(" << in->ino << ", \"" << name << "\") = " << ret << dendl; if (ret < 0) return ret; if (ret == 0) { @@ -13975,6 +13982,11 @@ int Client::_setxattr(Inode *in, const char *name, const void *value, ret = -CEPHFS_EOPNOTSUPP; } + if ((!strcmp(name, ACL_EA_ACCESS) || + !strcmp(name, ACL_EA_DEFAULT)) && + ret == -CEPHFS_ENODATA) + ret = 0; + return ret; } @@ -14063,7 +14075,7 @@ int Client::ll_setxattr(Inode *in, const char *name, const void *value, vinodeno_t vino = _get_vino(in); - ldout(cct, 3) << __func__ << " " << vino << " " << name << " size " << size << dendl; + ldout(cct, 3) << __func__ << " " << vino << " " << name << " size " << size << " value " << !!value << dendl; tout(cct) << __func__ << std::endl; tout(cct) << vino.ino.val << std::endl; tout(cct) << name << std::endl; diff --git a/src/cls/rbd/cls_rbd_types.h b/src/cls/rbd/cls_rbd_types.h index c8d2cb871e4..c1d64805ae4 100644 --- a/src/cls/rbd/cls_rbd_types.h +++ b/src/cls/rbd/cls_rbd_types.h @@ -374,6 +374,7 @@ struct GroupImageSpec { std::string image_key(); + bool operator==(const GroupImageSpec&) const = default; }; WRITE_CLASS_ENCODER(GroupImageSpec); diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index 9ad1320e38a..27a484dd51e 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -1887,9 +1887,6 @@ static int rgw_bucket_unlink_instance(cls_method_context_t hctx, bufferlist *in, } cls_rgw_obj_key dest_key = op.key; - if (dest_key.instance == "null") { - dest_key.instance.clear(); - } BIVerObjEntry obj(hctx, dest_key); BIOLHEntry olh(hctx, dest_key); diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc index ac87c9a4a40..e65dedf14e4 100644 --- a/src/cls/rgw/cls_rgw_client.cc +++ b/src/cls/rgw/cls_rgw_client.cc @@ -535,10 +535,11 @@ void cls_rgw_bucket_link_olh(librados::ObjectWriteOperation& op, const cls_rgw_o int cls_rgw_bucket_unlink_instance(librados::IoCtx& io_ctx, const string& oid, const cls_rgw_obj_key& key, const string& op_tag, - const string& olh_tag, uint64_t olh_epoch, bool log_op, const rgw_zone_set& zones_trace) + const string& olh_tag, uint64_t olh_epoch, bool log_op, + uint16_t bilog_flags, const rgw_zone_set& zones_trace) { librados::ObjectWriteOperation op; - cls_rgw_bucket_unlink_instance(op, key, op_tag, olh_tag, olh_epoch, log_op, zones_trace); + cls_rgw_bucket_unlink_instance(op, key, op_tag, olh_tag, olh_epoch, log_op, bilog_flags, zones_trace); int r = io_ctx.operate(oid, &op); if (r < 0) return r; @@ -548,7 +549,8 @@ int cls_rgw_bucket_unlink_instance(librados::IoCtx& io_ctx, const string& oid, void cls_rgw_bucket_unlink_instance(librados::ObjectWriteOperation& op, const cls_rgw_obj_key& key, const string& op_tag, - const string& olh_tag, uint64_t olh_epoch, bool log_op, const rgw_zone_set& zones_trace) + const string& olh_tag, uint64_t olh_epoch, bool log_op, + uint16_t bilog_flags, const rgw_zone_set& zones_trace) { bufferlist in, out; rgw_cls_unlink_instance_op call; @@ -558,6 +560,7 @@ void cls_rgw_bucket_unlink_instance(librados::ObjectWriteOperation& op, call.olh_tag = olh_tag; call.log_op = log_op; call.zones_trace = zones_trace; + call.bilog_flags = bilog_flags; encode(call, in); op.exec(RGW_CLASS, RGW_BUCKET_UNLINK_INSTANCE, in); } diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h index ac062b987ae..365a51fb5d5 100644 --- a/src/cls/rgw/cls_rgw_client.h +++ b/src/cls/rgw/cls_rgw_client.h @@ -381,7 +381,7 @@ void cls_rgw_bucket_link_olh(librados::ObjectWriteOperation& op, uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time, bool log_op, const rgw_zone_set& zones_trace); void cls_rgw_bucket_unlink_instance(librados::ObjectWriteOperation& op, const cls_rgw_obj_key& key, const std::string& op_tag, - const std::string& olh_tag, uint64_t olh_epoch, bool log_op, const rgw_zone_set& zones_trace); + const std::string& olh_tag, uint64_t olh_epoch, bool log_op, uint16_t bilog_flags, const rgw_zone_set& zones_trace); void cls_rgw_get_olh_log(librados::ObjectReadOperation& op, const cls_rgw_obj_key& olh, uint64_t ver_marker, const std::string& olh_tag, rgw_cls_read_olh_log_ret& log_ret, int& op_ret); void cls_rgw_trim_olh_log(librados::ObjectWriteOperation& op, const cls_rgw_obj_key& olh, uint64_t ver, const std::string& olh_tag); void cls_rgw_clear_olh(librados::ObjectWriteOperation& op, const cls_rgw_obj_key& olh, const std::string& olh_tag); @@ -395,7 +395,8 @@ int cls_rgw_bucket_link_olh(librados::IoCtx& io_ctx, const std::string& oid, uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time, bool log_op, const rgw_zone_set& zones_trace); int cls_rgw_bucket_unlink_instance(librados::IoCtx& io_ctx, const std::string& oid, const cls_rgw_obj_key& key, const std::string& op_tag, - const std::string& olh_tag, uint64_t olh_epoch, bool log_op, const rgw_zone_set& zones_trace); + const std::string& olh_tag, uint64_t olh_epoch, bool log_op, + uint16_t bilog_flags, const rgw_zone_set& zones_trace); int cls_rgw_get_olh_log(librados::IoCtx& io_ctx, std::string& oid, const cls_rgw_obj_key& olh, uint64_t ver_marker, const std::string& olh_tag, rgw_cls_read_olh_log_ret& log_ret); int cls_rgw_clear_olh(librados::IoCtx& io_ctx, std::string& oid, const cls_rgw_obj_key& olh, const std::string& olh_tag); diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h index 07f05bc5be4..3ef1555873d 100644 --- a/src/cls/rgw/cls_rgw_types.h +++ b/src/cls/rgw/cls_rgw_types.h @@ -111,6 +111,7 @@ inline std::ostream& operator<<(std::ostream& out, RGWModifyOp op) { enum RGWBILogFlags { RGW_BILOG_FLAG_VERSIONED_OP = 0x1, + RGW_BILOG_NULL_VERSION = 0X2, }; enum RGWCheckMTimeType { @@ -660,6 +661,11 @@ struct rgw_bi_log_entry { bool is_versioned() { return ((bilog_flags & RGW_BILOG_FLAG_VERSIONED_OP) != 0); } + + bool is_null_verid() { + return ((bilog_flags & RGW_BILOG_NULL_VERSION) != 0); + } + }; WRITE_CLASS_ENCODER(rgw_bi_log_entry) @@ -1363,7 +1369,7 @@ struct cls_rgw_reshard_entry } void decode(ceph::buffer::list::const_iterator& bl) { - DECODE_START(2, bl); + DECODE_START(3, bl); decode(time, bl); decode(tenant, bl); decode(bucket_name, bl); diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index a5dcdd8ace6..2a5fb7cac88 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -215,6 +215,7 @@ if(HAVE_INTEL) set(CMAKE_ASM_FLAGS "-i ${PROJECT_SOURCE_DIR}/src/isa-l/include/ ${CMAKE_ASM_FLAGS}") list(APPEND crc32_srcs ${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_00.asm + ${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_01.asm crc32c_intel_fast_zero_asm.s) endif(HAVE_NASM_X64) elseif(HAVE_POWER8) diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc index 8de8c7d1ff4..2e6a08ce818 100644 --- a/src/common/TrackedOp.cc +++ b/src/common/TrackedOp.cc @@ -339,12 +339,15 @@ bool OpTracker::visit_ops_in_flight(utime_t* oldest_secs, for (const auto sdata : sharded_in_flight_list) { ceph_assert(sdata); std::lock_guard locker(sdata->ops_in_flight_lock_sharded); - if (!sdata->ops_in_flight_sharded.empty()) { - utime_t oldest_op_tmp = - sdata->ops_in_flight_sharded.front().get_initiated(); + for (auto& op : sdata->ops_in_flight_sharded) { + if (!op.warn_interval_multiplier || op.is_continuous()) + continue; + + utime_t oldest_op_tmp = op.get_initiated(); if (oldest_op_tmp < oldest_op) { oldest_op = oldest_op_tmp; } + break; } std::transform(std::begin(sdata->ops_in_flight_sharded), std::end(sdata->ops_in_flight_sharded), diff --git a/src/common/ceph_crypto.h b/src/common/ceph_crypto.h index ed93d09e6e2..6b2fa50dc2a 100644 --- a/src/common/ceph_crypto.h +++ b/src/common/ceph_crypto.h @@ -91,7 +91,6 @@ namespace TOPNSPC::crypto { SHA512 () : OpenSSLDigest(EVP_sha512()) { } }; - # if OPENSSL_VERSION_NUMBER < 0x10100000L class HMAC { private: diff --git a/src/common/ceph_strings.cc b/src/common/ceph_strings.cc index e36df170feb..6f45999eafb 100644 --- a/src/common/ceph_strings.cc +++ b/src/common/ceph_strings.cc @@ -153,7 +153,15 @@ uint64_t ceph_release_features(int r) return req; req |= CEPH_FEATUREMASK_CRUSH_CHOOSE_ARGS; // and overlaps - if (r <= CEPH_RELEASE_LUMINOUS) + if (r <= CEPH_RELEASE_QUINCY) + return req; + + req |= CEPH_FEATUREMASK_SERVER_REEF; // upmap-primary + if (r <= CEPH_RELEASE_REEF) + return req; + + req |= CEPH_FEATURE_CRUSH_MSR; + if (r <= CEPH_RELEASE_SQUID) return req; return req; diff --git a/src/common/crc32c.cc b/src/common/crc32c.cc index e4a77ae99ae..2793432fdd4 100644 --- a/src/common/crc32c.cc +++ b/src/common/crc32c.cc @@ -24,6 +24,9 @@ ceph_crc32c_func_t ceph_choose_crc32(void) // use that. #if defined(__i386__) || defined(__x86_64__) if (ceph_arch_intel_sse42 && ceph_crc32c_intel_fast_exists()) { + if (ceph_arch_intel_pclmul) { + return ceph_crc32c_intel_fast_pclmul; + } return ceph_crc32c_intel_fast; } #elif defined(__arm__) || defined(__aarch64__) diff --git a/src/common/crc32c_intel_fast.c b/src/common/crc32c_intel_fast.c index 28bd9341651..3fbb63e2812 100644 --- a/src/common/crc32c_intel_fast.c +++ b/src/common/crc32c_intel_fast.c @@ -2,10 +2,25 @@ #include "common/crc32c_intel_baseline.h" extern unsigned int crc32_iscsi_00(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_00"); +extern unsigned int crc32_iscsi_01(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_01"); extern unsigned int crc32_iscsi_zero_00(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_zero_00"); #ifdef HAVE_NASM_X64 +uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len) +{ + if (!buffer) + { + return crc32_iscsi_zero_00(buffer, len, crc); + } + + /* Unlike crc32_iscsi_00, crc32_iscsi_01 handles the case where the + * input buffer is less than 8 bytes in its prelude, and does not + * prefetch beyond said buffer. + */ + return crc32_iscsi_01(buffer, len, crc); +} + uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len) { uint32_t v; @@ -43,6 +58,11 @@ int ceph_crc32c_intel_fast_exists(void) return 0; } +uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len) +{ + return 0; +} + uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len) { return 0; diff --git a/src/common/crc32c_intel_fast.h b/src/common/crc32c_intel_fast.h index 26a444f6061..81c6e494f0c 100644 --- a/src/common/crc32c_intel_fast.h +++ b/src/common/crc32c_intel_fast.h @@ -10,10 +10,16 @@ extern int ceph_crc32c_intel_fast_exists(void); #ifdef __x86_64__ +extern uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len); extern uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len); #else +static inline uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len) +{ + return 0; +} + static inline uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len) { return 0; diff --git a/src/common/dout_fmt.h b/src/common/dout_fmt.h new file mode 100644 index 00000000000..c22fdf30cfe --- /dev/null +++ b/src/common/dout_fmt.h @@ -0,0 +1,56 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include <iosfwd> +#include <iterator> +#include <fmt/ostream.h> +#include "dout.h" + +/// \file dout_fmt.h +/// +/// \brief dout macros to format log statements with libfmt +/// +/// A set of dout macros taking a format string and its corresponding argument +/// list. Log output is written directly to the underlying std::ostream by +/// fmt::print() rather than exposing the stream for ostream operator +/// chaining. + +// work around "warning: value computed is not used" with default dout_prefix +inline void dout_fmt_use_prefix(std::ostream&) {} + +#define lsubdout_fmt(cct, sub, v, ...) \ + dout_impl(cct, ceph_subsys_##sub, v) \ + dout_fmt_use_prefix(dout_prefix); \ + fmt::print(*_dout, __VA_ARGS__); \ + *_dout << dendl + +#define ldout_fmt(cct, v, ...) \ + dout_impl(cct, dout_subsys, v) \ + dout_fmt_use_prefix(dout_prefix); \ + fmt::print(*_dout, __VA_ARGS__); \ + *_dout << dendl + +#define dout_fmt(v, ...) \ + ldout_fmt((dout_context), v, __VA_ARGS__) + +#define ldpp_dout_fmt(dpp, v, ...) \ + if (decltype(auto) pdpp = (dpp); pdpp) { /* workaround -Wnonnull-compare for 'this' */ \ + dout_impl(pdpp->get_cct(), ceph::dout::need_dynamic(pdpp->get_subsys()), v) \ + pdpp->gen_prefix(*_dout); \ + fmt::print(*_dout, __VA_ARGS__); \ + *_dout << dendl; \ + } diff --git a/src/common/map_cacher.hpp b/src/common/map_cacher.hpp index a83f924b622..4d843be75dc 100644 --- a/src/common/map_cacher.hpp +++ b/src/common/map_cacher.hpp @@ -85,6 +85,10 @@ private: public: MapCacher(StoreDriver<K, V> *driver) : driver(driver) {} + void reset() { + in_progress.reset(); + } + /// Fetch first key/value std::pair after specified key int get_next( K key, ///< [in] key after which to get next diff --git a/src/common/options/ceph-exporter.yaml.in b/src/common/options/ceph-exporter.yaml.in index 798a185e96b..c4b24ee43d4 100644 --- a/src/common/options/ceph-exporter.yaml.in +++ b/src/common/options/ceph-exporter.yaml.in @@ -25,6 +25,20 @@ options: default: 9926 services: - ceph-exporter +- name: exporter_cert_file + type: str + level: advanced + desc: Certificate file for TLS. + default: + services: + - ceph-exporter +- name: exporter_key_file + type: str + level: advanced + desc: Key certificate file for TLS. + default: + services: + - ceph-exporter - name: exporter_prio_limit type: int level: advanced diff --git a/src/common/options/crimson.yaml.in b/src/common/options/crimson.yaml.in index 0c25c3f6140..73f4fdd6b2d 100644 --- a/src/common/options/crimson.yaml.in +++ b/src/common/options/crimson.yaml.in @@ -31,6 +31,15 @@ options: desc: CPU cores on which alienstore threads will run in cpuset(7) format flags: - startup +- name: crimson_seastar_num_threads + type: uint + level: advanced + default: 0 + desc: The number of threads for serving seastar reactors without CPU pinning, overridden if crimson_seastar_cpu_cores is set + flags: + - startup + min: 0 + max: 32 - name: crimson_osd_stat_interval type: int level: advanced diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index 7366bbb31c6..1b355d6e03a 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -104,8 +104,8 @@ options: - name: public_network_interface type: str level: advanced - desc: Interface name(s) from which to choose an address from a public_network to - bind to; public_network must also be specified. + desc: Interface name(s) from which to choose an address from a ``public_network`` to + bind to; ``public_network`` must also be specified. tags: - network services: @@ -135,8 +135,8 @@ options: - name: cluster_network_interface type: str level: advanced - desc: Interface name(s) from which to choose an address from a cluster_network to - bind to; cluster_network must also be specified. + desc: Interface name(s) from which to choose an address from a ``cluster_network`` to + bind to; ``cluster_network`` must also be specified. tags: - network services: @@ -1299,6 +1299,23 @@ options: desc: Inject a network congestions that stuck with N times operations default: 0 with_legacy: true +- name: ms_time_events_min_wait_interval + type: uint + level: dev + desc: In microseconds, msgr-worker's time_events min wait time for epoll_wait timeout + default: 1000 + min: 0 + max: 60000000 + with_legacy: true +- name: ms_client_throttle_retry_time_interval + type: uint + level: dev + desc: In microseconds, user client, the time interval between the next retry + when the throttle get_or_fail. + default: 5000 + min: 1000 + max: 60000000 + with_legacy: true - name: ms_blackhole_osd type: bool level: dev @@ -5049,10 +5066,17 @@ options: [hash_begin..hash_end) defines characters to use for hash calculation. Recommended hash ranges: O(0-13) P(0-8) m(0-16). Sharding of S,T,C,M,B prefixes is inadvised' fmt_desc: Definition of BlueStore's RocksDB sharding. - The optimal value depends on multiple factors, and modification is invadvisable. + The optimal value depends on multiple factors, and modification is inadvisable. This setting is used only when OSD is doing ``--mkfs``. Next runs of OSD retrieve sharding from disk. default: m(3) p(3,0-12) O(3,0-13)=block_cache={type=binned_lru} L=min_write_buffer_number_to_merge=32 P=min_write_buffer_number_to_merge=32 +- name: bluestore_async_db_compaction + type: bool + level: dev + desc: Perform DB compaction requests asynchronously + long_desc: 'How to perform DB compactions triggered either through async socket or + by OSD initialization procedure on start.' + default: true - name: bluestore_qfsck_on_mount type: bool level: dev diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index cf85729e6e2..dcf3eaac0d6 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -633,7 +633,8 @@ options: default: true services: - mds - with_legacy: true + flags: + - runtime - name: mds_export_ephemeral_random type: bool level: advanced @@ -690,7 +691,8 @@ options: default: 3 services: - mds - with_legacy: true + flags: + - runtime - name: mds_bal_replicate_threshold type: float level: advanced @@ -700,7 +702,8 @@ options: default: 8000 services: - mds - with_legacy: true + flags: + - runtime - name: mds_bal_unreplicate_threshold type: float level: advanced @@ -710,7 +713,8 @@ options: default: 0 services: - mds - with_legacy: true + flags: + - runtime - name: mds_bal_split_size type: int level: advanced @@ -720,7 +724,8 @@ options: default: 10000 services: - mds - with_legacy: true + flags: + - runtime - name: mds_bal_split_rd type: float level: advanced @@ -730,7 +735,8 @@ options: default: 25000 services: - mds - with_legacy: true + flags: + - runtime - name: mds_bal_split_wr type: float level: advanced @@ -740,7 +746,8 @@ options: default: 10000 services: - mds - with_legacy: true + flags: + - runtime - name: mds_bal_split_bits type: int level: advanced @@ -749,9 +756,10 @@ options: default: 3 services: - mds + flags: + - runtime min: 1 max: 24 - with_legacy: true - name: mds_bal_merge_size type: int level: advanced @@ -761,7 +769,8 @@ options: default: 50 services: - mds - with_legacy: true + flags: + - runtime - name: mds_bal_interval type: int level: advanced @@ -770,6 +779,8 @@ options: default: 10 services: - mds + flags: + - runtime - name: mds_bal_fragment_interval type: int level: advanced @@ -779,6 +790,8 @@ options: default: 5 services: - mds + flags: + - runtime # order of magnitude higher than split size - name: mds_bal_fragment_size_max type: int @@ -800,7 +813,8 @@ options: default: 1.5 services: - mds - with_legacy: true + flags: + - runtime - name: mds_bal_fragment_dirs type: bool level: advanced @@ -813,6 +827,8 @@ options: default: true services: - mds + flags: + - runtime - name: mds_bal_idle_threshold type: float level: advanced @@ -822,7 +838,8 @@ options: default: 0 services: - mds - with_legacy: true + flags: + - runtime - name: mds_bal_max type: int level: dev @@ -831,7 +848,8 @@ options: - mds fmt_desc: The number of iterations to run balancer before Ceph stops. (used for testing purposes only) - with_legacy: true + flags: + - runtime - name: mds_bal_max_until type: int level: dev @@ -840,7 +858,8 @@ options: - mds fmt_desc: The number of seconds to run balancer before Ceph stops. (used for testing purposes only) - with_legacy: true + flags: + - runtime - name: mds_bal_mode type: int level: dev @@ -853,7 +872,8 @@ options: - ``0`` = Hybrid. - ``1`` = Request rate and latency. - ``2`` = CPU load. - with_legacy: true + flags: + - runtime # must be this much above average before we export anything - name: mds_bal_min_rebalance type: float @@ -863,7 +883,8 @@ options: default: 0.1 services: - mds - with_legacy: true + flags: + - runtime # must be overloaded for more than these epochs before we export anything - name: mds_bal_overload_epochs type: int @@ -882,7 +903,8 @@ options: services: - mds fmt_desc: The minimum subtree temperature before Ceph searches a subtree. - with_legacy: true + flags: + - runtime # take within this range of what we need - name: mds_bal_need_min type: float @@ -891,7 +913,8 @@ options: services: - mds fmt_desc: The minimum fraction of target subtree size to accept. - with_legacy: true + flags: + - runtime - name: mds_bal_need_max type: float level: dev @@ -899,7 +922,8 @@ options: services: - mds fmt_desc: The maximum fraction of target subtree size to accept. - with_legacy: true + flags: + - runtime # any sub bigger than this taken in full - name: mds_bal_midchunk type: float @@ -909,7 +933,8 @@ options: - mds fmt_desc: Ceph will migrate any subtree that is larger than this fraction of the target subtree size. - with_legacy: true + flags: + - runtime # never take anything smaller than this - name: mds_bal_minchunk type: float @@ -919,7 +944,8 @@ options: - mds fmt_desc: Ceph will ignore any subtree that is smaller than this fraction of the target subtree size. - with_legacy: true + flags: + - runtime # target decay half-life in MDSMap (2x larger is approx. 2x slower) - name: mds_bal_target_decay type: float @@ -928,7 +954,8 @@ options: default: 10 services: - mds - with_legacy: true + flags: + - runtime - name: mds_oft_prefetch_dirfrags type: bool level: advanced @@ -1106,6 +1133,14 @@ options: fmt_desc: Ceph will inject MDS failure in the subtree import code (for developers only). with_legacy: true +- name: mds_kill_dirfrag_at + type: int + level: dev + default: 0 + services: + - mds + flags: + - runtime - name: mds_kill_link_at type: int level: dev diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in index 075b335a08f..1ec9871b6a8 100644 --- a/src/common/options/mon.yaml.in +++ b/src/common/options/mon.yaml.in @@ -778,6 +778,18 @@ options: services: - mon with_legacy: true +- name: mon_fsmap_prune_threshold + type: secs + level: advanced + desc: prune fsmap older than this threshold in seconds + fmt_desc: The monitors keep historical fsmaps in memory to optimize asking + when an MDS daemon was last seen in the FSMap. This option controls + how far back in time the monitors will look. + default: 300 + flags: + - runtime + services: + - mon - name: mds_beacon_mon_down_grace type: secs level: advanced diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index bc5d5357705..268a89154de 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -520,16 +520,6 @@ options: stats (inc. scrub/block duration) every this many seconds. default: 120 with_legacy: false -- name: osd_scrub_reservation_timeout - type: millisecs - level: advanced - desc: Maximum wait (milliseconds) for replicas' response to scrub reservation requests - long_desc: Maximum wait (milliseconds) for all replicas to respond to - scrub reservation requests, before the scrub session is aborted. Disable by setting - to a very large value. - default: 300000 - min: 2000 - with_legacy: false - name: osd_scrub_disable_reservation_queuing type: bool level: advanced @@ -1244,6 +1234,11 @@ options: level: advanced default: false with_legacy: true +- name: osd_ec_partial_reads + type: bool + level: advanced + default: true + with_legacy: true - name: osd_recovery_delay_start type: float level: advanced diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index 5cb7aee81f1..583fe4bcaf6 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -51,6 +51,14 @@ options: services: - rgw with_legacy: true +- name: rgw_disable_s3select + type: bool + level: advanced + desc: disable the s3select operation; RGW will report an error and will return ERR_INVALID_REQUEST. + default: false + services: + - rgw + with_legacy: true - name: rgw_rados_tracing type: bool level: advanced @@ -2230,6 +2238,14 @@ options: services: - rgw with_legacy: true +- name: rgw_asio_assert_yielding + type: bool + level: dev + desc: Trigger an assertion failure if an operation would block an asio thread + default: false + services: + - rgw + with_legacy: true - name: rgw_user_quota_bucket_sync_interval type: int level: advanced diff --git a/src/common/sharedptr_registry.hpp b/src/common/sharedptr_registry.hpp index 3b3cf01bb28..8c0db6c24a7 100644 --- a/src/common/sharedptr_registry.hpp +++ b/src/common/sharedptr_registry.hpp @@ -18,6 +18,7 @@ #include <map> #include <memory> #include "common/ceph_mutex.h" +#include "include/ceph_assert.h" /** * Provides a registry of shared_ptr<V> indexed by K while @@ -61,6 +62,11 @@ public: waiting(0) {} + void reset() { + ceph_assert(!waiting); + contents.clear(); + } + bool empty() { std::lock_guard l(lock); return contents.empty(); diff --git a/src/compressor/CMakeLists.txt b/src/compressor/CMakeLists.txt index 3da2710dab7..5a33f97eeb4 100644 --- a/src/compressor/CMakeLists.txt +++ b/src/compressor/CMakeLists.txt @@ -8,6 +8,7 @@ if(HAVE_QATZIP AND HAVE_QAT) QAT::qat QAT::usdm QAT::zip + legacy-option-headers ) endif() diff --git a/src/crimson/common/interruptible_future.h b/src/crimson/common/interruptible_future.h index 405d9c3c05d..59bb0be3087 100644 --- a/src/crimson/common/interruptible_future.h +++ b/src/crimson/common/interruptible_future.h @@ -1230,6 +1230,17 @@ public: }; } + template <typename Lock, typename Func> + [[gnu::always_inline]] + static auto with_lock(Lock& lock, Func&& func) { + return seastar::with_lock( + lock, + [func=std::move(func), + interrupt_condition=interrupt_cond<InterruptCond>.interrupt_cond]() mutable { + return call_with_interruption(interrupt_condition, func); + }); + } + template <typename Iterator, InvokeReturnsInterruptibleFuture<typename Iterator::reference> AsyncAction> [[gnu::always_inline]] diff --git a/src/crimson/common/operation.h b/src/crimson/common/operation.h index 94294b9a6a6..b38b9af31bf 100644 --- a/src/crimson/common/operation.h +++ b/src/crimson/common/operation.h @@ -137,17 +137,21 @@ struct TimeEvent : Event<T> { template <typename T> class BlockerT : public Blocker { public: - struct BlockingEvent : Event<typename T::BlockingEvent> { + struct BlockingEvent : Event<BlockingEvent>, + boost::intrusive::list_base_hook<> { using Blocker = std::decay_t<T>; + struct ExitBarrierEvent : TimeEvent<ExitBarrierEvent> { + } exit_barrier_event; + struct Backend { // `T` is based solely to let implementations to discriminate // basing on the type-of-event. - virtual void handle(typename T::BlockingEvent&, const Operation&, const T&) = 0; + virtual void handle(BlockingEvent&, const Operation&, const T&) = 0; }; struct InternalBackend : Backend { - void handle(typename T::BlockingEvent&, + void handle(BlockingEvent&, const Operation&, const T& blocker) override { this->timestamp = ceph_clock_now(); @@ -165,7 +169,7 @@ public: TriggerI(BlockingEvent& event) : event(event) {} template <class FutureT> - auto maybe_record_blocking(FutureT&& fut, const T& blocker) { + auto maybe_record_blocking(FutureT&& fut, T& blocker) { if (!fut.available()) { // a full blown call via vtable. that's the cost for templatization // avoidance. anyway, most of the things actually have the type @@ -183,10 +187,13 @@ public: virtual ~TriggerI() = default; protected: // it's for the sake of erasing the OpT type - virtual void record_blocking(const T& blocker) = 0; + virtual void record_blocking(T& blocker) = 0; - static void record_unblocking(BlockingEvent& event, const T& blocker) { - assert(event.internal_backend.blocker == &blocker); + static void record_unblocking(BlockingEvent& event, T& blocker) { + if (event.internal_backend.blocker) { + assert(event.internal_backend.blocker == &blocker); + blocker.delete_event(event); + } event.internal_backend.blocker = nullptr; } @@ -198,7 +205,7 @@ public: Trigger(BlockingEvent& event, const OpT& op) : TriggerI(event), op(op) {} template <class FutureT> - auto maybe_record_blocking(FutureT&& fut, const T& blocker) { + auto maybe_record_blocking(FutureT&& fut, T& blocker) { if (!fut.available()) { // no need for the dynamic dispatch! if we're lucky, a compiler // should collapse all these abstractions into a bunch of movs. @@ -213,12 +220,22 @@ public: const OpT &get_op() { return op; } + template <class FutureT> + decltype(auto) maybe_record_exit_barrier(FutureT&& fut) { + if (!fut.available()) { + this->event.exit_barrier_event.trigger(this->op); + } + return std::forward<FutureT>(fut); + } + protected: - void record_blocking(const T& blocker) override { + void record_blocking(T& blocker) override { this->event.trigger(op, blocker); + blocker.add_event(this->event); } const OpT& op; + }; void dump(ceph::Formatter *f) const { @@ -228,20 +245,34 @@ public: internal_backend.timestamp, internal_backend.blocker, f); + exit_barrier_event.dump(f); } }; - virtual ~BlockerT() = default; + virtual ~BlockerT() { + for (auto &event : event_list) { + event.internal_backend.blocker = nullptr; + } + event_list.clear(); + } template <class TriggerT, class... Args> decltype(auto) track_blocking(TriggerT&& trigger, Args&&... args) { return std::forward<TriggerT>(trigger).maybe_record_blocking( - std::forward<Args>(args)..., static_cast<const T&>(*this)); + std::forward<Args>(args)..., *(static_cast<T*>(this))); } private: const char *get_type_name() const final { return static_cast<const T*>(this)->type_name; } + using event_list_t = boost::intrusive::list<BlockingEvent>; + event_list_t event_list; + void add_event(BlockingEvent& event) { + event_list.push_back(event); + } + void delete_event(BlockingEvent& event) { + event_list.erase(event_list_t::s_iterator_to(event)); + } }; template <class T> @@ -258,7 +289,7 @@ struct AggregateBlockingEvent { public: template <class FutureT> auto maybe_record_blocking(FutureT&& fut, - const typename T::Blocker& blocker) { + typename T::Blocker& blocker) { // AggregateBlockingEvent is supposed to be used on relatively cold // paths (recovery), so we don't need to worry about the dynamic // polymothps / dynamic memory's overhead. @@ -692,29 +723,6 @@ private: */ template <class T> class OrderedConcurrentPhaseT : public PipelineStageIT<T> { - using base_t = PipelineStageIT<T>; -public: - struct BlockingEvent : base_t::BlockingEvent { - using base_t::BlockingEvent::BlockingEvent; - - struct ExitBarrierEvent : TimeEvent<ExitBarrierEvent> {}; - - template <class OpT> - struct Trigger : base_t::BlockingEvent::template Trigger<OpT> { - using base_t::BlockingEvent::template Trigger<OpT>::Trigger; - - template <class FutureT> - decltype(auto) maybe_record_exit_barrier(FutureT&& fut) { - if (!fut.available()) { - exit_barrier_event.trigger(this->op); - } - return std::forward<FutureT>(fut); - } - - ExitBarrierEvent exit_barrier_event; - }; - }; - private: void dump_detail(ceph::Formatter *f) const final {} diff --git a/src/crimson/common/tri_mutex.cc b/src/crimson/common/tri_mutex.cc index f79d2566885..f6aabc8fd7e 100644 --- a/src/crimson/common/tri_mutex.cc +++ b/src/crimson/common/tri_mutex.cc @@ -5,6 +5,9 @@ #include <seastar/util/later.hh> +SET_SUBSYS(osd); +//TODO: SET_SUBSYS(crimson_tri_mutex); + seastar::future<> read_lock::lock() { return static_cast<tri_mutex*>(this)->lock_for_read(); @@ -35,67 +38,51 @@ void excl_lock::unlock() static_cast<tri_mutex*>(this)->unlock_for_excl(); } -void excl_lock_from_read::lock() -{ - static_cast<tri_mutex*>(this)->promote_from_read(); -} - -void excl_lock_from_read::unlock() -{ - static_cast<tri_mutex*>(this)->demote_to_read(); -} - -void excl_lock_from_write::lock() -{ - static_cast<tri_mutex*>(this)->promote_from_write(); -} - -void excl_lock_from_write::unlock() -{ - static_cast<tri_mutex*>(this)->demote_to_write(); -} - tri_mutex::~tri_mutex() { + LOG_PREFIX(tri_mutex::~tri_mutex()); + DEBUGDPP("", *this); assert(!is_acquired()); } seastar::future<> tri_mutex::lock_for_read() { + LOG_PREFIX(tri_mutex::lock_for_read()); + DEBUGDPP("", *this); if (try_lock_for_read()) { + DEBUGDPP("lock_for_read successfully", *this); return seastar::now(); } + DEBUGDPP("can't lock_for_read, adding to waiters", *this); waiters.emplace_back(seastar::promise<>(), type_t::read); return waiters.back().pr.get_future(); } bool tri_mutex::try_lock_for_read() noexcept { + LOG_PREFIX(tri_mutex::try_lock_for_read()); + DEBUGDPP("", *this); if (!writers && !exclusively_used && waiters.empty()) { ++readers; return true; - } else { - return false; } + return false; } void tri_mutex::unlock_for_read() { + LOG_PREFIX(tri_mutex::unlock_for_read()); + DEBUGDPP("", *this); assert(readers > 0); if (--readers == 0) { wake(); } } -void tri_mutex::promote_from_read() -{ - assert(readers == 1); - --readers; - exclusively_used = true; -} - void tri_mutex::demote_to_read() { + LOG_PREFIX(tri_mutex::demote_to_read()); + DEBUGDPP("", *this); assert(exclusively_used); exclusively_used = false; ++readers; @@ -103,41 +90,42 @@ void tri_mutex::demote_to_read() seastar::future<> tri_mutex::lock_for_write() { + LOG_PREFIX(tri_mutex::lock_for_write()); + DEBUGDPP("", *this); if (try_lock_for_write()) { + DEBUGDPP("lock_for_write successfully", *this); return seastar::now(); } + DEBUGDPP("can't lock_for_write, adding to waiters", *this); waiters.emplace_back(seastar::promise<>(), type_t::write); return waiters.back().pr.get_future(); } bool tri_mutex::try_lock_for_write() noexcept { - if (!readers && !exclusively_used) { - if (waiters.empty()) { - ++writers; - return true; - } + LOG_PREFIX(tri_mutex::try_lock_for_write()); + DEBUGDPP("", *this); + if (!readers && !exclusively_used && waiters.empty()) { + ++writers; + return true; } return false; } void tri_mutex::unlock_for_write() { + LOG_PREFIX(tri_mutex::unlock_for_write()); + DEBUGDPP("", *this); assert(writers > 0); if (--writers == 0) { wake(); } } -void tri_mutex::promote_from_write() -{ - assert(writers == 1); - --writers; - exclusively_used = true; -} - void tri_mutex::demote_to_write() { + LOG_PREFIX(tri_mutex::demote_to_write()); + DEBUGDPP("", *this); assert(exclusively_used); exclusively_used = false; ++writers; @@ -146,15 +134,21 @@ void tri_mutex::demote_to_write() // for exclusive users seastar::future<> tri_mutex::lock_for_excl() { + LOG_PREFIX(tri_mutex::lock_for_excl()); + DEBUGDPP("", *this); if (try_lock_for_excl()) { + DEBUGDPP("lock_for_excl, successfully", *this); return seastar::now(); } + DEBUGDPP("can't lock_for_excl, adding to waiters", *this); waiters.emplace_back(seastar::promise<>(), type_t::exclusive); return waiters.back().pr.get_future(); } bool tri_mutex::try_lock_for_excl() noexcept { + LOG_PREFIX(tri_mutex::try_lock_for_excl()); + DEBUGDPP("", *this); if (readers == 0u && writers == 0u && !exclusively_used) { exclusively_used = true; return true; @@ -165,6 +159,8 @@ bool tri_mutex::try_lock_for_excl() noexcept void tri_mutex::unlock_for_excl() { + LOG_PREFIX(tri_mutex::unlock_for_excl()); + DEBUGDPP("", *this); assert(exclusively_used); exclusively_used = false; wake(); @@ -172,6 +168,8 @@ void tri_mutex::unlock_for_excl() bool tri_mutex::is_acquired() const { + LOG_PREFIX(tri_mutex::is_acquired()); + DEBUGDPP("", *this); if (readers != 0u) { return true; } else if (writers != 0u) { @@ -185,6 +183,8 @@ bool tri_mutex::is_acquired() const void tri_mutex::wake() { + LOG_PREFIX(tri_mutex::wake()); + DEBUGDPP("", *this); assert(!readers && !writers && !exclusively_used); type_t type = type_t::none; while (!waiters.empty()) { @@ -210,7 +210,9 @@ void tri_mutex::wake() default: assert(0); } + DEBUGDPP("waking up", *this); waiter.pr.set_value(); waiters.pop_front(); } + DEBUGDPP("no waiters", *this); } diff --git a/src/crimson/common/tri_mutex.h b/src/crimson/common/tri_mutex.h index d1c215be27e..ae7119985bf 100644 --- a/src/crimson/common/tri_mutex.h +++ b/src/crimson/common/tri_mutex.h @@ -6,6 +6,9 @@ #include <seastar/core/future.hh> #include <seastar/core/circular_buffer.hh> +#include "common/hobject.h" +#include "crimson/common/log.h" + class read_lock { public: seastar::future<> lock(); @@ -24,20 +27,6 @@ public: void unlock(); }; -// promote from read to excl -class excl_lock_from_read { -public: - void lock(); - void unlock(); -}; - -// promote from write to excl -class excl_lock_from_write { -public: - void lock(); - void unlock(); -}; - /// shared/exclusive mutual exclusion /// /// Unlike reader/write lock, tri_mutex does not enforce the exclusive access @@ -51,17 +40,17 @@ public: /// - readers /// - writers /// - exclusive users -/// -/// For lock promotion, a read or a write lock is only allowed to be promoted -/// atomically upon the first locking. class tri_mutex : private read_lock, write_lock, - excl_lock, - excl_lock_from_read, - excl_lock_from_write + excl_lock { public: tri_mutex() = default; +#ifdef NDEBUG + tri_mutex(const hobject_t &obj_name) : name() {} +#else + tri_mutex(const hobject_t &obj_name) : name(obj_name) {} +#endif ~tri_mutex(); read_lock& for_read() { @@ -73,18 +62,11 @@ public: excl_lock& for_excl() { return *this; } - excl_lock_from_read& excl_from_read() { - return *this; - } - excl_lock_from_write& excl_from_write() { - return *this; - } // for shared readers seastar::future<> lock_for_read(); bool try_lock_for_read() noexcept; void unlock_for_read(); - void promote_from_read(); void demote_to_read(); unsigned get_readers() const { return readers; @@ -94,7 +76,6 @@ public: seastar::future<> lock_for_write(); bool try_lock_for_write() noexcept; void unlock_for_write(); - void promote_from_write(); void demote_to_write(); unsigned get_writers() const { return writers; @@ -120,6 +101,10 @@ public: } } + const hobject_t &get_name() const{ + return name; + } + private: void wake(); unsigned readers = 0; @@ -139,10 +124,22 @@ private: type_t type; }; seastar::circular_buffer<waiter_t> waiters; + const hobject_t name; friend class read_lock; friend class write_lock; friend class excl_lock; - friend class excl_lock_from_read; - friend class excl_lock_from_write; - friend class excl_lock_from_excl; + friend std::ostream& operator<<(std::ostream &lhs, const tri_mutex &rhs); }; + +inline std::ostream& operator<<(std::ostream& os, const tri_mutex& tm) +{ + os << fmt::format("tri_mutex {} writers {} readers {}" + " exclusively_used {} waiters: {}", + tm.get_name(), tm.get_writers(), tm.get_readers(), + tm.exclusively_used, tm.waiters.size()); + return os; +} + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter<tri_mutex> : fmt::ostream_formatter {}; +#endif diff --git a/src/crimson/os/alienstore/alien_store.cc b/src/crimson/os/alienstore/alien_store.cc index 2139a2dd258..2fac9a89ee4 100644 --- a/src/crimson/os/alienstore/alien_store.cc +++ b/src/crimson/os/alienstore/alien_store.cc @@ -101,19 +101,23 @@ seastar::future<> AlienStore::start() if (!store) { ceph_abort_msgf("unsupported objectstore type: %s", type.c_str()); } - auto cpu_cores = seastar::resource::parse_cpuset( - get_conf<std::string>("crimson_alien_thread_cpu_cores")); - // crimson_alien_thread_cpu_cores are assigned to alien threads. - if (!cpu_cores.has_value()) { - // no core isolation by default, seastar_cpu_cores will be - // shared between both alien and seastar reactor threads. - cpu_cores = seastar::resource::parse_cpuset( - get_conf<std::string>("crimson_seastar_cpu_cores")); - ceph_assert(cpu_cores.has_value()); + /* + * crimson_alien_thread_cpu_cores must be set for optimal performance. + * Otherwise, no CPU pinning will take place. + */ + std::optional<seastar::resource::cpuset> alien_thread_cpu_cores; + + if (std::string conf_cpu_cores = + get_conf<std::string>("crimson_alien_thread_cpu_cores"); + !conf_cpu_cores.empty()) { + logger().debug("{} using crimson_alien_thread_cpu_cores", __func__); + alien_thread_cpu_cores = + seastar::resource::parse_cpuset(conf_cpu_cores); } + const auto num_threads = get_conf<uint64_t>("crimson_alien_op_num_threads"); - tp = std::make_unique<crimson::os::ThreadPool>(num_threads, 128, cpu_cores); + tp = std::make_unique<crimson::os::ThreadPool>(num_threads, 128, alien_thread_cpu_cores); return tp->start(); } diff --git a/src/crimson/os/futurized_store.h b/src/crimson/os/futurized_store.h index 95d0d9b2d29..fe09cc54510 100644 --- a/src/crimson/os/futurized_store.h +++ b/src/crimson/os/futurized_store.h @@ -186,6 +186,8 @@ public: virtual seastar::future<store_statfs_t> pool_statfs(int64_t pool_id) const = 0; + virtual seastar::future<> report_stats() { return seastar::now(); } + virtual uuid_d get_fsid() const = 0; virtual seastar::future<> write_meta(const std::string& key, diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index a9d22b1269c..00e7fa38956 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -349,18 +349,18 @@ void JournalTrimmerImpl::config_t::validate() const JournalTrimmerImpl::config_t JournalTrimmerImpl::config_t::get_default( - std::size_t roll_size, journal_type_t type) + std::size_t roll_size, backend_type_t type) { assert(roll_size); std::size_t target_dirty_bytes = 0; std::size_t target_alloc_bytes = 0; std::size_t max_journal_bytes = 0; - if (type == journal_type_t::SEGMENTED) { + if (type == backend_type_t::SEGMENTED) { target_dirty_bytes = 12 * roll_size; target_alloc_bytes = 2 * roll_size; max_journal_bytes = 16 * roll_size; } else { - assert(type == journal_type_t::RANDOM_BLOCK); + assert(type == backend_type_t::RANDOM_BLOCK); target_dirty_bytes = roll_size / 4; target_alloc_bytes = roll_size / 4; max_journal_bytes = roll_size / 2; @@ -376,18 +376,18 @@ JournalTrimmerImpl::config_t::get_default( JournalTrimmerImpl::config_t JournalTrimmerImpl::config_t::get_test( - std::size_t roll_size, journal_type_t type) + std::size_t roll_size, backend_type_t type) { assert(roll_size); std::size_t target_dirty_bytes = 0; std::size_t target_alloc_bytes = 0; std::size_t max_journal_bytes = 0; - if (type == journal_type_t::SEGMENTED) { + if (type == backend_type_t::SEGMENTED) { target_dirty_bytes = 2 * roll_size; target_alloc_bytes = 2 * roll_size; max_journal_bytes = 4 * roll_size; } else { - assert(type == journal_type_t::RANDOM_BLOCK); + assert(type == backend_type_t::RANDOM_BLOCK); target_dirty_bytes = roll_size / 36; target_alloc_bytes = roll_size / 4; max_journal_bytes = roll_size / 2; @@ -404,12 +404,12 @@ JournalTrimmerImpl::config_t::get_test( JournalTrimmerImpl::JournalTrimmerImpl( BackrefManager &backref_manager, config_t config, - journal_type_t type, + backend_type_t type, device_off_t roll_start, device_off_t roll_size) : backref_manager(backref_manager), config(config), - journal_type(type), + backend_type(type), roll_start(roll_start), roll_size(roll_size), reserved_usage(0) @@ -507,7 +507,7 @@ journal_seq_t JournalTrimmerImpl::get_tail_limit() const { assert(background_callback->is_ready()); auto ret = journal_head.add_offset( - journal_type, + backend_type, -static_cast<device_off_t>(config.max_journal_bytes), roll_start, roll_size); @@ -518,7 +518,7 @@ journal_seq_t JournalTrimmerImpl::get_dirty_tail_target() const { assert(background_callback->is_ready()); auto ret = journal_head.add_offset( - journal_type, + backend_type, -static_cast<device_off_t>(config.target_journal_dirty_bytes), roll_start, roll_size); @@ -529,7 +529,7 @@ journal_seq_t JournalTrimmerImpl::get_alloc_tail_target() const { assert(background_callback->is_ready()); auto ret = journal_head.add_offset( - journal_type, + backend_type, -static_cast<device_off_t>(config.target_journal_alloc_bytes), roll_start, roll_size); @@ -542,7 +542,7 @@ std::size_t JournalTrimmerImpl::get_dirty_journal_size() const return 0; } auto ret = journal_head.relative_to( - journal_type, + backend_type, journal_dirty_tail, roll_start, roll_size); @@ -556,7 +556,7 @@ std::size_t JournalTrimmerImpl::get_alloc_journal_size() const return 0; } auto ret = journal_head.relative_to( - journal_type, + backend_type, journal_alloc_tail, roll_start, roll_size); @@ -598,7 +598,14 @@ JournalTrimmerImpl::trim_alloc() { LOG_PREFIX(JournalTrimmerImpl::trim_alloc); assert(background_callback->is_ready()); - return repeat_eagain([this, FNAME] { + + auto& shard_stats = extent_callback->get_shard_stats(); + ++(shard_stats.trim_alloc_num); + ++(shard_stats.pending_bg_num); + + return repeat_eagain([this, FNAME, &shard_stats] { + ++(shard_stats.repeat_trim_alloc_num); + return extent_callback->with_transaction_intr( Transaction::src_t::TRIM_ALLOC, "trim_alloc", @@ -622,8 +629,11 @@ JournalTrimmerImpl::trim_alloc() return seastar::now(); }); }); - }).safe_then([this, FNAME] { + }).finally([this, FNAME, &shard_stats] { DEBUG("finish, alloc_tail={}", journal_alloc_tail); + + assert(shard_stats.pending_bg_num); + --(shard_stats.pending_bg_num); }); } @@ -632,7 +642,14 @@ JournalTrimmerImpl::trim_dirty() { LOG_PREFIX(JournalTrimmerImpl::trim_dirty); assert(background_callback->is_ready()); - return repeat_eagain([this, FNAME] { + + auto& shard_stats = extent_callback->get_shard_stats(); + ++(shard_stats.trim_dirty_num); + ++(shard_stats.pending_bg_num); + + return repeat_eagain([this, FNAME, &shard_stats] { + ++(shard_stats.repeat_trim_dirty_num); + return extent_callback->with_transaction_intr( Transaction::src_t::TRIM_DIRTY, "trim_dirty", @@ -662,8 +679,11 @@ JournalTrimmerImpl::trim_dirty() return extent_callback->submit_transaction_direct(t); }); }); - }).safe_then([this, FNAME] { + }).finally([this, FNAME, &shard_stats] { DEBUG("finish, dirty_tail={}", journal_dirty_tail); + + assert(shard_stats.pending_bg_num); + --(shard_stats.pending_bg_num); }); } @@ -1073,6 +1093,14 @@ SegmentCleaner::do_reclaim_space( std::size_t &reclaimed, std::size_t &runs) { + auto& shard_stats = extent_callback->get_shard_stats(); + if (is_cold) { + ++(shard_stats.cleaner_cold_num); + } else { + ++(shard_stats.cleaner_main_num); + } + ++(shard_stats.pending_bg_num); + // Extents satisfying any of the following requirements // are considered DEAD: // 1. can't find the corresponding mapping in both the @@ -1082,13 +1110,17 @@ SegmentCleaner::do_reclaim_space( // tree doesn't match the extent's paddr // 3. the extent is physical and doesn't exist in the // lba tree, backref tree or backref cache; - return repeat_eagain([this, &backref_extents, + return repeat_eagain([this, &backref_extents, &shard_stats, &pin_list, &reclaimed, &runs] { reclaimed = 0; runs++; - auto src = Transaction::src_t::CLEANER_MAIN; + transaction_type_t src; if (is_cold) { src = Transaction::src_t::CLEANER_COLD; + ++(shard_stats.repeat_cleaner_cold_num); + } else { + src = Transaction::src_t::CLEANER_MAIN; + ++(shard_stats.repeat_cleaner_main_num); } return extent_callback->with_transaction_intr( src, @@ -1167,6 +1199,9 @@ SegmentCleaner::do_reclaim_space( return extent_callback->submit_transaction_direct(t); }); }); + }).finally([&shard_stats] { + assert(shard_stats.pending_bg_num); + --(shard_stats.pending_bg_num); }); } @@ -1202,6 +1237,7 @@ SegmentCleaner::clean_space_ret SegmentCleaner::clean_space() std::pair<std::vector<CachedExtentRef>, backref_pin_list_t>(), [this](auto &weak_read_ret) { return repeat_eagain([this, &weak_read_ret] { + // Note: not tracked by shard_stats_t intentionally. return extent_callback->with_transaction_intr( Transaction::src_t::READ, "retrieve_from_backref_tree", diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index 6ff1065c4e6..adf9fb177ad 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -277,6 +277,8 @@ public: virtual ~ExtentCallbackInterface() = default; + virtual shard_stats_t& get_shard_stats() = 0; + /// Creates empty transaction /// weak transaction should be type READ virtual TransactionRef create_transaction( @@ -491,16 +493,16 @@ public: void validate() const; static config_t get_default( - std::size_t roll_size, journal_type_t type); + std::size_t roll_size, backend_type_t type); static config_t get_test( - std::size_t roll_size, journal_type_t type); + std::size_t roll_size, backend_type_t type); }; JournalTrimmerImpl( BackrefManager &backref_manager, config_t config, - journal_type_t type, + backend_type_t type, device_off_t roll_start, device_off_t roll_size); @@ -538,8 +540,8 @@ public: config.rewrite_dirty_bytes_per_cycle; } - journal_type_t get_journal_type() const { - return journal_type; + backend_type_t get_backend_type() const { + return backend_type; } void set_extent_callback(ExtentCallbackInterface *cb) { @@ -564,7 +566,7 @@ public: bool should_block_io_on_trim() const { return get_tail_limit() > get_journal_tail().add_offset( - journal_type, reserved_usage, roll_start, roll_size); + backend_type, reserved_usage, roll_start, roll_size); } bool try_reserve_inline_usage(std::size_t usage) final { @@ -587,7 +589,7 @@ public: static JournalTrimmerImplRef create( BackrefManager &backref_manager, config_t config, - journal_type_t type, + backend_type_t type, device_off_t roll_start, device_off_t roll_size) { return std::make_unique<JournalTrimmerImpl>( @@ -627,7 +629,7 @@ private: BackrefManager &backref_manager; config_t config; - journal_type_t journal_type; + backend_type_t backend_type; device_off_t roll_start; device_off_t roll_size; diff --git a/src/crimson/os/seastore/btree/btree_range_pin.cc b/src/crimson/os/seastore/btree/btree_range_pin.cc index 1e72f3da75f..12e078814cc 100644 --- a/src/crimson/os/seastore/btree/btree_range_pin.cc +++ b/src/crimson/os/seastore/btree/btree_range_pin.cc @@ -11,8 +11,7 @@ get_child_ret_t<LogicalCachedExtent> BtreeNodeMapping<key_t, val_t>::get_logical_extent( Transaction &t) { - assert(parent); - assert(parent->is_valid()); + ceph_assert(is_parent_valid()); assert(pos != std::numeric_limits<uint16_t>::max()); ceph_assert(t.get_trans_id() == ctx.trans.get_trans_id()); auto &p = (FixedKVNode<key_t>&)*parent; @@ -29,21 +28,25 @@ BtreeNodeMapping<key_t, val_t>::get_logical_extent( template <typename key_t, typename val_t> bool BtreeNodeMapping<key_t, val_t>::is_stable() const { - assert(parent); - assert(parent->is_valid()); + assert(!this->parent_modified()); assert(pos != std::numeric_limits<uint16_t>::max()); auto &p = (FixedKVNode<key_t>&)*parent; - return p.is_child_stable(ctx, pos); + auto k = this->is_indirect() + ? this->get_intermediate_base() + : get_key(); + return p.is_child_stable(ctx, pos, k); } template <typename key_t, typename val_t> bool BtreeNodeMapping<key_t, val_t>::is_data_stable() const { - assert(parent); - assert(parent->is_valid()); + assert(!this->parent_modified()); assert(pos != std::numeric_limits<uint16_t>::max()); auto &p = (FixedKVNode<key_t>&)*parent; - return p.is_child_data_stable(ctx, pos); + auto k = this->is_indirect() + ? this->get_intermediate_base() + : get_key(); + return p.is_child_data_stable(ctx, pos, k); } template class BtreeNodeMapping<laddr_t, paddr_t>; diff --git a/src/crimson/os/seastore/btree/btree_range_pin.h b/src/crimson/os/seastore/btree/btree_range_pin.h index a2d74558733..7e450f90a84 100644 --- a/src/crimson/os/seastore/btree/btree_range_pin.h +++ b/src/crimson/os/seastore/btree/btree_range_pin.h @@ -194,9 +194,37 @@ public: return parent->has_been_invalidated(); } + bool is_unviewable_by_trans(CachedExtent& extent, Transaction &t) const { + if (!extent.is_valid()) { + return true; + } + if (extent.is_pending()) { + assert(extent.is_pending_in_trans(t.get_trans_id())); + return false; + } + auto &pendings = extent.mutation_pendings; + auto trans_id = t.get_trans_id(); + bool unviewable = (pendings.find(trans_id, trans_spec_view_t::cmp_t()) != + pendings.end()); + if (!unviewable) { + auto &trans = extent.retired_transactions; + unviewable = (trans.find(trans_id, trans_spec_view_t::cmp_t()) != + trans.end()); + assert(unviewable == t.is_retired(extent.get_paddr(), extent.get_length())); + } + return unviewable; + } + get_child_ret_t<LogicalCachedExtent> get_logical_extent(Transaction&) final; bool is_stable() const final; bool is_data_stable() const final; + bool is_parent_valid() const final { + ceph_assert(parent); + if (!parent->is_valid()) { + return false; + } + return !is_unviewable_by_trans(*parent, ctx.trans); + } }; } diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index 5000b2e94de..d5f8ecb8f42 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -511,6 +511,8 @@ public: &child_node); } else { if (i->get_val().pladdr.is_laddr()) { + assert(!node->children[i->get_offset()] || + is_reserved_ptr(node->children[i->get_offset()])); continue; } ret = c.trans.get_extent( @@ -586,7 +588,7 @@ public: : true); } } - if (child == get_reserved_ptr()) { + if (is_reserved_ptr(child)) { if constexpr( !std::is_base_of_v<typename internal_node_t::base_t, child_node_t>) { @@ -1491,9 +1493,9 @@ private: // checking the lba child must be atomic with creating // and linking the absent child if (v.has_child()) { - return v.get_child_fut().safe_then( - [on_found=std::move(on_found), node_iter, c, - parent_entry](auto child) mutable { + return trans_intr::make_interruptible(std::move(v.get_child_fut()) + ).si_then([on_found=std::move(on_found), node_iter, c, + parent_entry](auto child) { LOG_PREFIX(FixedKVBtree::lookup_internal_level); SUBTRACET(seastore_fixedkv_tree, "got child on {}, pos: {}, res: {}", @@ -1561,9 +1563,9 @@ private: // checking the lba child must be atomic with creating // and linking the absent child if (v.has_child()) { - return v.get_child_fut().safe_then( - [on_found=std::move(on_found), node_iter, c, - parent_entry](auto child) mutable { + return trans_intr::make_interruptible(std::move(v.get_child_fut()) + ).si_then([on_found=std::move(on_found), node_iter, c, + parent_entry](auto child) { LOG_PREFIX(FixedKVBtree::lookup_leaf); SUBTRACET(seastore_fixedkv_tree, "got child on {}, pos: {}, res: {}", @@ -2116,9 +2118,9 @@ private: // checking the lba child must be atomic with creating // and linking the absent child if (v.has_child()) { - return v.get_child_fut().safe_then( - [do_merge=std::move(do_merge), &pos, - donor_iter, donor_is_left, c, parent_pos](auto child) mutable { + return trans_intr::make_interruptible(std::move(v.get_child_fut()) + ).si_then([do_merge=std::move(do_merge), &pos, + donor_iter, donor_is_left, c, parent_pos](auto child) { LOG_PREFIX(FixedKVBtree::merge_level); SUBTRACET(seastore_fixedkv_tree, "got child on {}, pos: {}, res: {}", diff --git a/src/crimson/os/seastore/btree/fixed_kv_node.h b/src/crimson/os/seastore/btree/fixed_kv_node.h index 79495cb35d1..8b31d82aad5 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_node.h +++ b/src/crimson/os/seastore/btree/fixed_kv_node.h @@ -130,7 +130,7 @@ struct FixedKVNode : ChildableCachedExtent { children[offset] = child; set_child_ptracker(child); } else { - // this can only happen when reserving lba spaces + // this can happen when reserving lba spaces and cloning mappings ceph_assert(is_leaf_and_has_children()); // this is to avoid mistakenly copying pointers from // copy sources when committing this lba node, because @@ -265,8 +265,14 @@ struct FixedKVNode : ChildableCachedExtent { set_child_ptracker(child); } - virtual bool is_child_stable(op_context_t<node_key_t>, uint16_t pos) const = 0; - virtual bool is_child_data_stable(op_context_t<node_key_t>, uint16_t pos) const = 0; + virtual bool is_child_stable( + op_context_t<node_key_t>, + uint16_t pos, + node_key_t key) const = 0; + virtual bool is_child_data_stable( + op_context_t<node_key_t>, + uint16_t pos, + node_key_t key) const = 0; template <typename T> get_child_ret_t<T> get_child( @@ -275,6 +281,7 @@ struct FixedKVNode : ChildableCachedExtent { node_key_t key) { assert(children.capacity()); + assert(key == get_key_from_idx(pos)); auto child = children[pos]; ceph_assert(!is_reserved_ptr(child)); if (is_valid_child_ptr(child)) { @@ -632,11 +639,17 @@ struct FixedKVInternalNode } } - bool is_child_stable(op_context_t<NODE_KEY>, uint16_t pos) const final { + bool is_child_stable( + op_context_t<NODE_KEY>, + uint16_t pos, + NODE_KEY key) const final { ceph_abort("impossible"); return false; } - bool is_child_data_stable(op_context_t<NODE_KEY>, uint16_t pos) const final { + bool is_child_data_stable( + op_context_t<NODE_KEY>, + uint16_t pos, + NODE_KEY key) const final { ceph_abort("impossible"); return false; } @@ -1004,14 +1017,29 @@ struct FixedKVLeafNode node_layout_t(this->get_bptr().c_str()) {} FixedKVLeafNode(const FixedKVLeafNode &rhs) : FixedKVNode<NODE_KEY>(rhs), - node_layout_t(this->get_bptr().c_str()) {} + node_layout_t(this->get_bptr().c_str()), + modifications(rhs.modifications) {} static constexpr bool do_has_children = has_children; + // for the stable extent, modifications is always 0; + // it will increase for each transaction-local change, so that + // modifications can be detected (see BtreeLBAMapping.parent_modifications) + uint64_t modifications = 0; + bool have_children() const final { return do_has_children; } + void on_modify() { + modifications++; + } + + bool modified_since(uint64_t v) const { + ceph_assert(v <= modifications); + return v != modifications; + } + bool is_leaf_and_has_children() const final { return has_children; } @@ -1025,14 +1053,25 @@ struct FixedKVLeafNode // 2. The child extent is stable // // For reserved mappings, the return values are undefined. - bool is_child_stable(op_context_t<NODE_KEY> c, uint16_t pos) const final { - return _is_child_stable(c, pos); + bool is_child_stable( + op_context_t<NODE_KEY> c, + uint16_t pos, + NODE_KEY key) const final { + return _is_child_stable(c, pos, key); } - bool is_child_data_stable(op_context_t<NODE_KEY> c, uint16_t pos) const final { - return _is_child_stable(c, pos, true); + bool is_child_data_stable( + op_context_t<NODE_KEY> c, + uint16_t pos, + NODE_KEY key) const final { + return _is_child_stable(c, pos, key, true); } - bool _is_child_stable(op_context_t<NODE_KEY> c, uint16_t pos, bool data_only = false) const { + bool _is_child_stable( + op_context_t<NODE_KEY> c, + uint16_t pos, + NODE_KEY key, + bool data_only = false) const { + assert(key == get_key_from_idx(pos)); auto child = this->children[pos]; if (is_reserved_ptr(child)) { return true; @@ -1108,6 +1147,7 @@ struct FixedKVLeafNode this->copy_sources.clear(); } } + modifications = 0; assert(this->is_initial_pending() ? this->copy_sources.empty(): true); @@ -1129,6 +1169,7 @@ struct FixedKVLeafNode } else { this->set_parent_tracker_from_prior_instance(); } + modifications = 0; } uint16_t lower_bound_offset(NODE_KEY key) const final { diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index a737b2be29c..ef1fec8766c 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -884,7 +884,7 @@ void Cache::mark_transaction_conflicted( if (t.get_src() != Transaction::src_t::READ) { io_stat_t retire_stat; for (auto &i: t.retired_set) { - retire_stat.increment(i->get_length()); + retire_stat.increment(i.extent->get_length()); } efforts.retire.increment_stat(retire_stat); @@ -1136,7 +1136,7 @@ record_t Cache::prepare_record( t.read_set.clear(); t.write_set.clear(); - record_t record(trans_src); + record_t record(record_type_t::JOURNAL, trans_src); auto commit_time = seastar::lowres_system_clock::now(); // Add new copy of mutated blocks, set_io_wait to block until written @@ -1249,18 +1249,19 @@ record_t Cache::prepare_record( alloc_delta_t rel_delta; rel_delta.op = alloc_delta_t::op_types_t::CLEAR; for (auto &i: t.retired_set) { + auto &extent = i.extent; get_by_ext(efforts.retire_by_ext, - i->get_type()).increment(i->get_length()); - retire_stat.increment(i->get_length()); - DEBUGT("retired and remove extent -- {}", t, *i); - commit_retire_extent(t, i); - if (is_backref_mapped_extent_node(i) - || is_retired_placeholder(i->get_type())) { + extent->get_type()).increment(extent->get_length()); + retire_stat.increment(extent->get_length()); + DEBUGT("retired and remove extent -- {}", t, *extent); + commit_retire_extent(t, extent); + if (is_backref_mapped_extent_node(extent) + || is_retired_placeholder(extent->get_type())) { rel_delta.alloc_blk_ranges.emplace_back( - i->get_paddr(), + extent->get_paddr(), L_ADDR_NULL, - i->get_length(), - i->get_type()); + extent->get_length(), + extent->get_type()); } } alloc_deltas.emplace_back(std::move(rel_delta)); @@ -1621,7 +1622,8 @@ void Cache::complete_commit( } for (auto &i: t.retired_set) { - epm.mark_space_free(i->get_paddr(), i->get_length()); + auto &extent = i.extent; + epm.mark_space_free(extent->get_paddr(), extent->get_length()); } for (auto &i: t.existing_block_list) { if (i->is_valid()) { @@ -1638,24 +1640,25 @@ void Cache::complete_commit( last_commit = start_seq; for (auto &i: t.retired_set) { - i->dirty_from_or_retired_at = start_seq; - if (is_backref_mapped_extent_node(i) - || is_retired_placeholder(i->get_type())) { + auto &extent = i.extent; + extent->dirty_from_or_retired_at = start_seq; + if (is_backref_mapped_extent_node(extent) + || is_retired_placeholder(extent->get_type())) { DEBUGT("backref_list free {} len {}", t, - i->get_paddr(), - i->get_length()); + extent->get_paddr(), + extent->get_length()); backref_list.emplace_back( std::make_unique<backref_entry_t>( - i->get_paddr(), + extent->get_paddr(), L_ADDR_NULL, - i->get_length(), - i->get_type(), + extent->get_length(), + extent->get_type(), start_seq)); - } else if (is_backref_node(i->get_type())) { - remove_backref_extent(i->get_paddr()); + } else if (is_backref_node(extent->get_type())) { + remove_backref_extent(extent->get_paddr()); } else { - ERRORT("{}", t, *i); + ERRORT("{}", t, *extent); ceph_abort("not possible"); } } diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index b2bcbcae9ff..5af65f4b9e4 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -953,10 +953,12 @@ public: NULL_GENERATION, t.get_trans_id()); + auto extent = ext->template cast<T>(); + extent->set_laddr(remap_laddr); t.add_fresh_extent(ext); SUBTRACET(seastore_cache, "allocated {} {}B, hint={}, has ptr? {} -- {}", - t, T::TYPE, remap_length, remap_laddr, original_bptr.has_value(), *ext); - return ext; + t, T::TYPE, remap_length, remap_laddr, original_bptr.has_value(), *extent); + return extent; } /** @@ -1514,9 +1516,6 @@ private: } }; - template <typename CounterT> - using counter_by_src_t = std::array<CounterT, TRANSACTION_TYPE_MAX>; - static constexpr std::size_t NUM_SRC_COMB = TRANSACTION_TYPE_MAX * (TRANSACTION_TYPE_MAX + 1) / 2; @@ -1556,14 +1555,6 @@ private: } stats; template <typename CounterT> - CounterT& get_by_src( - counter_by_src_t<CounterT>& counters_by_src, - Transaction::src_t src) { - assert(static_cast<std::size_t>(src) < counters_by_src.size()); - return counters_by_src[static_cast<std::size_t>(src)]; - } - - template <typename CounterT> CounterT& get_by_ext( counter_by_extent_t<CounterT>& counters_by_ext, extent_types_t ext) { diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 4778117c8a6..e78a0d95028 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -651,6 +651,7 @@ private: friend struct paddr_cmp; friend struct ref_paddr_cmp; friend class ExtentIndex; + friend struct trans_retired_extent_link_t; /// Pointer to containing index (or null) ExtentIndex *parent_index = nullptr; @@ -735,6 +736,7 @@ private: protected: trans_view_set_t mutation_pendings; + trans_view_set_t retired_transactions; CachedExtent(CachedExtent &&other) = delete; CachedExtent(ceph::bufferptr &&_ptr) : ptr(std::move(_ptr)) { @@ -884,17 +886,54 @@ struct paddr_cmp { } }; +// trans_retired_extent_link_t is used to link stable extents with +// the transactions that retired them. With this link, we can find +// out whether an extent has been retired by a specific transaction +// in a way that's more efficient than searching through the transaction's +// retired_set (Transaction::is_retired()) +struct trans_retired_extent_link_t { + CachedExtentRef extent; + // We use trans_spec_view_t instead of transaction_id_t, so that, + // when a transaction is deleted or reset, we can efficiently remove + // that transaction from the extents' extent-transaction link set. + // Otherwise, we have to search through each extent's "retired_transactions" + // to remove the transaction + trans_spec_view_t trans_view; + trans_retired_extent_link_t(CachedExtentRef extent, transaction_id_t id) + : extent(extent), trans_view{id} + { + assert(extent->is_stable()); + extent->retired_transactions.insert(trans_view); + } +}; + /// Compare extent refs by paddr struct ref_paddr_cmp { using is_transparent = paddr_t; - bool operator()(const CachedExtentRef &lhs, const CachedExtentRef &rhs) const { - return lhs->poffset < rhs->poffset; - } - bool operator()(const paddr_t &lhs, const CachedExtentRef &rhs) const { - return lhs < rhs->poffset; - } - bool operator()(const CachedExtentRef &lhs, const paddr_t &rhs) const { - return lhs->poffset < rhs; + bool operator()( + const trans_retired_extent_link_t &lhs, + const trans_retired_extent_link_t &rhs) const { + return lhs.extent->poffset < rhs.extent->poffset; + } + bool operator()( + const paddr_t &lhs, + const trans_retired_extent_link_t &rhs) const { + return lhs < rhs.extent->poffset; + } + bool operator()( + const trans_retired_extent_link_t &lhs, + const paddr_t &rhs) const { + return lhs.extent->poffset < rhs; + } + bool operator()( + const CachedExtentRef &lhs, + const trans_retired_extent_link_t &rhs) const { + return lhs->poffset < rhs.extent->poffset; + } + bool operator()( + const trans_retired_extent_link_t &lhs, + const CachedExtentRef &rhs) const { + return lhs.extent->poffset < rhs->poffset; } }; @@ -910,7 +949,7 @@ class addr_extent_set_base_t using pextent_set_t = addr_extent_set_base_t< paddr_t, - CachedExtentRef, + trans_retired_extent_link_t, ref_paddr_cmp >; @@ -1112,6 +1151,15 @@ public: bool is_zero_reserved() const { return !get_val().is_real(); } + virtual bool is_parent_valid() const = 0; + virtual bool parent_modified() const { + ceph_abort("impossible"); + return false; + }; + + virtual void maybe_fix_pos() { + ceph_abort("impossible"); + } virtual ~PhysicalNodeMapping() {} protected: diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index c5b5751bdb3..fb8bafde960 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -84,7 +84,7 @@ SegmentedOolWriter::do_write( return do_write(t, extents); }); } - record_t record(TRANSACTION_TYPE_NULL); + record_t record(record_type_t::OOL, t.get_src()); std::list<LogicalCachedExtentRef> pending_extents; auto commit_time = seastar::lowres_system_clock::now(); @@ -191,12 +191,12 @@ void ExtentPlacementManager::init( dynamic_max_rewrite_generation = MAX_REWRITE_GENERATION; } - if (trimmer->get_journal_type() == journal_type_t::SEGMENTED) { + if (trimmer->get_backend_type() == backend_type_t::SEGMENTED) { auto segment_cleaner = dynamic_cast<SegmentCleaner*>(cleaner.get()); ceph_assert(segment_cleaner != nullptr); auto num_writers = generation_to_writer(dynamic_max_rewrite_generation + 1); - data_writers_by_gen.resize(num_writers, {}); + data_writers_by_gen.resize(num_writers, nullptr); for (rewrite_gen_t gen = OOL_GENERATION; gen < MIN_COLD_GENERATION; ++gen) { writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>( data_category_t::DATA, gen, *segment_cleaner, @@ -217,11 +217,11 @@ void ExtentPlacementManager::init( add_device(device); } } else { - assert(trimmer->get_journal_type() == journal_type_t::RANDOM_BLOCK); + assert(trimmer->get_backend_type() == backend_type_t::RANDOM_BLOCK); auto rb_cleaner = dynamic_cast<RBMCleaner*>(cleaner.get()); ceph_assert(rb_cleaner != nullptr); auto num_writers = generation_to_writer(dynamic_max_rewrite_generation + 1); - data_writers_by_gen.resize(num_writers, {}); + data_writers_by_gen.resize(num_writers, nullptr); md_writers_by_gen.resize(num_writers, {}); writer_refs.emplace_back(std::make_unique<RandomBlockOolWriter>( rb_cleaner)); @@ -270,6 +270,161 @@ void ExtentPlacementManager::set_primary_device(Device *device) ceph_assert(devices_by_id[device->get_device_id()] == device); } +device_stats_t +ExtentPlacementManager::get_device_stats( + const writer_stats_t &journal_stats, + bool report_detail) const +{ + LOG_PREFIX(ExtentPlacementManager::get_device_stats); + + /* + * RecordSubmitter::get_stats() isn't reentrant. + * And refer to EPM::init() for the writers. + */ + + writer_stats_t main_stats = journal_stats; + std::vector<writer_stats_t> main_writer_stats; + using enum data_category_t; + if (get_main_backend_type() == backend_type_t::SEGMENTED) { + // 0. oolmdat + main_writer_stats.emplace_back( + get_writer(METADATA, OOL_GENERATION)->get_stats()); + main_stats.add(main_writer_stats.back()); + // 1. ooldata + main_writer_stats.emplace_back( + get_writer(DATA, OOL_GENERATION)->get_stats()); + main_stats.add(main_writer_stats.back()); + // 2. mainmdat + main_writer_stats.emplace_back(); + for (rewrite_gen_t gen = MIN_REWRITE_GENERATION; gen < MIN_COLD_GENERATION; ++gen) { + const auto &writer = get_writer(METADATA, gen); + ceph_assert(writer->get_type() == backend_type_t::SEGMENTED); + main_writer_stats.back().add(writer->get_stats()); + } + main_stats.add(main_writer_stats.back()); + // 3. maindata + main_writer_stats.emplace_back(); + for (rewrite_gen_t gen = MIN_REWRITE_GENERATION; gen < MIN_COLD_GENERATION; ++gen) { + const auto &writer = get_writer(DATA, gen); + ceph_assert(writer->get_type() == backend_type_t::SEGMENTED); + main_writer_stats.back().add(writer->get_stats()); + } + main_stats.add(main_writer_stats.back()); + } else { // RBM + ceph_assert(get_main_backend_type() == backend_type_t::RANDOM_BLOCK); + // In RBM, md_writer and data_wrtier share a single writer, so we only register + // md_writer's writer here. + main_writer_stats.emplace_back( + get_writer(METADATA, OOL_GENERATION)->get_stats()); + main_stats.add(main_writer_stats.back()); + } + + writer_stats_t cold_stats = {}; + std::vector<writer_stats_t> cold_writer_stats; + bool has_cold_tier = background_process.has_cold_tier(); + if (has_cold_tier) { + // 0. coldmdat + cold_writer_stats.emplace_back(); + for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) { + const auto &writer = get_writer(METADATA, gen); + ceph_assert(writer->get_type() == backend_type_t::SEGMENTED); + cold_writer_stats.back().add(writer->get_stats()); + } + cold_stats.add(cold_writer_stats.back()); + // 1. colddata + cold_writer_stats.emplace_back(); + for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) { + const auto &writer = get_writer(DATA, gen); + ceph_assert(writer->get_type() == backend_type_t::SEGMENTED); + cold_writer_stats.back().add(writer->get_stats()); + } + cold_stats.add(cold_writer_stats.back()); + } + + auto now = seastar::lowres_clock::now(); + if (last_tp == seastar::lowres_clock::time_point::min()) { + last_tp = now; + return {}; + } + std::chrono::duration<double> duration_d = now - last_tp; + double seconds = duration_d.count(); + last_tp = now; + + if (report_detail) { + std::ostringstream oss; + auto report_writer_stats = [seconds, &oss]( + const char* name, + const writer_stats_t& stats) { + oss << "\n" << name << ": " << writer_stats_printer_t{seconds, stats}; + }; + report_writer_stats("tier-main", main_stats); + report_writer_stats(" inline", journal_stats); + if (get_main_backend_type() == backend_type_t::SEGMENTED) { + report_writer_stats(" oolmdat", main_writer_stats[0]); + report_writer_stats(" ooldata", main_writer_stats[1]); + report_writer_stats(" mainmdat", main_writer_stats[2]); + report_writer_stats(" maindata", main_writer_stats[3]); + } else { // RBM + report_writer_stats(" ool", main_writer_stats[0]); + } + if (has_cold_tier) { + report_writer_stats("tier-cold", cold_stats); + report_writer_stats(" coldmdat", cold_writer_stats[0]); + report_writer_stats(" colddata", cold_writer_stats[1]); + } + + auto report_by_src = [seconds, has_cold_tier, &oss, + &journal_stats, + &main_writer_stats, + &cold_writer_stats](transaction_type_t src) { + auto t_stats = get_by_src(journal_stats.stats_by_src, src); + for (const auto &writer_stats : main_writer_stats) { + t_stats += get_by_src(writer_stats.stats_by_src, src); + } + for (const auto &writer_stats : cold_writer_stats) { + t_stats += get_by_src(writer_stats.stats_by_src, src); + } + if (src == transaction_type_t::READ) { + ceph_assert(t_stats.is_empty()); + return; + } + oss << "\n" << src << ": " + << tw_stats_printer_t{seconds, t_stats}; + + auto report_tw_stats = [seconds, src, &oss]( + const char* name, + const writer_stats_t& stats) { + const auto& tw_stats = get_by_src(stats.stats_by_src, src); + if (tw_stats.is_empty()) { + return; + } + oss << "\n " << name << ": " + << tw_stats_printer_t{seconds, tw_stats}; + }; + report_tw_stats("inline", journal_stats); + report_tw_stats("oolmdat", main_writer_stats[0]); + report_tw_stats("ooldata", main_writer_stats[1]); + report_tw_stats("mainmdat", main_writer_stats[2]); + report_tw_stats("maindata", main_writer_stats[3]); + if (has_cold_tier) { + report_tw_stats("coldmdat", cold_writer_stats[0]); + report_tw_stats("colddata", cold_writer_stats[1]); + } + }; + for (uint8_t _src=0; _src<TRANSACTION_TYPE_MAX; ++_src) { + auto src = static_cast<transaction_type_t>(_src); + report_by_src(src); + } + + INFO("{}", oss.str()); + } + + main_stats.add(cold_stats); + return {main_stats.io_depth_stats.num_io, + main_stats.io_depth_stats.num_io_grouped, + main_stats.get_total_bytes()}; +} + ExtentPlacementManager::open_ertr::future<> ExtentPlacementManager::open_for_write() { @@ -869,6 +1024,10 @@ RandomBlockOolWriter::do_write( bp = ceph::bufferptr(ex->get_bptr(), offset, len); } else { bp = ex->get_bptr(); + auto& trans_stats = get_by_src(w_stats.stats_by_src, t.get_src()); + ++(trans_stats.num_records); + trans_stats.data_bytes += ex->get_length(); + w_stats.data_bytes += ex->get_length(); } return trans_intr::make_interruptible( rbm->write(paddr + offset, diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index 458336c02e9..2985308e13b 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -3,7 +3,8 @@ #pragma once -#include "seastar/core/gate.hh" +#include <seastar/core/gate.hh> +#include <seastar/core/lowres_clock.hh> #include "crimson/os/seastore/async_cleaner.h" #include "crimson/os/seastore/cached_extent.h" @@ -30,6 +31,10 @@ class ExtentOolWriter { public: virtual ~ExtentOolWriter() {} + virtual backend_type_t get_type() const = 0; + + virtual writer_stats_t get_stats() const = 0; + using open_ertr = base_ertr; virtual open_ertr::future<> open() = 0; @@ -68,6 +73,14 @@ public: SegmentProvider &sp, SegmentSeqAllocator &ssa); + backend_type_t get_type() const final { + return backend_type_t::SEGMENTED; + } + + writer_stats_t get_stats() const final { + return record_submitter.get_stats(); + } + open_ertr::future<> open() final { return record_submitter.open(false).discard_result(); } @@ -119,8 +132,21 @@ public: RandomBlockOolWriter(RBMCleaner* rb_cleaner) : rb_cleaner(rb_cleaner) {} + backend_type_t get_type() const final { + return backend_type_t::RANDOM_BLOCK; + } + + writer_stats_t get_stats() const final { + writer_stats_t ret = w_stats; + ret.minus(last_w_stats); + last_w_stats = w_stats; + return ret; + } + using open_ertr = ExtentOolWriter::open_ertr; open_ertr::future<> open() final { + w_stats = {}; + last_w_stats = {}; return open_ertr::now(); } @@ -170,6 +196,8 @@ private: RBMCleaner* rb_cleaner; seastar::gate write_guard; + writer_stats_t w_stats; + mutable writer_stats_t last_w_stats; }; struct cleaner_usage_t { @@ -246,12 +274,11 @@ public: auto writer = get_writer(placement_hint_t::REWRITE, get_extent_category(extent->get_type()), OOL_GENERATION); - ceph_assert(writer); return writer->can_inplace_rewrite(t, extent); } - journal_type_t get_journal_type() const { - return background_process.get_journal_type(); + backend_type_t get_backend_type() const { + return background_process.get_backend_type(); } extent_len_t get_block_size() const { @@ -269,6 +296,10 @@ public: return background_process.get_stat(); } + device_stats_t get_device_stats( + const writer_stats_t &journal_stats, + bool report_detail) const; + using mount_ertr = crimson::errorator< crimson::ct_error::input_output_error>; using mount_ret = mount_ertr::future<>; @@ -323,9 +354,7 @@ public: addr = make_record_relative_paddr(0); } else { assert(category == data_category_t::METADATA); - assert(md_writers_by_gen[generation_to_writer(gen)]); - addr = md_writers_by_gen[ - generation_to_writer(gen)]->alloc_paddr(length); + addr = get_writer(hint, category, gen)->alloc_paddr(length); } assert(!(category == data_category_t::DATA)); @@ -378,9 +407,7 @@ public: { #endif assert(category == data_category_t::DATA); - assert(data_writers_by_gen[generation_to_writer(gen)]); - auto addrs = data_writers_by_gen[ - generation_to_writer(gen)]->alloc_paddrs(length); + auto addrs = get_writer(hint, category, gen)->alloc_paddrs(length); for (auto &ext : addrs) { auto left = ext.len; while (left > 0) { @@ -593,15 +620,40 @@ private: data_category_t category, rewrite_gen_t gen) { assert(hint < placement_hint_t::NUM_HINTS); + // TODO: might worth considering the hint + return get_writer(category, gen); + } + + ExtentOolWriter* get_writer(data_category_t category, + rewrite_gen_t gen) { + assert(is_rewrite_generation(gen)); + assert(gen != INLINE_GENERATION); + assert(gen <= dynamic_max_rewrite_generation); + ExtentOolWriter* ret = nullptr; + if (category == data_category_t::DATA) { + ret = data_writers_by_gen[generation_to_writer(gen)]; + } else { + assert(category == data_category_t::METADATA); + ret = md_writers_by_gen[generation_to_writer(gen)]; + } + assert(ret != nullptr); + return ret; + } + + const ExtentOolWriter* get_writer(data_category_t category, + rewrite_gen_t gen) const { assert(is_rewrite_generation(gen)); assert(gen != INLINE_GENERATION); assert(gen <= dynamic_max_rewrite_generation); + ExtentOolWriter* ret = nullptr; if (category == data_category_t::DATA) { - return data_writers_by_gen[generation_to_writer(gen)]; + ret = data_writers_by_gen[generation_to_writer(gen)]; } else { assert(category == data_category_t::METADATA); - return md_writers_by_gen[generation_to_writer(gen)]; + ret = md_writers_by_gen[generation_to_writer(gen)]; } + assert(ret != nullptr); + return ret; } /** @@ -644,8 +696,8 @@ private: } } - journal_type_t get_journal_type() const { - return trimmer->get_journal_type(); + backend_type_t get_backend_type() const { + return trimmer->get_backend_type(); } bool has_cold_tier() const { @@ -764,7 +816,7 @@ private: seastar::future<> stop_background(); backend_type_t get_main_backend_type() const { - return get_journal_type(); + return get_backend_type(); } // Testing interfaces @@ -1031,6 +1083,9 @@ private: SegmentSeqAllocatorRef ool_segment_seq_allocator; extent_len_t max_data_allocation_size = 0; + mutable seastar::lowres_clock::time_point last_tp = + seastar::lowres_clock::time_point::min(); + friend class ::transaction_manager_test_t; }; diff --git a/src/crimson/os/seastore/journal.h b/src/crimson/os/seastore/journal.h index 633aa84d7db..724b50041fd 100644 --- a/src/crimson/os/seastore/journal.h +++ b/src/crimson/os/seastore/journal.h @@ -23,6 +23,9 @@ class JournalTrimmer; class Journal { public: virtual JournalTrimmer &get_trimmer() = 0; + + virtual writer_stats_t get_writer_stats() const = 0; + /** * initializes journal for mkfs writes -- must run prior to calls * to submit_record. @@ -103,7 +106,7 @@ public: virtual ~Journal() {} - virtual journal_type_t get_type() = 0; + virtual backend_type_t get_type() = 0; }; using JournalRef = std::unique_ptr<Journal>; diff --git a/src/crimson/os/seastore/journal/circular_bounded_journal.h b/src/crimson/os/seastore/journal/circular_bounded_journal.h index debe535aef3..077da32c9a2 100644 --- a/src/crimson/os/seastore/journal/circular_bounded_journal.h +++ b/src/crimson/os/seastore/journal/circular_bounded_journal.h @@ -66,14 +66,18 @@ public: return trimmer; } + writer_stats_t get_writer_stats() const final { + return record_submitter.get_stats(); + } + open_for_mkfs_ret open_for_mkfs() final; open_for_mount_ret open_for_mount() final; close_ertr::future<> close() final; - journal_type_t get_type() final { - return journal_type_t::RANDOM_BLOCK; + backend_type_t get_type() final { + return backend_type_t::RANDOM_BLOCK; } submit_record_ret submit_record( diff --git a/src/crimson/os/seastore/journal/record_submitter.cc b/src/crimson/os/seastore/journal/record_submitter.cc index 5c892dcee22..d784c33cfc3 100644 --- a/src/crimson/os/seastore/journal/record_submitter.cc +++ b/src/crimson/os/seastore/journal/record_submitter.cc @@ -61,8 +61,7 @@ RecordBatch::add_pending( }); } -std::pair<ceph::bufferlist, record_group_size_t> -RecordBatch::encode_batch( +ceph::bufferlist RecordBatch::encode_batch( const journal_seq_t& committed_to, segment_nonce_t segment_nonce) { @@ -72,13 +71,12 @@ RecordBatch::encode_batch( state = state_t::SUBMITTING; submitting_size = pending.get_size(); - auto gsize = pending.size; - submitting_length = gsize.get_encoded_length(); - submitting_mdlength = gsize.get_mdlength(); + submitting_length = pending.size.get_encoded_length(); + submitting_mdlength = pending.size.get_mdlength(); auto bl = encode_records(pending, committed_to, segment_nonce); // Note: pending is cleared here assert(bl.length() == submitting_length); - return std::make_pair(bl, gsize); + return bl; } void RecordBatch::set_result( @@ -103,24 +101,24 @@ void RecordBatch::set_result( io_promise.reset(); } -std::pair<ceph::bufferlist, record_group_size_t> +ceph::bufferlist RecordBatch::submit_pending_fast( - record_t&& record, + record_group_t&& group, extent_len_t block_size, const journal_seq_t& committed_to, segment_nonce_t segment_nonce) { + assert(group.get_size() == 1); + auto& record = group.records[0]; auto new_size = get_encoded_length_after(record, block_size); std::ignore = new_size; assert(state == state_t::EMPTY); assert(evaluate_submit(record.size, block_size).submit_size == new_size); - - auto group = record_group_t(std::move(record), block_size); - auto size = group.size; - assert(size == new_size); + assert(group.size == new_size); auto bl = encode_records(group, committed_to, segment_nonce); - assert(bl.length() == size.get_encoded_length()); - return std::make_pair(std::move(bl), size); + // Note: group is cleared here + assert(bl.length() == new_size.get_encoded_length()); + return bl; } RecordSubmitter::RecordSubmitter( @@ -174,6 +172,14 @@ bool RecordSubmitter::is_available() const return ret; } +writer_stats_t RecordSubmitter::get_stats() const +{ + writer_stats_t ret = stats; + ret.minus(last_stats); + last_stats = stats; + return ret; +} + RecordSubmitter::wa_ertr::future<> RecordSubmitter::wait_available() { @@ -292,14 +298,19 @@ RecordSubmitter::submit( state != state_t::FULL) { // fast path with direct write increment_io(); - auto [to_write, sizes] = p_current_batch->submit_pending_fast( - std::move(record), - journal_allocator.get_block_size(), + auto block_size = journal_allocator.get_block_size(); + auto rg = record_group_t(std::move(record), block_size); + account_submission(rg); + assert(stats.record_batch_stats.num_io == + stats.io_depth_stats.num_io); + record_group_size_t sizes = rg.size; + auto to_write = p_current_batch->submit_pending_fast( + std::move(rg), + block_size, get_committed_to(), journal_allocator.get_nonce()); DEBUG("{} fast submit {}, committed_to={}, outstanding_io={} ...", get_name(), sizes, get_committed_to(), num_outstanding_io); - account_submission(1, sizes); return journal_allocator.write(std::move(to_write) ).safe_then([mdlength = sizes.get_mdlength()](auto write_result) { return record_locator_t{ @@ -361,6 +372,7 @@ RecordSubmitter::open(bool is_mkfs) LOG_PREFIX(RecordSubmitter::open); DEBUG("{} register metrics", get_name()); stats = {}; + last_stats = {}; namespace sm = seastar::metrics; std::vector<sm::label_instance> label_instances; label_instances.push_back(sm::label_instance("submitter", get_name())); @@ -369,14 +381,8 @@ RecordSubmitter::open(bool is_mkfs) { sm::make_counter( "record_num", - stats.record_batch_stats.num_io, - sm::description("total number of records submitted"), - label_instances - ), - sm::make_counter( - "record_batch_num", stats.record_batch_stats.num_io_grouped, - sm::description("total number of records batched"), + sm::description("total number of records submitted"), label_instances ), sm::make_counter( @@ -405,7 +411,7 @@ RecordSubmitter::open(bool is_mkfs) ), sm::make_counter( "record_group_data_bytes", - stats.record_group_data_bytes, + stats.data_bytes, sm::description("bytes of data when write record groups"), label_instances ), @@ -478,14 +484,22 @@ void RecordSubmitter::decrement_io_with_flush() } void RecordSubmitter::account_submission( - std::size_t num, - const record_group_size_t& size) + const record_group_t& rg) { stats.record_group_padding_bytes += - (size.get_mdlength() - size.get_raw_mdlength()); - stats.record_group_metadata_bytes += size.get_raw_mdlength(); - stats.record_group_data_bytes += size.dlength; - stats.record_batch_stats.increment(num); + (rg.size.get_mdlength() - rg.size.get_raw_mdlength()); + stats.record_group_metadata_bytes += rg.size.get_raw_mdlength(); + stats.data_bytes += rg.size.dlength; + stats.record_batch_stats.increment(rg.get_size()); + + for (const record_t& r : rg.records) { + auto src = r.trans_type; + assert(is_modify_transaction(src)); + auto& trans_stats = get_by_src(stats.stats_by_src, src); + ++(trans_stats.num_records); + trans_stats.metadata_bytes += r.size.get_raw_mdlength(); + trans_stats.data_bytes += r.size.dlength; + } } void RecordSubmitter::finish_submit_batch( @@ -508,24 +522,30 @@ void RecordSubmitter::flush_current_batch() increment_io(); auto num = p_batch->get_num_records(); - auto [to_write, sizes] = p_batch->encode_batch( + const auto& rg = p_batch->get_record_group(); + assert(rg.get_size() == num); + record_group_size_t sizes = rg.size; + account_submission(rg); + assert(stats.record_batch_stats.num_io == + stats.io_depth_stats.num_io); + auto to_write = p_batch->encode_batch( get_committed_to(), journal_allocator.get_nonce()); + // Note: rg is cleared DEBUG("{} {} records, {}, committed_to={}, outstanding_io={} ...", get_name(), num, sizes, get_committed_to(), num_outstanding_io); - account_submission(num, sizes); std::ignore = journal_allocator.write(std::move(to_write) - ).safe_then([this, p_batch, FNAME, num, sizes=sizes](auto write_result) { + ).safe_then([this, p_batch, FNAME, num, sizes](auto write_result) { TRACE("{} {} records, {}, write done with {}", get_name(), num, sizes, write_result); finish_submit_batch(p_batch, write_result); }).handle_error( - crimson::ct_error::all_same_way([this, p_batch, FNAME, num, sizes=sizes](auto e) { + crimson::ct_error::all_same_way([this, p_batch, FNAME, num, sizes](auto e) { ERROR("{} {} records, {}, got error {}", get_name(), num, sizes, e); finish_submit_batch(p_batch, std::nullopt); return seastar::now(); }) - ).handle_exception([this, p_batch, FNAME, num, sizes=sizes](auto e) { + ).handle_exception([this, p_batch, FNAME, num, sizes](auto e) { ERROR("{} {} records, {}, got exception {}", get_name(), num, sizes, e); finish_submit_batch(p_batch, std::nullopt); diff --git a/src/crimson/os/seastore/journal/record_submitter.h b/src/crimson/os/seastore/journal/record_submitter.h index eedd2dd8cfd..d69a5ac96f0 100644 --- a/src/crimson/os/seastore/journal/record_submitter.h +++ b/src/crimson/os/seastore/journal/record_submitter.h @@ -113,6 +113,10 @@ public: } } + const record_group_t& get_record_group() const { + return pending; + } + struct evaluation_t { record_group_size_t submit_size; bool is_full; @@ -150,7 +154,7 @@ public: extent_len_t block_size); // Encode the batched records for write. - std::pair<ceph::bufferlist, record_group_size_t> encode_batch( + ceph::bufferlist encode_batch( const journal_seq_t& committed_to, segment_nonce_t segment_nonce); @@ -165,8 +169,8 @@ public: // the intervention of the shared io_promise. // // Note the current RecordBatch can be reused afterwards. - std::pair<ceph::bufferlist, record_group_size_t> submit_pending_fast( - record_t&&, + ceph::bufferlist submit_pending_fast( + record_group_t&&, extent_len_t block_size, const journal_seq_t& committed_to, segment_nonce_t segment_nonce); @@ -217,16 +221,6 @@ class RecordSubmitter { // OVERFLOW: outstanding_io > io_depth_limit is impossible }; - struct grouped_io_stats { - uint64_t num_io = 0; - uint64_t num_io_grouped = 0; - - void increment(uint64_t num_grouped_io) { - ++num_io; - num_io_grouped += num_grouped_io; - } - }; - using base_ertr = crimson::errorator< crimson::ct_error::input_output_error>; @@ -248,6 +242,9 @@ public: // whether is available to submit a record bool is_available() const; + // get the stats since last_stats + writer_stats_t get_stats() const; + // wait for available if cannot submit, should check is_available() again // when the future is resolved. using wa_ertr = base_ertr; @@ -306,7 +303,7 @@ private: free_batch_ptrs.pop_front(); } - void account_submission(std::size_t, const record_group_size_t&); + void account_submission(const record_group_t&); using maybe_result_t = RecordBatch::maybe_result_t; void finish_submit_batch(RecordBatch*, maybe_result_t); @@ -334,13 +331,9 @@ private: // wait for decrement_io_with_flush() std::optional<seastar::promise<> > wait_unfull_flush_promise; - struct { - grouped_io_stats record_batch_stats; - grouped_io_stats io_depth_stats; - uint64_t record_group_padding_bytes = 0; - uint64_t record_group_metadata_bytes = 0; - uint64_t record_group_data_bytes = 0; - } stats; + writer_stats_t stats; + mutable writer_stats_t last_stats; + seastar::metrics::metric_group metrics; }; diff --git a/src/crimson/os/seastore/journal/segmented_journal.h b/src/crimson/os/seastore/journal/segmented_journal.h index 3d580817c0f..736b8c01293 100644 --- a/src/crimson/os/seastore/journal/segmented_journal.h +++ b/src/crimson/os/seastore/journal/segmented_journal.h @@ -34,6 +34,10 @@ public: return trimmer; } + writer_stats_t get_writer_stats() const final { + return record_submitter.get_stats(); + } + open_for_mkfs_ret open_for_mkfs() final; open_for_mount_ret open_for_mount() final; @@ -52,8 +56,8 @@ public: write_pipeline = _write_pipeline; } - journal_type_t get_type() final { - return journal_type_t::SEGMENTED; + backend_type_t get_type() final { + return backend_type_t::SEGMENTED; } seastar::future<> finish_commit(transaction_type_t type) { return seastar::now(); diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc index 74364d6245c..bf0a8e3ec79 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc @@ -307,10 +307,27 @@ BtreeLBAManager::_alloc_extents( std::vector<alloc_mapping_info_t> &alloc_infos, extent_ref_count_t refcount) { + ceph_assert(hint != L_ADDR_NULL); extent_len_t total_len = 0; +#ifndef NDEBUG + bool laddr_null = (alloc_infos.front().key == L_ADDR_NULL); + laddr_t last_end = hint; for (auto &info : alloc_infos) { - total_len += info.len; + assert((info.key == L_ADDR_NULL) == (laddr_null)); + if (!laddr_null) { + assert(info.key >= last_end); + last_end = info.key + info.len; + } + } +#endif + if (alloc_infos.front().key == L_ADDR_NULL) { + for (auto &info : alloc_infos) { + total_len += info.len; + } + } else { + total_len = alloc_infos.back().key + alloc_infos.back().len - hint; } + struct state_t { laddr_t last_end; @@ -379,6 +396,9 @@ BtreeLBAManager::_alloc_extents( alloc_infos, [c, addr, hint, &btree, &state, FNAME, total_len, &rets, refcount](auto &alloc_info) { + if (alloc_info.key != L_ADDR_NULL) { + state.last_end = alloc_info.key; + } return btree.insert( c, *state.insert_iter, @@ -396,13 +416,23 @@ BtreeLBAManager::_alloc_extents( c.trans, addr, total_len, hint, state.last_end); if (alloc_info.extent) { ceph_assert(alloc_info.val.is_paddr()); + assert(alloc_info.val == iter.get_val().pladdr); + assert(alloc_info.len == iter.get_val().len); + if (alloc_info.extent->has_laddr()) { + assert(alloc_info.key == alloc_info.extent->get_laddr()); + assert(alloc_info.key == iter.get_key()); + } else { + alloc_info.extent->set_laddr(iter.get_key()); + } alloc_info.extent->set_laddr(iter.get_key()); } ceph_assert(inserted); rets.emplace_back(iter.get_pin(c)); return iter.next(c).si_then([&state, &alloc_info](auto it) { state.insert_iter = it; - state.last_end += alloc_info.len; + if (alloc_info.key == L_ADDR_NULL) { + state.last_end += alloc_info.len; + } }); }); }); diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h index 43807efb5fc..ca25dc6a2a0 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h @@ -25,6 +25,8 @@ namespace crimson::os::seastore::lba_manager::btree { +struct LBALeafNode; + class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> { // To support cloning, there are two kinds of lba mappings: // 1. physical lba mapping: the pladdr in the value of which is the paddr of @@ -62,7 +64,7 @@ public: : BtreeNodeMapping(ctx) {} BtreeLBAMapping( op_context_t<laddr_t> c, - CachedExtentRef parent, + LBALeafNodeRef parent, uint16_t pos, lba_map_val_t &val, lba_node_meta_t meta) @@ -78,7 +80,8 @@ public: intermediate_key(indirect ? val.pladdr.get_laddr() : L_ADDR_NULL), intermediate_length(indirect ? val.len : 0), raw_val(val.pladdr), - map_val(val) + map_val(val), + parent_modifications(parent->modifications) {} lba_map_val_t get_map_val() const { @@ -154,6 +157,25 @@ public: len = length; } + uint64_t get_parent_modifications() const { + return parent_modifications; + } + + bool parent_modified() const final { + ceph_assert(parent); + ceph_assert(is_parent_valid()); + auto &p = static_cast<LBALeafNode&>(*parent); + return p.modified_since(parent_modifications); + } + + void maybe_fix_pos() final { + assert(is_parent_valid()); + if (!parent_modified()) { + return; + } + auto &p = static_cast<LBALeafNode&>(*parent); + p.maybe_fix_mapping_pos(*this); + } protected: std::unique_ptr<BtreeNodeMapping<laddr_t, paddr_t>> _duplicate( op_context_t<laddr_t> ctx) const final { @@ -165,9 +187,14 @@ protected: pin->indirect = indirect; pin->raw_val = raw_val; pin->map_val = map_val; + pin->parent_modifications = parent_modifications; return pin; } private: + void _new_pos(uint16_t pos) { + this->pos = pos; + } + laddr_t key = L_ADDR_NULL; bool indirect = false; laddr_t intermediate_key = L_ADDR_NULL; @@ -175,6 +202,8 @@ private: extent_len_t intermediate_length = 0; pladdr_t raw_val; lba_map_val_t map_val; + uint64_t parent_modifications = 0; + friend struct LBALeafNode; }; using BtreeLBAMappingRef = std::unique_ptr<BtreeLBAMapping>; @@ -225,10 +254,36 @@ public: struct alloc_mapping_info_t { + laddr_t key = L_ADDR_NULL; // once assigned, the allocation to + // key must be exact and successful extent_len_t len = 0; pladdr_t val; uint32_t checksum = 0; LogicalCachedExtent* extent = nullptr; + + static alloc_mapping_info_t create_zero(extent_len_t len) { + return {L_ADDR_NULL, len, P_ADDR_ZERO, 0, nullptr}; + } + static alloc_mapping_info_t create_indirect( + laddr_t laddr, + extent_len_t len, + laddr_t intermediate_key) { + return { + laddr, + len, + intermediate_key, + 0, // crc will only be used and checked with LBA direct mappings + // also see pin_to_extent(_by_type) + nullptr}; + } + static alloc_mapping_info_t create_direct( + laddr_t laddr, + extent_len_t len, + paddr_t paddr, + uint32_t checksum, + LogicalCachedExtent *extent) { + return {laddr, len, paddr, checksum, extent}; + } }; alloc_extent_ret reserve_region( @@ -237,7 +292,7 @@ public: extent_len_t len) final { std::vector<alloc_mapping_info_t> alloc_infos = { - alloc_mapping_info_t{len, P_ADDR_ZERO, 0, nullptr}}; + alloc_mapping_info_t::create_zero(len)}; return seastar::do_with( std::move(alloc_infos), [&t, hint, this](auto &alloc_infos) { @@ -261,12 +316,16 @@ public: laddr_t intermediate_key, laddr_t intermediate_base) final { - return alloc_cloned_mapping( + std::vector<alloc_mapping_info_t> alloc_infos = { + alloc_mapping_info_t::create_indirect( + laddr, len, intermediate_key)}; + return alloc_cloned_mappings( t, laddr, - len, - intermediate_key - ).si_then([&t, this, intermediate_base](auto imapping) { + std::move(alloc_infos) + ).si_then([&t, this, intermediate_base](auto imappings) { + assert(imappings.size() == 1); + auto &imapping = imappings.front(); return update_refcount(t, intermediate_base, 1, false ).si_then([imapping=std::move(imapping)](auto p) mutable { auto mapping = std::move(p.mapping); @@ -293,8 +352,14 @@ public: { // The real checksum will be updated upon transaction commit assert(ext.get_last_committed_crc() == 0); - std::vector<alloc_mapping_info_t> alloc_infos = {{ - ext.get_length(), ext.get_paddr(), ext.get_last_committed_crc(), &ext}}; + assert(!ext.has_laddr()); + std::vector<alloc_mapping_info_t> alloc_infos = { + alloc_mapping_info_t::create_direct( + L_ADDR_NULL, + ext.get_length(), + ext.get_paddr(), + ext.get_last_committed_crc(), + &ext)}; return seastar::do_with( std::move(alloc_infos), [this, &t, hint, refcount](auto &alloc_infos) { @@ -319,11 +384,13 @@ public: { std::vector<alloc_mapping_info_t> alloc_infos; for (auto &extent : extents) { - alloc_infos.emplace_back(alloc_mapping_info_t{ - extent->get_length(), - pladdr_t(extent->get_paddr()), - extent->get_last_committed_crc(), - extent.get()}); + alloc_infos.emplace_back( + alloc_mapping_info_t::create_direct( + extent->has_laddr() ? extent->get_laddr() : L_ADDR_NULL, + extent->get_length(), + extent->get_paddr(), + extent->get_last_committed_crc(), + extent.get())); } return seastar::do_with( std::move(alloc_infos), @@ -363,7 +430,7 @@ public: std::move(remaps), std::move(extents), std::move(orig_mapping), - [&t, FNAME, this](auto &ret, auto &remaps, + [&t, FNAME, this](auto &ret, const auto &remaps, auto &extents, auto &orig_mapping) { return update_refcount(t, orig_mapping->get_key(), -1, false ).si_then([&ret, this, &extents, &remaps, @@ -374,68 +441,80 @@ public: ret.ruret.addr.is_paddr() && !ret.ruret.addr.get_paddr().is_zero()); } - return trans_intr::do_for_each( - boost::make_counting_iterator(size_t(0)), - boost::make_counting_iterator(remaps.size()), - [&remaps, &t, this, &orig_mapping, &extents, FNAME, &ret](auto i) { - laddr_t orig_laddr = orig_mapping->get_key(); - extent_len_t orig_len = orig_mapping->get_length(); - paddr_t orig_paddr = orig_mapping->get_val(); - laddr_t intermediate_base = orig_mapping->is_indirect() - ? orig_mapping->get_intermediate_base() - : L_ADDR_NULL; - laddr_t intermediate_key = orig_mapping->is_indirect() - ? orig_mapping->get_intermediate_key() - : L_ADDR_NULL; - auto &remap = remaps[i]; - auto remap_offset = remap.offset; - auto remap_len = remap.len; - auto remap_laddr = orig_laddr + remap_offset; - auto remap_paddr = orig_paddr.add_offset(remap_offset); - if (orig_mapping->is_indirect()) { + auto fut = alloc_extent_iertr::make_ready_future< + std::vector<LBAMappingRef>>(); + laddr_t orig_laddr = orig_mapping->get_key(); + if (orig_mapping->is_indirect()) { + std::vector<alloc_mapping_info_t> alloc_infos; + for (auto &remap : remaps) { + extent_len_t orig_len = orig_mapping->get_length(); + paddr_t orig_paddr = orig_mapping->get_val(); + laddr_t intermediate_base = orig_mapping->is_indirect() + ? orig_mapping->get_intermediate_base() + : L_ADDR_NULL; + laddr_t intermediate_key = orig_mapping->is_indirect() + ? orig_mapping->get_intermediate_key() + : L_ADDR_NULL; + auto remap_offset = remap.offset; + auto remap_len = remap.len; + auto remap_laddr = orig_laddr + remap_offset; ceph_assert(intermediate_base != L_ADDR_NULL); ceph_assert(intermediate_key != L_ADDR_NULL); - remap_paddr = orig_paddr; - } - ceph_assert(remap_len < orig_len); - ceph_assert(remap_offset + remap_len <= orig_len); - ceph_assert(remap_len != 0); - SUBDEBUGT(seastore_lba, - "remap laddr: {}, remap paddr: {}, remap length: {}," - " intermediate_base: {}, intermediate_key: {}", t, - remap_laddr, remap_paddr, remap_len, - intermediate_base, intermediate_key); - auto fut = alloc_extent_iertr::make_ready_future<LBAMappingRef>(); - if (orig_mapping->is_indirect()) { - assert(intermediate_base != L_ADDR_NULL - && intermediate_key != L_ADDR_NULL); + ceph_assert(remap_len < orig_len); + ceph_assert(remap_offset + remap_len <= orig_len); + ceph_assert(remap_len != 0); + SUBDEBUGT(seastore_lba, + "remap laddr: {}, remap paddr: {}, remap length: {}," + " intermediate_base: {}, intermediate_key: {}", t, + remap_laddr, orig_paddr, remap_len, + intermediate_base, intermediate_key); auto remapped_intermediate_key = intermediate_key + remap_offset; - fut = alloc_cloned_mapping( - t, - remap_laddr, - remap_len, - remapped_intermediate_key - ).si_then([&orig_mapping](auto imapping) mutable { + alloc_infos.emplace_back( + alloc_mapping_info_t::create_indirect( + remap_laddr, + remap_len, + remapped_intermediate_key)); + } + fut = alloc_cloned_mappings( + t, + remaps.front().offset + orig_laddr, + std::move(alloc_infos) + ).si_then([&orig_mapping](auto imappings) mutable { + std::vector<LBAMappingRef> mappings; + for (auto &imapping : imappings) { auto mapping = orig_mapping->duplicate(); auto bmapping = static_cast<BtreeLBAMapping*>(mapping.get()); bmapping->adjust_mutable_indirect_attrs( imapping->get_key(), imapping->get_length(), imapping->get_intermediate_key()); - return seastar::make_ready_future<LBAMappingRef>( - std::move(mapping)); - }); - } else { - fut = alloc_extent(t, remap_laddr, *extents[i]); - } - return fut.si_then([remap_laddr, remap_len, &ret, - remap_paddr](auto &&ref) { - assert(ref->get_key() == remap_laddr); - assert(ref->get_val() == remap_paddr); - assert(ref->get_length() == remap_len); - ret.remapped_mappings.emplace_back(std::move(ref)); - return seastar::now(); + mappings.emplace_back(std::move(mapping)); + } + return seastar::make_ready_future<std::vector<LBAMappingRef>>( + std::move(mappings)); }); + } else { // !orig_mapping->is_indirect() + fut = alloc_extents( + t, + remaps.front().offset + orig_laddr, + std::move(extents), + EXTENT_DEFAULT_REF_COUNT); + } + + return fut.si_then([&ret, &remaps, &orig_mapping](auto &&refs) { + assert(refs.size() == remaps.size()); +#ifndef NDEBUG + auto ref_it = refs.begin(); + auto remap_it = remaps.begin(); + for (;ref_it != refs.end(); ref_it++, remap_it++) { + auto &ref = *ref_it; + auto &remap = *remap_it; + assert(ref->get_key() == orig_mapping->get_key() + remap.offset); + assert(ref->get_length() == remap.len); + } +#endif + ret.remapped_mappings = std::move(refs); + return seastar::now(); }); }).si_then([&remaps, &t, &orig_mapping, this] { if (remaps.size() > 1 && orig_mapping->is_indirect()) { @@ -565,20 +644,16 @@ private: }); } - alloc_extent_iertr::future<BtreeLBAMappingRef> alloc_cloned_mapping( + alloc_extent_iertr::future<std::vector<BtreeLBAMappingRef>> alloc_cloned_mappings( Transaction &t, laddr_t laddr, - extent_len_t len, - laddr_t intermediate_key) + std::vector<alloc_mapping_info_t> alloc_infos) { - assert(intermediate_key != L_ADDR_NULL); - std::vector<alloc_mapping_info_t> alloc_infos = { - alloc_mapping_info_t{ - len, - intermediate_key, - 0, // crc will only be used and checked with LBA direct mappings - // also see pin_to_extent(_by_type) - nullptr}}; +#ifndef NDEBUG + for (auto &alloc_info : alloc_infos) { + assert(alloc_info.val.get_laddr() != L_ADDR_NULL); + } +#endif return seastar::do_with( std::move(alloc_infos), [this, &t, laddr](auto &alloc_infos) { @@ -587,12 +662,21 @@ private: laddr, alloc_infos, EXTENT_DEFAULT_REF_COUNT - ).si_then([laddr](auto mappings) { - ceph_assert(mappings.size() == 1); - auto mapping = std::move(mappings.front()); - ceph_assert(mapping->get_key() == laddr); - return std::unique_ptr<BtreeLBAMapping>( - static_cast<BtreeLBAMapping*>(mapping.release())); + ).si_then([&alloc_infos](auto mappings) { + assert(alloc_infos.size() == mappings.size()); + std::vector<BtreeLBAMappingRef> rets; + auto mit = mappings.begin(); + auto ait = alloc_infos.begin(); + for (; mit != mappings.end(); mit++, ait++) { + auto mapping = static_cast<BtreeLBAMapping*>(mit->release()); + auto &alloc_info = *ait; + assert(mapping->get_key() == alloc_info.key); + assert(mapping->get_raw_val().get_laddr() == + alloc_info.val.get_laddr()); + assert(mapping->get_length() == alloc_info.len); + rets.emplace_back(mapping); + } + return rets; }); }); } diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc index 66dc94394a9..504c346ea94 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc @@ -10,7 +10,7 @@ #include "include/buffer.h" #include "include/byteorder.h" -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" +#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" #include "crimson/os/seastore/logging.h" SET_SUBSYS(seastore_lba); @@ -31,6 +31,7 @@ std::ostream &LBALeafNode::_print_detail(std::ostream &out) const { out << ", size=" << this->get_size() << ", meta=" << this->get_meta() + << ", modifications=" << this->modifications << ", my_tracker=" << (void*)this->my_tracker; if (this->my_tracker) { out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get(); @@ -52,4 +53,23 @@ void LBALeafNode::resolve_relative_addrs(paddr_t base) } } +void LBALeafNode::maybe_fix_mapping_pos(BtreeLBAMapping &mapping) +{ + assert(mapping.get_parent() == this); + auto key = mapping.is_indirect() + ? mapping.get_intermediate_base() + : mapping.get_key(); + if (key != iter_idx(mapping.get_pos()).get_key()) { + auto iter = lower_bound(key); + { + // a mapping that no longer exist or has its value + // modified is considered an outdated one, and + // shouldn't be used anymore + ceph_assert(iter != end()); + assert(iter.get_val() == mapping.get_map_val()); + } + mapping._new_pos(iter.get_offset()); + } +} + } diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h index c5da860e24f..add464e45e6 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h @@ -26,6 +26,8 @@ namespace crimson::os::seastore::lba_manager::btree { using base_iertr = LBAManager::base_iertr; using LBANode = FixedKVNode<laddr_t>; +class BtreeLBAMapping; + /** * lba_map_val_t * @@ -202,6 +204,7 @@ struct LBALeafNode assert(nextent->has_parent_tracker() && nextent->get_parent_node<LBALeafNode>().get() == this); } + this->on_modify(); if (val.pladdr.is_paddr()) { val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); } @@ -222,6 +225,7 @@ struct LBALeafNode iter.get_offset(), addr, (void*)nextent); + this->on_modify(); this->insert_child_ptr(iter, nextent); if (val.pladdr.is_paddr()) { val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); @@ -241,6 +245,7 @@ struct LBALeafNode iter.get_offset(), iter.get_key()); assert(iter != this->end()); + this->on_modify(); this->remove_child_ptr(iter); return this->journal_remove( iter, @@ -287,6 +292,8 @@ struct LBALeafNode } std::ostream &_print_detail(std::ostream &out) const final; + + void maybe_fix_mapping_pos(BtreeLBAMapping &mapping); }; using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>; diff --git a/src/crimson/os/seastore/onode.cc b/src/crimson/os/seastore/onode.cc index 96b69fb7c30..f3fd6eb18a5 100644 --- a/src/crimson/os/seastore/onode.cc +++ b/src/crimson/os/seastore/onode.cc @@ -10,6 +10,7 @@ std::ostream& operator<<(std::ostream &out, const Onode &rhs) { auto &layout = rhs.get_layout(); return out << "Onode(" + << "hobj=" << rhs.hobj << ", " << "size=" << static_cast<uint32_t>(layout.size) << ")"; } diff --git a/src/crimson/os/seastore/onode.h b/src/crimson/os/seastore/onode.h index e803a2e4e74..de72a658435 100644 --- a/src/crimson/os/seastore/onode.h +++ b/src/crimson/os/seastore/onode.h @@ -8,6 +8,7 @@ #include <boost/intrusive_ptr.hpp> #include <boost/smart_ptr/intrusive_ref_counter.hpp> +#include "common/hobject.h" #include "include/byteorder.h" #include "seastore_types.h" @@ -56,10 +57,12 @@ protected: virtual laddr_t get_hint() const = 0; const uint32_t default_metadata_offset = 0; const uint32_t default_metadata_range = 0; + const hobject_t hobj; public: - Onode(uint32_t ddr, uint32_t dmr) + Onode(uint32_t ddr, uint32_t dmr, const hobject_t &hobj) : default_metadata_offset(ddr), - default_metadata_range(dmr) + default_metadata_range(dmr), + hobj(hobj) {} virtual bool is_alive() const = 0; @@ -85,6 +88,7 @@ public: laddr_t get_data_hint() const { return get_hint(); } + friend std::ostream& operator<<(std::ostream &out, const Onode &rhs); }; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc index 9a29072fe8b..bedcff107e2 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc @@ -151,6 +151,7 @@ FLTreeOnodeManager::get_onode_ret FLTreeOnodeManager::get_onode( auto val = OnodeRef(new FLTreeOnode( default_data_reservation, default_metadata_range, + hoid.hobj, cursor.value())); return get_onode_iertr::make_ready_future<OnodeRef>( val @@ -173,6 +174,7 @@ FLTreeOnodeManager::get_or_create_onode( auto onode = new FLTreeOnode( default_data_reservation, default_metadata_range, + hoid.hobj, cursor.value()); if (created) { DEBUGT("created onode for entry for {}", trans, hoid); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h index 33109e50f6a..155776046ef 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h @@ -36,13 +36,13 @@ struct FLTreeOnode final : Onode, Value { FLTreeOnode& operator=(const FLTreeOnode&) = delete; template <typename... T> - FLTreeOnode(uint32_t ddr, uint32_t dmr, T&&... args) - : Onode(ddr, dmr), + FLTreeOnode(uint32_t ddr, uint32_t dmr, const hobject_t &hobj, T&&... args) + : Onode(ddr, dmr, hobj), Value(std::forward<T>(args)...) {} template <typename... T> - FLTreeOnode(T&&... args) - : Onode(0, 0), + FLTreeOnode(const hobject_t &hobj, T&&... args) + : Onode(0, 0, hobj), Value(std::forward<T>(args)...) {} struct Recorder : public ValueDeltaRecorder { diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h b/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h index 7385e080c8f..68f8a55e6c7 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h @@ -92,7 +92,7 @@ class Btree { ValueImpl value() { assert(!is_end()); return p_tree->value_builder.build_value( - *p_tree->nm, p_tree->value_builder, p_cursor); + get_ghobj().hobj, *p_tree->nm, p_tree->value_builder, p_cursor); } bool operator==(const Cursor& o) const { return operator<=>(o) == 0; } diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/value.h b/src/crimson/os/seastore/onode_manager/staged-fltree/value.h index d9f0c231a17..f32d5f1e668 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/value.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/value.h @@ -312,11 +312,12 @@ struct ValueBuilderImpl final : public ValueBuilder { return ret; } - ValueImpl build_value(NodeExtentManager& nm, + ValueImpl build_value(const hobject_t &hobj, + NodeExtentManager& nm, const ValueBuilder& vb, Ref<tree_cursor_t>& p_cursor) const { assert(vb.get_header_magic() == get_header_magic()); - return ValueImpl(nm, vb, p_cursor); + return ValueImpl(hobj, nm, vb, p_cursor); } }; diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 50e7eb1d4ff..68d432d5dd0 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -361,7 +361,15 @@ SeaStore::Shard::mkfs_managers() init_managers(); return transaction_manager->mount(); }).safe_then([this] { + + ++(shard_stats.io_num); + ++(shard_stats.pending_io_num); + // For TM::submit_transaction() + ++(shard_stats.processing_inlock_io_num); + return repeat_eagain([this] { + ++(shard_stats.repeat_io_num); + return transaction_manager->with_transaction_intr( Transaction::src_t::MUTATE, "mkfs_seastore", @@ -381,7 +389,12 @@ SeaStore::Shard::mkfs_managers() crimson::ct_error::assert_all{ "Invalid error in Shard::mkfs_managers" } - ); + ).finally([this] { + assert(shard_stats.pending_io_num); + --(shard_stats.pending_io_num); + // XXX: it's wrong to assume no failure + --(shard_stats.processing_postlock_io_num); + }); } seastar::future<> SeaStore::set_secondaries() @@ -572,6 +585,114 @@ seastar::future<store_statfs_t> SeaStore::pool_statfs(int64_t pool_id) const return SeaStore::stat(); } +seastar::future<> SeaStore::report_stats() +{ + ceph_assert(seastar::this_shard_id() == primary_core); + shard_device_stats.resize(seastar::smp::count); + shard_io_stats.resize(seastar::smp::count); + return shard_stores.invoke_on_all([this](const Shard &local_store) { + bool report_detail = false; + if (seastar::this_shard_id() == 0) { + // avoid too verbose logs, only report detail in a particular shard + report_detail = true; + } + shard_device_stats[seastar::this_shard_id()] = + local_store.get_device_stats(report_detail); + shard_io_stats[seastar::this_shard_id()] = + local_store.get_io_stats(report_detail); + }).then([this] { + LOG_PREFIX(SeaStore); + auto now = seastar::lowres_clock::now(); + if (last_tp == seastar::lowres_clock::time_point::min()) { + last_tp = now; + return seastar::now(); + } + std::chrono::duration<double> duration_d = now - last_tp; + double seconds = duration_d.count(); + last_tp = now; + + device_stats_t device_total = {}; + for (const auto &s : shard_device_stats) { + device_total.add(s); + } + constexpr const char* dfmt = "{:.2f}"; + auto device_total_num_io = static_cast<double>(device_total.num_io); + + std::ostringstream oss_iops; + auto iops = device_total.num_io/seconds; + oss_iops << "device IOPS: " + << fmt::format(dfmt, iops) + << " " + << fmt::format(dfmt, iops/seastar::smp::count) + << "("; + + std::ostringstream oss_bd; + auto bd_mb = device_total.total_bytes/seconds/(1<<20); + oss_bd << "device bandwidth(MiB): " + << fmt::format(dfmt, bd_mb) + << " " + << fmt::format(dfmt, bd_mb/seastar::smp::count) + << "("; + + for (const auto &s : shard_device_stats) { + oss_iops << fmt::format(dfmt, s.num_io/seconds) << ","; + oss_bd << fmt::format(dfmt, s.total_bytes/seconds/(1<<20)) << ","; + } + oss_iops << ")"; + oss_bd << ")"; + + INFO("{}", oss_iops.str()); + INFO("{}", oss_bd.str()); + INFO("device IO depth per writer: {:.2f}", + device_total.total_depth/device_total_num_io); + INFO("device bytes per write: {:.2f}", + device_total.total_bytes/device_total_num_io); + + shard_stats_t io_total = {}; + for (const auto &s : shard_io_stats) { + io_total.add(s); + } + INFO("trans IOPS: {:.2f},{:.2f},{:.2f},{:.2f} per-shard: {:.2f},{:.2f},{:.2f},{:.2f}", + io_total.io_num/seconds, + io_total.read_num/seconds, + io_total.get_bg_num()/seconds, + io_total.flush_num/seconds, + io_total.io_num/seconds/seastar::smp::count, + io_total.read_num/seconds/seastar::smp::count, + io_total.get_bg_num()/seconds/seastar::smp::count, + io_total.flush_num/seconds/seastar::smp::count); + auto calc_conflicts = [](uint64_t ios, uint64_t repeats) { + return (double)(repeats-ios)/ios; + }; + INFO("trans conflicts: {:.2f},{:.2f},{:.2f}", + calc_conflicts(io_total.io_num, io_total.repeat_io_num), + calc_conflicts(io_total.read_num, io_total.repeat_read_num), + calc_conflicts(io_total.get_bg_num(), io_total.get_repeat_bg_num())); + INFO("trans outstanding: {},{},{},{} per-shard: {:.2f},{:.2f},{:.2f},{:.2f}", + io_total.pending_io_num, + io_total.pending_read_num, + io_total.pending_bg_num, + io_total.pending_flush_num, + (double)io_total.pending_io_num/seastar::smp::count, + (double)io_total.pending_read_num/seastar::smp::count, + (double)io_total.pending_bg_num/seastar::smp::count, + (double)io_total.pending_flush_num/seastar::smp::count); + + std::ostringstream oss_pending; + for (const auto &s : shard_io_stats) { + oss_pending << s.pending_io_num + << "(" << s.starting_io_num + << "," << s.waiting_collock_io_num + << "," << s.waiting_throttler_io_num + << "," << s.processing_inlock_io_num + << "," << s.processing_postlock_io_num + << ") "; + } + INFO("details: {}", oss_pending.str()); + return seastar::now(); + }); +} + TransactionManager::read_extent_iertr::future<std::optional<unsigned>> SeaStore::Shard::get_coll_bits(CollectionRef ch, Transaction &t) const { @@ -671,6 +792,9 @@ SeaStore::Shard::list_objects(CollectionRef ch, const ghobject_t& end, uint64_t limit) const { + ++(shard_stats.read_num); + ++(shard_stats.pending_read_num); + ceph_assert(start <= end); using list_iertr = OnodeManager::list_onodes_iertr; using RetType = typename OnodeManager::list_onodes_bare_ret; @@ -679,6 +803,8 @@ SeaStore::Shard::list_objects(CollectionRef ch, std::move(limit), [this, ch, start, end](auto& ret, auto& limit) { return repeat_eagain([this, ch, start, end, &limit, &ret] { + ++(shard_stats.repeat_read_num); + return transaction_manager->with_transaction_intr( Transaction::src_t::READ, "list_objects", @@ -757,6 +883,9 @@ SeaStore::Shard::list_objects(CollectionRef ch, "Invalid error in SeaStore::list_objects" } ); + }).finally([this] { + assert(shard_stats.pending_read_num); + --(shard_stats.pending_read_num); }); } @@ -796,10 +925,15 @@ SeaStore::Shard::set_collection_opts(CollectionRef c, seastar::future<std::vector<coll_core_t>> SeaStore::Shard::list_collections() { + ++(shard_stats.read_num); + ++(shard_stats.pending_read_num); + return seastar::do_with( std::vector<coll_core_t>(), [this](auto &ret) { return repeat_eagain([this, &ret] { + ++(shard_stats.repeat_read_num); + return transaction_manager->with_transaction_intr( Transaction::src_t::READ, "list_collections", @@ -825,7 +959,10 @@ SeaStore::Shard::list_collections() crimson::ct_error::assert_all{ "Invalid error in SeaStore::list_collections" } - ); + ).finally([this] { + assert(shard_stats.pending_read_num); + --(shard_stats.pending_read_num); + }); } SeaStore::Shard::read_errorator::future<ceph::bufferlist> @@ -838,6 +975,10 @@ SeaStore::Shard::read( { LOG_PREFIX(SeaStore::read); DEBUG("oid {} offset {} len {}", oid, offset, len); + + ++(shard_stats.read_num); + ++(shard_stats.pending_read_num); + return repeat_with_onode<ceph::bufferlist>( ch, oid, @@ -863,7 +1004,11 @@ SeaStore::Shard::read( }, offset, corrected_len); - }); + } + ).finally([this] { + assert(shard_stats.pending_read_num); + --(shard_stats.pending_read_num); + }); } SeaStore::Shard::base_errorator::future<bool> @@ -873,6 +1018,10 @@ SeaStore::Shard::exists( { LOG_PREFIX(SeaStore::exists); DEBUG("oid {}", oid); + + ++(shard_stats.read_num); + ++(shard_stats.pending_read_num); + return repeat_with_onode<bool>( c, oid, @@ -886,7 +1035,10 @@ SeaStore::Shard::exists( return seastar::make_ready_future<bool>(false); }), crimson::ct_error::assert_all{"unexpected error"} - ); + ).finally([this] { + assert(shard_stats.pending_read_num); + --(shard_stats.pending_read_num); + }); } SeaStore::Shard::read_errorator::future<ceph::bufferlist> @@ -927,6 +1079,10 @@ SeaStore::Shard::get_attr( auto c = static_cast<SeastoreCollection*>(ch.get()); LOG_PREFIX(SeaStore::get_attr); DEBUG("{} {}", c->get_cid(), oid); + + ++(shard_stats.read_num); + ++(shard_stats.pending_read_num); + return repeat_with_onode<ceph::bufferlist>( c, oid, @@ -954,7 +1110,11 @@ SeaStore::Shard::get_attr( ).handle_error( crimson::ct_error::input_output_error::assert_failure{ "EIO when getting attrs"}, - crimson::ct_error::pass_further_all{}); + crimson::ct_error::pass_further_all{} + ).finally([this] { + assert(shard_stats.pending_read_num); + --(shard_stats.pending_read_num); + }); } SeaStore::Shard::get_attrs_ertr::future<SeaStore::Shard::attrs_t> @@ -965,6 +1125,10 @@ SeaStore::Shard::get_attrs( LOG_PREFIX(SeaStore::get_attrs); auto c = static_cast<SeastoreCollection*>(ch.get()); DEBUG("{} {}", c->get_cid(), oid); + + ++(shard_stats.read_num); + ++(shard_stats.pending_read_num); + return repeat_with_onode<attrs_t>( c, oid, @@ -997,13 +1161,20 @@ SeaStore::Shard::get_attrs( ).handle_error( crimson::ct_error::input_output_error::assert_failure{ "EIO when getting attrs"}, - crimson::ct_error::pass_further_all{}); + crimson::ct_error::pass_further_all{} + ).finally([this] { + assert(shard_stats.pending_read_num); + --(shard_stats.pending_read_num); + }); } seastar::future<struct stat> SeaStore::Shard::stat( CollectionRef c, const ghobject_t& oid) { + ++(shard_stats.read_num); + ++(shard_stats.pending_read_num); + LOG_PREFIX(SeaStore::stat); return repeat_with_onode<struct stat>( c, @@ -1025,7 +1196,10 @@ seastar::future<struct stat> SeaStore::Shard::stat( crimson::ct_error::assert_all{ "Invalid error in SeaStore::stat" } - ); + ).finally([this] { + assert(shard_stats.pending_read_num); + --(shard_stats.pending_read_num); + }); } SeaStore::Shard::get_attr_errorator::future<ceph::bufferlist> @@ -1042,6 +1216,9 @@ SeaStore::Shard::omap_get_values( const ghobject_t &oid, const omap_keys_t &keys) { + ++(shard_stats.read_num); + ++(shard_stats.pending_read_num); + auto c = static_cast<SeastoreCollection*>(ch.get()); return repeat_with_onode<omap_values_t>( c, @@ -1056,7 +1233,11 @@ SeaStore::Shard::omap_get_values( t, std::move(omap_root), keys); - }); + } + ).finally([this] { + assert(shard_stats.pending_read_num); + --(shard_stats.pending_read_num); + }); } SeaStore::Shard::_omap_get_value_ret @@ -1158,6 +1339,10 @@ SeaStore::Shard::omap_get_values( auto c = static_cast<SeastoreCollection*>(ch.get()); LOG_PREFIX(SeaStore::omap_get_values); DEBUG("{} {}", c->get_cid(), oid); + + ++(shard_stats.read_num); + ++(shard_stats.pending_read_num); + using ret_bare_t = std::tuple<bool, SeaStore::Shard::omap_values_t>; return repeat_with_onode<ret_bare_t>( c, @@ -1174,6 +1359,10 @@ SeaStore::Shard::omap_get_values( OMapManager::omap_list_config_t() .with_inclusive(false, false) .without_max()); + } + ).finally([this] { + assert(shard_stats.pending_read_num); + --(shard_stats.pending_read_num); }); } @@ -1206,6 +1395,10 @@ SeaStore::Shard::fiemap( { LOG_PREFIX(SeaStore::fiemap); DEBUG("oid: {}, off: {}, len: {} ", oid, off, len); + + ++(shard_stats.read_num); + ++(shard_stats.pending_read_num); + return repeat_with_onode<std::map<uint64_t, uint64_t>>( ch, oid, @@ -1222,6 +1415,9 @@ SeaStore::Shard::fiemap( size - off: std::min(size - off, len); return _fiemap(t, onode, off, adjust_len); + }).finally([this] { + assert(shard_stats.pending_read_num); + --(shard_stats.pending_read_num); }); } @@ -1242,6 +1438,10 @@ seastar::future<> SeaStore::Shard::do_transaction_no_callbacks( CollectionRef _ch, ceph::os::Transaction&& _t) { + ++(shard_stats.io_num); + ++(shard_stats.pending_io_num); + ++(shard_stats.starting_io_num); + // repeat_with_internal_context ensures ordering via collection lock return repeat_with_internal_context( _ch, @@ -1288,12 +1488,21 @@ seastar::future<> SeaStore::Shard::do_transaction_no_callbacks( return transaction_manager->submit_transaction(*ctx.transaction); }); }); - }); + } + ).finally([this] { + assert(shard_stats.pending_io_num); + --(shard_stats.pending_io_num); + // XXX: it's wrong to assume no failure + --(shard_stats.processing_postlock_io_num); + }); } seastar::future<> SeaStore::Shard::flush(CollectionRef ch) { + ++(shard_stats.flush_num); + ++(shard_stats.pending_flush_num); + return seastar::do_with( get_dummy_ordering_handle(), [this, ch](auto &handle) { @@ -1302,7 +1511,11 @@ seastar::future<> SeaStore::Shard::flush(CollectionRef ch) ).then([this, &handle] { return transaction_manager->flush(handle); }); - }); + } + ).finally([this] { + assert(shard_stats.pending_flush_num); + --(shard_stats.pending_flush_num); + }); } SeaStore::Shard::tm_ret @@ -2235,10 +2448,18 @@ seastar::future<> SeaStore::Shard::write_meta( { LOG_PREFIX(SeaStore::write_meta); DEBUG("key: {}; value: {}", key, value); + + ++(shard_stats.io_num); + ++(shard_stats.pending_io_num); + // For TM::submit_transaction() + ++(shard_stats.processing_inlock_io_num); + return seastar::do_with( key, value, [this, FNAME](auto& key, auto& value) { return repeat_eagain([this, FNAME, &key, &value] { + ++(shard_stats.repeat_io_num); + return transaction_manager->with_transaction_intr( Transaction::src_t::MUTATE, "write_meta", @@ -2252,9 +2473,16 @@ seastar::future<> SeaStore::Shard::write_meta( }); }); }); - }).handle_error( - crimson::ct_error::assert_all{"Invalid error in SeaStore::write_meta"} - ); + } + ).handle_error( + crimson::ct_error::assert_all{"Invalid error in SeaStore::write_meta"} + ).finally([this] { + assert(shard_stats.pending_io_num); + --(shard_stats.pending_io_num); + // XXX: it's wrong to assume no failure, + // but failure leads to fatal error + --(shard_stats.processing_postlock_io_num); + }); } seastar::future<std::tuple<int, std::string>> @@ -2286,15 +2514,75 @@ void SeaStore::Shard::init_managers() transaction_manager.reset(); collection_manager.reset(); onode_manager.reset(); + shard_stats = {}; transaction_manager = make_transaction_manager( - device, secondaries, is_test); + device, secondaries, shard_stats, is_test); collection_manager = std::make_unique<collection_manager::FlatCollectionManager>( *transaction_manager); onode_manager = std::make_unique<crimson::os::seastore::onode::FLTreeOnodeManager>( *transaction_manager); } +device_stats_t SeaStore::Shard::get_device_stats(bool report_detail) const +{ + return transaction_manager->get_device_stats(report_detail); +} + +shard_stats_t SeaStore::Shard::get_io_stats(bool report_detail) const +{ + auto now = seastar::lowres_clock::now(); + if (last_tp == seastar::lowres_clock::time_point::min()) { + last_tp = now; + last_shard_stats = shard_stats; + return {}; + } + std::chrono::duration<double> duration_d = now - last_tp; + double seconds = duration_d.count(); + last_tp = now; + + shard_stats_t ret = shard_stats; + ret.minus(last_shard_stats); + last_shard_stats = shard_stats; + if (report_detail) { + LOG_PREFIX(SeaStore::get_io_stats); + auto calc_conflicts = [](uint64_t ios, uint64_t repeats) { + return (double)(repeats-ios)/ios; + }; + INFO("iops={:.2f},{:.2f},{:.2f}({:.2f},{:.2f},{:.2f},{:.2f}),{:.2f} " + "conflicts={:.2f},{:.2f},{:.2f}({:.2f},{:.2f},{:.2f},{:.2f}) " + "outstanding={}({},{},{},{},{}),{},{},{}", + // iops + ret.io_num/seconds, + ret.read_num/seconds, + ret.get_bg_num()/seconds, + ret.trim_alloc_num/seconds, + ret.trim_dirty_num/seconds, + ret.cleaner_main_num/seconds, + ret.cleaner_cold_num/seconds, + ret.flush_num/seconds, + // conflicts + calc_conflicts(ret.io_num, ret.repeat_io_num), + calc_conflicts(ret.read_num, ret.repeat_read_num), + calc_conflicts(ret.get_bg_num(), ret.get_repeat_bg_num()), + calc_conflicts(ret.trim_alloc_num, ret.repeat_trim_alloc_num), + calc_conflicts(ret.trim_dirty_num, ret.repeat_trim_dirty_num), + calc_conflicts(ret.cleaner_main_num, ret.repeat_cleaner_main_num), + calc_conflicts(ret.cleaner_cold_num, ret.repeat_cleaner_cold_num), + // outstanding + ret.pending_io_num, + ret.starting_io_num, + ret.waiting_collock_io_num, + ret.waiting_throttler_io_num, + ret.processing_inlock_io_num, + ret.processing_postlock_io_num, + ret.pending_read_num, + ret.pending_bg_num, + ret.pending_flush_num); + } + return ret; +} + std::unique_ptr<SeaStore> make_seastore( const std::string &device) { diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index 3de8a812e12..16c0f6185a9 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -3,14 +3,15 @@ #pragma once -#include <string> -#include <unordered_map> #include <map> +#include <optional> +#include <string> #include <typeinfo> +#include <unordered_map> #include <vector> -#include <optional> #include <seastar/core/future.hh> +#include <seastar/core/lowres_clock.hh> #include <seastar/core/metrics_types.hh> #include "include/uuid.h" @@ -203,6 +204,10 @@ public: void init_managers(); + device_stats_t get_device_stats(bool report_detail) const; + + shard_stats_t get_io_stats(bool report_detail) const; + private: struct internal_context_t { CollectionRef ch; @@ -240,18 +245,34 @@ public: const char* tname, op_type_t op_type, F &&f) { + // The below repeat_io_num requires MUTATE + assert(src == Transaction::src_t::MUTATE); return seastar::do_with( internal_context_t( ch, std::move(t), transaction_manager->create_transaction(src, tname)), std::forward<F>(f), [this, op_type](auto &ctx, auto &f) { + assert(shard_stats.starting_io_num); + --(shard_stats.starting_io_num); + ++(shard_stats.waiting_collock_io_num); + return ctx.transaction->get_handle().take_collection_lock( static_cast<SeastoreCollection&>(*(ctx.ch)).ordering_lock ).then([this] { + assert(shard_stats.waiting_collock_io_num); + --(shard_stats.waiting_collock_io_num); + ++(shard_stats.waiting_throttler_io_num); + return throttler.get(1); }).then([&, this] { + assert(shard_stats.waiting_throttler_io_num); + --(shard_stats.waiting_throttler_io_num); + ++(shard_stats.processing_inlock_io_num); + return repeat_eagain([&, this] { + ++(shard_stats.repeat_io_num); + ctx.reset_preserve_handle(*transaction_manager); return std::invoke(f, ctx); }).handle_error( @@ -284,6 +305,9 @@ public: ](auto &oid, auto &ret, auto &f) { return repeat_eagain([&, this, src, tname] { + assert(src == Transaction::src_t::READ); + ++(shard_stats.repeat_read_num); + return transaction_manager->with_transaction_intr( src, tname, @@ -477,6 +501,11 @@ public: seastar::metrics::metric_group metrics; void register_metrics(); + + mutable shard_stats_t shard_stats; + mutable seastar::lowres_clock::time_point last_tp = + seastar::lowres_clock::time_point::min(); + mutable shard_stats_t last_shard_stats; }; public: @@ -495,6 +524,8 @@ public: seastar::future<store_statfs_t> stat() const final; seastar::future<store_statfs_t> pool_statfs(int64_t pool_id) const final; + seastar::future<> report_stats() final; + uuid_d get_fsid() const final { ceph_assert(seastar::this_shard_id() == primary_core); return shard_stores.local().get_fsid(); @@ -547,6 +578,11 @@ private: DeviceRef device; std::vector<DeviceRef> secondaries; seastar::sharded<SeaStore::Shard> shard_stores; + + mutable seastar::lowres_clock::time_point last_tp = + seastar::lowres_clock::time_point::min(); + mutable std::vector<device_stats_t> shard_device_stats; + mutable std::vector<shard_stats_t> shard_io_stats; }; std::unique_ptr<SeaStore> make_seastore( diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index 93818ba0258..25d787f0fd3 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -2,6 +2,9 @@ // vim: ts=8 sw=2 smarttab #include "crimson/os/seastore/seastore_types.h" + +#include <utility> + #include "crimson/common/log.h" namespace { @@ -130,7 +133,7 @@ std::ostream &operator<<(std::ostream &out, const paddr_t &rhs) } journal_seq_t journal_seq_t::add_offset( - journal_type_t type, + backend_type_t type, device_off_t off, device_off_t roll_start, device_off_t roll_size) const @@ -142,10 +145,10 @@ journal_seq_t journal_seq_t::add_offset( segment_seq_t jseq = segment_seq; device_off_t joff; - if (type == journal_type_t::SEGMENTED) { + if (type == backend_type_t::SEGMENTED) { joff = offset.as_seg_paddr().get_segment_off(); } else { - assert(type == journal_type_t::RANDOM_BLOCK); + assert(type == backend_type_t::RANDOM_BLOCK); auto boff = offset.as_blk_paddr().get_device_off(); joff = boff; } @@ -160,7 +163,7 @@ journal_seq_t journal_seq_t::add_offset( ++new_jseq; joff -= roll_size; } - assert(new_jseq < MAX_SEG_SEQ); + assert(std::cmp_less(new_jseq, MAX_SEG_SEQ)); jseq = static_cast<segment_seq_t>(new_jseq); } else { device_off_t mod = (-off) / roll_size; @@ -169,7 +172,7 @@ journal_seq_t journal_seq_t::add_offset( ++mod; joff += roll_size; } - if (jseq >= mod) { + if (std::cmp_greater_equal(jseq, mod)) { jseq -= mod; } else { return JOURNAL_SEQ_MIN; @@ -181,7 +184,7 @@ journal_seq_t journal_seq_t::add_offset( } device_off_t journal_seq_t::relative_to( - journal_type_t type, + backend_type_t type, const journal_seq_t& r, device_off_t roll_start, device_off_t roll_size) const @@ -193,11 +196,11 @@ device_off_t journal_seq_t::relative_to( device_off_t ret = static_cast<device_off_t>(segment_seq) - r.segment_seq; ret *= roll_size; - if (type == journal_type_t::SEGMENTED) { + if (type == backend_type_t::SEGMENTED) { ret += (static_cast<device_off_t>(offset.as_seg_paddr().get_segment_off()) - static_cast<device_off_t>(r.offset.as_seg_paddr().get_segment_off())); } else { - assert(type == journal_type_t::RANDOM_BLOCK); + assert(type == backend_type_t::RANDOM_BLOCK); ret += offset.as_blk_paddr().get_device_off() - r.offset.as_blk_paddr().get_device_off(); } @@ -385,20 +388,32 @@ std::ostream &operator<<(std::ostream &out, const segment_tail_t &tail) extent_len_t record_size_t::get_raw_mdlength() const { + assert(record_type < record_type_t::MAX); // empty record is allowed to submit - return plain_mdlength + - ceph::encoded_sizeof_bounded<record_header_t>(); + extent_len_t ret = plain_mdlength; + if (record_type == record_type_t::JOURNAL) { + ret += ceph::encoded_sizeof_bounded<record_header_t>(); + } else { + // OOL won't contain metadata + assert(ret == 0); + } + return ret; } void record_size_t::account_extent(extent_len_t extent_len) { assert(extent_len); - plain_mdlength += ceph::encoded_sizeof_bounded<extent_info_t>(); + if (record_type == record_type_t::JOURNAL) { + plain_mdlength += ceph::encoded_sizeof_bounded<extent_info_t>(); + } else { + // OOL won't contain metadata + } dlength += extent_len; } void record_size_t::account(const delta_info_t& delta) { + assert(record_type == record_type_t::JOURNAL); assert(delta.bl.length()); plain_mdlength += ceph::encoded_sizeof(delta); } @@ -430,15 +445,32 @@ std::ostream &operator<<(std::ostream &os, transaction_type_t type) std::ostream &operator<<(std::ostream& out, const record_size_t& rsize) { return out << "record_size_t(" + << "record_type=" << rsize.record_type << "raw_md=" << rsize.get_raw_mdlength() << ", data=" << rsize.dlength << ")"; } +std::ostream &operator<<(std::ostream& out, const record_type_t& type) +{ + switch (type) { + case record_type_t::JOURNAL: + return out << "JOURNAL"; + case record_type_t::OOL: + return out << "OOL"; + case record_type_t::MAX: + return out << "NULL"; + default: + return out << "INVALID_RECORD_TYPE(" + << static_cast<std::size_t>(type) + << ")"; + } +} + std::ostream &operator<<(std::ostream& out, const record_t& r) { return out << "record_t(" - << "type=" << r.type + << "trans_type=" << r.trans_type << ", num_extents=" << r.extents.size() << ", num_deltas=" << r.deltas.size() << ", modify_time=" << sea_time_point_printer_t{r.modify_time} @@ -469,9 +501,16 @@ std::ostream& operator<<(std::ostream& out, const record_group_header_t& h) extent_len_t record_group_size_t::get_raw_mdlength() const { - return plain_mdlength + - sizeof(checksum_t) + - ceph::encoded_sizeof_bounded<record_group_header_t>(); + assert(record_type < record_type_t::MAX); + extent_len_t ret = plain_mdlength; + if (record_type == record_type_t::JOURNAL) { + ret += sizeof(checksum_t); + ret += ceph::encoded_sizeof_bounded<record_group_header_t>(); + } else { + // OOL won't contain metadata + assert(ret == 0); + } + return ret; } void record_group_size_t::account( @@ -482,14 +521,23 @@ void record_group_size_t::account( assert(_block_size > 0); assert(rsize.dlength % _block_size == 0); assert(block_size == 0 || block_size == _block_size); - plain_mdlength += rsize.get_raw_mdlength(); - dlength += rsize.dlength; + assert(record_type == RECORD_TYPE_NULL || + record_type == rsize.record_type); block_size = _block_size; + record_type = rsize.record_type; + if (record_type == record_type_t::JOURNAL) { + plain_mdlength += rsize.get_raw_mdlength(); + } else { + // OOL won't contain metadata + assert(rsize.get_raw_mdlength() == 0); + } + dlength += rsize.dlength; } std::ostream& operator<<(std::ostream& out, const record_group_size_t& size) { return out << "record_group_size_t(" + << "record_type=" << size.record_type << "raw_md=" << size.get_raw_mdlength() << ", data=" << size.dlength << ", block_size=" << size.block_size @@ -523,6 +571,7 @@ ceph::bufferlist encode_records( const journal_seq_t& committed_to, segment_nonce_t current_segment_nonce) { + assert(record_group.size.record_type < record_type_t::MAX); assert(record_group.size.block_size > 0); assert(record_group.records.size() > 0); @@ -534,6 +583,15 @@ ceph::bufferlist encode_records( } } + if (record_group.size.record_type == record_type_t::OOL) { + // OOL won't contain metadata + assert(record_group.size.get_mdlength() == 0); + ceph_assert(data_bl.length() == + record_group.size.get_encoded_length()); + record_group.clear(); + return data_bl; + } + // JOURNAL bufferlist bl; record_group_header_t header{ static_cast<extent_len_t>(record_group.records.size()), @@ -549,7 +607,7 @@ ceph::bufferlist encode_records( for (auto& r: record_group.records) { record_header_t rheader{ - r.type, + r.trans_type, (extent_len_t)r.deltas.size(), (extent_len_t)r.extents.size(), timepoint_to_mod(r.modify_time) @@ -875,4 +933,46 @@ std::ostream& operator<<(std::ostream& out, const scan_valid_records_cursor& c) << ")"; } +std::ostream& operator<<(std::ostream& out, const tw_stats_printer_t& p) +{ + constexpr const char* dfmt = "{:.2f}"; + double d_num_records = static_cast<double>(p.stats.num_records); + out << "rps=" + << fmt::format(dfmt, d_num_records/p.seconds) + << ",bwMiB=" + << fmt::format(dfmt, p.stats.get_total_bytes()/p.seconds/(1<<20)) + << ",sizeB=" + << fmt::format(dfmt, p.stats.get_total_bytes()/d_num_records) + << "(" + << fmt::format(dfmt, p.stats.data_bytes/d_num_records) + << "," + << fmt::format(dfmt, p.stats.metadata_bytes/d_num_records) + << ")"; + return out; +} + +std::ostream& operator<<(std::ostream& out, const writer_stats_printer_t& p) +{ + constexpr const char* dfmt = "{:.2f}"; + auto d_num_io = static_cast<double>(p.stats.io_depth_stats.num_io); + out << "iops=" + << fmt::format(dfmt, d_num_io/p.seconds) + << ",depth=" + << fmt::format(dfmt, p.stats.io_depth_stats.average()) + << ",batch=" + << fmt::format(dfmt, p.stats.record_batch_stats.average()) + << ",bwMiB=" + << fmt::format(dfmt, p.stats.get_total_bytes()/p.seconds/(1<<20)) + << ",sizeB=" + << fmt::format(dfmt, p.stats.get_total_bytes()/d_num_io) + << "(" + << fmt::format(dfmt, p.stats.data_bytes/d_num_io) + << "," + << fmt::format(dfmt, p.stats.record_group_metadata_bytes/d_num_io) + << "," + << fmt::format(dfmt, p.stats.record_group_padding_bytes/d_num_io) + << ")"; + return out; +} + } diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index e670a43c88e..c2c6ec56882 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -904,7 +904,6 @@ enum class backend_type_t { }; std::ostream& operator<<(std::ostream& out, backend_type_t); -using journal_type_t = backend_type_t; constexpr backend_type_t get_default_backend_of_device(device_type_t dtype) { assert(dtype != device_type_t::NONE && @@ -933,13 +932,13 @@ struct journal_seq_t { // produces a pseudo journal_seq_t relative to this by offset journal_seq_t add_offset( - journal_type_t type, + backend_type_t type, device_off_t off, device_off_t roll_start, device_off_t roll_size) const; device_off_t relative_to( - journal_type_t type, + backend_type_t type, const journal_seq_t& r, device_off_t roll_start, device_off_t roll_size) const; @@ -1889,7 +1888,25 @@ constexpr bool is_trim_transaction(transaction_type_t type) { type == transaction_type_t::TRIM_ALLOC); } +constexpr bool is_modify_transaction(transaction_type_t type) { + return (type == transaction_type_t::MUTATE || + is_background_transaction(type)); +} + +// Note: It is possible to statically introduce structs for OOL, which must be +// more efficient, but that requires to specialize the RecordSubmitter as well. +// Let's delay this optimization until necessary. +enum class record_type_t { + JOURNAL = 0, + OOL, // no header, no metadata, so no padding + MAX +}; +std::ostream &operator<<(std::ostream&, const record_type_t&); + +static constexpr auto RECORD_TYPE_NULL = record_type_t::MAX; + struct record_size_t { + record_type_t record_type = RECORD_TYPE_NULL; // must not be NULL in use extent_len_t plain_mdlength = 0; // mdlength without the record header extent_len_t dlength = 0; @@ -1913,22 +1930,30 @@ struct record_size_t { std::ostream &operator<<(std::ostream&, const record_size_t&); struct record_t { - transaction_type_t type = TRANSACTION_TYPE_NULL; + transaction_type_t trans_type = TRANSACTION_TYPE_NULL; std::vector<extent_t> extents; std::vector<delta_info_t> deltas; record_size_t size; sea_time_point modify_time = NULL_TIME; - record_t(transaction_type_t type) : type{type} { } + record_t(record_type_t r_type, + transaction_type_t t_type) + : trans_type{t_type} { + assert(r_type != RECORD_TYPE_NULL); + size.record_type = r_type; + } // unit test only record_t() { - type = transaction_type_t::MUTATE; + trans_type = transaction_type_t::MUTATE; + size.record_type = record_type_t::JOURNAL; } // unit test only record_t(std::vector<extent_t>&& _extents, std::vector<delta_info_t>&& _deltas) { + trans_type = transaction_type_t::MUTATE; + size.record_type = record_type_t::JOURNAL; auto modify_time = seastar::lowres_system_clock::now(); for (auto& e: _extents) { push_back(std::move(e), modify_time); @@ -1936,7 +1961,6 @@ struct record_t { for (auto& d: _deltas) { push_back(std::move(d)); } - type = transaction_type_t::MUTATE; } bool is_empty() const { @@ -1945,6 +1969,13 @@ struct record_t { } std::size_t get_delta_size() const { + assert(size.record_type < record_type_t::MAX); + if (size.record_type == record_type_t::OOL) { + // OOL won't contain metadata + assert(deltas.size() == 0); + return 0; + } + // JOURNAL auto delta_size = std::accumulate( deltas.begin(), deltas.end(), 0, [](uint64_t sum, auto& delta) { @@ -2014,6 +2045,7 @@ struct record_group_header_t { std::ostream& operator<<(std::ostream&, const record_group_header_t&); struct record_group_size_t { + record_type_t record_type = RECORD_TYPE_NULL; // must not be NULL in use extent_len_t plain_mdlength = 0; // mdlength without the group header extent_len_t dlength = 0; extent_len_t block_size = 0; @@ -2029,7 +2061,14 @@ struct record_group_size_t { extent_len_t get_mdlength() const { assert(block_size > 0); - return p2roundup(get_raw_mdlength(), block_size); + assert(record_type < record_type_t::MAX); + if (record_type == record_type_t::JOURNAL) { + return p2roundup(get_raw_mdlength(), block_size); + } else { + // OOL won't contain metadata + assert(get_raw_mdlength() == 0); + return 0; + } } extent_len_t get_encoded_length() const { @@ -2212,6 +2251,247 @@ struct scan_valid_records_cursor { }; std::ostream& operator<<(std::ostream&, const scan_valid_records_cursor&); +template <typename CounterT> +using counter_by_src_t = std::array<CounterT, TRANSACTION_TYPE_MAX>; + +template <typename CounterT> +CounterT& get_by_src( + counter_by_src_t<CounterT>& counters_by_src, + transaction_type_t src) { + assert(static_cast<std::size_t>(src) < counters_by_src.size()); + return counters_by_src[static_cast<std::size_t>(src)]; +} + +template <typename CounterT> +const CounterT& get_by_src( + const counter_by_src_t<CounterT>& counters_by_src, + transaction_type_t src) { + assert(static_cast<std::size_t>(src) < counters_by_src.size()); + return counters_by_src[static_cast<std::size_t>(src)]; +} + +template <typename CounterT> +void add_srcs(counter_by_src_t<CounterT>& base, + const counter_by_src_t<CounterT>& by) { + for (std::size_t i=0; i<TRANSACTION_TYPE_MAX; ++i) { + base[i] += by[i]; + } +} + +template <typename CounterT> +void minus_srcs(counter_by_src_t<CounterT>& base, + const counter_by_src_t<CounterT>& by) { + for (std::size_t i=0; i<TRANSACTION_TYPE_MAX; ++i) { + base[i] -= by[i]; + } +} + +struct grouped_io_stats { + uint64_t num_io = 0; + uint64_t num_io_grouped = 0; + + double average() const { + return static_cast<double>(num_io_grouped)/num_io; + } + + bool is_empty() const { + return num_io == 0; + } + + void add(const grouped_io_stats &o) { + num_io += o.num_io; + num_io_grouped += o.num_io_grouped; + } + + void minus(const grouped_io_stats &o) { + num_io -= o.num_io; + num_io_grouped -= o.num_io_grouped; + } + + void increment(uint64_t num_grouped_io) { + add({1, num_grouped_io}); + } +}; + +struct device_stats_t { + uint64_t num_io = 0; + uint64_t total_depth = 0; + uint64_t total_bytes = 0; + + void add(const device_stats_t& other) { + num_io += other.num_io; + total_depth += other.total_depth; + total_bytes += other.total_bytes; + } +}; + +struct trans_writer_stats_t { + uint64_t num_records = 0; + uint64_t metadata_bytes = 0; + uint64_t data_bytes = 0; + + bool is_empty() const { + return num_records == 0; + } + + uint64_t get_total_bytes() const { + return metadata_bytes + data_bytes; + } + + trans_writer_stats_t& + operator+=(const trans_writer_stats_t& o) { + num_records += o.num_records; + metadata_bytes += o.metadata_bytes; + data_bytes += o.data_bytes; + return *this; + } + + trans_writer_stats_t& + operator-=(const trans_writer_stats_t& o) { + num_records -= o.num_records; + metadata_bytes -= o.metadata_bytes; + data_bytes -= o.data_bytes; + return *this; + } +}; +struct tw_stats_printer_t { + double seconds; + const trans_writer_stats_t &stats; +}; +std::ostream& operator<<(std::ostream&, const tw_stats_printer_t&); + +struct writer_stats_t { + grouped_io_stats record_batch_stats; + grouped_io_stats io_depth_stats; + uint64_t record_group_padding_bytes = 0; + uint64_t record_group_metadata_bytes = 0; + uint64_t data_bytes = 0; + counter_by_src_t<trans_writer_stats_t> stats_by_src; + + bool is_empty() const { + return io_depth_stats.is_empty(); + } + + uint64_t get_total_bytes() const { + return record_group_padding_bytes + + record_group_metadata_bytes + + data_bytes; + } + + void add(const writer_stats_t &o) { + record_batch_stats.add(o.record_batch_stats); + io_depth_stats.add(o.io_depth_stats); + record_group_padding_bytes += o.record_group_padding_bytes; + record_group_metadata_bytes += o.record_group_metadata_bytes; + data_bytes += o.data_bytes; + add_srcs(stats_by_src, o.stats_by_src); + } + + void minus(const writer_stats_t &o) { + record_batch_stats.minus(o.record_batch_stats); + io_depth_stats.minus(o.io_depth_stats); + record_group_padding_bytes -= o.record_group_padding_bytes; + record_group_metadata_bytes -= o.record_group_metadata_bytes; + data_bytes -= o.data_bytes; + minus_srcs(stats_by_src, o.stats_by_src); + } +}; +struct writer_stats_printer_t { + double seconds; + const writer_stats_t &stats; +}; +std::ostream& operator<<(std::ostream&, const writer_stats_printer_t&); + +struct shard_stats_t { + // transaction_type_t::MUTATE + uint64_t io_num = 0; + uint64_t repeat_io_num = 0; + uint64_t pending_io_num = 0; + uint64_t starting_io_num = 0; + uint64_t waiting_collock_io_num = 0; + uint64_t waiting_throttler_io_num = 0; + uint64_t processing_inlock_io_num = 0; + uint64_t processing_postlock_io_num = 0; + + // transaction_type_t::READ + uint64_t read_num = 0; + uint64_t repeat_read_num = 0; + uint64_t pending_read_num = 0; + + // transaction_type_t::TRIM_DIRTY~CLEANER_COLD + uint64_t pending_bg_num = 0; + uint64_t trim_alloc_num = 0; + uint64_t repeat_trim_alloc_num = 0; + uint64_t trim_dirty_num = 0; + uint64_t repeat_trim_dirty_num = 0; + uint64_t cleaner_main_num = 0; + uint64_t repeat_cleaner_main_num = 0; + uint64_t cleaner_cold_num = 0; + uint64_t repeat_cleaner_cold_num = 0; + + uint64_t flush_num = 0; + uint64_t pending_flush_num = 0; + + uint64_t get_bg_num() const { + return trim_alloc_num + + trim_dirty_num + + cleaner_main_num + + cleaner_cold_num; + } + + uint64_t get_repeat_bg_num() const { + return repeat_trim_alloc_num + + repeat_trim_dirty_num + + repeat_cleaner_main_num + + repeat_cleaner_cold_num; + } + + void add(const shard_stats_t &o) { + io_num += o.io_num; + repeat_io_num += o.repeat_io_num; + pending_io_num += o.pending_io_num; + starting_io_num += o.starting_io_num; + waiting_collock_io_num += o.waiting_collock_io_num; + waiting_throttler_io_num += o.waiting_throttler_io_num; + processing_inlock_io_num += o.processing_inlock_io_num; + processing_postlock_io_num += o.processing_postlock_io_num; + + read_num += o.read_num; + repeat_read_num += o.repeat_read_num; + pending_read_num += o.pending_read_num; + + pending_bg_num += o.pending_bg_num; + trim_alloc_num += o.trim_alloc_num; + repeat_trim_alloc_num += o.repeat_trim_alloc_num; + trim_dirty_num += o.trim_dirty_num; + repeat_trim_dirty_num += o.repeat_trim_dirty_num; + cleaner_main_num += o.cleaner_main_num; + repeat_cleaner_main_num += o.repeat_cleaner_main_num; + cleaner_cold_num += o.cleaner_cold_num; + repeat_cleaner_cold_num += o.repeat_cleaner_cold_num; + + flush_num += o.flush_num; + pending_flush_num += o.pending_flush_num; + } + + void minus(const shard_stats_t &o) { + // realtime(e.g. pending) stats are not related + io_num -= o.io_num; + repeat_io_num -= o.repeat_io_num; + read_num -= o.read_num; + repeat_read_num -= o.repeat_read_num; + trim_alloc_num -= o.trim_alloc_num; + repeat_trim_alloc_num -= o.repeat_trim_alloc_num; + trim_dirty_num -= o.trim_dirty_num; + repeat_trim_dirty_num -= o.repeat_trim_dirty_num; + cleaner_main_num -= o.cleaner_main_num; + repeat_cleaner_main_num -= o.repeat_cleaner_main_num; + cleaner_cold_num -= o.cleaner_cold_num; + repeat_cleaner_cold_num -= o.repeat_cleaner_cold_num; + flush_num -= o.flush_num; + } +}; + } WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::seastore_meta_t) @@ -2246,6 +2526,7 @@ template <> struct fmt::formatter<crimson::os::seastore::record_group_header_t> template <> struct fmt::formatter<crimson::os::seastore::record_group_size_t> : fmt::ostream_formatter {}; template <> struct fmt::formatter<crimson::os::seastore::record_header_t> : fmt::ostream_formatter {}; template <> struct fmt::formatter<crimson::os::seastore::record_locator_t> : fmt::ostream_formatter {}; +template <> struct fmt::formatter<crimson::os::seastore::record_type_t> : fmt::ostream_formatter {}; template <> struct fmt::formatter<crimson::os::seastore::record_t> : fmt::ostream_formatter {}; template <> struct fmt::formatter<crimson::os::seastore::rewrite_gen_printer_t> : fmt::ostream_formatter {}; template <> struct fmt::formatter<crimson::os::seastore::scan_valid_records_cursor> : fmt::ostream_formatter {}; diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index 5e3f165560e..f6af7cfc350 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -92,7 +92,7 @@ public: *out = CachedExtentRef(&*iter); SUBTRACET(seastore_cache, "{} is present in write_set -- {}", *this, addr, *iter); - assert((*out)->is_valid()); + assert(!out || (*out)->is_valid()); return get_extent_ret::PRESENT; } else if (retired_set.count(addr)) { return get_extent_ret::RETIRED; @@ -126,14 +126,14 @@ public: ref->set_invalid(*this); write_set.erase(*ref); assert(ref->prior_instance); - retired_set.insert(ref->prior_instance); + retired_set.emplace(ref->prior_instance, trans_id); assert(read_set.count(ref->prior_instance->get_paddr())); ref->prior_instance.reset(); } else { // && retired_set.count(ref->get_paddr()) == 0 // If it's already in the set, insert here will be a noop, // which is what we want. - retired_set.insert(ref); + retired_set.emplace(ref, trans_id); } } @@ -262,9 +262,9 @@ public: { auto where = retired_set.find(&placeholder); assert(where != retired_set.end()); - assert(where->get() == &placeholder); + assert(where->extent.get() == &placeholder); where = retired_set.erase(where); - retired_set.emplace_hint(where, &extent); + retired_set.emplace_hint(where, &extent, trans_id); } } @@ -317,19 +317,17 @@ public: } bool is_retired(paddr_t paddr, extent_len_t len) { - if (retired_set.empty()) { + auto iter = retired_set.lower_bound(paddr); + if (iter == retired_set.end()) { return false; } - auto iter = retired_set.lower_bound(paddr); - if (iter == retired_set.end() || - (*iter)->get_paddr() > paddr) { - assert(iter != retired_set.begin()); - --iter; + auto &extent = iter->extent; + if (extent->get_paddr() != paddr) { + return false; + } else { + assert(len == extent->get_length()); + return true; } - auto retired_paddr = (*iter)->get_paddr(); - auto retired_length = (*iter)->get_length(); - return retired_paddr <= paddr && - retired_paddr.add_offset(retired_length) >= paddr.add_offset(len); } template <typename F> diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 763aae38dd4..517deb3f408 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -29,7 +29,8 @@ TransactionManager::TransactionManager( CacheRef _cache, LBAManagerRef _lba_manager, ExtentPlacementManagerRef &&_epm, - BackrefManagerRef&& _backref_manager) + BackrefManagerRef&& _backref_manager, + shard_stats_t& _shard_stats) : cache(std::move(_cache)), lba_manager(std::move(_lba_manager)), journal(std::move(_journal)), @@ -37,7 +38,8 @@ TransactionManager::TransactionManager( backref_manager(std::move(_backref_manager)), full_extent_integrity_check( crimson::common::get_conf<bool>( - "seastore_full_integrity_check")) + "seastore_full_integrity_check")), + shard_stats(_shard_stats) { epm->set_extent_callback(this); journal->set_write_pipeline(&write_pipeline); @@ -55,6 +57,12 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs() journal->get_trimmer().set_journal_head(start_seq); return epm->open_for_write(); }).safe_then([this, FNAME]() { + ++(shard_stats.io_num); + ++(shard_stats.pending_io_num); + // For submit_transaction_direct() + ++(shard_stats.processing_inlock_io_num); + ++(shard_stats.repeat_io_num); + return with_transaction_intr( Transaction::src_t::MUTATE, "mkfs_tm", @@ -76,7 +84,13 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs() return mkfs_ertr::now(); }), mkfs_ertr::pass_further{} - ); + ).finally([this] { + assert(shard_stats.pending_io_num); + --(shard_stats.pending_io_num); + // XXX: it's wrong to assume no failure, + // but failure leads to fatal error + --(shard_stats.processing_postlock_io_num); + }); }).safe_then([this] { return close(); }).safe_then([FNAME] { @@ -419,6 +433,10 @@ TransactionManager::do_submit_transaction( journal->get_trimmer().get_dirty_tail()); tref.get_handle().maybe_release_collection_lock(); + if (tref.get_src() == Transaction::src_t::MUTATE) { + --(shard_stats.processing_inlock_io_num); + ++(shard_stats.processing_postlock_io_num); + } SUBTRACET(seastore_t, "submitting record", tref); return journal->submit_record(std::move(record), tref.get_handle() @@ -734,6 +752,7 @@ TransactionManager::~TransactionManager() {} TransactionManagerRef make_transaction_manager( Device *primary_device, const std::vector<Device*> &secondary_devices, + shard_stats_t& shard_stats, bool is_test) { auto epm = std::make_unique<ExtentPlacementManager>(); @@ -775,10 +794,10 @@ TransactionManagerRef make_transaction_manager( } } - auto journal_type = p_backend_type; + auto backend_type = p_backend_type; device_off_t roll_size; device_off_t roll_start; - if (journal_type == journal_type_t::SEGMENTED) { + if (backend_type == backend_type_t::SEGMENTED) { roll_size = static_cast<SegmentManager*>(primary_device)->get_segment_size(); roll_start = 0; } else { @@ -801,17 +820,17 @@ TransactionManagerRef make_transaction_manager( cleaner_is_detailed = true; cleaner_config = SegmentCleaner::config_t::get_test(); trimmer_config = JournalTrimmerImpl::config_t::get_test( - roll_size, journal_type); + roll_size, backend_type); } else { cleaner_is_detailed = false; cleaner_config = SegmentCleaner::config_t::get_default(); trimmer_config = JournalTrimmerImpl::config_t::get_default( - roll_size, journal_type); + roll_size, backend_type); } auto journal_trimmer = JournalTrimmerImpl::create( *backref_manager, trimmer_config, - journal_type, roll_start, roll_size); + backend_type, roll_start, roll_size); AsyncCleanerRef cleaner; JournalRef journal; @@ -826,7 +845,7 @@ TransactionManagerRef make_transaction_manager( epm->get_ool_segment_seq_allocator(), cleaner_is_detailed, /* is_cold = */ true); - if (journal_type == journal_type_t::SEGMENTED) { + if (backend_type == backend_type_t::SEGMENTED) { for (auto id : cold_segment_cleaner->get_device_ids()) { segment_providers_by_id[id] = static_cast<SegmentProvider*>(cold_segment_cleaner.get()); @@ -834,7 +853,7 @@ TransactionManagerRef make_transaction_manager( } } - if (journal_type == journal_type_t::SEGMENTED) { + if (backend_type == backend_type_t::SEGMENTED) { cleaner = SegmentCleaner::create( cleaner_config, std::move(sms), @@ -873,7 +892,8 @@ TransactionManagerRef make_transaction_manager( std::move(cache), std::move(lba_manager), std::move(epm), - std::move(backref_manager)); + std::move(backref_manager), + shard_stats); } } diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 66574cc4225..8db88628ed9 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -65,7 +65,8 @@ public: CacheRef cache, LBAManagerRef lba_manager, ExtentPlacementManagerRef &&epm, - BackrefManagerRef&& backref_manager); + BackrefManagerRef&& backref_manager, + shard_stats_t& shard_stats); /// Writes initial metadata to disk using mkfs_ertr = base_ertr; @@ -79,6 +80,11 @@ public: using close_ertr = base_ertr; close_ertr::future<> close(); + device_stats_t get_device_stats(bool report_detail) const { + writer_stats_t journal_stats = journal->get_writer_stats(); + return epm->get_device_stats(journal_stats, report_detail); + } + /// Resets transaction void reset_transaction_preserve_handle(Transaction &t) { return cache->reset_transaction_preserve_handle(t); @@ -176,14 +182,27 @@ public: Transaction &t, LBAMappingRef pin) { - // checking the lba child must be atomic with creating - // and linking the absent child - auto ret = get_extent_if_linked<T>(t, std::move(pin)); - if (ret.index() == 1) { - return std::move(std::get<1>(ret)); + auto fut = base_iertr::make_ready_future<LBAMappingRef>(); + if (!pin->is_parent_valid()) { + fut = get_pin(t, pin->get_key() + ).handle_error_interruptible( + crimson::ct_error::enoent::assert_failure{"unexpected enoent"}, + crimson::ct_error::input_output_error::pass_further{} + ); } else { - return this->pin_to_extent<T>(t, std::move(std::get<0>(ret))); + pin->maybe_fix_pos(); + fut = base_iertr::make_ready_future<LBAMappingRef>(std::move(pin)); } + return fut.si_then([&t, this](auto npin) mutable { + // checking the lba child must be atomic with creating + // and linking the absent child + auto ret = get_extent_if_linked<T>(t, std::move(npin)); + if (ret.index() == 1) { + return std::move(std::get<1>(ret)); + } else { + return this->pin_to_extent<T>(t, std::move(std::get<0>(ret))); + } + }); } template <typename T> @@ -192,6 +211,9 @@ public: Transaction &t, LBAMappingRef pin) { + ceph_assert(pin->is_parent_valid()); + // checking the lba child must be atomic with creating + // and linking the absent child auto v = pin->get_logical_extent(t); if (v.has_child()) { return v.get_child_fut().safe_then([pin=std::move(pin)](auto extent) { @@ -215,6 +237,7 @@ public: LBAMappingRef pin, extent_types_t type) { + ceph_assert(!pin->parent_modified()); auto v = pin->get_logical_extent(t); // checking the lba child must be atomic with creating // and linking the absent child @@ -408,6 +431,7 @@ public: Transaction &t, LBAMappingRef &&pin, std::array<remap_entry, N> remaps) { + static_assert(std::is_base_of_v<LogicalCachedExtent, T>); #ifndef NDEBUG std::sort(remaps.begin(), remaps.end(), @@ -451,16 +475,31 @@ public: // The according extent might be stable or pending. auto fut = base_iertr::now(); if (!pin->is_indirect()) { - auto fut2 = base_iertr::make_ready_future<TCachedExtentRef<T>>(); - if (full_extent_integrity_check) { - fut2 = read_pin<T>(t, pin->duplicate()); + if (!pin->is_parent_valid()) { + fut = get_pin(t, pin->get_key() + ).si_then([&pin](auto npin) { + assert(npin); + pin = std::move(npin); + return seastar::now(); + }).handle_error_interruptible( + crimson::ct_error::enoent::assert_failure{"unexpected enoent"}, + crimson::ct_error::input_output_error::pass_further{} + ); } else { - auto ret = get_extent_if_linked<T>(t, pin->duplicate()); - if (ret.index() == 1) { - fut2 = std::move(std::get<1>(ret)); - } + pin->maybe_fix_pos(); } - fut = fut2.si_then([this, &t, &remaps, original_paddr, + + fut = fut.si_then([this, &t, &pin] { + if (full_extent_integrity_check) { + return read_pin<T>(t, pin->duplicate()); + } else { + auto ret = get_extent_if_linked<T>(t, pin->duplicate()); + if (ret.index() == 1) { + return std::move(std::get<1>(ret)); + } + } + return base_iertr::make_ready_future<TCachedExtentRef<T>>(); + }).si_then([this, &t, &remaps, original_paddr, original_laddr, original_len, &extents, FNAME](auto ext) mutable { ceph_assert(full_extent_integrity_check @@ -491,13 +530,14 @@ public: SUBDEBUGT(seastore_tm, "remap laddr: {}, remap paddr: {}, remap length: {}", t, remap_laddr, remap_paddr, remap_len); - extents.emplace_back(cache->alloc_remapped_extent<T>( + auto extent = cache->alloc_remapped_extent<T>( t, remap_laddr, remap_paddr, remap_len, original_laddr, - original_bptr)); + original_bptr); + extents.emplace_back(std::move(extent)); } }); } @@ -623,6 +663,10 @@ public: * ExtentCallbackInterface */ + shard_stats_t& get_shard_stats() { + return shard_stats; + } + /// weak transaction should be type READ TransactionRef create_transaction( Transaction::src_t src, @@ -795,6 +839,8 @@ private: bool full_extent_integrity_check = true; + shard_stats_t& shard_stats; + rewrite_extent_ret rewrite_logical_extent( Transaction& t, LogicalCachedExtentRef extent); @@ -971,5 +1017,6 @@ using TransactionManagerRef = std::unique_ptr<TransactionManager>; TransactionManagerRef make_transaction_manager( Device *primary_device, const std::vector<Device*> &secondary_devices, + shard_stats_t& shard_stats, bool is_test); } diff --git a/src/crimson/osd/main_config_bootstrap_helpers.cc b/src/crimson/osd/main_config_bootstrap_helpers.cc index cbb22ec0e6b..c4e7fb72e47 100644 --- a/src/crimson/osd/main_config_bootstrap_helpers.cc +++ b/src/crimson/osd/main_config_bootstrap_helpers.cc @@ -150,16 +150,32 @@ _get_early_config(int argc, const char *argv[]) std::end(early_args), [](auto* arg) { return "--cpuset"sv == arg; }); found == std::end(early_args)) { - auto smp_config = crimson::common::get_conf<std::string>("crimson_seastar_cpu_cores"); - if (!smp_config.empty()) { + auto cpu_cores = crimson::common::get_conf<std::string>("crimson_seastar_cpu_cores"); + if (!cpu_cores.empty()) { // Set --cpuset based on crimson_seastar_cpu_cores config option // --smp default is one per CPU ret.early_args.emplace_back("--cpuset"); - ret.early_args.emplace_back(smp_config); - logger().info("get_early_config: set --cpuset {}", smp_config); + ret.early_args.emplace_back(cpu_cores); + ret.early_args.emplace_back("--thread-affinity"); + ret.early_args.emplace_back("1"); + logger().info("get_early_config: set --thread-affinity 1 --cpuset {}", + cpu_cores); } else { - logger().warn("get_early_config: no cpuset specified, falling back" - " to seastar's default of: all"); + auto reactor_num = crimson::common::get_conf<uint64_t>("crimson_seastar_num_threads"); + if (!reactor_num) { + logger().error("get_early_config: crimson_seastar_cpu_cores" + " or crimson_seastar_num_threads" + " must be set"); + ceph_abort(); + } + std::string smp = fmt::format("{}", reactor_num); + ret.early_args.emplace_back("--smp"); + ret.early_args.emplace_back(smp); + ret.early_args.emplace_back("--thread-affinity"); + ret.early_args.emplace_back("0"); + logger().info("get_early_config: set --thread-affinity 0 --smp {}", + smp); + } } else { logger().error("get_early_config: --cpuset can be " diff --git a/src/crimson/osd/object_context.h b/src/crimson/osd/object_context.h index e1a3cc92987..4148e3b592c 100644 --- a/src/crimson/osd/object_context.h +++ b/src/crimson/osd/object_context.h @@ -61,6 +61,9 @@ class ObjectContext : public ceph::common::intrusive_lru_base< ceph::common::intrusive_lru_config< hobject_t, ObjectContext, obc_to_hoid<ObjectContext>>> { +private: + tri_mutex lock; + public: ObjectState obs; SnapSetContextRef ssc; @@ -70,7 +73,8 @@ public: using watch_key_t = std::pair<uint64_t, entity_name_t>; std::map<watch_key_t, seastar::shared_ptr<crimson::osd::Watch>> watchers; - ObjectContext(hobject_t hoid) : obs(std::move(hoid)) {} + ObjectContext(hobject_t hoid) : lock(hoid), + obs(std::move(hoid)) {} const hobject_t &get_oid() const { return obs.oi.soid; @@ -112,38 +116,33 @@ public: template<typename Exception> void interrupt(Exception ex) { lock.abort(std::move(ex)); - if (recovery_read_marker) { - drop_recovery_read(); - } } - bool is_loaded_and_valid() const { - return fully_loaded && !invalidated_by_interval_change; + bool is_loaded() const { + return fully_loaded; } -private: - tri_mutex lock; - bool recovery_read_marker = false; + bool is_valid() const { + return !invalidated_by_interval_change; + } +private: template <typename Lock, typename Func> auto _with_lock(Lock& lock, Func&& func) { - Ref obc = this; - return lock.lock().then([&lock, func = std::forward<Func>(func), obc]() mutable { - return seastar::futurize_invoke(func).finally([&lock, obc] { + return lock.lock( + ).then([&lock, func=std::forward<Func>(func), obc=Ref(this)]() mutable { + return seastar::futurize_invoke( + func + ).finally([&lock, obc=std::move(obc)] { + /* We chain the finally block here because it's possible for lock.lock() + * above to fail due to a call to ObjectContext::interrupt, which calls + * tri_mutex::abort. In the event of such an error, the lock isn't + * actually taken and calling unlock() would be incorrect. */ lock.unlock(); }); }); } - template <typename Lock, typename Func> - auto _with_promoted_lock(Lock& lock, Func&& func) { - Ref obc = this; - lock.lock(); - return seastar::futurize_invoke(func).finally([&lock, obc] { - lock.unlock(); - }); - } - boost::intrusive::list_member_hook<> obc_accessing_hook; uint64_t list_link_cnt = 0; bool fully_loaded = false; @@ -204,36 +203,84 @@ public: } } } - template<RWState::State Type, typename InterruptCond = void, typename Func> - auto with_promoted_lock(Func&& func) { - if constexpr (!std::is_void_v<InterruptCond>) { - auto wrapper = ::crimson::interruptible::interruptor<InterruptCond>::wrap_function(std::forward<Func>(func)); - switch (Type) { - case RWState::RWWRITE: - return _with_promoted_lock(lock.excl_from_write(), std::move(wrapper)); - case RWState::RWREAD: - return _with_promoted_lock(lock.excl_from_read(), std::move(wrapper)); - case RWState::RWEXCL: - return seastar::futurize_invoke(std::move(wrapper)); - case RWState::RWNONE: - return _with_lock(lock.for_excl(), std::move(wrapper)); - default: - assert(0 == "noop"); + + /** + * load_then_with_lock + * + * Takes two functions as arguments -- load_func to be invoked + * with an exclusive lock, and func to be invoked under the + * lock type specified by the Type template argument. + * + * Caller must ensure that *this is not already locked, presumably + * by invoking load_then_with_lock immediately after construction. + * + * @param [in] load_func Function to be invoked under excl lock + * @param [in] func Function to be invoked after load_func under + * lock of type Type. + */ + template<RWState::State Type, typename Func, typename Func2> + auto load_then_with_lock(Func &&load_func, Func2 &&func) { + class lock_state_t { + tri_mutex *lock = nullptr; + bool excl = false; + + public: + lock_state_t(tri_mutex &lock) : lock(&lock), excl(true) { + ceph_assert(lock.try_lock_for_excl()); } - } else { - switch (Type) { - case RWState::RWWRITE: - return _with_promoted_lock(lock.excl_from_write(), std::forward<Func>(func)); - case RWState::RWREAD: - return _with_promoted_lock(lock.excl_from_read(), std::forward<Func>(func)); - case RWState::RWEXCL: - return seastar::futurize_invoke(std::forward<Func>(func)); - case RWState::RWNONE: - return _with_lock(lock.for_excl(), std::forward<Func>(func)); - default: - assert(0 == "noop"); + lock_state_t(lock_state_t &&o) : lock(o.lock), excl(o.excl) { + o.lock = nullptr; + o.excl = false; } - } + lock_state_t() = delete; + lock_state_t &operator=(lock_state_t &&o) = delete; + lock_state_t(const lock_state_t &o) = delete; + lock_state_t &operator=(const lock_state_t &o) = delete; + + void demote() { + ceph_assert(excl); + ceph_assert(lock); + if constexpr (Type == RWState::RWWRITE) { + lock->demote_to_write(); + } else if constexpr (Type == RWState::RWREAD) { + lock->demote_to_read(); + } else if constexpr (Type == RWState::RWNONE) { + lock->unlock_for_excl(); + } + excl = false; + } + + ~lock_state_t() { + if (!lock) + return; + + if constexpr (Type == RWState::RWEXCL) { + lock->unlock_for_excl(); + } else { + if (excl) { + lock->unlock_for_excl(); + return; + } + + if constexpr (Type == RWState::RWWRITE) { + lock->unlock_for_write(); + } else if constexpr (Type == RWState::RWREAD) { + lock->unlock_for_read(); + } + } + } + }; + + return seastar::do_with( + lock_state_t{lock}, + [load_func=std::move(load_func), func=std::move(func)](auto &ls) mutable { + return std::invoke( + std::move(load_func) + ).si_then([func=std::move(func), &ls]() mutable { + ls.demote(); + return std::invoke(std::move(func)); + }); + }); } bool empty() const { @@ -242,26 +289,6 @@ public: bool is_request_pending() const { return lock.is_acquired(); } - - bool get_recovery_read() { - if (lock.try_lock_for_read()) { - recovery_read_marker = true; - return true; - } else { - return false; - } - } - void wait_recovery_read() { - assert(lock.get_readers() > 0); - recovery_read_marker = true; - } - void drop_recovery_read() { - assert(recovery_read_marker); - recovery_read_marker = false; - } - bool maybe_get_excl() { - return lock.try_lock_for_excl(); - } }; using ObjectContextRef = ObjectContext::Ref; diff --git a/src/crimson/osd/object_context_loader.cc b/src/crimson/osd/object_context_loader.cc index b53cbabd04c..8ecb1d4b8ef 100644 --- a/src/crimson/osd/object_context_loader.cc +++ b/src/crimson/osd/object_context_loader.cc @@ -12,26 +12,15 @@ using crimson::common::local_conf; ObjectContextLoader::with_head_obc(const hobject_t& oid, with_obc_func_t&& func) { - LOG_PREFIX(ObjectContextLoader::with_head_obc); - auto [obc, existed] = obc_registry.get_cached_obc(oid); - DEBUGDPP("object {}", dpp, obc->get_oid()); - assert(obc->is_head()); - obc->append_to(obc_set_accessing); - return obc->with_lock<State, IOInterruptCondition>( - [existed=existed, obc=obc, func=std::move(func), this] { - return get_or_load_obc<State>(obc, existed) - .safe_then_interruptible( - [func = std::move(func)](auto obc) { + return with_locked_obc<State, true /* track */>( + oid, + [func=std::move(func)](auto obc) { // The template with_obc_func_t wrapper supports two obcs (head and clone). // In the 'with_head_obc' case, however, only the head is in use. // Pass the same head obc twice in order to // to support the generic with_obc sturcture. - return std::move(func)(obc, obc); + return std::invoke(std::move(func), obc, obc); }); - }).finally([FNAME, this, obc=std::move(obc)] { - DEBUGDPP("released object {}", dpp, obc->get_oid()); - obc->remove_from(obc_set_accessing); - }); } template<RWState::State State> @@ -67,7 +56,7 @@ using crimson::common::local_conf; bool resolve_clone) { LOG_PREFIX(ObjectContextLoader::with_clone_obc_only); - DEBUGDPP("{}", clone_oid); + DEBUGDPP("{}", dpp, clone_oid); assert(!clone_oid.is_head()); if (resolve_clone) { auto resolved_oid = resolve_oid(head->get_head_ss(), clone_oid); @@ -83,18 +72,12 @@ using crimson::common::local_conf; } clone_oid = *resolved_oid; } - auto [clone, existed] = obc_registry.get_cached_obc(clone_oid); - return clone->template with_lock<State, IOInterruptCondition>( - [existed=existed, clone=std::move(clone), - func=std::move(func), head=std::move(head), this]() mutable - -> load_obc_iertr::future<> { - auto loaded = get_or_load_obc<State>(clone, existed); - return loaded.safe_then_interruptible( - [func = std::move(func), head=std::move(head)](auto clone) mutable { + return with_locked_obc<State, false /* don't track */>( + clone_oid, + [head=std::move(head), func=std::move(func)](auto clone) { clone->set_clone_ssc(head->ssc); return std::move(func)(std::move(head), std::move(clone)); }); - }); } template<RWState::State State> @@ -110,14 +93,55 @@ using crimson::common::local_conf; } } - ObjectContextLoader::load_obc_iertr::future<ObjectContextRef> + template<RWState::State State, bool track, typename Func> + ObjectContextLoader::load_obc_iertr::future<> + ObjectContextLoader::with_locked_obc(const hobject_t& oid, + Func&& func) + { + LOG_PREFIX(ObjectContextLoader::with_locked_obc); + auto [obc, existed] = obc_registry.get_cached_obc(oid); + DEBUGDPP("object {} existed {}", + dpp, obc->get_oid(), existed); + if constexpr (track) { + obc->append_to(obc_set_accessing); + } + if (existed) { + return obc->with_lock<State, IOInterruptCondition>( + [func=std::move(func), obc=ObjectContextRef(obc)] { + return std::invoke(std::move(func), obc); + } + ).finally([FNAME, this, obc=ObjectContextRef(obc)] { + DEBUGDPP("released object {}", dpp, obc->get_oid()); + if constexpr (track) { + obc->remove_from(obc_set_accessing); + } + }); + } else { + return obc->load_then_with_lock<State> ( + [this, obc=ObjectContextRef(obc)] { + return load_obc(obc); + }, + [func=std::move(func), obc=ObjectContextRef(obc)] { + return std::invoke(std::move(func), obc); + } + ).finally([FNAME, this, obc=ObjectContextRef(obc)] { + DEBUGDPP("released object {}", dpp, obc->get_oid()); + if constexpr (track) { + obc->remove_from(obc_set_accessing); + } + }); + } + } + + + ObjectContextLoader::load_obc_iertr::future<> ObjectContextLoader::load_obc(ObjectContextRef obc) { LOG_PREFIX(ObjectContextLoader::load_obc); return backend.load_metadata(obc->get_oid()) .safe_then_interruptible( [FNAME, this, obc=std::move(obc)](auto md) - -> load_obc_ertr::future<ObjectContextRef> { + -> load_obc_ertr::future<> { const hobject_t& oid = md->os.oi.soid; DEBUGDPP("loaded obs {} for {}", dpp, md->os.oi, oid); if (oid.is_head()) { @@ -133,41 +157,11 @@ using crimson::common::local_conf; // See set_clone_ssc obc->set_clone_state(std::move(md->os)); } - DEBUGDPP("returning obc {} for {}", dpp, obc->obs.oi, obc->obs.oi.soid); - return load_obc_ertr::make_ready_future<ObjectContextRef>(obc); + DEBUGDPP("loaded obc {} for {}", dpp, obc->obs.oi, obc->obs.oi.soid); + return seastar::now(); }); } - template<RWState::State State> - ObjectContextLoader::load_obc_iertr::future<ObjectContextRef> - ObjectContextLoader::get_or_load_obc(ObjectContextRef obc, - bool existed) - { - LOG_PREFIX(ObjectContextLoader::get_or_load_obc); - auto loaded = - load_obc_iertr::make_ready_future<ObjectContextRef>(obc); - if (existed) { - if (!obc->is_loaded_and_valid()) { - ERRORDPP( - "obc for {} invalid -- fully_loaded={}, " - "invalidated_by_interval_change={}", - dpp, obc->get_oid(), - obc->fully_loaded, obc->invalidated_by_interval_change - ); - } - ceph_assert(obc->is_loaded_and_valid()); - DEBUGDPP("cache hit on {}", dpp, obc->get_oid()); - } else { - DEBUGDPP("cache miss on {}", dpp, obc->get_oid()); - loaded = - obc->template with_promoted_lock<State, IOInterruptCondition>( - [obc, this] { - return load_obc(obc); - }); - } - return loaded; - } - ObjectContextLoader::load_obc_iertr::future<> ObjectContextLoader::reload_obc(ObjectContext& obc) const { diff --git a/src/crimson/osd/object_context_loader.h b/src/crimson/osd/object_context_loader.h index 77805e11bc1..277708eca4f 100644 --- a/src/crimson/osd/object_context_loader.h +++ b/src/crimson/osd/object_context_loader.h @@ -29,6 +29,9 @@ public: ::crimson::osd::IOInterruptCondition, load_obc_ertr>; + using interruptor = ::crimson::interruptible::interruptor< + ::crimson::osd::IOInterruptCondition>; + using with_obc_func_t = std::function<load_obc_iertr::future<> (ObjectContextRef, ObjectContextRef)>; @@ -72,12 +75,15 @@ private: load_obc_iertr::future<> with_head_obc(const hobject_t& oid, with_obc_func_t&& func); + template<RWState::State State, bool track, typename Func> + load_obc_iertr::future<> with_locked_obc(const hobject_t& oid, + Func&& func); + template<RWState::State State> load_obc_iertr::future<ObjectContextRef> get_or_load_obc(ObjectContextRef obc, bool existed); - load_obc_iertr::future<ObjectContextRef> - load_obc(ObjectContextRef obc); + load_obc_iertr::future<> load_obc(ObjectContextRef obc); }; } diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc index 3c7e9c86b1c..656caa92ca4 100644 --- a/src/crimson/osd/ops_executer.cc +++ b/src/crimson/osd/ops_executer.cc @@ -489,6 +489,11 @@ OpsExecuter::list_snaps_iertr::future<> OpsExecuter::do_list_snaps( const ObjectState& os, const SnapSet& ss) { + if (msg->get_snapid() != CEPH_SNAPDIR) { + logger().debug("LIST_SNAPS with incorrect context"); + return crimson::ct_error::invarg::make(); + } + obj_list_snap_response_t resp; resp.clones.reserve(ss.clones.size() + 1); for (auto &clone: ss.clones) { @@ -980,7 +985,6 @@ void OpsExecuter::update_clone_overlap() { &cloning_ctx->new_snapset.clone_overlap.rbegin()->second; } else if (op_info.may_write() && obc->obs.exists - && !snapc.snaps.empty() && !obc->ssc->snapset.clones.empty()) { newest_overlap = &obc->ssc->snapset.clone_overlap.rbegin()->second; diff --git a/src/crimson/osd/ops_executer.h b/src/crimson/osd/ops_executer.h index 92d7b89c4a4..834266ce68f 100644 --- a/src/crimson/osd/ops_executer.h +++ b/src/crimson/osd/ops_executer.h @@ -107,6 +107,7 @@ public: virtual uint64_t get_features() const = 0; virtual bool has_flag(uint32_t flag) const = 0; virtual entity_name_t get_source() const = 0; + virtual snapid_t get_snapid() const = 0; }; template <class ImplT> @@ -144,6 +145,9 @@ public: uint64_t get_features() const final { return pimpl->get_features(); } + snapid_t get_snapid() const final { + return pimpl->get_snapid(); + } }; // because OpsExecuter is pretty heavy-weight object we want to ensure diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 6b43abca512..e75e4b2e365 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -361,7 +361,12 @@ seastar::future<> OSD::start() { LOG_PREFIX(OSD::start); INFO("seastar::smp::count {}", seastar::smp::count); - + if (auto cpu_cores = + local_conf().get_val<std::string>("crimson_seastar_cpu_cores"); + cpu_cores.empty()) { + clog->warn() << "for optimal performance please set " + "crimson_seastar_cpu_cores"; + } startup_time = ceph::mono_clock::now(); ceph_assert(seastar::this_shard_id() == PRIMARY_CORE); return store.start().then([this] { @@ -385,26 +390,6 @@ seastar::future<> OSD::start() std::ref(osd_states)); }); }).then([this, FNAME] { - auto stats_seconds = local_conf().get_val<int64_t>("crimson_osd_stat_interval"); - if (stats_seconds > 0) { - shard_stats.resize(seastar::smp::count); - stats_timer.set_callback([this, FNAME] { - std::ignore = shard_services.invoke_on_all( - [this](auto &local_service) { - auto stats = local_service.report_stats(); - shard_stats[seastar::this_shard_id()] = stats; - }).then([this, FNAME] { - std::ostringstream oss; - for (const auto &stats : shard_stats) { - oss << int(stats.reactor_utilization); - oss << ","; - } - INFO("reactor_utilizations: {}", oss.str()); - }); - }); - stats_timer.arm_periodic(std::chrono::seconds(stats_seconds)); - } - heartbeat.reset(new Heartbeat{ whoami, get_shard_services(), *monc, *hb_front_msgr, *hb_back_msgr}); @@ -414,7 +399,37 @@ seastar::future<> OSD::start() local_conf().get_val<std::string>("osd_data"), ec.value(), ec.message()); })); - }).then([this] { + }).then([this, FNAME] { + auto stats_seconds = local_conf().get_val<int64_t>("crimson_osd_stat_interval"); + if (stats_seconds > 0) { + shard_stats.resize(seastar::smp::count); + stats_timer.set_callback([this, FNAME] { + gate.dispatch_in_background("stats_osd", *this, [this, FNAME] { + return shard_services.invoke_on_all( + [this](auto &local_service) { + auto stats = local_service.report_stats(); + shard_stats[seastar::this_shard_id()] = stats; + }).then([this, FNAME] { + std::ostringstream oss; + double agg_ru = 0; + int cnt = 0; + for (const auto &stats : shard_stats) { + agg_ru += stats.reactor_utilization; + ++cnt; + oss << int(stats.reactor_utilization); + oss << ","; + } + INFO("reactor_utilizations: {}({})", + int(agg_ru/cnt), oss.str()); + }); + }); + gate.dispatch_in_background("stats_store", *this, [this] { + return store.report_stats(); + }); + }); + stats_timer.arm_periodic(std::chrono::seconds(stats_seconds)); + } + return open_meta_coll(); }).then([this] { return pg_shard_manager.get_meta_coll().load_superblock( diff --git a/src/crimson/osd/osd_operation.cc b/src/crimson/osd/osd_operation.cc index 920fdc11480..8442b605d39 100644 --- a/src/crimson/osd/osd_operation.cc +++ b/src/crimson/osd/osd_operation.cc @@ -33,18 +33,11 @@ void OSDOperationRegistry::do_stop() /* add_ref= */ false }; }); - last_of_recents = std::end(historic_registry); // to_ref_down is going off } OSDOperationRegistry::OSDOperationRegistry() - : OperationRegistryT(seastar::this_shard_id()) -{ - constexpr auto historic_reg_index = - static_cast<size_t>(OperationTypeCode::historic_client_request); - auto& historic_registry = get_registry<historic_reg_index>(); - last_of_recents = std::begin(historic_registry); -} + : OperationRegistryT(seastar::this_shard_id()) {} static auto get_duration(const ClientRequest& client_request) { @@ -55,50 +48,49 @@ static auto get_duration(const ClientRequest& client_request) void OSDOperationRegistry::put_historic(const ClientRequest& op) { + using crimson::common::local_conf; // unlink the op from the client request registry. this is a part of - // the re-link procedure. finally it will be in historic registry. - constexpr auto client_reg_index = - static_cast<size_t>(OperationTypeCode::client_request); + // the re-link procedure. finally it will be in historic/historic_slow registry. constexpr auto historic_reg_index = static_cast<size_t>(OperationTypeCode::historic_client_request); - auto& client_registry = get_registry<client_reg_index>(); - auto& historic_registry = get_registry<historic_reg_index>(); - historic_registry.splice(std::end(historic_registry), - client_registry, - client_registry.iterator_to(op)); - ClientRequest::ICRef( - &op, /* add_ref= */true - ).detach(); // yes, "leak" it for now! - - // check whether the history size limit is not exceeded; if so, then - // purge the oldest op. - // NOTE: Operation uses the auto-unlink feature of boost::intrusive. - // NOTE: the cleaning happens in OSDOperationRegistry::do_stop() - using crimson::common::local_conf; - if (num_recent_ops >= local_conf()->osd_op_history_size) { - ++last_of_recents; - ++num_slow_ops; + constexpr auto slow_historic_reg_index = + static_cast<size_t>(OperationTypeCode::historic_slow_client_request); + + if (get_duration(op) > local_conf()->osd_op_complaint_time) { + auto& slow_historic_registry = get_registry<slow_historic_reg_index>(); + _put_historic(slow_historic_registry, + op, + local_conf()->osd_op_history_slow_op_size); } else { - ++num_recent_ops; + auto& historic_registry = get_registry<historic_reg_index>(); + _put_historic(historic_registry, + op, + local_conf()->osd_op_history_size); } - if (num_slow_ops > local_conf()->osd_op_history_slow_op_size) { - // we're interested in keeping slowest ops. if the slow op history - // is disabled, the list will have only one element, so the full-blown - // search will boil down into `.front()`. - const auto fastest_historic_iter = std::min_element( - std::cbegin(historic_registry), last_of_recents, - [] (const auto& lop, const auto& rop) { - const auto& lclient_request = static_cast<const ClientRequest&>(lop); - const auto& rclient_request = static_cast<const ClientRequest&>(rop); - return get_duration(lclient_request) < get_duration(rclient_request); - }); - assert(fastest_historic_iter != std::end(historic_registry)); - const auto& fastest_historic_op = - static_cast<const ClientRequest&>(*fastest_historic_iter); - historic_registry.erase(fastest_historic_iter); +} + +void OSDOperationRegistry::_put_historic( + op_list& list, + const class ClientRequest& op, + uint64_t max) +{ + constexpr auto client_reg_index = + static_cast<size_t>(OperationTypeCode::client_request); + auto& client_registry = get_registry<client_reg_index>(); + + // we only save the newest op + list.splice(std::end(list), client_registry, client_registry.iterator_to(op)); + ClientRequest::ICRef( + &op, /* add_ref= */true + ).detach(); // yes, "leak" it for now! + + if (list.size() >= max) { + auto old_op_ptr = &list.front(); + list.pop_front(); + const auto& old_op = + static_cast<const ClientRequest&>(*old_op_ptr); // clear a previously "leaked" op - ClientRequest::ICRef(&fastest_historic_op, /* add_ref= */false); - --num_slow_ops; + ClientRequest::ICRef(&old_op, /* add_ref= */false); } } @@ -125,33 +117,20 @@ size_t OSDOperationRegistry::dump_historic_client_requests(ceph::Formatter* f) c size_t OSDOperationRegistry::dump_slowest_historic_client_requests(ceph::Formatter* f) const { - const auto& historic_client_registry = - get_registry<static_cast<size_t>(OperationTypeCode::historic_client_request)>(); //ClientRequest::type)>(); + const auto& slow_historic_client_registry = + get_registry<static_cast<size_t>(OperationTypeCode::historic_slow_client_request)>(); //ClientRequest::type)>(); f->open_object_section("op_history"); - f->dump_int("size", historic_client_registry.size()); + f->dump_int("size", slow_historic_client_registry.size()); // TODO: f->dump_int("duration", history_duration.load()); // the intrusive list is configured to not store the size - std::multimap<utime_t, - const ClientRequest*, - std::greater<utime_t>> sorted_slowest_ops; - // iterating over the entire registry as a slow op could be also - // in the "recently added" part. - std::transform(std::begin(historic_client_registry), - std::end(historic_client_registry), - std::inserter(sorted_slowest_ops, std::end(sorted_slowest_ops)), - [] (const Operation& op) { - const auto& cop = static_cast<const ClientRequest&>(op); - return std::make_pair(get_duration(cop), &cop); - }); - f->open_array_section("ops"); - using crimson::common::local_conf; size_t ops_count = 0; - for (auto it = std::begin(sorted_slowest_ops); - ops_count < local_conf()->osd_op_history_slow_op_size - && it != std::end(sorted_slowest_ops); - ++it, ++ops_count) { - it->second->dump(f); + f->open_array_section("ops"); + for (const auto& op : slow_historic_client_registry) { + op.dump(f); + ++ops_count; + } + f->close_section(); } f->close_section(); return ops_count; diff --git a/src/crimson/osd/osd_operation.h b/src/crimson/osd/osd_operation.h index 1064a5c8e03..fb0432edb8f 100644 --- a/src/crimson/osd/osd_operation.h +++ b/src/crimson/osd/osd_operation.h @@ -50,6 +50,7 @@ enum class OperationTypeCode { background_recovery_sub, internal_client_request, historic_client_request, + historic_slow_client_request, logmissing_request, logmissing_request_reply, snaptrim_event, @@ -72,6 +73,7 @@ static constexpr const char* const OP_NAMES[] = { "background_recovery_sub", "internal_client_request", "historic_client_request", + "historic_slow_client_request", "logmissing_request", "logmissing_request_reply", "snaptrim_event", @@ -225,12 +227,15 @@ struct OSDOperationRegistry : OperationRegistryT< void do_stop() override; void put_historic(const class ClientRequest& op); + void _put_historic( + op_list& list, + const class ClientRequest& op, + uint64_t max); size_t dump_historic_client_requests(ceph::Formatter* f) const; size_t dump_slowest_historic_client_requests(ceph::Formatter* f) const; private: - op_list::const_iterator last_of_recents; size_t num_recent_ops = 0; size_t num_slow_ops = 0; }; diff --git a/src/crimson/osd/osd_operations/background_recovery.cc b/src/crimson/osd/osd_operations/background_recovery.cc index f74933ec266..ac94ea7eb88 100644 --- a/src/crimson/osd/osd_operations/background_recovery.cc +++ b/src/crimson/osd/osd_operations/background_recovery.cc @@ -116,15 +116,19 @@ UrgentRecovery::do_recovery() { LOG_PREFIX(UrgentRecovery::do_recovery); DEBUGDPPI("{}: {}", *pg, __func__, *this); - if (!pg->has_reset_since(epoch_started)) { + if (pg->has_reset_since(epoch_started)) { + return seastar::make_ready_future<bool>(false); + } + + return pg->find_unfound(epoch_started + ).then_interruptible([this] { return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent, interruptor>([this] (auto&& trigger) { return pg->get_recovery_handler()->recover_missing(trigger, soid, need); }).then_interruptible([] { return seastar::make_ready_future<bool>(false); }); - } - return seastar::make_ready_future<bool>(false); + }); } void UrgentRecovery::print(std::ostream &lhs) const @@ -164,11 +168,14 @@ PglogBasedRecovery::do_recovery() if (pg->has_reset_since(epoch_started)) { return seastar::make_ready_future<bool>(false); } - return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent, - interruptor>([this] (auto&& trigger) { - return pg->get_recovery_handler()->start_recovery_ops( - trigger, - crimson::common::local_conf()->osd_recovery_max_single_start); + return pg->find_unfound(epoch_started + ).then_interruptible([this] { + return with_blocking_event<RecoveryBackend::RecoveryBlockingEvent, + interruptor>([this] (auto&& trigger) { + return pg->get_recovery_handler()->start_recovery_ops( + trigger, + crimson::common::local_conf()->osd_recovery_max_single_start); + }); }); } diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc index b48e52ff31e..4a721d1277f 100644 --- a/src/crimson/osd/osd_operations/client_request.cc +++ b/src/crimson/osd/osd_operations/client_request.cc @@ -73,6 +73,9 @@ void ClientRequest::dump_detail(Formatter *f) const std::apply([f] (auto... event) { (..., event.dump(f)); }, tracking_events); + std::apply([f] (auto... event) { + (..., event.dump(f)); + }, get_instance_handle()->pg_tracking_events); } ConnectionPipeline &ClientRequest::get_connection_pipeline() @@ -167,6 +170,30 @@ ClientRequest::interruptible_future<> ClientRequest::with_pg_process_interruptib pg.wait_for_active_blocker, &decltype(pg.wait_for_active_blocker)::wait)); + if (int res = op_info.set_from_op(&*m, *pg.get_osdmap()); + res != 0) { + co_await reply_op_error(pgref, res); + co_return; + } + + if (!pg.is_primary()) { + // primary can handle both normal ops and balanced reads + if (is_misdirected(pg)) { + DEBUGDPP("{}.{}: dropping misdirected op", + pg, *this, this_instance_id); + co_return; + } else if (const hobject_t& hoid = m->get_hobj(); + !pg.get_peering_state().can_serve_replica_read(hoid)) { + DEBUGDPP("{}.{}: unstable write on replica, bouncing to primary", + pg, *this, this_instance_id); + co_await reply_op_error(pgref, -EAGAIN); + co_return; + } else { + DEBUGDPP("{}.{}: serving replica read on oid {}", + pg, *this, this_instance_id, m->get_hobj()); + } + } + DEBUGDPP("{}.{}: pg active, entering process[_pg]_op", *pgref, *this, this_instance_id); @@ -332,11 +359,6 @@ ClientRequest::process_op( DEBUGDPP("{}.{}: entered get_obc stage, about to wait_scrub", *pg, *this, this_instance_id); - if (int res = op_info.set_from_op(&*m, *pg->get_osdmap()); - res != 0) { - co_await reply_op_error(pg, res); - co_return; - } co_await ihref.enter_blocker( *this, pg->scrubber, &decltype(pg->scrubber)::wait_scrub, m->get_hobj()); @@ -465,24 +487,6 @@ ClientRequest::do_process( co_return; } - if (!pg->is_primary()) { - // primary can handle both normal ops and balanced reads - if (is_misdirected(*pg)) { - DEBUGDPP("{}.{}: dropping misdirected op", - *pg, *this, this_instance_id); - co_return; - } else if (const hobject_t& hoid = m->get_hobj(); - !pg->get_peering_state().can_serve_replica_read(hoid)) { - DEBUGDPP("{}.{}: unstable write on replica, bouncing to primary", - *pg, *this, this_instance_id); - co_await reply_op_error(pg, -EAGAIN); - co_return; - } else { - DEBUGDPP("{}.{}: serving replica read on oid {}", - *pg, *this, this_instance_id, m->get_hobj()); - } - } - auto [submitted, all_completed] = co_await pg->do_osd_ops( m, r_conn, obc, op_info, snapc ); diff --git a/src/crimson/osd/osd_operations/client_request.h b/src/crimson/osd/osd_operations/client_request.h index 259d616ec24..e4284ba90d0 100644 --- a/src/crimson/osd/osd_operations/client_request.h +++ b/src/crimson/osd/osd_operations/client_request.h @@ -53,9 +53,6 @@ public: struct AwaitMap : OrderedExclusivePhaseT<AwaitMap> { static constexpr auto type_name = "ClientRequest::PGPipeline::await_map"; } await_map; - struct WaitRepop : OrderedConcurrentPhaseT<WaitRepop> { - static constexpr auto type_name = "ClientRequest::PGPipeline::wait_repop"; - } wait_repop; struct SendReply : OrderedExclusivePhaseT<SendReply> { static constexpr auto type_name = "ClientRequest::PGPipeline::send_reply"; } send_reply; @@ -180,6 +177,7 @@ public: instance_handle = new instance_handle_t; } auto get_instance_handle() { return instance_handle; } + auto get_instance_handle() const { return instance_handle; } std::set<snapid_t> snaps_need_to_recover() { std::set<snapid_t> ret; diff --git a/src/crimson/osd/osd_operations/client_request_common.cc b/src/crimson/osd/osd_operations/client_request_common.cc index 547b9f2db2f..c4439d5bb35 100644 --- a/src/crimson/osd/osd_operations/client_request_common.cc +++ b/src/crimson/osd/osd_operations/client_request_common.cc @@ -19,20 +19,34 @@ CommonClientRequest::do_recover_missing( const hobject_t& soid, const osd_reqid_t& reqid) { - eversion_t ver; - assert(pg->is_primary()); logger().debug("{} reqid {} check for recovery, {}", __func__, reqid, soid); + assert(pg->is_primary()); + eversion_t ver; auto &peering_state = pg->get_peering_state(); auto &missing_loc = peering_state.get_missing_loc(); - bool needs_recovery = missing_loc.needs_recovery(soid, &ver); - if (!pg->is_unreadable_object(soid) && - !pg->is_degraded_or_backfilling_object(soid)) { + bool needs_recovery_or_backfill = false; + + if (pg->is_unreadable_object(soid)) { + logger().debug("{} reqid {}, {} is unreadable", + __func__, reqid, soid); + ceph_assert(missing_loc.needs_recovery(soid, &ver)); + needs_recovery_or_backfill = true; + } + + if (pg->is_degraded_or_backfilling_object(soid)) { + logger().debug("{} reqid {}, {} is degraded or backfilling", + __func__, reqid, soid); + if (missing_loc.needs_recovery(soid, &ver)) { + needs_recovery_or_backfill = true; + } + } + + if (!needs_recovery_or_backfill) { logger().debug("{} reqid {} nothing to recover {}", __func__, reqid, soid); return seastar::now(); } - ceph_assert(needs_recovery); logger().debug("{} reqid {} need to wait for recovery, {} version {}", __func__, reqid, soid, ver); diff --git a/src/crimson/osd/osd_operations/common/pg_pipeline.h b/src/crimson/osd/osd_operations/common/pg_pipeline.h index d6a5f686654..d13dbe2e0d2 100644 --- a/src/crimson/osd/osd_operations/common/pg_pipeline.h +++ b/src/crimson/osd/osd_operations/common/pg_pipeline.h @@ -29,6 +29,9 @@ protected: struct Process : OrderedExclusivePhaseT<Process> { static constexpr auto type_name = "CommonPGPipeline::process"; } process; + struct WaitRepop : OrderedConcurrentPhaseT<WaitRepop> { + static constexpr auto type_name = "ClientRequest::PGPipeline::wait_repop"; + } wait_repop; }; } // namespace crimson::osd diff --git a/src/crimson/osd/osd_operations/logmissing_request.cc b/src/crimson/osd/osd_operations/logmissing_request.cc index 7e979131f06..cc3448fb1ce 100644 --- a/src/crimson/osd/osd_operations/logmissing_request.cc +++ b/src/crimson/osd/osd_operations/logmissing_request.cc @@ -49,7 +49,7 @@ void LogMissingRequest::dump_detail(Formatter *f) const ConnectionPipeline &LogMissingRequest::get_connection_pipeline() { return get_osd_priv(&get_local_connection() - ).client_request_conn_pipeline; + ).replicated_request_conn_pipeline; } PerShardPipeline &LogMissingRequest::get_pershard_pipeline( diff --git a/src/crimson/osd/osd_operations/peering_event.cc b/src/crimson/osd/osd_operations/peering_event.cc index 5c5c73e0086..6c1afd3d336 100644 --- a/src/crimson/osd/osd_operations/peering_event.cc +++ b/src/crimson/osd/osd_operations/peering_event.cc @@ -136,7 +136,7 @@ PeeringEvent<T>::complete_rctx(ShardServices &shard_services, Ref<PG> pg) ConnectionPipeline &RemotePeeringEvent::get_connection_pipeline() { return get_osd_priv(&get_local_connection() - ).client_request_conn_pipeline; + ).peering_request_conn_pipeline; } PerShardPipeline &RemotePeeringEvent::get_pershard_pipeline( diff --git a/src/crimson/osd/osd_operations/recovery_subrequest.cc b/src/crimson/osd/osd_operations/recovery_subrequest.cc index 06a3be1662c..e333e4b5c0a 100644 --- a/src/crimson/osd/osd_operations/recovery_subrequest.cc +++ b/src/crimson/osd/osd_operations/recovery_subrequest.cc @@ -53,7 +53,7 @@ seastar::future<> RecoverySubRequest::with_pg( ConnectionPipeline &RecoverySubRequest::get_connection_pipeline() { return get_osd_priv(&get_local_connection() - ).client_request_conn_pipeline; + ).peering_request_conn_pipeline; } PerShardPipeline &RecoverySubRequest::get_pershard_pipeline( diff --git a/src/crimson/osd/osd_operations/replicated_request.cc b/src/crimson/osd/osd_operations/replicated_request.cc index dc2adc37efe..30adde0445a 100644 --- a/src/crimson/osd/osd_operations/replicated_request.cc +++ b/src/crimson/osd/osd_operations/replicated_request.cc @@ -49,7 +49,7 @@ void RepRequest::dump_detail(Formatter *f) const ConnectionPipeline &RepRequest::get_connection_pipeline() { return get_osd_priv(&get_local_connection() - ).client_request_conn_pipeline; + ).replicated_request_conn_pipeline; } PerShardPipeline &RepRequest::get_pershard_pipeline( diff --git a/src/crimson/osd/osd_operations/snaptrim_event.cc b/src/crimson/osd/osd_operations/snaptrim_event.cc index 49f51ecbf2d..c853de513c8 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.cc +++ b/src/crimson/osd/osd_operations/snaptrim_event.cc @@ -1,10 +1,12 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab +#include "crimson/common/coroutine.h" #include "crimson/osd/osd_operations/snaptrim_event.h" #include "crimson/osd/ops_executer.h" #include "crimson/osd/pg.h" #include <seastar/core/sleep.hh> +#include <seastar/util/defer.hh> namespace { seastar::logger& logger() { @@ -63,30 +65,18 @@ CommonPGPipeline& SnapTrimEvent::client_pp() SnapTrimEvent::snap_trim_event_ret_t SnapTrimEvent::start() { + ceph_assert(pg->is_active_clean()); + + /* TODO: add a way to expose progress via the optracker without misusing + * pipeline stages. https://tracker.ceph.com/issues/66473 */ ShardServices &shard_services = pg->get_shard_services(); - return enter_stage<interruptor>( - client_pp().wait_for_active - ).then_interruptible([this] { - return with_blocking_event<PGActivationBlocker::BlockingEvent, - interruptor>([this] (auto&& trigger) { - return pg->wait_for_active_blocker.wait(std::move(trigger)); + { + co_await pg->background_process_lock.lock_with_op(*this); + auto unlocker = seastar::defer([this] { + pg->background_process_lock.unlock(); }); - }).then_interruptible([this] { - return enter_stage<interruptor>( - client_pp().recover_missing); - }).then_interruptible([] { - //return do_recover_missing(pg, get_target_oid()); - return seastar::now(); - }).then_interruptible([this] { - return enter_stage<interruptor>( - client_pp().get_obc); - }).then_interruptible([this] { - return pg->background_process_lock.lock_with_op(*this); - }).then_interruptible([this] { - return enter_stage<interruptor>( - client_pp().process); - }).then_interruptible([&shard_services, this] { - return interruptor::async([this] { + + auto to_trim_fut = interruptor::async([this] { using crimson::common::local_conf; const auto max = local_conf().get_val<uint64_t>("osd_pg_max_concurrent_snap_trims"); @@ -100,65 +90,42 @@ SnapTrimEvent::start() } logger().debug("{}: async almost done line {}", *this, __LINE__); return std::move(*to_trim); - }).then_interruptible([&shard_services, this] (const auto& to_trim) { - if (to_trim.empty()) { - // the legit ENOENT -> done - logger().debug("{}: to_trim is empty! Stopping iteration", *this); - pg->background_process_lock.unlock(); - return snap_trim_iertr::make_ready_future<seastar::stop_iteration>( - seastar::stop_iteration::yes); - } - return [&shard_services, this](const auto &to_trim) { - for (const auto& object : to_trim) { - logger().debug("{}: trimming {}", *this, object); - subop_blocker.emplace_back( - shard_services.start_operation_may_interrupt< - interruptor, SnapTrimObjSubEvent>( - pg, - object, - snapid)); - } - return interruptor::now(); - }(to_trim).then_interruptible([this] { - return enter_stage<interruptor>(wait_subop); - }).then_interruptible([this] { - logger().debug("{}: awaiting completion", *this); - return subop_blocker.interruptible_wait_completion(); - }).finally([this] { - pg->background_process_lock.unlock(); - }).si_then([this] { - if (!needs_pause) { - return interruptor::now(); - } - // let's know operators we're waiting - return enter_stage<interruptor>( - wait_trim_timer - ).then_interruptible([this] { - using crimson::common::local_conf; - const auto time_to_sleep = - local_conf().template get_val<double>("osd_snap_trim_sleep"); - logger().debug("{}: time_to_sleep {}", *this, time_to_sleep); - // TODO: this logic should be more sophisticated and distinguish - // between SSDs, HDDs and the hybrid case - return seastar::sleep( - std::chrono::milliseconds(std::lround(time_to_sleep * 1000))); - }); - }).si_then([this] { - logger().debug("{}: all completed", *this); - return snap_trim_iertr::make_ready_future<seastar::stop_iteration>( - seastar::stop_iteration::no); - }); - }).si_then([this](auto stop) { - return handle.complete().then([stop] { - return snap_trim_iertr::make_ready_future<seastar::stop_iteration>(stop); - }); }); - }).finally([this] { - // This SnapTrimEvent op lifetime is maintained within - // PerShardState::start_operation() implementation. - logger().debug("{}: exit", *this); - handle.exit(); - }); + auto to_trim = co_await std::move(to_trim_fut); + + if (to_trim.empty()) { + // the legit ENOENT -> done + logger().debug("{}: to_trim is empty! Stopping iteration", *this); + co_return seastar::stop_iteration::yes; + } + for (const auto& object : to_trim) { + logger().debug("{}: trimming {}", *this, object); + subop_blocker.emplace_back( + shard_services.start_operation_may_interrupt< + interruptor, SnapTrimObjSubEvent>( + pg, + object, + snapid)); + } + + logger().debug("{}: awaiting completion", *this); + co_await subop_blocker.interruptible_wait_completion(); + } + + if (needs_pause) { + using crimson::common::local_conf; + const auto time_to_sleep = + local_conf().template get_val<double>("osd_snap_trim_sleep"); + logger().debug("{}: time_to_sleep {}", *this, time_to_sleep); + // TODO: this logic should be more sophisticated and distinguish + // between SSDs, HDDs and the hybrid case + co_await interruptor::make_interruptible( + seastar::sleep( + std::chrono::milliseconds(std::lround(time_to_sleep * 1000)))); + } + + logger().debug("{}: all completed", *this); + co_return seastar::stop_iteration::no; } @@ -418,65 +385,55 @@ SnapTrimObjSubEvent::remove_or_update( SnapTrimObjSubEvent::snap_trim_obj_subevent_ret_t SnapTrimObjSubEvent::start() { - return enter_stage<interruptor>( - client_pp().wait_for_active - ).then_interruptible([this] { - return with_blocking_event<PGActivationBlocker::BlockingEvent, - interruptor>([this] (auto&& trigger) { - return pg->wait_for_active_blocker.wait(std::move(trigger)); - }); - }).then_interruptible([this] { - return enter_stage<interruptor>( - client_pp().recover_missing); - }).then_interruptible([] { - //return do_recover_missing(pg, get_target_oid()); - return seastar::now(); - }).then_interruptible([this] { - return enter_stage<interruptor>( - client_pp().get_obc); - }).then_interruptible([this] { - logger().debug("{}: getting obc for {}", *this, coid); - // end of commonality - // lock both clone's and head's obcs - return pg->obc_loader.with_obc<RWState::RWWRITE>( - coid, - [this](auto head_obc, auto clone_obc) { + ceph_assert(pg->is_active_clean()); + + auto exit_handle = seastar::defer([this] { + logger().debug("{}: exit", *this); + handle.exit(); + }); + + co_await enter_stage<interruptor>( + client_pp().get_obc); + + logger().debug("{}: getting obc for {}", *this, coid); + // end of commonality + // lock both clone's and head's obcs + co_await pg->obc_loader.with_obc<RWState::RWWRITE>( + coid, + [this](auto head_obc, auto clone_obc) { logger().debug("{}: got clone_obc={}", *this, clone_obc->get_oid()); return enter_stage<interruptor>( client_pp().process ).then_interruptible( [this,clone_obc=std::move(clone_obc), head_obc=std::move(head_obc)]() mutable { - logger().debug("{}: processing clone_obc={}", *this, clone_obc->get_oid()); - return remove_or_update( - clone_obc, head_obc - ).safe_then_interruptible([clone_obc, this](auto&& txn) mutable { - auto [submitted, all_completed] = pg->submit_transaction( - std::move(clone_obc), - std::move(txn), - std::move(osd_op_p), - std::move(log_entries)); - return submitted.then_interruptible( - [all_completed=std::move(all_completed), this] () mutable { - return enter_stage<interruptor>( - wait_repop - ).then_interruptible([all_completed=std::move(all_completed)] () mutable { - return std::move(all_completed); - }); - }); - }); - }); + logger().debug("{}: processing clone_obc={}", *this, clone_obc->get_oid()); + return remove_or_update( + clone_obc, head_obc + ).safe_then_interruptible([clone_obc, this](auto&& txn) mutable { + auto [submitted, all_completed] = pg->submit_transaction( + std::move(clone_obc), + std::move(txn), + std::move(osd_op_p), + std::move(log_entries)); + return submitted.then_interruptible( + [this, all_completed=std::move(all_completed)]() mutable { + return enter_stage<interruptor>( + client_pp().wait_repop + ).then_interruptible([all_completed=std::move(all_completed)]() mutable{ + return std::move(all_completed); + }); + }); + }); + }); }, - false).si_then([this] { - logger().debug("{}: completed", *this); - return handle.complete(); - }).handle_error_interruptible( - remove_or_update_iertr::pass_further{}, - crimson::ct_error::assert_all{"unexpected error in SnapTrimObjSubEvent"} - ); - }).finally([this] { - logger().debug("{}: exit", *this); - handle.exit(); - }); + false + ).handle_error_interruptible( + remove_or_update_iertr::pass_further{}, + crimson::ct_error::assert_all{"unexpected error in SnapTrimObjSubEvent"} + ); + + logger().debug("{}: completed", *this); + co_await interruptor::make_interruptible(handle.complete()); } void SnapTrimObjSubEvent::print(std::ostream &lhs) const diff --git a/src/crimson/osd/osd_operations/snaptrim_event.h b/src/crimson/osd/osd_operations/snaptrim_event.h index 9d7cde724ef..0e6c1e9d8fc 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.h +++ b/src/crimson/osd/osd_operations/snaptrim_event.h @@ -61,19 +61,6 @@ private: SubOpBlocker<snap_trim_obj_subevent_ret_t> subop_blocker; - // we don't need to synchronize with other instances of SnapTrimEvent; - // it's here for the sake of op tracking. - struct WaitSubop : OrderedConcurrentPhaseT<WaitSubop> { - static constexpr auto type_name = "SnapTrimEvent::wait_subop"; - } wait_subop; - - // an instantiator can instruct us to go over this stage and then - // wait for the future to implement throttling. It is implemented - // that way to for the sake of tracking ops. - struct WaitTrimTimer : OrderedExclusivePhaseT<WaitTrimTimer> { - static constexpr auto type_name = "SnapTrimEvent::wait_trim_timer"; - } wait_trim_timer; - Ref<PG> pg; PipelineHandle handle; SnapMapper& snap_mapper; @@ -85,14 +72,7 @@ public: std::tuple< StartEvent, - CommonPGPipeline::WaitForActive::BlockingEvent, - PGActivationBlocker::BlockingEvent, - CommonPGPipeline::RecoverMissing::BlockingEvent, - CommonPGPipeline::GetOBC::BlockingEvent, - CommonPGPipeline::Process::BlockingEvent, - WaitSubop::BlockingEvent, PG::BackgroundProcessLock::Wait::BlockingEvent, - WaitTrimTimer::BlockingEvent, CompletionEvent > tracking_events; @@ -154,12 +134,6 @@ private: remove_or_update_iertr::future<ceph::os::Transaction> remove_or_update(ObjectContextRef obc, ObjectContextRef head_obc); - // we don't need to synchronize with other instances started by - // SnapTrimEvent; it's here for the sake of op tracking. - struct WaitRepop : OrderedConcurrentPhaseT<WaitRepop> { - static constexpr auto type_name = "SnapTrimObjSubEvent::wait_repop"; - } wait_repop; - void add_log_entry( int _op, const hobject_t& _soid, @@ -192,12 +166,9 @@ public: std::tuple< StartEvent, - CommonPGPipeline::WaitForActive::BlockingEvent, - PGActivationBlocker::BlockingEvent, - CommonPGPipeline::RecoverMissing::BlockingEvent, CommonPGPipeline::GetOBC::BlockingEvent, CommonPGPipeline::Process::BlockingEvent, - WaitRepop::BlockingEvent, + CommonPGPipeline::WaitRepop::BlockingEvent, CompletionEvent > tracking_events; }; diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 460441fe48a..6cb3b6f0536 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -244,6 +244,40 @@ void PG::queue_check_readable(epoch_t last_peering_reset, ceph::timespan delay) std::chrono::duration_cast<seastar::lowres_clock::duration>(delay)); } +PG::interruptible_future<> PG::find_unfound(epoch_t epoch_started) +{ + if (!have_unfound()) { + return interruptor::now(); + } + PeeringCtx rctx; + if (!peering_state.discover_all_missing(rctx)) { + if (peering_state.state_test(PG_STATE_BACKFILLING)) { + logger().debug( + "{} {} no luck, giving up on this pg for now (in backfill)", + *this, __func__); + std::ignore = get_shard_services().start_operation<LocalPeeringEvent>( + this, + get_pg_whoami(), + get_pgid(), + epoch_started, + epoch_started, + PeeringState::UnfoundBackfill()); + } else if (peering_state.state_test(PG_STATE_RECOVERING)) { + logger().debug( + "{} {} no luck, giving up on this pg for now (in recovery)", + *this, __func__); + std::ignore = get_shard_services().start_operation<LocalPeeringEvent>( + this, + get_pg_whoami(), + get_pgid(), + epoch_started, + epoch_started, + PeeringState::UnfoundRecovery()); + } + } + return get_shard_services().dispatch_context(get_collection_ref(), std::move(rctx)); +} + void PG::recheck_readable() { bool changed = false; @@ -534,20 +568,22 @@ void PG::on_active_actmap() const auto needs_pause = !snap_trimq.empty(); return trim_snap(to_trim, needs_pause); } - ).finally([this] { + ).then_interruptible([this] { logger().debug("{}: PG::on_active_actmap() finished trimming", *this); peering_state.state_clear(PG_STATE_SNAPTRIM); peering_state.state_clear(PG_STATE_SNAPTRIM_ERROR); - publish_stats_to_osd(); + return seastar::now(); }); }, [this](std::exception_ptr eptr) { logger().debug("{}: snap trimming interrupted", *this); - peering_state.state_clear(PG_STATE_SNAPTRIM); - }, pg_ref); + ceph_assert(!peering_state.state_test(PG_STATE_SNAPTRIM)); + }, pg_ref).finally([pg_ref, this] { + publish_stats_to_osd(); + }); } else { logger().debug("{}: pg not clean, skipping snap trim"); - assert(!peering_state.state_test(PG_STATE_SNAPTRIM)); + ceph_assert(!peering_state.state_test(PG_STATE_SNAPTRIM)); } } @@ -1585,6 +1621,12 @@ void PG::on_change(ceph::os::Transaction &t) { } scrubber.on_interval_change(); obc_registry.invalidate_on_interval_change(); + // snap trim events are all going to be interrupted, + // clearing PG_STATE_SNAPTRIM/PG_STATE_SNAPTRIM_ERROR here + // is save and in time. + peering_state.state_clear(PG_STATE_SNAPTRIM); + peering_state.state_clear(PG_STATE_SNAPTRIM_ERROR); + snap_mapper.reset_backend(); } void PG::context_registry_on_change() { diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index d705a71bb78..ec6bb78fa4e 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -323,7 +323,9 @@ public: } Context *on_clean() final; void on_activate_committed() final { - // Not needed yet (will be needed for IO unblocking) + if (!is_primary()) { + wait_for_active_blocker.unblock(); + } } void on_active_exit() final { // Not needed yet @@ -437,6 +439,9 @@ public: // Utility + bool is_active_clean() const { + return peering_state.is_active() && peering_state.is_clean(); + } bool is_primary() const final { return peering_state.is_primary(); } @@ -737,6 +742,10 @@ public: // TODO: see PrimaryLogPG::mark_all_unfound_lost() return seastar::now(); } + interruptible_future<> find_unfound(epoch_t epoch_started); + bool have_unfound() const { + return peering_state.have_unfound(); + } bool old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch) const; @@ -769,9 +778,6 @@ private: friend class SnapTrimEvent; friend class SnapTrimObjSubEvent; private: - seastar::future<bool> find_unfound() { - return seastar::make_ready_future<bool>(true); - } bool can_discard_replica_op(const Message& m, epoch_t m_map_epoch) const; bool can_discard_op(const MOSDOp& m) const; @@ -831,12 +837,17 @@ struct PG::do_osd_ops_params_t { return orig_source_inst.name; } + snapid_t get_snapid() const { + return snapid; + } + crimson::net::ConnectionXcoreRef &conn; osd_reqid_t reqid; utime_t mtime; epoch_t map_epoch; entity_inst_t orig_source_inst; uint64_t features; + snapid_t snapid; }; std::ostream& operator<<(std::ostream&, const PG& pg); diff --git a/src/crimson/osd/pg_backend.cc b/src/crimson/osd/pg_backend.cc index 387a7a61fd0..e065a004d24 100644 --- a/src/crimson/osd/pg_backend.cc +++ b/src/crimson/osd/pg_backend.cc @@ -236,14 +236,20 @@ PGBackend::read(const ObjectState& os, OSDOp& osd_op, (op.extent.truncate_size < size)) { size = op.extent.truncate_size; } - if (offset >= size) { - // read size was trimmed to zero and it is expected to do nothing, - return read_errorator::now(); - } if (!length) { // read the whole object if length is 0 length = size; } + if (offset >= size) { + // read size was trimmed to zero and it is expected to do nothing, + return read_errorator::now(); + } else if (offset + length > size) { + length = size - op.extent.offset; + if (!length) { + // this is the second trimmed_read case + return read_errorator::now(); + } + } return _read(oi.soid, offset, length, op.flags).safe_then_interruptible_tuple( [&delta_stats, &oi, &osd_op](auto&& bl) -> read_errorator::future<> { if (!_read_verify_data(oi, bl)) { @@ -336,8 +342,6 @@ namespace { auto init_value_p = init_value_bl.cbegin(); try { decode(init_value, init_value_p); - // chop off the consumed part - init_value_bl.splice(0, init_value_p.get_off()); } catch (const ceph::buffer::end_of_buffer&) { logger().warn("{}: init value not provided", __func__); return crimson::ct_error::invarg::make(); @@ -988,8 +992,6 @@ PGBackend::create_iertr::future<> PGBackend::create( } } maybe_create_new_object(os, txn, delta_stats); - txn.create(coll->get_cid(), - ghobject_t{os.oi.soid, ghobject_t::NO_GEN, shard}); return seastar::now(); } diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc index b2f813447b3..05f8c6e1f96 100644 --- a/src/crimson/osd/pg_recovery.cc +++ b/src/crimson/osd/pg_recovery.cc @@ -146,11 +146,23 @@ size_t PGRecovery::start_primary_recovery_ops( } else { soid = p->second; } - const pg_missing_item& item = missing.get_items().find(p->second)->second; - ++p; hobject_t head = soid.get_head(); + if (pg->get_peering_state().get_missing_loc().is_unfound(soid)) { + logger().debug("{}: object {} unfound", __func__, soid); + ++skipped; + continue; + } + if (pg->get_peering_state().get_missing_loc().is_unfound(head)) { + logger().debug("{}: head object {} unfound", __func__, soid); + ++skipped; + continue; + } + + const pg_missing_item& item = missing.get_items().find(p->second)->second; + ++p; + bool head_missing = missing.is_missing(head); logger().info( "{} {} item.need {} {} {} {} {}", @@ -417,8 +429,6 @@ void PGRecovery::on_global_recover ( pg->get_peering_state().object_recovered(soid, stat_diff); pg->publish_stats_to_osd(); auto& recovery_waiter = pg->get_recovery_backend()->get_recovering(soid); - if (!is_delete) - recovery_waiter.obc->drop_recovery_read(); recovery_waiter.set_recovered(); pg->get_recovery_backend()->remove_recovering(soid); } diff --git a/src/crimson/osd/pg_shard_manager.h b/src/crimson/osd/pg_shard_manager.h index 965d6ab0e16..b9879c8c9dd 100644 --- a/src/crimson/osd/pg_shard_manager.h +++ b/src/crimson/osd/pg_shard_manager.h @@ -232,7 +232,7 @@ public: return target_shard_services.get_or_create_pg( std::move(trigger), opref.get_pgid(), - std::move(opref.get_create_info()) + opref.get_create_info() ); }).safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) { logger.debug("{}: have_pg", opref); diff --git a/src/crimson/osd/recovery_backend.cc b/src/crimson/osd/recovery_backend.cc index 7923ad24a42..e6b232c3561 100644 --- a/src/crimson/osd/recovery_backend.cc +++ b/src/crimson/osd/recovery_backend.cc @@ -66,16 +66,26 @@ void RecoveryBackend::clean_up(ceph::os::Transaction& t, } void RecoveryBackend::WaitForObjectRecovery::stop() { - readable.set_exception( + if (readable) { + readable->set_exception( crimson::common::system_shutdown_exception()); - recovered.set_exception( + readable.reset(); + } + if (recovered) { + recovered->set_exception( crimson::common::system_shutdown_exception()); - pulled.set_exception( + recovered.reset(); + } + if (pulled) { + pulled->set_exception( crimson::common::system_shutdown_exception()); + pulled.reset(); + } for (auto& [pg_shard, pr] : pushes) { pr.set_exception( - crimson::common::system_shutdown_exception()); + crimson::common::system_shutdown_exception()); } + pushes.clear(); } void RecoveryBackend::handle_backfill_finish( diff --git a/src/crimson/osd/recovery_backend.h b/src/crimson/osd/recovery_backend.h index 4c9c67770ab..f5a365c1558 100644 --- a/src/crimson/osd/recovery_backend.h +++ b/src/crimson/osd/recovery_backend.h @@ -125,7 +125,7 @@ public: public boost::intrusive_ref_counter< WaitForObjectRecovery, boost::thread_unsafe_counter>, public crimson::BlockerT<WaitForObjectRecovery> { - seastar::shared_promise<> readable, recovered, pulled; + std::optional<seastar::shared_promise<>> readable, recovered, pulled; std::map<pg_shard_t, seastar::shared_promise<>> pushes; public: static constexpr const char* type_name = "WaitForObjectRecovery"; @@ -135,13 +135,19 @@ public: std::map<pg_shard_t, push_info_t> pushing; seastar::future<> wait_for_readable() { - return readable.get_shared_future(); + if (!readable) { + readable = seastar::shared_promise<>(); + } + return readable->get_shared_future(); } seastar::future<> wait_for_pushes(pg_shard_t shard) { return pushes[shard].get_shared_future(); } seastar::future<> wait_for_recovered() { - return recovered.get_shared_future(); + if (!recovered) { + recovered = seastar::shared_promise<>(); + } + return recovered->get_shared_future(); } template <typename T, typename F> auto wait_track_blocking(T &trigger, F &&fut) { @@ -154,37 +160,72 @@ public: template <typename T> seastar::future<> wait_for_recovered(T &trigger) { WaitForObjectRecoveryRef ref = this; - return wait_track_blocking(trigger, recovered.get_shared_future()); + if (!recovered) { + recovered = seastar::shared_promise<>(); + } + return wait_track_blocking(trigger, recovered->get_shared_future()); } seastar::future<> wait_for_pull() { - return pulled.get_shared_future(); + if (!pulled) { + pulled = seastar::shared_promise<>(); + } + return pulled->get_shared_future(); } void set_readable() { - readable.set_value(); + if (readable) { + readable->set_value(); + readable.reset(); + } } void set_recovered() { - recovered.set_value(); + if (recovered) { + recovered->set_value(); + recovered.reset(); + } } void set_pushed(pg_shard_t shard) { - pushes[shard].set_value(); + auto it = pushes.find(shard); + if (it != pushes.end()) { + auto &push_promise = it->second; + push_promise.set_value(); + pushes.erase(it); + } } void set_pulled() { - pulled.set_value(); + if (pulled) { + pulled->set_value(); + pulled.reset(); + } } void set_push_failed(pg_shard_t shard, std::exception_ptr e) { - pushes.at(shard).set_exception(e); + auto it = pushes.find(shard); + if (it != pushes.end()) { + auto &push_promise = it->second; + push_promise.set_exception(e); + pushes.erase(it); + } } void interrupt(std::string_view why) { - readable.set_exception(std::system_error( - std::make_error_code(std::errc::interrupted), why.data())); - recovered.set_exception(std::system_error( - std::make_error_code(std::errc::interrupted), why.data())); - pulled.set_exception(std::system_error( - std::make_error_code(std::errc::interrupted), why.data())); + if (readable) { + readable->set_exception(std::system_error( + std::make_error_code(std::errc::interrupted), why.data())); + readable.reset(); + } + if (recovered) { + recovered->set_exception(std::system_error( + std::make_error_code(std::errc::interrupted), why.data())); + recovered.reset(); + } + if (pulled) { + pulled->set_exception(std::system_error( + std::make_error_code(std::errc::interrupted), why.data())); + pulled.reset(); + } for (auto& [pg_shard, pr] : pushes) { - pr.set_exception(std::system_error( - std::make_error_code(std::errc::interrupted), why.data())); + pr.set_exception(std::system_error( + std::make_error_code(std::errc::interrupted), why.data())); } + pushes.clear(); } void stop(); void dump_detail(Formatter* f) const { diff --git a/src/crimson/osd/replicated_backend.cc b/src/crimson/osd/replicated_backend.cc index 7eeece482d5..1fc59b7d9a0 100644 --- a/src/crimson/osd/replicated_backend.cc +++ b/src/crimson/osd/replicated_backend.cc @@ -70,7 +70,6 @@ ReplicatedBackend::_submit_transaction(std::set<pg_shard_t>&& pg_shards, encode(log_entries, m->logbl); m->pg_trim_to = osd_op_p.pg_trim_to; m->min_last_complete_ondisk = osd_op_p.min_last_complete_ondisk; - m->set_rollback_to(osd_op_p.at_version); // TODO: set more stuff. e.g., pg_states sends->emplace_back( shard_services.send_to_osd( diff --git a/src/crimson/osd/replicated_recovery_backend.cc b/src/crimson/osd/replicated_recovery_backend.cc index 4b4db79af49..03cf38e6954 100644 --- a/src/crimson/osd/replicated_recovery_backend.cc +++ b/src/crimson/osd/replicated_recovery_backend.cc @@ -38,7 +38,6 @@ ReplicatedRecoveryBackend::recover_object( logger().debug("recover_object: loaded obc: {}", obc->obs.oi.soid); auto& recovery_waiter = get_recovering(soid); recovery_waiter.obc = obc; - recovery_waiter.obc->wait_recovery_read(); return maybe_push_shards(head, soid, need); }, false).handle_error_interruptible( crimson::osd::PG::load_obc_ertr::all_same_way([soid](auto& code) { @@ -98,10 +97,6 @@ ReplicatedRecoveryBackend::maybe_push_shards( } return seastar::make_ready_future<>(); }).handle_exception_interruptible([this, soid](auto e) { - auto &recovery = get_recovering(soid); - if (recovery.obc) { - recovery.obc->drop_recovery_read(); - } recovering.erase(soid); return seastar::make_exception_future<>(e); }); diff --git a/src/crimson/tools/store_nbd/tm_driver.cc b/src/crimson/tools/store_nbd/tm_driver.cc index 967a46ccd45..078e33bf8c4 100644 --- a/src/crimson/tools/store_nbd/tm_driver.cc +++ b/src/crimson/tools/store_nbd/tm_driver.cc @@ -139,11 +139,13 @@ seastar::future<bufferlist> TMDriver::read( void TMDriver::init() { + shard_stats = {}; + std::vector<Device*> sec_devices; #ifndef NDEBUG - tm = make_transaction_manager(device.get(), sec_devices, true); + tm = make_transaction_manager(device.get(), sec_devices, shard_stats, true); #else - tm = make_transaction_manager(device.get(), sec_devices, false); + tm = make_transaction_manager(device.get(), sec_devices, shard_stats, false); #endif } diff --git a/src/crimson/tools/store_nbd/tm_driver.h b/src/crimson/tools/store_nbd/tm_driver.h index 24aabdeb603..6433c050e44 100644 --- a/src/crimson/tools/store_nbd/tm_driver.h +++ b/src/crimson/tools/store_nbd/tm_driver.h @@ -41,6 +41,9 @@ private: using TransactionManagerRef = crimson::os::seastore::TransactionManagerRef; TransactionManagerRef tm; + using shard_stats_t = crimson::os::seastore::shard_stats_t; + shard_stats_t shard_stats; + seastar::future<> mkfs(); void init(); void clear(); diff --git a/src/crush/CMakeLists.txt b/src/crush/CMakeLists.txt index 1c875d59474..4668b4ad7a7 100644 --- a/src/crush/CMakeLists.txt +++ b/src/crush/CMakeLists.txt @@ -9,3 +9,4 @@ set(crush_srcs CrushLocation.cc) add_library(crush_objs OBJECT ${crush_srcs}) +target_link_libraries(crush_objs PUBLIC legacy-option-headers) diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index e434d1a17d8..da542403321 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -2351,6 +2351,7 @@ int CrushWrapper::add_simple_rule_at( int ret = crush_add_rule(crush, rule, rno); if(ret < 0) { *err << "failed to add rule " << rno << " because " << cpp_strerror(ret); + free(rule); return ret; } set_rule_name(rno, name); @@ -2455,6 +2456,7 @@ int CrushWrapper::add_multi_osd_per_failure_domain_rule_at( int ret = crush_add_rule(crush, rule, rno); if(ret < 0) { *err << "failed to add rule " << rno << " because " << cpp_strerror(ret); + free(rule); return ret; } set_rule_name(rno, name); diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 7018ca498c6..ef7d2b99765 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -1172,6 +1172,9 @@ public: crush_rule *n = crush_make_rule(len, type); ceph_assert(n); ruleno = crush_add_rule(crush, n, ruleno); + if (ruleno < 0) { + free(n); + } return ruleno; } int set_rule_step(unsigned ruleno, unsigned step, int op, int arg1, int arg2) { @@ -1610,14 +1613,14 @@ public: void do_rule(int rule, int x, std::vector<int>& out, int maxout, const WeightVector& weight, uint64_t choose_args_index) const { - int rawout[maxout]; - char work[crush_work_size(crush, maxout)]; - crush_init_workspace(crush, work); + std::vector<int> rawout(maxout); + std::vector<char> work(crush_work_size(crush, maxout)); + crush_init_workspace(crush, std::data(work)); crush_choose_arg_map arg_map = choose_args_get_with_fallback( choose_args_index); - int numrep = crush_do_rule(crush, rule, x, rawout, maxout, + int numrep = crush_do_rule(crush, rule, x, std::data(rawout), maxout, std::data(weight), std::size(weight), - work, arg_map.args); + std::data(work), arg_map.args); if (numrep < 0) numrep = 0; out.resize(numrep); diff --git a/src/erasure-code/ErasureCode.cc b/src/erasure-code/ErasureCode.cc index 6784fa355cf..b27273c172f 100644 --- a/src/erasure-code/ErasureCode.cc +++ b/src/erasure-code/ErasureCode.cc @@ -348,21 +348,35 @@ int ErasureCode::to_string(const std::string &name, return 0; } -int ErasureCode::decode_concat(const map<int, bufferlist> &chunks, +int ErasureCode::decode_concat(const set<int>& want_to_read, + const map<int, bufferlist> &chunks, bufferlist *decoded) { - set<int> want_to_read; - - for (unsigned int i = 0; i < get_data_chunk_count(); i++) { - want_to_read.insert(chunk_index(i)); - } map<int, bufferlist> decoded_map; int r = _decode(want_to_read, chunks, &decoded_map); if (r == 0) { for (unsigned int i = 0; i < get_data_chunk_count(); i++) { - decoded->claim_append(decoded_map[chunk_index(i)]); + // XXX: the ErasureCodeInterface allows `decode()` to return + // *at least* `want_to_read chunks`; that is, they may more. + // Some implementations are consistently exact but jerasure + // is quirky: it outputs more only when deailing with degraded. + // The check below uniforms the behavior. + if (want_to_read.contains(chunk_index(i)) && + decoded_map.contains(chunk_index(i))) { + decoded->claim_append(decoded_map[chunk_index(i)]); + } } } return r; } + +int ErasureCode::decode_concat(const map<int, bufferlist> &chunks, + bufferlist *decoded) +{ + set<int> want_to_read; + for (unsigned int i = 0; i < get_data_chunk_count(); i++) { + want_to_read.insert(chunk_index(i)); + } + return decode_concat(want_to_read, chunks, decoded); +} } diff --git a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h index fd6d1a41f71..2ae40b63686 100644 --- a/src/erasure-code/ErasureCode.h +++ b/src/erasure-code/ErasureCode.h @@ -112,8 +112,11 @@ namespace ceph { const std::string &default_value, std::ostream *ss); + int decode_concat(const std::set<int>& want_to_read, + const std::map<int, bufferlist> &chunks, + bufferlist *decoded) override; int decode_concat(const std::map<int, bufferlist> &chunks, - bufferlist *decoded) override; + bufferlist *decoded) override; protected: int parse(const ErasureCodeProfile &profile, diff --git a/src/erasure-code/ErasureCodeInterface.h b/src/erasure-code/ErasureCodeInterface.h index 7107f978dd4..673136a701b 100644 --- a/src/erasure-code/ErasureCodeInterface.h +++ b/src/erasure-code/ErasureCodeInterface.h @@ -453,12 +453,20 @@ namespace ceph { * * Returns 0 on success. * - * @param [in] chunks map chunk indexes to chunk data - * @param [out] decoded concatenante of the data chunks + * @param [in] want_to_read mapped std::set of chunks caller wants + * concatenated to `decoded`. This works as + * selectors for `chunks` + * @param [in] chunks set of chunks with data available for decoding + * @param [out] decoded must be non-null, chunks specified in `want_to_read` + * will be concatenated into `decoded` in index order * @return **0** on success or a negative errno on error. */ + virtual int decode_concat(const std::set<int>& want_to_read, + const std::map<int, bufferlist> &chunks, + bufferlist *decoded) = 0; virtual int decode_concat(const std::map<int, bufferlist> &chunks, bufferlist *decoded) = 0; + }; typedef std::shared_ptr<ErasureCodeInterface> ErasureCodeInterfaceRef; diff --git a/src/erasure-code/clay/ErasureCodeClay.cc b/src/erasure-code/clay/ErasureCodeClay.cc index ba37b8c721c..c75ea279651 100644 --- a/src/erasure-code/clay/ErasureCodeClay.cc +++ b/src/erasure-code/clay/ErasureCodeClay.cc @@ -306,7 +306,14 @@ int ErasureCodeClay::is_repair(const set<int> &want_to_read, if (includes(available_chunks.begin(), available_chunks.end(), want_to_read.begin(), want_to_read.end())) return 0; + // Oops, before the attempt to EC partial reads the fellowing + // condition was always true as `get_want_to_read_shards()` yields + // entire stripe. Unfortunately, we built upon this assumption and + // even `ECUtil::decode()` asserts on chunks being multiply of + // `chunk_size`. + // XXX: for now returning 0 and knocking the optimization out. if (want_to_read.size() > 1) return 0; + else return 0; int i = *want_to_read.begin(); int lost_node_id = (i < k) ? i: i+nu; diff --git a/src/erasure-code/jerasure/CMakeLists.txt b/src/erasure-code/jerasure/CMakeLists.txt index f9cd22e1176..b35c796f308 100644 --- a/src/erasure-code/jerasure/CMakeLists.txt +++ b/src/erasure-code/jerasure/CMakeLists.txt @@ -5,6 +5,7 @@ set(jerasure_utils_src ErasureCodeJerasure.cc) add_library(jerasure_utils OBJECT ${jerasure_utils_src}) +target_link_libraries(jerasure_utils legacy-option-headers) # Set the CFLAGS correctly for gf-complete based on SIMD compiler support set(GF_COMPILE_FLAGS) diff --git a/src/exporter/CMakeLists.txt b/src/exporter/CMakeLists.txt index 0c0c03bf91d..0127cc53913 100644 --- a/src/exporter/CMakeLists.txt +++ b/src/exporter/CMakeLists.txt @@ -1,10 +1,12 @@ set(exporter_srcs ceph_exporter.cc DaemonMetricCollector.cc - http_server.cc + web_server.cc util.cc ) add_executable(ceph-exporter ${exporter_srcs}) target_link_libraries(ceph-exporter - global-static ceph-common) + global-static + ceph-common + OpenSSL::SSL) install(TARGETS ceph-exporter DESTINATION bin) diff --git a/src/exporter/ceph_exporter.cc b/src/exporter/ceph_exporter.cc index 70650ff87c6..2e2c16bb085 100644 --- a/src/exporter/ceph_exporter.cc +++ b/src/exporter/ceph_exporter.cc @@ -1,7 +1,7 @@ #include "common/ceph_argparse.h" #include "common/config.h" #include "exporter/DaemonMetricCollector.h" -#include "exporter/http_server.h" +#include "exporter/web_server.h" #include "global/global_init.h" #include "global/global_context.h" @@ -18,6 +18,8 @@ static void usage() { " --sock-dir: The path to ceph daemons socket files dir\n" " --addrs: Host ip address where exporter is deployed\n" " --port: Port to deploy exporter on. Default is 9926\n" + " --cert-file: Path to the certificate file to use https\n" + " --key-file: Path to the certificate key file to use https\n" " --prio-limit: Only perf counters greater than or equal to prio-limit are fetched. Default: 5\n" " --stats-period: Time to wait before sending requests again to exporter server (seconds). Default: 5s" << std::endl; @@ -48,6 +50,10 @@ int main(int argc, char **argv) { cct->_conf.set_val("exporter_addr", val); } else if (ceph_argparse_witharg(args, i, &val, "--port", (char *)NULL)) { cct->_conf.set_val("exporter_http_port", val); + } else if (ceph_argparse_witharg(args, i, &val, "--cert-file", (char *)NULL)) { + cct->_conf.set_val("exporter_cert_file", val); + } else if (ceph_argparse_witharg(args, i, &val, "--key-file", (char *)NULL)) { + cct->_conf.set_val("exporter_key_file", val); } else if (ceph_argparse_witharg(args, i, &val, "--prio-limit", (char *)NULL)) { cct->_conf.set_val("exporter_prio_limit", val); } else if (ceph_argparse_witharg(args, i, &val, "--stats-period", (char *)NULL)) { @@ -58,7 +64,7 @@ int main(int argc, char **argv) { } common_init_finish(g_ceph_context); - boost::thread server_thread(http_server_thread_entrypoint); + boost::thread server_thread(web_server_thread_entrypoint); DaemonMetricCollector &collector = collector_instance(); collector.main(); server_thread.join(); diff --git a/src/exporter/http_server.cc b/src/exporter/http_server.cc deleted file mode 100644 index 3eb48a2a1f0..00000000000 --- a/src/exporter/http_server.cc +++ /dev/null @@ -1,169 +0,0 @@ -#include "http_server.h" -#include "common/debug.h" -#include "common/hostname.h" -#include "global/global_init.h" -#include "global/global_context.h" -#include "exporter/DaemonMetricCollector.h" - -#include <boost/asio/ip/tcp.hpp> -#include <boost/beast/core.hpp> -#include <boost/beast/http.hpp> -#include <boost/beast/version.hpp> -#include <boost/thread/thread.hpp> -#include <chrono> -#include <cstdlib> -#include <ctime> -#include <iostream> -#include <map> -#include <memory> -#include <string> - -#define dout_context g_ceph_context -#define dout_subsys ceph_subsys_ceph_exporter - -namespace beast = boost::beast; // from <boost/beast.hpp> -namespace http = beast::http; // from <boost/beast/http.hpp> -namespace net = boost::asio; // from <boost/asio.hpp> -using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp> - -class http_connection : public std::enable_shared_from_this<http_connection> { -public: - http_connection(tcp::socket socket) : socket_(std::move(socket)) {} - - // Initiate the asynchronous operations associated with the connection. - void start() { - read_request(); - check_deadline(); - } - -private: - tcp::socket socket_; - beast::flat_buffer buffer_{8192}; - http::request<http::dynamic_body> request_; - http::response<http::string_body> response_; - - net::steady_timer deadline_{socket_.get_executor(), std::chrono::seconds(60)}; - - // Asynchronously receive a complete request message. - void read_request() { - auto self = shared_from_this(); - - http::async_read(socket_, buffer_, request_, - [self](beast::error_code ec, std::size_t bytes_transferred) { - boost::ignore_unused(bytes_transferred); - if (ec) { - dout(1) << "ERROR: " << ec.message() << dendl; - return; - } - else { - self->process_request(); - } - }); - } - - // Determine what needs to be done with the request message. - void process_request() { - response_.version(request_.version()); - response_.keep_alive(request_.keep_alive()); - - switch (request_.method()) { - case http::verb::get: - response_.result(http::status::ok); - create_response(); - break; - - default: - // We return responses indicating an error if - // we do not recognize the request method. - response_.result(http::status::method_not_allowed); - response_.set(http::field::content_type, "text/plain"); - std::string body("Invalid request-method '" + - std::string(request_.method_string()) + "'"); - response_.body() = body; - break; - } - - write_response(); - } - - // Construct a response message based on the program state. - void create_response() { - if (request_.target() == "/") { - response_.set(http::field::content_type, "text/html; charset=utf-8"); - std::string body("<html>\n" - "<head><title>Ceph Exporter</title></head>\n" - "<body>\n" - "<h1>Ceph Exporter</h1>\n" - "<p><a href='/metrics'>Metrics</a></p>" - "</body>\n" - "</html>\n"); - response_.body() = body; - } else if (request_.target() == "/metrics") { - response_.set(http::field::content_type, "text/plain; charset=utf-8"); - DaemonMetricCollector &collector = collector_instance(); - std::string metrics = collector.get_metrics(); - response_.body() = metrics; - } else { - response_.result(http::status::method_not_allowed); - response_.set(http::field::content_type, "text/plain"); - response_.body() = "File not found \n"; - } - } - - // Asynchronously transmit the response message. - void write_response() { - auto self = shared_from_this(); - - response_.prepare_payload(); - - http::async_write(socket_, response_, - [self](beast::error_code ec, std::size_t) { - self->socket_.shutdown(tcp::socket::shutdown_send, ec); - self->deadline_.cancel(); - if (ec) { - dout(1) << "ERROR: " << ec.message() << dendl; - return; - } - }); - } - - // Check whether we have spent enough time on this connection. - void check_deadline() { - auto self = shared_from_this(); - - deadline_.async_wait([self](beast::error_code ec) { - if (!ec) { - // Close socket to cancel any outstanding operation. - self->socket_.close(ec); - } - }); - } -}; - -// "Loop" forever accepting new connections. -void http_server(tcp::acceptor &acceptor, tcp::socket &socket) { - acceptor.async_accept(socket, [&](beast::error_code ec) { - if (!ec) - std::make_shared<http_connection>(std::move(socket))->start(); - http_server(acceptor, socket); - }); -} - -void http_server_thread_entrypoint() { - try { - std::string exporter_addr = g_conf().get_val<std::string>("exporter_addr"); - auto const address = net::ip::make_address(exporter_addr); - unsigned short port = g_conf().get_val<int64_t>("exporter_http_port"); - - net::io_context ioc{1}; - - tcp::acceptor acceptor{ioc, {address, port}}; - tcp::socket socket{ioc}; - http_server(acceptor, socket); - dout(1) << "Http server running on " << exporter_addr << ":" << port << dendl; - ioc.run(); - } catch (std::exception const &e) { - dout(1) << "Error: " << e.what() << dendl; - exit(EXIT_FAILURE); - } -} diff --git a/src/exporter/http_server.h b/src/exporter/http_server.h deleted file mode 100644 index 0d0502f57c8..00000000000 --- a/src/exporter/http_server.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#include <string> - -void http_server_thread_entrypoint(); diff --git a/src/exporter/web_server.cc b/src/exporter/web_server.cc new file mode 100644 index 00000000000..96cc02b389f --- /dev/null +++ b/src/exporter/web_server.cc @@ -0,0 +1,276 @@ +#include "web_server.h" +#include "common/debug.h" +#include "common/hostname.h" +#include "global/global_init.h" +#include "global/global_context.h" +#include "exporter/DaemonMetricCollector.h" + +#include <boost/asio/ip/tcp.hpp> +#include <boost/asio/ssl.hpp> // SSL/TLS +#include <boost/beast/core.hpp> +#include <boost/beast/http.hpp> +#include <boost/beast/version.hpp> +#include <boost/thread/thread.hpp> +#include <chrono> +#include <cstdlib> +#include <ctime> +#include <iostream> +#include <map> +#include <memory> +#include <string> + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_ceph_exporter + +namespace beast = boost::beast; // from <boost/beast.hpp> +namespace http = beast::http; // from <boost/beast/http.hpp> +namespace net = boost::asio; // from <boost/asio.hpp> +namespace ssl = boost::asio::ssl; // from <boost/asio/ssl.hpp> +using tcp = boost::asio::ip::tcp; // from <boost/asio/ip/tcp.hpp> + +// Base class for common functionality +class web_connection { +public: + virtual ~web_connection() = default; + virtual void start() = 0; // Pure virtual function to start the connection + +protected: + beast::flat_buffer buffer_{8192}; + http::request<http::dynamic_body> request_; + http::response<http::string_body> response_; + net::steady_timer deadline_; + + web_connection(net::any_io_executor executor, std::chrono::seconds timeout) + : deadline_(executor, timeout) {} + + // Common request processing logic + void process_request() { + response_.version(request_.version()); + response_.keep_alive(request_.keep_alive()); + + switch (request_.method()) { + case http::verb::get: + response_.result(http::status::ok); + create_response(); + break; + + default: + response_.result(http::status::method_not_allowed); + response_.set(http::field::content_type, "text/plain"); + std::string body("Invalid request-method '" + std::string(request_.method_string()) + "'\n"); + response_.body() = body; + break; + } + write_response(); + } + + // Construct a response message based on the request target + void create_response() { + if (request_.target() == "/") { + response_.result(http::status::moved_permanently); + response_.set(http::field::location, "/metrics"); + } else if (request_.target() == "/metrics") { + response_.set(http::field::content_type, "text/plain; charset=utf-8"); + DaemonMetricCollector &collector = collector_instance(); + std::string metrics = collector.get_metrics(); + response_.body() = metrics; + } else { + response_.result(http::status::method_not_allowed); + response_.set(http::field::content_type, "text/plain"); + response_.body() = "File not found \n"; + } + } + + // Asynchronously transmit the response message + virtual void write_response() = 0; + + // Check whether we have spent enough time on this connection + void check_deadline(std::shared_ptr<web_connection> self) { + deadline_.async_wait([self](beast::error_code ec) { + if (!ec) { + self->close_connection(ec); + } + }); + } + + // Bad requests error mgmt (http req->https srv and https req ->http srv) + void handle_bad_request(beast::error_code ec) { + response_.version(request_.version()); + response_.keep_alive(request_.keep_alive()); + response_.result(http::status::method_not_allowed); + response_.set(http::field::content_type, "text/plain"); + std::string body = "Ceph exporter.\nRequest Error: " + ec.message(); + response_.body() = body; + + write_response(); + } + + virtual void close_connection(beast::error_code& ec) = 0; +}; + +// Derived class for HTTP connections +class http_connection : public web_connection, public std::enable_shared_from_this<http_connection> { +public: + explicit http_connection(tcp::socket socket) + : web_connection(socket.get_executor(), std::chrono::seconds(60)), socket_(std::move(socket)) {} + + void start() override { + read_request(shared_from_this()); + check_deadline(shared_from_this()); + } + +private: + tcp::socket socket_; + + void read_request(std::shared_ptr<http_connection> self) { + http::async_read(socket_, buffer_, request_, + [self](beast::error_code ec, std::size_t bytes_transferred) { + boost::ignore_unused(bytes_transferred); + if (ec) { + dout(1) << "ERROR: " << ec.message() << dendl; + self->handle_bad_request(ec); + return; + } + self->process_request(); + }); + } + + void write_response() override { + auto self = shared_from_this(); + response_.prepare_payload(); + http::async_write(socket_, response_, + [self](beast::error_code ec, std::size_t) { + self->socket_.shutdown(tcp::socket::shutdown_send, ec); + self->deadline_.cancel(); + if (ec) { + dout(1) << "ERROR: " << ec.message() << dendl; + return; + } + }); + } + + void close_connection(beast::error_code& ec) override { + socket_.close(ec); + } +}; + +// Derived class for HTTPS connections +class https_connection : public web_connection, public std::enable_shared_from_this<https_connection> { +public: + explicit https_connection(ssl::stream<tcp::socket> socket) + : web_connection(socket.get_executor(), std::chrono::seconds(60)), socket_(std::move(socket)) {} + + void start() override { + auto self = shared_from_this(); + socket_.async_handshake(ssl::stream_base::server, + [self](beast::error_code ec) { + if (!ec) { + self->read_request(self); + } else { + dout(1) << "ERROR: SSL Handshake failed: " << ec.message() << dendl; + self->handle_bad_request(ec); + } + }); + check_deadline(self); + } + +private: + ssl::stream<tcp::socket> socket_; + + void read_request(std::shared_ptr<https_connection> self) { + http::async_read(socket_, buffer_, request_, + [self](beast::error_code ec, std::size_t bytes_transferred) { + boost::ignore_unused(bytes_transferred); + if (ec) { + dout(1) << "ERROR: " << ec.message() << dendl; + return; + } + self->process_request(); + }); + } + + void write_response() override { + auto self = shared_from_this(); + response_.prepare_payload(); + http::async_write(socket_, response_, + [self](beast::error_code ec, std::size_t) { + self->socket_.async_shutdown([self](beast::error_code ec) { + self->deadline_.cancel(); + if (ec) { + dout(1) << "ERROR: " << ec.message() << dendl; + } + }); + }); + } + + void close_connection(beast::error_code& ec) override { + socket_.lowest_layer().close(ec); + } + +}; + +void http_server(tcp::acceptor &acceptor, tcp::socket &socket) { + acceptor.async_accept(socket, [&](beast::error_code ec) { + if (!ec) { + std::make_shared<http_connection>(std::move(socket))->start(); + } + http_server(acceptor, socket); + }); +} + +void https_server(tcp::acceptor &acceptor, ssl::context &ssl_ctx) { + acceptor.async_accept([&](beast::error_code ec, tcp::socket socket) { + if (!ec) { + std::make_shared<https_connection>(ssl::stream<tcp::socket>(std::move(socket), ssl_ctx))->start(); + } + https_server(acceptor, ssl_ctx); + }); +} + +void run_http_server(const std::string& exporter_addr, short unsigned int port) { + net::io_context ioc{1}; + tcp::acceptor acceptor{ioc, {net::ip::make_address(exporter_addr), port}}; + tcp::socket socket{ioc}; + + http_server(acceptor, socket); + + dout(1) << "HTTP server running on " << exporter_addr << ":" << port << dendl; + ioc.run(); +} + +void run_https_server(const std::string& exporter_addr, short unsigned int port, const std::string& cert_file, const std::string& key_file) { + net::io_context ioc{1}; + ssl::context ssl_ctx(ssl::context::tlsv13); + + ssl_ctx.use_certificate_chain_file(cert_file); + ssl_ctx.use_private_key_file(key_file, ssl::context::pem); + + tcp::acceptor acceptor{ioc, {net::ip::make_address(exporter_addr), port}}; + https_server(acceptor, ssl_ctx); + + dout(1) << "HTTPS server running on " << exporter_addr << ":" << port << dendl; + ioc.run(); +} + +void web_server_thread_entrypoint() { + try { + std::string exporter_addr = g_conf().get_val<std::string>("exporter_addr"); + short unsigned int port = g_conf().get_val<int64_t>("exporter_http_port"); + std::string cert_file = g_conf().get_val<std::string>("exporter_cert_file"); + std::string key_file = g_conf().get_val<std::string>("exporter_key_file"); + + if (cert_file.empty() && key_file.empty()) { + run_http_server(exporter_addr, port); + } else { + try { + run_https_server(exporter_addr, port, cert_file, key_file); + } catch (const std::exception &e) { + dout(1) << "Failed to start HTTPS server: " << e.what() << dendl; + exit(EXIT_FAILURE); + } + } + } catch (std::exception const &e) { + dout(1) << "Error: " << e.what() << dendl; + exit(EXIT_FAILURE); + } +} diff --git a/src/exporter/web_server.h b/src/exporter/web_server.h new file mode 100644 index 00000000000..c3339a8d43a --- /dev/null +++ b/src/exporter/web_server.h @@ -0,0 +1,5 @@ +#pragma once + +#include <string> + +void web_server_thread_entrypoint(); diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index a6ee2737710..57eb18b0d3e 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -488,6 +488,7 @@ int ceph_flags_sys2wire(int flags); */ #define CEPH_XATTR_CREATE (1 << 0) #define CEPH_XATTR_REPLACE (1 << 1) +#define CEPH_XATTR_REMOVE2 (1 << 30) #define CEPH_XATTR_REMOVE (1 << 31) /* diff --git a/src/include/rbd/librbd.h b/src/include/rbd/librbd.h index 4a7e108e73f..267ed289bf7 100644 --- a/src/include/rbd/librbd.h +++ b/src/include/rbd/librbd.h @@ -260,6 +260,11 @@ typedef struct { char *group_snap_name; } rbd_snap_group_namespace_t; +typedef struct { + rbd_snap_namespace_type_t original_namespace_type; + char *original_name; +} rbd_snap_trash_namespace_t; + typedef enum { RBD_SNAP_MIRROR_STATE_PRIMARY, RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED, @@ -479,6 +484,9 @@ CEPH_RBD_API int rbd_clone2(rados_ioctx_t p_ioctx, const char *p_name, CEPH_RBD_API int rbd_clone3(rados_ioctx_t p_ioctx, const char *p_name, const char *p_snapname, rados_ioctx_t c_ioctx, const char *c_name, rbd_image_options_t c_opts); +CEPH_RBD_API int rbd_clone4(rados_ioctx_t p_ioctx, const char *p_name, + uint64_t p_snap_id, rados_ioctx_t c_ioctx, + const char *c_name, rbd_image_options_t c_opts); CEPH_RBD_API int rbd_remove(rados_ioctx_t io, const char *name); CEPH_RBD_API int rbd_remove_with_progress(rados_ioctx_t io, const char *name, librbd_progress_fn_t cb, @@ -965,6 +973,11 @@ CEPH_RBD_API int rbd_snap_get_trash_namespace(rbd_image_t image, uint64_t snap_id, char* original_name, size_t max_length); +CEPH_RBD_API int rbd_snap_get_trash_namespace2( + rbd_image_t image, uint64_t snap_id, + rbd_snap_trash_namespace_t *trash_snap, size_t trash_snap_size); +CEPH_RBD_API int rbd_snap_trash_namespace_cleanup( + rbd_snap_trash_namespace_t *trash_snap, size_t trash_snap_size); CEPH_RBD_API int rbd_snap_get_mirror_namespace( rbd_image_t image, uint64_t snap_id, rbd_snap_mirror_namespace_t *mirror_snap, size_t mirror_snap_size); @@ -1377,6 +1390,8 @@ CEPH_RBD_API int rbd_aio_mirror_image_create_snapshot(rbd_image_t image, CEPH_RBD_API int rbd_group_create(rados_ioctx_t p, const char *name); CEPH_RBD_API int rbd_group_remove(rados_ioctx_t p, const char *name); CEPH_RBD_API int rbd_group_list(rados_ioctx_t p, char *names, size_t *size); +CEPH_RBD_API int rbd_group_get_id(rados_ioctx_t p, const char *group_name, + char *group_id, size_t *size); CEPH_RBD_API int rbd_group_rename(rados_ioctx_t p, const char *src_name, const char *dest_name); CEPH_RBD_API int rbd_group_info_cleanup(rbd_group_info_t *group_info, diff --git a/src/include/rbd/librbd.hpp b/src/include/rbd/librbd.hpp index 6d97d1087ad..c02d179450c 100644 --- a/src/include/rbd/librbd.hpp +++ b/src/include/rbd/librbd.hpp @@ -73,6 +73,11 @@ namespace librbd { std::string group_snap_name; } snap_group_namespace_t; + typedef struct { + snap_namespace_type_t original_namespace_type; + std::string original_name; + } snap_trash_namespace_t; + typedef rbd_snap_mirror_state_t snap_mirror_state_t; typedef struct { @@ -294,6 +299,8 @@ public: int *c_order, uint64_t stripe_unit, int stripe_count); int clone3(IoCtx& p_ioctx, const char *p_name, const char *p_snapname, IoCtx& c_ioctx, const char *c_name, ImageOptions& opts); + int clone4(IoCtx& p_ioctx, const char *p_name, uint64_t p_snap_id, + IoCtx& c_ioctx, const char *c_name, ImageOptions& opts); int remove(IoCtx& io_ctx, const char *name); int remove_with_progress(IoCtx& io_ctx, const char *name, ProgressContext& pctx); int rename(IoCtx& src_io_ctx, const char *srcname, const char *destname); @@ -410,6 +417,8 @@ public: int group_create(IoCtx& io_ctx, const char *group_name); int group_remove(IoCtx& io_ctx, const char *group_name); int group_list(IoCtx& io_ctx, std::vector<std::string> *names); + int group_get_id(IoCtx& io_ctx, const char *group_name, + std::string *group_id); int group_rename(IoCtx& io_ctx, const char *src_group_name, const char *dest_group_name); @@ -675,6 +684,9 @@ public: snap_group_namespace_t *group_namespace, size_t snap_group_namespace_size); int snap_get_trash_namespace(uint64_t snap_id, std::string* original_name); + int snap_get_trash_namespace2(uint64_t snap_id, + snap_trash_namespace_t *trash_namespace, + size_t snap_trash_namespace_size); int snap_get_mirror_namespace( uint64_t snap_id, snap_mirror_namespace_t *mirror_namespace, size_t snap_mirror_namespace_size); diff --git a/src/kv/RocksDBStore.cc b/src/kv/RocksDBStore.cc index 28217118609..a653fa6398c 100644 --- a/src/kv/RocksDBStore.cc +++ b/src/kv/RocksDBStore.cc @@ -1243,7 +1243,9 @@ int RocksDBStore::do_open(ostream &out, plb.add_time_avg(l_rocksdb_submit_latency, "submit_latency", "Submit Latency"); plb.add_time_avg(l_rocksdb_submit_sync_latency, "submit_sync_latency", "Submit Sync Latency"); plb.add_u64_counter(l_rocksdb_compact, "compact", "Compactions"); - plb.add_u64_counter(l_rocksdb_compact_range, "compact_range", "Compactions by range"); + plb.add_u64_counter(l_rocksdb_compact_running, "compact_running", "Running compactions"); + plb.add_u64_counter(l_rocksdb_compact_completed, "compact_completed", "Completed compactions"); + plb.add_time(l_rocksdb_compact_lasted, "compact_lasted", "Last completed compaction duration"); plb.add_u64_counter(l_rocksdb_compact_queue_merge, "compact_queue_merge", "Mergings of ranges in compaction queue"); plb.add_u64(l_rocksdb_compact_queue_len, "compact_queue_len", "Length of compaction queue"); plb.add_time_avg(l_rocksdb_write_wal_time, "rocksdb_write_wal_time", "Rocksdb write wal time"); @@ -1988,6 +1990,7 @@ int RocksDBStore::split_key(rocksdb::Slice in, string *prefix, string *key) void RocksDBStore::compact() { + dout(2) << __func__ << " starting" << dendl; logger->inc(l_rocksdb_compact); rocksdb::CompactRangeOptions options; db->CompactRange(options, default_cf, nullptr, nullptr); @@ -1999,6 +2002,7 @@ void RocksDBStore::compact() nullptr, nullptr); } } + dout(2) << __func__ << " completed" << dendl; } void RocksDBStore::compact_thread_entry() @@ -2011,12 +2015,17 @@ void RocksDBStore::compact_thread_entry() compact_queue.pop_front(); logger->set(l_rocksdb_compact_queue_len, compact_queue.size()); l.unlock(); - logger->inc(l_rocksdb_compact_range); + logger->inc(l_rocksdb_compact_running); + auto start = ceph_clock_now(); if (range.first.empty() && range.second.empty()) { compact(); } else { compact_range(range.first, range.second); } + auto lat = ceph_clock_now() - start; + logger->dec(l_rocksdb_compact_running); + logger->inc(l_rocksdb_compact_completed); + logger->tset(l_rocksdb_compact_lasted, lat); l.lock(); continue; } diff --git a/src/kv/RocksDBStore.h b/src/kv/RocksDBStore.h index 01c84f2d0b3..a8468a25d4d 100644 --- a/src/kv/RocksDBStore.h +++ b/src/kv/RocksDBStore.h @@ -35,7 +35,9 @@ enum { l_rocksdb_submit_latency, l_rocksdb_submit_sync_latency, l_rocksdb_compact, - l_rocksdb_compact_range, + l_rocksdb_compact_running, + l_rocksdb_compact_completed, + l_rocksdb_compact_lasted, l_rocksdb_compact_queue_merge, l_rocksdb_compact_queue_len, l_rocksdb_write_wal_time, diff --git a/src/librbd/api/DiffIterate.cc b/src/librbd/api/DiffIterate.cc index 717110bd38a..f7dd57504db 100644 --- a/src/librbd/api/DiffIterate.cc +++ b/src/librbd/api/DiffIterate.cc @@ -10,6 +10,7 @@ #include "librbd/internal.h" #include "librbd/io/AioCompletion.h" #include "librbd/io/ImageDispatchSpec.h" +#include "librbd/io/Utils.h" #include "librbd/object_map/DiffRequest.h" #include "include/rados/librados.hpp" #include "include/interval_set.h" @@ -266,6 +267,7 @@ int DiffIterate<I>::diff_iterate(I *ictx, template <typename I> std::pair<uint64_t, uint64_t> DiffIterate<I>::calc_object_diff_range() { + ceph_assert(m_length > 0); uint64_t period = m_image_ctx.get_stripe_period(); uint64_t first_period_off = round_down_to(m_offset, period); uint64_t last_period_off = round_down_to(m_offset + m_length - 1, period); @@ -274,15 +276,15 @@ std::pair<uint64_t, uint64_t> DiffIterate<I>::calc_object_diff_range() { if (first_period_off != last_period_off) { // map only the tail of the first period and the front of the last // period instead of the entire range for efficiency - Striper::file_to_extents(m_image_ctx.cct, &m_image_ctx.layout, - m_offset, first_period_off + period - m_offset, - 0, 0, &object_extents); - Striper::file_to_extents(m_image_ctx.cct, &m_image_ctx.layout, - last_period_off, m_offset + m_length - last_period_off, - 0, 0, &object_extents); + io::util::area_to_object_extents(&m_image_ctx, m_offset, + first_period_off + period - m_offset, + io::ImageArea::DATA, 0, &object_extents); + io::util::area_to_object_extents(&m_image_ctx, last_period_off, + m_offset + m_length - last_period_off, + io::ImageArea::DATA, 0, &object_extents); } else { - Striper::file_to_extents(m_image_ctx.cct, &m_image_ctx.layout, m_offset, - m_length, 0, 0, &object_extents); + io::util::area_to_object_extents(&m_image_ctx, m_offset, m_length, + io::ImageArea::DATA, 0, &object_extents); } return {object_extents.front().object_no, object_extents.back().object_no + 1}; } @@ -311,13 +313,13 @@ int DiffIterate<I>::execute() { if (from_snap_id == CEPH_NOSNAP) { return -ENOENT; } - if (from_snap_id == end_snap_id) { + if (from_snap_id > end_snap_id) { + return -EINVAL; + } + if (from_snap_id == end_snap_id || m_length == 0) { // no diff. return 0; } - if (from_snap_id >= end_snap_id) { - return -EINVAL; - } int r; bool fast_diff_enabled = false; @@ -379,47 +381,43 @@ int DiffIterate<I>::execute() { uint64_t read_len = std::min(period_off + period - off, left); if (fast_diff_enabled) { - // map to extents - std::map<object_t,std::vector<ObjectExtent> > object_extents; - Striper::file_to_extents(cct, m_image_ctx.format_string, - &m_image_ctx.layout, off, read_len, 0, - object_extents, 0); + // map to objects (there would be one extent per object) + striper::LightweightObjectExtents object_extents; + io::util::area_to_object_extents(&m_image_ctx, off, read_len, + io::ImageArea::DATA, 0, &object_extents); // get diff info for each object and merge adjacent stripe units // into an aggregate (this also sorts them) io::SparseExtents aggregate_sparse_extents; - for (auto& [object, extents] : object_extents) { - const uint64_t object_no = extents.front().objectno; - ceph_assert(object_no >= start_object_no && object_no < end_object_no); - uint8_t diff_state = object_diff_state[object_no - start_object_no]; - ldout(cct, 20) << "object " << object << ": diff_state=" - << (int)diff_state << dendl; + for (const auto& oe : object_extents) { + ceph_assert(oe.object_no >= start_object_no && + oe.object_no < end_object_no); + uint8_t diff_state = object_diff_state[oe.object_no - start_object_no]; + ldout(cct, 20) << "object " + << util::data_object_name(&m_image_ctx, oe.object_no) + << ": diff_state=" << (int)diff_state << dendl; if (diff_state == object_map::DIFF_STATE_HOLE && from_snap_id == 0 && !parent_diff.empty()) { // no data in child object -- report parent diff instead - for (auto& oe : extents) { - for (auto& be : oe.buffer_extents) { - interval_set<uint64_t> o; - o.insert(off + be.first, be.second); - o.intersection_of(parent_diff); - ldout(cct, 20) << " reporting parent overlap " << o << dendl; - for (auto e = o.begin(); e != o.end(); ++e) { - aggregate_sparse_extents.insert(e.get_start(), e.get_len(), - {io::SPARSE_EXTENT_STATE_DATA, - e.get_len()}); - } + for (const auto& be : oe.buffer_extents) { + interval_set<uint64_t> o; + o.insert(off + be.first, be.second); + o.intersection_of(parent_diff); + ldout(cct, 20) << " reporting parent overlap " << o << dendl; + for (auto e = o.begin(); e != o.end(); ++e) { + aggregate_sparse_extents.insert(e.get_start(), e.get_len(), + {io::SPARSE_EXTENT_STATE_DATA, + e.get_len()}); } } } else if (diff_state == object_map::DIFF_STATE_HOLE_UPDATED || diff_state == object_map::DIFF_STATE_DATA_UPDATED) { auto state = (diff_state == object_map::DIFF_STATE_HOLE_UPDATED ? io::SPARSE_EXTENT_STATE_ZEROED : io::SPARSE_EXTENT_STATE_DATA); - for (auto& oe : extents) { - for (auto& be : oe.buffer_extents) { - aggregate_sparse_extents.insert(off + be.first, be.second, - {state, be.second}); - } + for (const auto& be : oe.buffer_extents) { + aggregate_sparse_extents.insert(off + be.first, be.second, + {state, be.second}); } } } diff --git a/src/librbd/api/Group.cc b/src/librbd/api/Group.cc index 06d38fe8500..58c7499e5c9 100644 --- a/src/librbd/api/Group.cc +++ b/src/librbd/api/Group.cc @@ -309,7 +309,6 @@ finish: int group_snap_rollback_by_record(librados::IoCtx& group_ioctx, const cls::rbd::GroupSnapshot& group_snap, const std::string& group_id, - const std::string& group_header_oid, ProgressContext& pctx) { CephContext *cct = (CephContext *)group_ioctx.cct(); std::vector<C_SaferCond*> on_finishes; @@ -649,6 +648,24 @@ int Group<I>::list(IoCtx& io_ctx, vector<string> *names) } template <typename I> +int Group<I>::get_id(IoCtx& io_ctx, const char *group_name, + std::string *group_id) +{ + CephContext *cct = (CephContext *)io_ctx.cct(); + ldout(cct, 20) << "io_ctx=" << &io_ctx << dendl; + + int r = cls_client::dir_get_id(&io_ctx, RBD_GROUP_DIRECTORY, group_name, + group_id); + if (r < 0) { + lderr(cct) << "error reading group id object: " + << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + +template <typename I> int Group<I>::image_add(librados::IoCtx& group_ioctx, const char *group_name, librados::IoCtx& image_ioctx, const char *image_name) { @@ -1263,9 +1280,36 @@ int Group<I>::snap_rollback(librados::IoCtx& group_ioctx, return -ENOENT; } - string group_header_oid = util::group_header_name(group_id); - r = group_snap_rollback_by_record(group_ioctx, *group_snap, group_id, - group_header_oid, pctx); + if (group_snap->state != cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE) { + lderr(cct) << "group snapshot is not complete" << dendl; + return -EINVAL; + } + + std::vector<cls::rbd::GroupImageSpec> rollback_images; + for (const auto& snap : group_snap->snaps) { + rollback_images.emplace_back(snap.image_id, snap.pool); + } + + std::vector<cls::rbd::GroupImageStatus> images; + r = group_image_list(group_ioctx, group_name, &images); + if (r < 0) { + return r; + } + + std::vector<cls::rbd::GroupImageSpec> current_images; + for (const auto& image : images) { + if (image.state == cls::rbd::GROUP_IMAGE_LINK_STATE_ATTACHED) { + current_images.push_back(image.spec); + } + } + + if (rollback_images != current_images) { + lderr(cct) << "group snapshot membership does not match group membership" + << dendl; + return -EINVAL; + } + + r = group_snap_rollback_by_record(group_ioctx, *group_snap, group_id, pctx); return r; } diff --git a/src/librbd/api/Group.h b/src/librbd/api/Group.h index 9d3abcc59e8..98833eb506f 100644 --- a/src/librbd/api/Group.h +++ b/src/librbd/api/Group.h @@ -21,6 +21,8 @@ struct Group { static int create(librados::IoCtx& io_ctx, const char *group_name); static int remove(librados::IoCtx& io_ctx, const char *group_name); static int list(librados::IoCtx& io_ctx, std::vector<std::string> *names); + static int get_id(librados::IoCtx& io_ctx, const char *group_name, + std::string *group_id); static int rename(librados::IoCtx& io_ctx, const char *src_group_name, const char *dest_group_name); diff --git a/src/librbd/api/Snapshot.cc b/src/librbd/api/Snapshot.cc index 306ddb593da..e32c79b97a3 100644 --- a/src/librbd/api/Snapshot.cc +++ b/src/librbd/api/Snapshot.cc @@ -82,10 +82,10 @@ public: class GetTrashVisitor { public: - std::string* original_name; + snap_trash_namespace_t *trash_snap; - explicit GetTrashVisitor(std::string* original_name) - : original_name(original_name) { + explicit GetTrashVisitor(snap_trash_namespace_t *trash_snap) + : trash_snap(trash_snap) { } template <typename T> @@ -95,7 +95,9 @@ public: inline int operator()( const cls::rbd::TrashSnapshotNamespace& snap_namespace) { - *original_name = snap_namespace.original_name; + trash_snap->original_namespace_type = static_cast<snap_namespace_type_t>( + snap_namespace.original_snapshot_namespace_type); + trash_snap->original_name = snap_namespace.original_name; return 0; } }; @@ -153,7 +155,7 @@ int Snapshot<I>::get_group_namespace(I *ictx, uint64_t snap_id, template <typename I> int Snapshot<I>::get_trash_namespace(I *ictx, uint64_t snap_id, - std::string* original_name) { + snap_trash_namespace_t *trash_snap) { int r = ictx->state->refresh_if_required(); if (r < 0) { return r; @@ -165,7 +167,7 @@ int Snapshot<I>::get_trash_namespace(I *ictx, uint64_t snap_id, return -ENOENT; } - auto visitor = GetTrashVisitor(original_name); + auto visitor = GetTrashVisitor(trash_snap); r = snap_info->snap_namespace.visit(visitor); if (r < 0) { return r; diff --git a/src/librbd/api/Snapshot.h b/src/librbd/api/Snapshot.h index 7e06a5a8d07..a2a7955e1c3 100644 --- a/src/librbd/api/Snapshot.h +++ b/src/librbd/api/Snapshot.h @@ -21,7 +21,7 @@ struct Snapshot { snap_group_namespace_t *group_snap); static int get_trash_namespace(ImageCtxT *ictx, uint64_t snap_id, - std::string *original_name); + snap_trash_namespace_t *trash_snap); static int get_mirror_namespace( ImageCtxT *ictx, uint64_t snap_id, diff --git a/src/librbd/image/CloneRequest.cc b/src/librbd/image/CloneRequest.cc index 7a955f06464..700bd245876 100644 --- a/src/librbd/image/CloneRequest.cc +++ b/src/librbd/image/CloneRequest.cc @@ -397,7 +397,7 @@ void CloneRequest<I>::handle_attach_child(int r) { ldout(m_cct, 15) << "r=" << r << dendl; if (r < 0) { - lderr(m_cct) << "failed to attach parent: " << cpp_strerror(r) << dendl; + lderr(m_cct) << "failed to attach child: " << cpp_strerror(r) << dendl; m_r_saved = r; close_child(); return; diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 3cd699b2c81..dd674f3a949 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -716,24 +716,40 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) { opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit); opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count); - int r = clone(p_ioctx, nullptr, p_name, p_snap_name, c_ioctx, nullptr, - c_name, opts, "", ""); + int r = clone(p_ioctx, nullptr, p_name, CEPH_NOSNAP, p_snap_name, + c_ioctx, nullptr, c_name, opts, "", ""); opts.get(RBD_IMAGE_OPTION_ORDER, &order); *c_order = order; return r; } int clone(IoCtx& p_ioctx, const char *p_id, const char *p_name, - const char *p_snap_name, IoCtx& c_ioctx, const char *c_id, - const char *c_name, ImageOptions& c_opts, + uint64_t p_snap_id, const char *p_snap_name, IoCtx& c_ioctx, + const char *c_id, const char *c_name, ImageOptions& c_opts, const std::string &non_primary_global_image_id, const std::string &primary_mirror_uuid) { - ceph_assert((p_id == nullptr) ^ (p_name == nullptr)); - CephContext *cct = (CephContext *)p_ioctx.cct(); - if (p_snap_name == nullptr) { - lderr(cct) << "image to be cloned must be a snapshot" << dendl; + ldout(cct, 10) << __func__ + << " p_id=" << (p_id ?: "") + << ", p_name=" << (p_name ?: "") + << ", p_snap_id=" << p_snap_id + << ", p_snap_name=" << (p_snap_name ?: "") + << ", c_id=" << (c_id ?: "") + << ", c_name=" << c_name + << ", c_opts=" << c_opts + << ", non_primary_global_image_id=" << non_primary_global_image_id + << ", primary_mirror_uuid=" << primary_mirror_uuid + << dendl; + + if (((p_id == nullptr) ^ (p_name == nullptr)) == 0) { + lderr(cct) << "must specify either parent image id or parent image name" + << dendl; + return -EINVAL; + } + if (((p_snap_id == CEPH_NOSNAP) ^ (p_snap_name == nullptr)) == 0) { + lderr(cct) << "must specify either parent snap id or parent snap name" + << dendl; return -EINVAL; } @@ -766,10 +782,8 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) { clone_id = c_id; } - ldout(cct, 10) << __func__ << " " - << "c_name=" << c_name << ", " - << "c_id= " << clone_id << ", " - << "c_opts=" << c_opts << dendl; + ldout(cct, 10) << __func__ << " parent_id=" << parent_id + << ", clone_id=" << clone_id << dendl; ConfigProxy config{reinterpret_cast<CephContext *>(c_ioctx.cct())->_conf}; api::Config<>::apply_pool_overrides(c_ioctx, &config); @@ -778,8 +792,8 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) { C_SaferCond cond; auto *req = image::CloneRequest<>::create( - config, p_ioctx, parent_id, p_snap_name, - {cls::rbd::UserSnapshotNamespace{}}, CEPH_NOSNAP, c_ioctx, c_name, + config, p_ioctx, parent_id, (p_snap_name ?: ""), + {cls::rbd::UserSnapshotNamespace{}}, p_snap_id, c_ioctx, c_name, clone_id, c_opts, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL, non_primary_global_image_id, primary_mirror_uuid, asio_engine.get_work_queue(), &cond); diff --git a/src/librbd/internal.h b/src/librbd/internal.h index 65e9a9d18fe..77a64137ddb 100644 --- a/src/librbd/internal.h +++ b/src/librbd/internal.h @@ -77,8 +77,8 @@ namespace librbd { uint64_t features, int *c_order, uint64_t stripe_unit, int stripe_count); int clone(IoCtx& p_ioctx, const char *p_id, const char *p_name, - const char *p_snap_name, IoCtx& c_ioctx, const char *c_id, - const char *c_name, ImageOptions& c_opts, + uint64_t p_snap_id, const char *p_snap_name, IoCtx& c_ioctx, + const char *c_id, const char *c_name, ImageOptions& c_opts, const std::string &non_primary_global_image_id, const std::string &primary_mirror_uuid); int rename(librados::IoCtx& io_ctx, const char *srcname, const char *dstname); diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc index 8749a04d2d5..df45e370af4 100644 --- a/src/librbd/librbd.cc +++ b/src/librbd/librbd.cc @@ -778,12 +778,26 @@ namespace librbd { { TracepointProvider::initialize<tracepoint_traits>(get_cct(p_ioctx)); tracepoint(librbd, clone3_enter, p_ioctx.get_pool_name().c_str(), p_ioctx.get_id(), p_name, p_snap_name, c_ioctx.get_pool_name().c_str(), c_ioctx.get_id(), c_name, c_opts.opts); - int r = librbd::clone(p_ioctx, nullptr, p_name, p_snap_name, c_ioctx, - nullptr, c_name, c_opts, "", ""); + int r = librbd::clone(p_ioctx, nullptr, p_name, CEPH_NOSNAP, p_snap_name, + c_ioctx, nullptr, c_name, c_opts, "", ""); tracepoint(librbd, clone3_exit, r); return r; } + int RBD::clone4(IoCtx& p_ioctx, const char *p_name, uint64_t p_snap_id, + IoCtx& c_ioctx, const char *c_name, ImageOptions& c_opts) + { + TracepointProvider::initialize<tracepoint_traits>(get_cct(p_ioctx)); + tracepoint(librbd, clone4_enter, p_ioctx.get_pool_name().c_str(), + p_ioctx.get_id(), p_name, p_snap_id, + c_ioctx.get_pool_name().c_str(), c_ioctx.get_id(), c_name, + c_opts.opts); + int r = librbd::clone(p_ioctx, nullptr, p_name, p_snap_id, nullptr, + c_ioctx, nullptr, c_name, c_opts, "", ""); + tracepoint(librbd, clone4_exit, r); + return r; + } + int RBD::remove(IoCtx& io_ctx, const char *name) { TracepointProvider::initialize<tracepoint_traits>(get_cct(io_ctx)); @@ -1292,6 +1306,11 @@ namespace librbd { return r; } + int RBD::group_get_id(IoCtx& io_ctx, const char *group_name, std::string *group_id) + { + return librbd::api::Group<>::get_id(io_ctx, group_name, group_id); + } + int RBD::group_rename(IoCtx& io_ctx, const char *src_name, const char *dest_name) { @@ -2478,8 +2497,29 @@ namespace librbd { int Image::snap_get_trash_namespace(uint64_t snap_id, std::string* original_name) { ImageCtx *ictx = (ImageCtx *)ctx; + + snap_trash_namespace_t trash_snap; + int r = librbd::api::Snapshot<>::get_trash_namespace(ictx, snap_id, + &trash_snap); + if (r < 0) { + return r; + } + + *original_name = trash_snap.original_name; + return 0; + } + + int Image::snap_get_trash_namespace2( + uint64_t snap_id, snap_trash_namespace_t *trash_snap, + size_t trash_snap_size) { + ImageCtx *ictx = (ImageCtx *)ctx; + + if (trash_snap_size != sizeof(snap_trash_namespace_t)) { + return -ERANGE; + } + return librbd::api::Snapshot<>::get_trash_namespace(ictx, snap_id, - original_name); + trash_snap); } int Image::snap_get_mirror_namespace( @@ -3973,12 +4013,30 @@ extern "C" int rbd_clone3(rados_ioctx_t p_ioctx, const char *p_name, TracepointProvider::initialize<tracepoint_traits>(get_cct(p_ioc)); tracepoint(librbd, clone3_enter, p_ioc.get_pool_name().c_str(), p_ioc.get_id(), p_name, p_snap_name, c_ioc.get_pool_name().c_str(), c_ioc.get_id(), c_name, c_opts); librbd::ImageOptions c_opts_(c_opts); - int r = librbd::clone(p_ioc, nullptr, p_name, p_snap_name, c_ioc, nullptr, - c_name, c_opts_, "", ""); + int r = librbd::clone(p_ioc, nullptr, p_name, CEPH_NOSNAP, p_snap_name, + c_ioc, nullptr, c_name, c_opts_, "", ""); tracepoint(librbd, clone3_exit, r); return r; } +extern "C" int rbd_clone4(rados_ioctx_t p_ioctx, const char *p_name, + uint64_t p_snap_id, rados_ioctx_t c_ioctx, + const char *c_name, rbd_image_options_t c_opts) +{ + librados::IoCtx p_ioc, c_ioc; + librados::IoCtx::from_rados_ioctx_t(p_ioctx, p_ioc); + librados::IoCtx::from_rados_ioctx_t(c_ioctx, c_ioc); + TracepointProvider::initialize<tracepoint_traits>(get_cct(p_ioc)); + tracepoint(librbd, clone4_enter, p_ioc.get_pool_name().c_str(), + p_ioc.get_id(), p_name, p_snap_id, c_ioc.get_pool_name().c_str(), + c_ioc.get_id(), c_name, c_opts); + librbd::ImageOptions c_opts_(c_opts); + int r = librbd::clone(p_ioc, nullptr, p_name, p_snap_id, nullptr, + c_ioc, nullptr, c_name, c_opts_, "", ""); + tracepoint(librbd, clone4_exit, r); + return r; +} + extern "C" int rbd_remove(rados_ioctx_t p, const char *name) { librados::IoCtx io_ctx; @@ -6923,6 +6981,31 @@ extern "C" int rbd_group_rename(rados_ioctx_t p, const char *src_name, return r; } +extern "C" int rbd_group_get_id(rados_ioctx_t p, + const char *group_name, + char *group_id, + size_t *size) +{ + librados::IoCtx io_ctx; + librados::IoCtx::from_rados_ioctx_t(p, io_ctx); + + std::string cpp_id; + int r = librbd::api::Group<>::get_id(io_ctx, group_name, &cpp_id); + if (r < 0) { + return r; + } + + auto total_len = cpp_id.size() + 1; + if (*size < total_len) { + *size = total_len; + return -ERANGE; + } + *size = total_len; + + strcpy(group_id, cpp_id.c_str()); + return 0; +} + extern "C" int rbd_group_image_add(rados_ioctx_t group_p, const char *group_name, rados_ioctx_t image_p, @@ -7307,18 +7390,50 @@ extern "C" int rbd_snap_get_trash_namespace(rbd_image_t image, uint64_t snap_id, size_t max_length) { librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; - std::string cpp_original_name; + librbd::snap_trash_namespace_t trash_namespace; + int r = librbd::api::Snapshot<>::get_trash_namespace(ictx, snap_id, + &trash_namespace); + if (r < 0) { + return r; + } + + if (trash_namespace.original_name.length() >= max_length) { + return -ERANGE; + } + + strcpy(original_name, trash_namespace.original_name.c_str()); + return 0; +} + +extern "C" int rbd_snap_get_trash_namespace2( + rbd_image_t image, uint64_t snap_id, + rbd_snap_trash_namespace_t *trash_snap, + size_t trash_snap_size) { + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + + if (trash_snap_size != sizeof(rbd_snap_trash_namespace_t)) { + return -ERANGE; + } + + librbd::snap_trash_namespace_t trash_namespace; int r = librbd::api::Snapshot<>::get_trash_namespace(ictx, snap_id, - &cpp_original_name); + &trash_namespace); if (r < 0) { return r; } - if (cpp_original_name.length() >= max_length) { + trash_snap->original_namespace_type = trash_namespace.original_namespace_type; + trash_snap->original_name = strdup(trash_namespace.original_name.c_str()); + return 0; +} + +extern "C" int rbd_snap_trash_namespace_cleanup( + rbd_snap_trash_namespace_t *trash_snap, size_t trash_snap_size) { + if (trash_snap_size != sizeof(rbd_snap_trash_namespace_t)) { return -ERANGE; } - strcpy(original_name, cpp_original_name.c_str()); + free(trash_snap->original_name); return 0; } diff --git a/src/mds/BoostUrlImpl.cc b/src/mds/BoostUrlImpl.cc deleted file mode 100644 index 479f4c6d75d..00000000000 --- a/src/mds/BoostUrlImpl.cc +++ /dev/null @@ -1,8 +0,0 @@ -/* - * https://www.boost.org/doc/libs/1_82_0/libs/url/doc/html/url/overview.html#url.overview.requirements - * - * To use the library as header-only; that is, to eliminate the requirement - * to link a program to a static or dynamic Boost.URL library, - * simply place the following line in exactly one source file in your project. - */ -#include <boost/url/src.hpp> diff --git a/src/mds/CDentry.cc b/src/mds/CDentry.cc index 8694dfc72a2..b9a232798d8 100644 --- a/src/mds/CDentry.cc +++ b/src/mds/CDentry.cc @@ -568,6 +568,7 @@ void CDentry::encode_remote(inodeno_t& ino, unsigned char d_type, // marker, name, ino ENCODE_START(2, 1, bl); + // WARNING: always put new fields at the end of bl encode(ino, bl); encode(d_type, bl); encode(alternate_name, bl); @@ -599,6 +600,15 @@ void CDentry::dump(Formatter *f) const make_path(path); f->dump_string("path", path.get_path()); + if (auto s = get_alternate_name(); !s.empty()) { + bufferlist bl, b64; + bl.append(s); + bl.encode_base64(b64); + auto encoded = std::string_view(b64.c_str(), b64.length()); + f->dump_string("alternate_name", encoded); + } else { + f->dump_string("alternate_name", ""); + } f->dump_unsigned("path_ino", path.get_ino().val); f->dump_unsigned("snap_first", first); f->dump_unsigned("snap_last", last); diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 3828fe04d7b..acddeb4f1d1 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1018,6 +1018,12 @@ void CDir::init_fragment_pins() get(PIN_SUBTREE); } +bool CDir::should_split() const { + uint64_t split_size = mdcache->mds->balancer->get_bal_split_size(); + uint64_t items = get_frag_size() + get_num_snap_items(); + return split_size > 0 && items > split_size; +} + void CDir::split(int bits, std::vector<CDir*>* subs, MDSContext::vec& waiters, bool replay) { dout(10) << "split by " << bits << " bits on " << *this << dendl; @@ -2327,18 +2333,18 @@ public: class C_IO_Dir_Commit_Ops : public Context { public: - C_IO_Dir_Commit_Ops(CDir *d, int pr, - vector<CDir::dentry_commit_item> &&s, bufferlist &&bl, - vector<string> &&r, - mempool::mds_co::compact_set<mempool::mds_co::string> &&stales) : - dir(d), op_prio(pr) { + C_IO_Dir_Commit_Ops(CDir* d, int pr, auto&& s, auto&& bl, auto&& r, auto&& stales) + : + dir(d), + op_prio(pr), + to_set(std::forward<decltype(s)>(s)), + dfts(std::forward<decltype(bl)>(bl)), + to_remove(std::forward<decltype(r)>(r)), + stale_items(std::forward<decltype(stales)>(stales)) + { metapool = dir->mdcache->mds->get_metadata_pool(); version = dir->get_version(); is_new = dir->is_new(); - to_set.swap(s); - dfts.swap(bl); - to_remove.swap(r); - stale_items.swap(stales); } void finish(int r) override { @@ -2488,9 +2494,9 @@ void CDir::_omap_commit_ops(int r, int op_prio, int64_t metapool, version_t vers mdcache->mds->heartbeat_reset(); } - bufferlist bl; using ceph::encode; for (auto &item : to_set) { + bufferlist bl; encode(item.first, bl); if (item.is_remote) { // remote link @@ -2500,6 +2506,7 @@ void CDir::_omap_commit_ops(int r, int op_prio, int64_t metapool, version_t vers bl.append('i'); // inode ENCODE_START(2, 1, bl); + // WARNING: always put new fields at the end of bl encode(item.alternate_name, bl); _encode_primary_inode_base(item, dfts, bl); ENCODE_FINISH(bl); @@ -2510,7 +2517,7 @@ void CDir::_omap_commit_ops(int r, int op_prio, int64_t metapool, version_t vers commit_one(); write_size += size; - _set[std::move(item.key)].swap(bl); + _set[std::move(item.key)] = std::move(bl); if (!(++count % mdcache->mds->heartbeat_reset_grace())) mdcache->mds->heartbeat_reset(); @@ -2621,7 +2628,7 @@ void CDir::_omap_commit(int op_prio) auto c = new C_IO_Dir_Commit_Ops(this, op_prio, std::move(to_set), std::move(dfts), std::move(to_remove), std::move(stale_items)); - stale_items.clear(); + stale_items.clear(); /* in CDir */ mdcache->mds->finisher->queue(c); } @@ -3774,7 +3781,10 @@ std::string CDir::get_path() const bool CDir::should_split_fast() const { // Max size a fragment can be before trigger fast splitting - int fast_limit = g_conf()->mds_bal_split_size * g_conf()->mds_bal_fragment_fast_factor; + auto&& balancer = mdcache->mds->balancer; + auto split_size = balancer->get_bal_split_size(); + auto fragment_fast_factor = balancer->get_bal_fragment_fast_factor(); + int64_t fast_limit = split_size * fragment_fast_factor; // Fast path: the sum of accounted size and null dentries does not // exceed threshold: we definitely are not over it. @@ -3811,7 +3821,9 @@ bool CDir::should_merge() const return false; } - return ((int)get_frag_size() + (int)get_num_snap_items()) < g_conf()->mds_bal_merge_size; + uint64_t merge_size = mdcache->mds->balancer->get_bal_merge_size(); + uint64_t items = get_frag_size() + get_num_snap_items(); + return items < merge_size; } MEMPOOL_DEFINE_OBJECT_FACTORY(CDir, co_dir, mds_co); diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 76ac7e21cc0..215375ca297 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -403,10 +403,7 @@ public: void split(int bits, std::vector<CDir*>* subs, MDSContext::vec& waiters, bool replay); void merge(const std::vector<CDir*>& subs, MDSContext::vec& waiters, bool replay); - bool should_split() const { - return g_conf()->mds_bal_split_size > 0 && - ((int)get_frag_size() + (int)get_num_snap_items()) > g_conf()->mds_bal_split_size; - } + bool should_split() const; bool should_split_fast() const; bool should_merge() const; diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 40b0ba76a37..c2ea2facbd0 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -26,6 +26,7 @@ #include "MDLog.h" #include "Locker.h" #include "Mutation.h" +#include "MDBalancer.h" #include "events/EUpdate.h" @@ -5384,7 +5385,8 @@ void CInode::queue_export_pin(mds_rank_t export_pin) void CInode::maybe_export_pin(bool update) { - if (!g_conf()->mds_bal_export_pin) + auto&& balancer = mdcache->mds->balancer; + if (!balancer->get_bal_export_pin()) return; if (!is_dir() || !is_normal()) return; @@ -5499,7 +5501,9 @@ void CInode::set_export_pin(mds_rank_t rank) mds_rank_t CInode::get_export_pin(bool inherit) const { - if (!g_conf()->mds_bal_export_pin) + auto&& balancer = mdcache->mds->balancer; + auto export_pin = balancer->get_bal_export_pin(); + if (!export_pin) return MDS_RANK_NONE; /* An inode that is export pinned may not necessarily be a subtree root, we diff --git a/src/mds/CMakeLists.txt b/src/mds/CMakeLists.txt index ffa9dc28d8a..f3980c7e04b 100644 --- a/src/mds/CMakeLists.txt +++ b/src/mds/CMakeLists.txt @@ -45,13 +45,12 @@ set(mds_srcs QuiesceDbManager.cc QuiesceAgent.cc MDSRankQuiesce.cc - BoostUrlImpl.cc ${CMAKE_SOURCE_DIR}/src/common/TrackedOp.cc ${CMAKE_SOURCE_DIR}/src/common/MemoryModel.cc ${CMAKE_SOURCE_DIR}/src/osdc/Journaler.cc ${CMAKE_SOURCE_DIR}/src/mgr/MDSPerfMetricTypes.cc) add_library(mds STATIC ${mds_srcs}) target_link_libraries(mds PRIVATE - legacy-option-headers + legacy-option-headers Boost::url heap_profiler cpu_profiler osdc ${LUA_LIBRARIES}) target_include_directories(mds PRIVATE "${LUA_INCLUDE_DIR}") diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc index 73e4151a9ae..88d7fda7c10 100644 --- a/src/mds/FSMap.cc +++ b/src/mds/FSMap.cc @@ -135,6 +135,7 @@ void Filesystem::dump(Formatter *f) const void FSMap::dump(Formatter *f) const { f->dump_int("epoch", epoch); + f->dump_string("btime", fmt::format("{}", btime)); // Use 'default' naming to match 'set-default' CLI f->dump_int("default_fscid", legacy_client_fscid); @@ -168,6 +169,7 @@ void FSMap::dump(Formatter *f) const FSMap &FSMap::operator=(const FSMap &rhs) { epoch = rhs.epoch; + btime = rhs.btime; next_filesystem_id = rhs.next_filesystem_id; legacy_client_fscid = rhs.legacy_client_fscid; default_compat = rhs.default_compat; @@ -206,6 +208,7 @@ void FSMap::generate_test_instances(std::list<FSMap*>& ls) void FSMap::print(ostream& out) const { out << "e" << epoch << std::endl; + out << "btime " << fmt::format("{}", btime) << std::endl; out << "enable_multiple, ever_enabled_multiple: " << enable_multiple << "," << ever_enabled_multiple << std::endl; out << "default compat: " << default_compat << std::endl; @@ -296,6 +299,7 @@ void FSMap::print_summary(Formatter *f, ostream *out) const { if (f) { f->dump_unsigned("epoch", get_epoch()); + f->dump_string("btime", fmt::format("{}", btime)); for (const auto& [fscid, fs] : filesystems) { f->dump_unsigned("id", fscid); f->dump_unsigned("up", fs.mds_map.up.size()); @@ -643,6 +647,7 @@ void FSMap::encode(bufferlist& bl, uint64_t features) const encode(standby_daemons, bl, features); encode(standby_epochs, bl); encode(ever_enabled_multiple, bl); + encode(btime, bl); ENCODE_FINISH(bl); } @@ -674,6 +679,9 @@ void FSMap::decode(bufferlist::const_iterator& p) if (struct_v >= 7) { decode(ever_enabled_multiple, p); } + if (struct_v >= 8) { + decode(btime, p); + } DECODE_FINISH(p); } @@ -1011,6 +1019,12 @@ void FSMap::erase(mds_gid_t who, epoch_t blocklist_epoch) // the rank ever existed so that next time it's handed out // to a gid it'll go back into CREATING. fs.mds_map.in.erase(info.rank); + } else if (info.state == MDSMap::STATE_STARTING) { + // If this gid didn't make it past STARTING, then forget + // the rank ever existed so that next time it's handed out + // to a gid it'll go back into STARTING. + fs.mds_map.in.erase(info.rank); + fs.mds_map.stopped.insert(info.rank); } else { // Put this rank into the failed list so that the next available // STANDBY will pick it up. diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h index 9d452bb98d9..518d6273e44 100644 --- a/src/mds/FSMap.h +++ b/src/mds/FSMap.h @@ -26,6 +26,7 @@ #include <errno.h> #include "include/types.h" +#include "common/ceph_time.h" #include "common/Clock.h" #include "mds/MDSMap.h" @@ -268,12 +269,13 @@ WRITE_CLASS_ENCODER_FEATURES(Filesystem) class FSMap { public: + using real_clock = ceph::real_clock; using mds_info_t = MDSMap::mds_info_t; using fsmap = typename std::map<fs_cluster_id_t, Filesystem>; using const_iterator = typename fsmap::const_iterator; using iterator = typename fsmap::iterator; - static const version_t STRUCT_VERSION = 7; + static const version_t STRUCT_VERSION = 8; static const version_t STRUCT_VERSION_TRIM_TO = 7; FSMap() : default_compat(MDSMap::get_compat_set_default()) {} @@ -281,6 +283,7 @@ public: FSMap(const FSMap &rhs) : epoch(rhs.epoch), + btime(rhs.btime), next_filesystem_id(rhs.next_filesystem_id), legacy_client_fscid(rhs.legacy_client_fscid), default_compat(rhs.default_compat), @@ -584,6 +587,13 @@ public: epoch_t get_epoch() const { return epoch; } void inc_epoch() { epoch++; } + void set_btime() { + btime = real_clock::now(); + } + auto get_btime() const { + return btime; + } + version_t get_struct_version() const { return struct_version; } bool is_struct_old() const { return struct_version < STRUCT_VERSION_TRIM_TO; @@ -676,6 +686,8 @@ protected: } epoch_t epoch = 0; + ceph::real_time btime = real_clock::zero(); + uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1; fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE; CompatSet default_compat; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index b84f7b59ee5..f4fb1a114d9 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1906,7 +1906,8 @@ void Locker::wrlock_force(SimpleLock *lock, MutationRef& mut) dout(7) << "wrlock_force on " << *lock << " on " << *lock->get_parent() << dendl; lock->get_wrlock(true); - mut->emplace_lock(lock, MutationImpl::LockOp::WRLOCK); + auto it = mut->emplace_lock(lock, MutationImpl::LockOp::WRLOCK); + it->flags |= MutationImpl::LockOp::WRLOCK; // may already remote_wrlocked } bool Locker::wrlock_try(SimpleLock *lock, const MutationRef& mut, client_t client) @@ -2128,7 +2129,8 @@ bool Locker::xlock_start(SimpleLock *lock, const MDRequestRef& mut) in && in->issued_caps_need_gather(lock))) { // xlocker does not hold shared cap lock->set_state(LOCK_XLOCK); lock->get_xlock(mut, client); - mut->emplace_lock(lock, MutationImpl::LockOp::XLOCK); + auto it = mut->emplace_lock(lock, MutationImpl::LockOp::XLOCK); + ceph_assert(it->is_xlock()); mut->finish_locking(lock); return true; } @@ -4265,8 +4267,8 @@ void Locker::_do_cap_release(client_t client, inodeno_t ino, uint64_t cap_id, new C_Locker_RetryCapRelease(this, client, ino, cap_id, mseq, seq)); return; } - if (seq != cap->get_last_issue()) { - dout(7) << " issue_seq " << seq << " != " << cap->get_last_issue() << dendl; + if (seq < cap->get_last_issue()) { + dout(7) << " issue_seq " << seq << " < " << cap->get_last_issue() << dendl; // clean out any old revoke history cap->clean_revoke_from(seq); eval_cap_gather(in); @@ -5212,7 +5214,8 @@ void Locker::scatter_writebehind(ScatterLock *lock) // forcefully take a wrlock lock->get_wrlock(true); - mut->emplace_lock(lock, MutationImpl::LockOp::WRLOCK); + auto it = mut->emplace_lock(lock, MutationImpl::LockOp::WRLOCK); + ceph_assert(it->is_wrlock()); in->pre_cow_old_inode(); // avoid cow mayhem @@ -5603,7 +5606,8 @@ bool Locker::local_xlock_start(LocalLockC *lock, const MDRequestRef& mut) } lock->get_xlock(mut, mut->get_client()); - mut->emplace_lock(lock, MutationImpl::LockOp::XLOCK); + auto it = mut->emplace_lock(lock, MutationImpl::LockOp::XLOCK); + ceph_assert(it->is_xlock()); return true; } diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index 88cc55833b5..3fa4acca1b4 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -81,18 +81,58 @@ int MDBalancer::proc_message(const cref_t<Message> &m) MDBalancer::MDBalancer(MDSRank *m, Messenger *msgr, MonClient *monc) : mds(m), messenger(msgr), mon_client(monc) { + bal_export_pin = g_conf().get_val<bool>("mds_bal_export_pin"); bal_fragment_dirs = g_conf().get_val<bool>("mds_bal_fragment_dirs"); + bal_fragment_fast_factor = g_conf().get_val<double>("mds_bal_fragment_fast_factor"); bal_fragment_interval = g_conf().get_val<int64_t>("mds_bal_fragment_interval"); + bal_interval = g_conf().get_val<int64_t>("mds_bal_interval"); + bal_max_until = g_conf().get_val<int64_t>("mds_bal_max_until"); + bal_merge_size = g_conf().get_val<int64_t>("mds_bal_merge_size"); + bal_mode = g_conf().get_val<int64_t>("mds_bal_mode"); + bal_replicate_threshold = g_conf().get_val<double>("mds_bal_replicate_threshold"); + bal_sample_interval = g_conf().get_val<double>("mds_bal_sample_interval"); + bal_split_rd = g_conf().get_val<double>("mds_bal_split_rd"); + bal_split_bits = g_conf().get_val<int64_t>("mds_bal_split_bits"); + bal_split_size = g_conf().get_val<int64_t>("mds_bal_split_size"); + bal_split_wr = g_conf().get_val<double>("mds_bal_split_wr"); + bal_unreplicate_threshold = g_conf().get_val<double>("mds_bal_unreplicate_threshold"); + num_bal_times = g_conf().get_val<int64_t>("mds_bal_max"); } void MDBalancer::handle_conf_change(const std::set<std::string>& changed, const MDSMap& mds_map) { - if (changed.count("mds_bal_fragment_dirs")) { + if (changed.count("mds_bal_export_pin")) + bal_export_pin = g_conf().get_val<bool>("mds_bal_export_pin"); + if (changed.count("mds_bal_fragment_dirs")) bal_fragment_dirs = g_conf().get_val<bool>("mds_bal_fragment_dirs"); - } - if (changed.count("mds_bal_fragment_interval")) { + if (changed.count("mds_bal_fragment_fast_factor")) + bal_fragment_fast_factor = g_conf().get_val<double>("mds_bal_fragment_fast_factor"); + if (changed.count("mds_bal_fragment_interval")) bal_fragment_interval = g_conf().get_val<int64_t>("mds_bal_fragment_interval"); - } + if (changed.count("mds_bal_interval")) + bal_interval = g_conf().get_val<int64_t>("mds_bal_interval"); + if (changed.count("mds_bal_max_until")) + bal_max_until = g_conf().get_val<int64_t>("mds_bal_max_until"); + if (changed.count("mds_bal_merge_size")) + bal_merge_size = g_conf().get_val<int64_t>("mds_bal_merge_size"); + if (changed.count("mds_bal_mode")) + bal_mode = g_conf().get_val<int64_t>("mds_bal_mode"); + if (changed.count("mds_bal_replicate_threshold")) + bal_replicate_threshold = g_conf().get_val<double>("mds_bal_replicate_threshold"); + if (changed.count("mds_bal_sample_interval")) + bal_sample_interval = g_conf().get_val<double>("mds_bal_sample_interval"); + if (changed.count("mds_bal_split_rd")) + bal_split_rd = g_conf().get_val<double>("mds_bal_split_rd"); + if (changed.count("mds_bal_split_bits")) + bal_split_bits = g_conf().get_val<int64_t>("mds_bal_split_bits"); + if (changed.count("mds_bal_split_size")) + bal_split_size = g_conf().get_val<int64_t>("mds_bal_split_size"); + if (changed.count("mds_bal_split_wr")) + bal_split_wr = g_conf().get_val<double>("mds_bal_split_wr"); + if (changed.count("mds_bal_unreplicate_threshold")) + bal_unreplicate_threshold = g_conf().get_val<double>("mds_bal_unreplicate_threshold"); + if (changed.count("mds_bal_max")) + num_bal_times = g_conf().get_val<int64_t>("mds_bal_max"); } bool MDBalancer::test_rank_mask(mds_rank_t rank) @@ -229,19 +269,16 @@ void MDBalancer::handle_export_pins(void) void MDBalancer::tick() { - static int num_bal_times = g_conf()->mds_bal_max; bool balance_automate = mds->mdsmap->allows_balance_automate(); - auto bal_interval = g_conf().get_val<int64_t>("mds_bal_interval"); - auto bal_max_until = g_conf().get_val<int64_t>("mds_bal_max_until"); time now = clock::now(); - if (g_conf()->mds_bal_export_pin) { + if (bal_export_pin) { handle_export_pins(); } // sample? if (chrono::duration<double>(now-last_sample).count() > - g_conf()->mds_bal_sample_interval) { + bal_sample_interval) { dout(15) << "tick last_sample now " << now << dendl; last_sample = now; } @@ -275,9 +312,9 @@ public: }; -double mds_load_t::mds_load() const +double mds_load_t::mds_load(int64_t bal_mode) const { - switch(g_conf()->mds_bal_mode) { + switch(bal_mode) { case 0: return .8 * auth.meta_load() + @@ -397,7 +434,6 @@ int MDBalancer::localize_balancer() /* timeout: if we waste half our time waiting for RADOS, then abort! */ std::cv_status ret_t = [&] { - auto bal_interval = g_conf().get_val<int64_t>("mds_bal_interval"); std::unique_lock locker{lock}; return cond.wait_for(locker, std::chrono::seconds(bal_interval / 2)); }(); @@ -434,7 +470,7 @@ void MDBalancer::send_heartbeat() // my load mds_load_t load = get_load(); - mds->logger->set(l_mds_load_cent, 100 * load.mds_load()); + mds->logger->set(l_mds_load_cent, 100 * load.mds_load(bal_mode)); mds->logger->set(l_mds_dispatch_queue_len, load.queue_len); auto em = mds_load.emplace(std::piecewise_construct, std::forward_as_tuple(mds->get_nodeid()), std::forward_as_tuple(load)); @@ -607,7 +643,7 @@ void MDBalancer::queue_split(const CDir *dir, bool fast) // Pass on to MDCache: note that the split might still not // happen if the checks in MDCache::can_fragment fail. dout(10) << _func_ << " splitting " << *dir << dendl; - int bits = g_conf()->mds_bal_split_bits; + int bits = bal_split_bits; if (dir->inode->is_ephemeral_dist()) { unsigned min_frag_bits = mdcache->get_ephemeral_dist_frag_bits(); if (df.frag.bits() + bits < min_frag_bits) @@ -741,9 +777,9 @@ void MDBalancer::prep_rebalance(int beat) // rescale! turn my mds_load back into meta_load units double load_fac = 1.0; map<mds_rank_t, mds_load_t>::iterator m = mds_load.find(whoami); - if ((m != mds_load.end()) && (m->second.mds_load() > 0)) { + if ((m != mds_load.end()) && (m->second.mds_load(bal_mode) > 0)) { double metald = m->second.auth.meta_load(); - double mdsld = m->second.mds_load(); + double mdsld = m->second.mds_load(bal_mode); load_fac = metald / mdsld; dout(7) << " load_fac is " << load_fac << " <- " << m->second.auth << " " << metald @@ -758,13 +794,13 @@ void MDBalancer::prep_rebalance(int beat) for (mds_rank_t i=mds_rank_t(0); i < mds_rank_t(cluster_size); i++) { mds_load_t& load = mds_load.at(i); - double l = load.mds_load() * load_fac; + double l = load.mds_load(bal_mode) * load_fac; mds_meta_load[i] = l; if (whoami == 0) dout(7) << " mds." << i << " " << load - << " = " << load.mds_load() + << " = " << load.mds_load(bal_mode) << " ~ " << l << dendl; if (whoami == i) my_load = l; @@ -781,9 +817,9 @@ void MDBalancer::prep_rebalance(int beat) << dendl; // under or over? + auto bal_min_rebalance = g_conf().get_val<double>("mds_bal_min_rebalance"); for (const auto& [load, rank] : load_map) { - if (test_rank_mask(rank) && - load < target_load * (1.0 + g_conf()->mds_bal_min_rebalance)) { + if (test_rank_mask(rank) && load < target_load * (1.0 + bal_min_rebalance)) { dout(7) << " mds." << rank << " is underloaded or barely overloaded." << dendl; mds_last_epoch_under_map[rank] = beat_epoch; } @@ -962,8 +998,9 @@ void MDBalancer::try_rebalance(balance_state_t& state) mds_rank_t from = diri->authority().first; double pop = dir->pop_auth_subtree.meta_load(); - if (g_conf()->mds_bal_idle_threshold > 0 && - pop < g_conf()->mds_bal_idle_threshold && + const auto bal_idle_threshold = g_conf().get_val<double>("mds_bal_idle_threshold"); + if (bal_idle_threshold > 0 && + pop < bal_idle_threshold && diri != mds->mdcache->get_root() && from != mds->get_nodeid()) { dout(5) << " exporting idle (" << pop << ") import " << *dir @@ -1125,13 +1162,14 @@ void MDBalancer::find_exports(CDir *dir, ceph_assert(dir->is_auth()); double need = amount - have; - if (need < amount * g_conf()->mds_bal_min_start) + const auto bal_min_start = g_conf().get_val<double>("mds_bal_min_start"); + if (need < amount * bal_min_start) return; // good enough! - double needmax = need * g_conf()->mds_bal_need_max; - double needmin = need * g_conf()->mds_bal_need_min; - double midchunk = need * g_conf()->mds_bal_midchunk; - double minchunk = need * g_conf()->mds_bal_minchunk; + double needmax = need * g_conf().get_val<double>("mds_bal_need_max"); + double needmin = need * g_conf().get_val<double>("mds_bal_need_min"); + double midchunk = need * g_conf().get_val<double>("mds_bal_midchunk"); + double minchunk = need * g_conf().get_val<double>("mds_bal_minchunk"); std::vector<CDir*> bigger_rep, bigger_unrep; multimap<double, CDir*> smaller; @@ -1285,8 +1323,8 @@ void MDBalancer::hit_dir(CDir *dir, int type, double amount) // hit me double v = dir->pop_me.get(type).hit(amount); - const bool hot = (v > g_conf()->mds_bal_split_rd && type == META_POP_IRD) || - (v > g_conf()->mds_bal_split_wr && type == META_POP_IWR); + const bool hot = (v > bal_split_rd && type == META_POP_IRD) || + (v > bal_split_wr && type == META_POP_IWR); dout(20) << type << " pop is " << v << ", frag " << dir->get_frag() << " size " << dir->get_frag_size() << " " << dir->pop_me << dendl; @@ -1303,7 +1341,7 @@ void MDBalancer::hit_dir(CDir *dir, int type, double amount) dout(20) << type << " pop " << dir_pop << " spread in " << *dir << dendl; if (dir->is_auth() && !dir->is_ambiguous_auth() && dir->can_rep()) { - if (dir_pop >= g_conf()->mds_bal_replicate_threshold) { + if (dir_pop >= bal_replicate_threshold) { // replicate double rdp = dir->pop_me.get(META_POP_IRD).get(); rd_adj = rdp / mds->get_mds_map()->get_num_in_mds() - rdp; @@ -1321,7 +1359,7 @@ void MDBalancer::hit_dir(CDir *dir, int type, double amount) if (dir->ino() != 1 && dir->is_rep() && - dir_pop < g_conf()->mds_bal_unreplicate_threshold) { + dir_pop < bal_unreplicate_threshold) { // unreplicate dout(5) << "unreplicating dir " << *dir << " pop " << dir_pop << dendl; @@ -1488,12 +1526,12 @@ int MDBalancer::dump_loads(Formatter *f, int64_t depth) const f->open_object_section("mds_load"); { - auto dump_mds_load = [f](const mds_load_t& load) { + auto dump_mds_load = [f](const mds_load_t& load, int64_t bal_mode) { f->dump_float("request_rate", load.req_rate); f->dump_float("cache_hit_rate", load.cache_hit_rate); f->dump_float("queue_length", load.queue_len); f->dump_float("cpu_load", load.cpu_load_avg); - f->dump_float("mds_load", load.mds_load()); + f->dump_float("mds_load", load.mds_load(bal_mode)); f->open_object_section("auth_dirfrags"); load.auth.dump(f); @@ -1507,7 +1545,7 @@ int MDBalancer::dump_loads(Formatter *f, int64_t depth) const CachedStackStringStream css; *css << "mds." << rank; f->open_object_section(css->strv()); - dump_mds_load(load); + dump_mds_load(load, bal_mode); f->close_section(); } } diff --git a/src/mds/MDBalancer.h b/src/mds/MDBalancer.h index 69a6402b17e..e10d671d9f0 100644 --- a/src/mds/MDBalancer.h +++ b/src/mds/MDBalancer.h @@ -76,6 +76,19 @@ public: int dump_loads(Formatter *f, int64_t depth = -1) const; + bool get_bal_export_pin() const { + return bal_export_pin; + } + int64_t get_bal_merge_size() const { + return bal_merge_size; + } + int64_t get_bal_split_size() const { + return bal_split_size; + } + double get_bal_fragment_fast_factor() const { + return bal_fragment_fast_factor; + } + private: typedef struct { std::map<mds_rank_t, double> targets; @@ -83,6 +96,8 @@ private: std::map<mds_rank_t, double> exported; } balance_state_t; + static const unsigned int AUTH_TREES_THRESHOLD = 5; + //set up the rebalancing targets for export and do one if the //MDSMap is up to date void prep_rebalance(int beat); @@ -121,7 +136,20 @@ private: bool bal_fragment_dirs; int64_t bal_fragment_interval; - static const unsigned int AUTH_TREES_THRESHOLD = 5; + int64_t bal_interval; + int64_t bal_max_until; + int64_t bal_mode; + bool bal_export_pin; + double bal_sample_interval; + double bal_split_rd; + double bal_split_wr; + double bal_replicate_threshold; + double bal_unreplicate_threshold; + double bal_fragment_fast_factor; + int64_t bal_split_bits; + int64_t bal_split_size; + int64_t bal_merge_size; + int64_t num_bal_times; MDSRank *mds; Messenger *messenger; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 696634c0ee0..83ad5756360 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -156,6 +156,7 @@ MDCache::MDCache(MDSRank *m, PurgeQueue &purge_queue_) : export_ephemeral_random_max = g_conf().get_val<double>("mds_export_ephemeral_random_max"); symlink_recovery = g_conf().get_val<bool>("mds_symlink_recovery"); + kill_dirfrag_at = static_cast<enum dirfrag_killpoint>(g_conf().get_val<int64_t>("mds_kill_dirfrag_at")); kill_shutdown_at = g_conf().get_val<uint64_t>("mds_kill_shutdown_at"); @@ -208,6 +209,11 @@ void MDCache::handle_conf_change(const std::set<std::string>& changed, const MDS if (changed.count("mds_export_ephemeral_random_max")) { export_ephemeral_random_max = g_conf().get_val<double>("mds_export_ephemeral_random_max"); } + + if (changed.count("mds_kill_dirfrag_at")) { + kill_dirfrag_at = static_cast<enum dirfrag_killpoint>(g_conf().get_val<int64_t>("mds_kill_dirfrag_at")); + } + if (changed.count("mds_health_cache_threshold")) cache_health_threshold = g_conf().get_val<double>("mds_health_cache_threshold"); if (changed.count("mds_cache_mid")) @@ -634,7 +640,24 @@ void MDCache::open_root_inode(MDSContext *c) if (mds->get_nodeid() == mds->mdsmap->get_root()) { CInode *in; in = create_system_inode(CEPH_INO_ROOT, S_IFDIR|0755); // initially inaccurate! - in->fetch(c); + if (mds->is_starting()) { + in->fetch( + new MDSInternalContextWrapper(mds, + new LambdaContext([this, c](int r) { + if (r < 0) { + c->complete(r); + return; + } + CDir *rootdir = root->get_or_open_dirfrag(this, frag_t()); + ceph_assert(rootdir); + adjust_subtree_auth(rootdir, mds->get_nodeid()); + rootdir->fetch(c); + }) + ) + ); + } else { + in->fetch(c); + } } else { discover_base_ino(CEPH_INO_ROOT, c, mds->mdsmap->get_root()); } @@ -11765,6 +11788,7 @@ void MDCache::merge_dir(CInode *diri, frag_t frag) void MDCache::fragment_freeze_dirs(const std::vector<CDir*>& dirs) { + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_FREEZE); bool any_subtree = false, any_non_subtree = false; for (const auto& dir : dirs) { dir->auth_pin(dir); // until we mark and complete them @@ -12183,6 +12207,8 @@ void MDCache::_fragment_logged(const MDRequestRef& mdr) dout(10) << "fragment_logged " << basedirfrag << " bits " << info.bits << " on " << *diri << dendl; + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_LOGGED); + mdr->mark_event("prepare logged"); mdr->apply(); // mark scatterlock @@ -12219,6 +12245,7 @@ void MDCache::_fragment_stored(const MDRequestRef& mdr) // tell peers mds_rank_t diri_auth = (first->is_subtree_root() && !diri->is_auth()) ? diri->authority().first : CDIR_AUTH_UNKNOWN; + dout(20) << " first dirfrag " << *first << " diri_auth=" << diri_auth << dendl; for (const auto &p : first->get_replicas()) { if (mds->mdsmap->get_state(p.first) < MDSMap::STATE_REJOIN || (mds->mdsmap->get_state(p.first) == MDSMap::STATE_REJOIN && @@ -12245,6 +12272,7 @@ void MDCache::_fragment_stored(const MDRequestRef& mdr) * So we need to ensure replicas have received the notify, then unlock * the dirfragtreelock. */ + dout(20) << " ack wanted" << dendl; notify->mark_ack_wanted(); info.notify_ack_waiting.insert(p.first); } @@ -12255,6 +12283,7 @@ void MDCache::_fragment_stored(const MDRequestRef& mdr) } mds->send_message_mds(notify, p.first); + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_STORED_POST_NOTIFY); } // journal commit @@ -12277,6 +12306,8 @@ void MDCache::_fragment_stored(const MDRequestRef& mdr) dir->unfreeze_dir(); } + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_STORED_POST_JOURNAL); + if (info.notify_ack_waiting.empty()) { fragment_drop_locks(info); } else { @@ -12287,6 +12318,8 @@ void MDCache::_fragment_stored(const MDRequestRef& mdr) void MDCache::_fragment_committed(dirfrag_t basedirfrag, const MDRequestRef& mdr) { dout(10) << "fragment_committed " << basedirfrag << dendl; + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_COMMITTED); + if (mdr) mdr->mark_event("commit logged"); @@ -12325,6 +12358,8 @@ void MDCache::_fragment_committed(dirfrag_t basedirfrag, const MDRequestRef& mdr void MDCache::_fragment_old_purged(dirfrag_t basedirfrag, int bits, const MDRequestRef& mdr) { dout(10) << "fragment_old_purged " << basedirfrag << dendl; + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_OLD_PURGED); + if (mdr) mdr->mark_event("old frags purged"); @@ -12361,6 +12396,8 @@ void MDCache::fragment_drop_locks(fragment_info_t& info) void MDCache::fragment_maybe_finish(const fragment_info_iterator& it) { + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_MAYBE_FINISH); + if (!it->second.finishing) return; @@ -12383,6 +12420,7 @@ void MDCache::fragment_maybe_finish(const fragment_info_iterator& it) void MDCache::handle_fragment_notify_ack(const cref_t<MMDSFragmentNotifyAck> &ack) { dout(10) << "handle_fragment_notify_ack " << *ack << " from " << ack->get_source() << dendl; + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_HANDLE_NOTIFY_ACK); mds_rank_t from = mds_rank_t(ack->get_source().num()); if (mds->get_state() < MDSMap::STATE_ACTIVE) { @@ -12406,6 +12444,7 @@ void MDCache::handle_fragment_notify_ack(const cref_t<MMDSFragmentNotifyAck> &ac void MDCache::handle_fragment_notify(const cref_t<MMDSFragmentNotify> ¬ify) { dout(10) << "handle_fragment_notify " << *notify << " from " << notify->get_source() << dendl; + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_HANDLE_NOTIFY); mds_rank_t from = mds_rank_t(notify->get_source().num()); if (mds->get_state() < MDSMap::STATE_REJOIN) { @@ -12453,6 +12492,7 @@ void MDCache::handle_fragment_notify(const cref_t<MMDSFragmentNotify> ¬ify) auto ack = make_message<MMDSFragmentNotifyAck>(notify->get_base_dirfrag(), notify->get_bits(), notify->get_tid()); mds->send_message_mds(ack, from); + ceph_assert(kill_dirfrag_at != dirfrag_killpoint::FRAGMENT_HANDLE_NOTIFY_POSTACK); } } diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index dc56e06d03c..18c848d941c 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1370,6 +1370,19 @@ private: StrayManager stray_manager; private: + enum dirfrag_killpoint : std::int8_t { + FRAGMENT_FREEZE = 1, + FRAGMENT_HANDLE_NOTIFY, + FRAGMENT_HANDLE_NOTIFY_POSTACK, + FRAGMENT_STORED_POST_NOTIFY, + FRAGMENT_STORED_POST_JOURNAL, + FRAGMENT_HANDLE_NOTIFY_ACK, + FRAGMENT_MAYBE_FINISH, + FRAGMENT_LOGGED, + FRAGMENT_COMMITTED, + FRAGMENT_OLD_PURGED, + }; + std::set<inodeno_t> replay_taken_inos; // the inos have been taken when replaying // -- fragmenting -- @@ -1499,6 +1512,7 @@ private: // Stores the symlink target on the file object's head bool symlink_recovery; + enum dirfrag_killpoint kill_dirfrag_at; // File size recovery RecoveryQueue recovery_queue; diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index 206daf9d5d8..b3a68e36b08 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -408,11 +408,11 @@ void MDSDaemon::set_up_admin_socket() asok_hook, "List client sessions based on a filter"); ceph_assert(r == 0); - r = admin_socket->register_command("session evict name=filters,type=CephString,n=N,req=false", + r = admin_socket->register_command("session evict name=filters,type=CephString,n=N,req=true", asok_hook, "Evict client session(s) based on a filter"); ceph_assert(r == 0); - r = admin_socket->register_command("client evict name=filters,type=CephString,n=N,req=false", + r = admin_socket->register_command("client evict name=filters,type=CephString,n=N,req=true", asok_hook, "Evict client session(s) based on a filter"); ceph_assert(r == 0); diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 2878887e335..91e7d4a7d55 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -673,9 +673,9 @@ void MDSRank::update_targets() void MDSRank::hit_export_target(mds_rank_t rank, double amount) { - double rate = g_conf()->mds_bal_target_decay; + double rate = g_conf().get_val<double>("mds_bal_target_decay"); if (amount < 0.0) { - amount = 100.0/g_conf()->mds_bal_target_decay; /* a good default for "i am trying to keep this export_target active" */ + amount = 100.0/rate; /* a good default for "i am trying to keep this export_target active" */ } auto em = export_targets.emplace(std::piecewise_construct, std::forward_as_tuple(rank), std::forward_as_tuple(DecayRate(rate))); auto &counter = em.first->second; @@ -1724,7 +1724,10 @@ void MDSRank::boot_start(BootStep step, int r) } else { dout(2) << "Booting: " << step << ": positioning at end of old mds log" << dendl; mdlog->append(); - starting_done(); + auto sle = mdcache->create_subtree_map(); + mdlog->submit_entry(sle); + mdlog->flush(); + mdlog->wait_for_safe(new C_MDS_VoidFn(this, &MDSRank::starting_done)); } break; case MDS_BOOT_REPLAY_DONE: @@ -1771,9 +1774,6 @@ void MDSRank::starting_done() ceph_assert(is_starting()); request_state(MDSMap::STATE_ACTIVE); - auto sle = mdcache->create_subtree_map(); - mdlog->submit_entry(sle); - // sync snaptable cache snapclient->sync(new C_MDSInternalNoop); } @@ -2130,6 +2130,8 @@ void MDSRank::active_start() { dout(1) << "active_start" << dendl; + m_is_active = true; + if (last_state == MDSMap::STATE_CREATING || last_state == MDSMap::STATE_STARTING) { mdcache->open_root(); @@ -2878,7 +2880,12 @@ void MDSRankDispatcher::handle_asok_command( r = config_client(client_id, !got_value, option, value, *css); } else if (command == "scrub start" || command == "scrub_start") { - if (whoami != 0) { + if (!is_active()) { + *css << "MDS is not active"; + r = -CEPHFS_EINVAL; + goto out; + } + else if (whoami != 0) { *css << "Not rank 0"; r = -CEPHFS_EXDEV; goto out; @@ -2904,7 +2911,12 @@ void MDSRankDispatcher::handle_asok_command( })); return; } else if (command == "scrub abort") { - if (whoami != 0) { + if (!is_active()) { + *css << "MDS is not active"; + r = -CEPHFS_EINVAL; + goto out; + } + else if (whoami != 0) { *css << "Not rank 0"; r = -CEPHFS_EXDEV; goto out; @@ -2919,7 +2931,12 @@ void MDSRankDispatcher::handle_asok_command( })); return; } else if (command == "scrub pause") { - if (whoami != 0) { + if (!is_active()) { + *css << "MDS is not active"; + r = -CEPHFS_EINVAL; + goto out; + } + else if (whoami != 0) { *css << "Not rank 0"; r = -CEPHFS_EXDEV; goto out; @@ -2934,7 +2951,12 @@ void MDSRankDispatcher::handle_asok_command( })); return; } else if (command == "scrub resume") { - if (whoami != 0) { + if (!is_active()) { + *css << "MDS is not active"; + r = -CEPHFS_EINVAL; + goto out; + } + else if (whoami != 0) { *css << "Not rank 0"; r = -CEPHFS_EXDEV; goto out; @@ -3115,7 +3137,7 @@ void MDSRankDispatcher::evict_clients( dout(20) << __func__ << " matched " << victims.size() << " sessions" << dendl; if (victims.empty()) { - on_finish(0, {}, outbl); + on_finish(-ESRCH, "no hosts match", outbl); return; } @@ -4042,9 +4064,23 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const "fsid", "host", "mds_alternate_name_max", + "mds_bal_export_pin", "mds_bal_fragment_dirs", + "mds_bal_fragment_fast_factor", "mds_bal_fragment_interval", "mds_bal_fragment_size_max", + "mds_bal_interval", + "mds_bal_max", + "mds_bal_max_until", + "mds_bal_merge_size", + "mds_bal_mode", + "mds_bal_replicate_threshold", + "mds_bal_sample_interval", + "mds_bal_split_bits", + "mds_bal_split_rd", + "mds_bal_split_size", + "mds_bal_split_wr", + "mds_bal_unreplicate_threshold", "mds_cache_memory_limit", "mds_cache_mid", "mds_cache_reservation", @@ -4072,6 +4108,7 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const "mds_inject_journal_corrupt_dentry_first", "mds_inject_migrator_session_race", "mds_inject_rename_corrupt_dentry_first", + "mds_kill_dirfrag_at", "mds_kill_shutdown_at", "mds_log_event_large_threshold", "mds_log_events_per_segment", @@ -4110,6 +4147,7 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const void MDSRankDispatcher::handle_conf_change(const ConfigProxy& conf, const std::set<std::string>& changed) { // XXX with or without mds_lock! + dout(2) << __func__ << ": " << changed << dendl; if (changed.count("mds_heartbeat_reset_grace")) { _heartbeat_reset_grace = conf.get_val<uint64_t>("mds_heartbeat_reset_grace"); diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 9ea6ddd96d1..c4a8809b6e1 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -15,6 +15,7 @@ #ifndef MDS_RANK_H_ #define MDS_RANK_H_ +#include <atomic> #include <string_view> #include <boost/asio/io_context.hpp> @@ -226,6 +227,8 @@ class MDSRank { bool is_cluster_degraded() const { return cluster_degraded; } bool allows_multimds_snaps() const { return mdsmap->allows_multimds_snaps(); } + bool is_active_lockless() const { return m_is_active.load(); } + bool is_cache_trimmable() const { return is_standby_replay() || is_clientreplay() || is_active() || is_stopping(); } @@ -669,6 +672,8 @@ private: mono_time starttime = mono_clock::zero(); boost::asio::io_context& ioc; + + std::atomic_bool m_is_active = false; /* accessed outside mds_lock */ }; class C_MDS_RetryMessage : public MDSInternalContext { diff --git a/src/mds/MDSRankQuiesce.cc b/src/mds/MDSRankQuiesce.cc index 0ad49b396d6..0071cd8fa59 100644 --- a/src/mds/MDSRankQuiesce.cc +++ b/src/mds/MDSRankQuiesce.cc @@ -197,9 +197,9 @@ void MDSRank::command_quiesce_db(const cmdmap_t& cmdmap, asok_finisher on_finish } else if (op_reset) { r.reset_roots(roots); } else if (op_release) { - r.release_roots(); + r.release(); } else if (op_cancel) { - r.cancel_roots(); + r.cancel(); } double timeout; @@ -319,7 +319,7 @@ void MDSRank::quiesce_cluster_update() { struct CancelAll: public QuiesceDbManager::RequestContext { mds_rank_t whoami; CancelAll(mds_rank_t whoami) : whoami(whoami) { - request.cancel_roots(); + request.cancel(); } void finish(int rc) override { dout(rc == 0 ? 15 : 3) << "injected cancel all completed with rc: " << rc << dendl; diff --git a/src/mds/MetricsHandler.cc b/src/mds/MetricsHandler.cc index b28b06b7ad2..9ad10b9d6e6 100644 --- a/src/mds/MetricsHandler.cc +++ b/src/mds/MetricsHandler.cc @@ -331,6 +331,11 @@ void MetricsHandler::handle_payload(Session *session, const UnknownPayload &payl } void MetricsHandler::handle_client_metrics(const cref_t<MClientMetrics> &m) { + if (!mds->is_active_lockless()) { + dout(20) << ": dropping metrics message during recovery" << dendl; + return; + } + std::scoped_lock locker(lock); Session *session = mds->get_session(m); diff --git a/src/mds/QuiesceDb.h b/src/mds/QuiesceDb.h index 244f342cc73..b2cdf957379 100644 --- a/src/mds/QuiesceDb.h +++ b/src/mds/QuiesceDb.h @@ -124,8 +124,8 @@ namespace QuiesceInterface { } struct QuiesceDbVersion { - epoch_t epoch; - QuiesceSetVersion set_version; + epoch_t epoch = 0; + QuiesceSetVersion set_version = 0; auto operator<=>(QuiesceDbVersion const& other) const = default; QuiesceDbVersion& operator+(unsigned int delta) { set_version += delta; @@ -340,8 +340,8 @@ struct QuiesceDbRequest { /// for when `roots` is empty enum RootsOp: uint8_t { INCLUDE_OR_QUERY, - EXCLUDE_OR_RELEASE, - RESET_OR_CANCEL, + EXCLUDE_OR_CANCEL, + RESET_OR_RELEASE, __INVALID }; @@ -427,15 +427,15 @@ struct QuiesceDbRequest { bool is_mutating() const { return (control.roots_op != INCLUDE_OR_QUERY) || !roots.empty() || timeout || expiration; } bool is_cancel_all() const { return !set_id && is_cancel(); } - bool excludes_roots() const { return control.roots_op == RESET_OR_CANCEL || (control.roots_op == EXCLUDE_OR_RELEASE && !roots.empty()); } - bool includes_roots() const { return (control.roots_op == RESET_OR_CANCEL || control.roots_op == INCLUDE_OR_QUERY) && !roots.empty(); } + bool excludes_roots() const { return is_exclude() || is_reset(); } + bool includes_roots() const { return is_include() || is_reset(); } bool is_include() const { return control.roots_op == INCLUDE_OR_QUERY && !roots.empty(); } bool is_query() const { return control.roots_op == INCLUDE_OR_QUERY && roots.empty(); } - bool is_exclude() const { return control.roots_op == EXCLUDE_OR_RELEASE && !roots.empty(); } - bool is_release() const { return control.roots_op == EXCLUDE_OR_RELEASE && roots.empty(); } - bool is_reset() const { return control.roots_op == RESET_OR_CANCEL && !roots.empty(); } - bool is_cancel() const { return control.roots_op == RESET_OR_CANCEL && roots.empty(); } + bool is_exclude() const { return control.roots_op == EXCLUDE_OR_CANCEL && !roots.empty(); } + bool is_release() const { return control.roots_op == RESET_OR_RELEASE && roots.empty(); } + bool is_reset() const { return control.roots_op == RESET_OR_RELEASE && !roots.empty(); } + bool is_cancel() const { return control.roots_op == EXCLUDE_OR_CANCEL && roots.empty(); } bool is_verbose() const { return control.flags & Flags::VERBOSE; } bool is_exclusive() const { return control.flags & Flags::EXCLUSIVE; } @@ -444,11 +444,11 @@ struct QuiesceDbRequest { switch (control.roots_op) { case INCLUDE_OR_QUERY: return false; - case EXCLUDE_OR_RELEASE: - return roots.contains(root); - case RESET_OR_CANCEL: - return !roots.contains(root); - default: ceph_abort("unknown roots_op"); return false; + case EXCLUDE_OR_CANCEL: + return roots.empty() || roots.contains(root); + case RESET_OR_RELEASE: + return !roots.empty() && !roots.contains(root); + default: ceph_abort("unknown roots_op"); return false; } } @@ -493,22 +493,22 @@ struct QuiesceDbRequest { template <typename R = Roots> void exclude_roots(R&& roots) { - set_roots(EXCLUDE_OR_RELEASE, std::forward<R>(roots)); + set_roots(EXCLUDE_OR_CANCEL, std::forward<R>(roots)); } - void release_roots() { - set_roots(EXCLUDE_OR_RELEASE, {}); + void release() { + set_roots(RESET_OR_RELEASE, {}); } template <typename R = Roots> void reset_roots(R&& roots) { - set_roots(RESET_OR_CANCEL, std::forward<R>(roots)); + set_roots(RESET_OR_RELEASE, std::forward<R>(roots)); } - void cancel_roots() + void cancel() { - set_roots(RESET_OR_CANCEL, {}); + set_roots(EXCLUDE_OR_CANCEL, {}); } template <typename S = std::string> @@ -522,10 +522,10 @@ struct QuiesceDbRequest { switch (control.roots_op) { case INCLUDE_OR_QUERY: return roots.empty() ? "query" : "include"; - case EXCLUDE_OR_RELEASE: - return roots.empty() ? "release" : "exclude"; - case RESET_OR_CANCEL: - return roots.empty() ? "cancel" : "reset"; + case EXCLUDE_OR_CANCEL: + return roots.empty() ? "cancel" : "exclude"; + case RESET_OR_RELEASE: + return roots.empty() ? "release" : "reset"; default: return "<unknown>"; } @@ -547,11 +547,11 @@ operator<<(std::basic_ostream<CharT, Traits>& os, const QuiesceDbRequest& req) os << "q-req[" << req.op_string(); if (req.set_id) { - os << " \"" << req.set_id << "\""; + os << " \"" << *req.set_id << "\""; } if (req.if_version) { - os << " ?v:" << req.if_version; + os << " ?v:" << *req.if_version; } if (req.await) { diff --git a/src/mds/QuiesceDbManager.cc b/src/mds/QuiesceDbManager.cc index 88844f09c81..3629e0190ee 100644 --- a/src/mds/QuiesceDbManager.cc +++ b/src/mds/QuiesceDbManager.cc @@ -87,11 +87,9 @@ void* QuiesceDbManager::quiesce_db_thread_main() if (next_event_at_age <= db_age) { break; } + dout(20) << "db idle, age: " << db_age << " next_event_at_age: " << next_event_at_age << dendl; auto timeout = std::min(max_wait, next_event_at_age - db_age); - auto wait_result = submit_condition.wait_for(ls, timeout); - if (std::cv_status::timeout == wait_result) { - dout(20) << "db idle, age: " << db_age << dendl; - } + submit_condition.wait_for(ls, timeout); } auto [is_member, should_exit] = membership_upkeep(); @@ -111,6 +109,8 @@ void* QuiesceDbManager::quiesce_db_thread_main() next_event_at_age = leader_upkeep(std::move(acks), std::move(requests)); } else { // not yet there. Put the acks and requests back onto the queue and wait for updates + // We should mark the next event age in case we get caught up in the sleep above + next_event_at_age = db.get_age() + bootstrap_delay; ls.lock(); while (!requests.empty()) { pending_requests.emplace_front(std::move(requests.back())); @@ -121,6 +121,12 @@ void* QuiesceDbManager::quiesce_db_thread_main() acks.pop_back(); } if (pending_db_updates.empty()) { + // we are waiting here because if requests/acks aren't empty + // the code above will skip the sleep due to the `db_thread_has_work` + // returning true, causing a busy-loop of the quiesce manager thread. + // This sleep may be interrupted by the submit_condition, in which case + // we will re-consider everything and may end up here again, but with a shorter + // bootstrap_delay. dout(5) << "bootstrap: waiting for new peers with pending acks: " << pending_acks.size() << " requests: " << pending_requests.size() << ". Wait timeout: " << bootstrap_delay << dendl; @@ -447,7 +453,7 @@ void QuiesceDbManager::complete_requests() { } // non-zero result codes are all errors - dout(10) << "completing request '" << req->request << " with rc: " << -res << dendl; + dout(10) << "completing " << req->request << " with rc: " << -res << dendl; req->complete(-res); } done_requests.clear(); @@ -589,6 +595,8 @@ int QuiesceDbManager::leader_process_request(RequestContext* req_ctx) return EINVAL; } + dout(20) << request << dendl; + const auto db_age = db.get_age(); if (request.is_cancel_all()) { @@ -1206,10 +1214,11 @@ void QuiesceDbManager::calculate_quiesce_map(QuiesceMap &map) auto ttl = get_root_ttl(set, member, db_age); auto root_it = map.roots.try_emplace(root, QuiesceMap::RootInfo { requested, ttl }).first; - // the min below resolves conditions when members representing the same root have different state/ttl - // e.g. if at least one member is QUIESCING then the root should be QUIESCING + // the logic below resolves conditions when members representing the same root have different state/ttl + // The state should be min, e.g. QUIESCING if at least one member is QUIESCING + // The ttl should be large enough to cover all aggregated states, i.e. max root_it->second.state = std::min(root_it->second.state, requested); - root_it->second.ttl = std::min(root_it->second.ttl, ttl); + root_it->second.ttl = std::max(root_it->second.ttl, ttl); } } } diff --git a/src/mds/QuiesceDbManager.h b/src/mds/QuiesceDbManager.h index 9654ce802eb..4d65f93d94e 100644 --- a/src/mds/QuiesceDbManager.h +++ b/src/mds/QuiesceDbManager.h @@ -227,7 +227,7 @@ class QuiesceDbManager { // the database. struct Db { QuiesceTimePoint time_zero; - epoch_t epoch; + epoch_t epoch = 0; QuiesceSetVersion set_version = 0; using Sets = std::unordered_map<QuiesceSetId, QuiesceSet>; Sets sets; @@ -281,7 +281,7 @@ class QuiesceDbManager { std::unordered_map<RequestContext*, int> done_requests; void* quiesce_db_thread_main(); - bool db_thread_has_work() const; + virtual bool db_thread_has_work() const; using IsMemberBool = bool; using ShouldExitBool = bool; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index e41342bf2b7..011718aa8c9 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -472,6 +472,9 @@ void Server::reclaim_session(Session *session, const cref_t<MClientReclaim> &m) ceph_assert(!session->reclaiming_from); session->reclaiming_from = target; reply->set_addrs(entity_addrvec_t(target->info.inst.addr)); + } else { + derr << ": could not find session by uuid:" << m->get_uuid() << dendl; + mds->sessionmap.dump(); } if (flags & CEPH_RECLAIM_RESET) { @@ -2489,6 +2492,31 @@ void Server::trim_completed_request_list(ceph_tid_t tid, Session *session) } } +void Server::set_reply_extra_bl(const cref_t<MClientRequest> &req, inodeno_t ino, bufferlist& extra_bl) +{ + Session *session = mds->get_session(req); + + if (session->info.has_feature(CEPHFS_FEATURE_DELEG_INO)) { + openc_response_t ocresp; + + dout(10) << "adding created_ino and delegated_inos" << dendl; + ocresp.created_ino = ino; + + if (delegate_inos_pct && !req->is_queued_for_replay()) { + // Try to delegate some prealloc_inos to the client, if it's down to half the max + unsigned frac = 100 / delegate_inos_pct; + if (session->delegated_inos.size() < (unsigned)g_conf()->mds_client_prealloc_inos / frac / 2) + session->delegate_inos(g_conf()->mds_client_prealloc_inos / frac, ocresp.delegated_inos); + } + + encode(ocresp, extra_bl); + } else if (req->get_connection()->has_feature(CEPH_FEATURE_REPLY_CREATE_INODE)) { + dout(10) << "adding ino to reply to indicate inode was created" << dendl; + // add the file created flag onto the reply if create_flags features is supported + encode(ino, extra_bl); + } +} + void Server::handle_client_request(const cref_t<MClientRequest> &req) { dout(4) << "handle_client_request " << *req << dendl; @@ -2549,7 +2577,7 @@ void Server::handle_client_request(const cref_t<MClientRequest> &req) auto reply = make_message<MClientReply>(*req, 0); if (created != inodeno_t()) { bufferlist extra; - encode(created, extra); + set_reply_extra_bl(req, created, extra); reply->set_extra_bl(extra); } mds->send_message_client(reply, session); @@ -2765,10 +2793,18 @@ void Server::dispatch_client_request(const MDRequestRef& mdr) // funky. case CEPH_MDS_OP_CREATE: - if (mdr->has_completed) + if (mdr->has_completed) { + inodeno_t created; + + ceph_assert(mdr->session); + mdr->session->have_completed_request(req->get_reqid().tid, &created); + ceph_assert(created != inodeno_t()); + + set_reply_extra_bl(req, created, mdr->reply_extra_bl); handle_client_open(mdr); // already created.. just open - else + } else { handle_client_openc(mdr); + } break; case CEPH_MDS_OP_OPEN: @@ -4794,25 +4830,7 @@ void Server::handle_client_openc(const MDRequestRef& mdr) C_MDS_openc_finish *fin = new C_MDS_openc_finish(this, mdr, dn, newi); - if (mdr->session->info.has_feature(CEPHFS_FEATURE_DELEG_INO)) { - openc_response_t ocresp; - - dout(10) << "adding created_ino and delegated_inos" << dendl; - ocresp.created_ino = _inode->ino; - - if (delegate_inos_pct && !req->is_queued_for_replay()) { - // Try to delegate some prealloc_inos to the client, if it's down to half the max - unsigned frac = 100 / delegate_inos_pct; - if (mdr->session->delegated_inos.size() < (unsigned)g_conf()->mds_client_prealloc_inos / frac / 2) - mdr->session->delegate_inos(g_conf()->mds_client_prealloc_inos / frac, ocresp.delegated_inos); - } - - encode(ocresp, mdr->reply_extra_bl); - } else if (mdr->client_request->get_connection()->has_feature(CEPH_FEATURE_REPLY_CREATE_INODE)) { - dout(10) << "adding ino to reply to indicate inode was created" << dendl; - // add the file created flag onto the reply if create_flags features is supported - encode(newi->ino(), mdr->reply_extra_bl); - } + set_reply_extra_bl(req, _inode->ino, mdr->reply_extra_bl); journal_and_reply(mdr, newi, dn, le, fin); @@ -6006,6 +6024,7 @@ int Server::check_layout_vxattr(const MDRequestRef& mdr, file_layout_t *layout) { const cref_t<MClientRequest> &req = mdr->client_request; + bool is_rmxattr = (req->get_op() == CEPH_MDS_OP_RMXATTR); epoch_t epoch; int r; @@ -6015,7 +6034,12 @@ int Server::check_layout_vxattr(const MDRequestRef& mdr, }); if (r == -CEPHFS_ENOENT) { + if (is_rmxattr) { + r = -CEPHFS_EINVAL; + respond_to_request(mdr, r); + return r; + } // we don't have the specified pool, make sure our map // is newer than or as new as the client. epoch_t req_epoch = req->get_osdmap_epoch(); @@ -6055,14 +6079,15 @@ int Server::check_layout_vxattr(const MDRequestRef& mdr, return 0; } -void Server::handle_set_vxattr(const MDRequestRef& mdr, CInode *cur) +void Server::handle_client_setvxattr(const MDRequestRef& mdr, CInode *cur) { const cref_t<MClientRequest> &req = mdr->client_request; + bool is_rmxattr = (req->get_op() == CEPH_MDS_OP_RMXATTR); MutationImpl::LockOpVec lov; string name(req->get_path2()); bufferlist bl = req->get_data(); string value (bl.c_str(), bl.length()); - dout(10) << "handle_set_vxattr " << name + dout(10) << "handle_client_setvxattr " << name << " val " << value.length() << " bytes on " << *cur << dendl; @@ -6104,19 +6129,44 @@ void Server::handle_set_vxattr(const MDRequestRef& mdr, CInode *cur) else layout = mdcache->default_file_layout; - rest = name.substr(name.find("layout")); - if (check_layout_vxattr(mdr, rest, value, &layout) < 0) - return; + if (is_rmxattr && name == "ceph.dir.layout") { + lov.add_xlock(&cur->policylock); + if (!mds->locker->acquire_locks(mdr, lov)) { + return; + } + if (!cur->get_projected_inode()->has_layout()) { + respond_to_request(mdr, 0); + return; + } + auto pi = cur->project_inode(mdr); + + if (cur->is_root()) { + pi.inode->layout = mdcache->default_file_layout; + } else { + pi.inode->clear_layout(); + pi.inode->version = cur->pre_dirty(); + } + pip = pi.inode.get(); + } else { + rest = name.substr(name.find("layout")); + if (check_layout_vxattr(mdr, rest, value, &layout) < 0) + return; + + auto pi = cur->project_inode(mdr); + pi.inode->layout = layout; + pip = pi.inode.get(); + } - auto pi = cur->project_inode(mdr); - pi.inode->layout = layout; mdr->no_early_reply = true; - pip = pi.inode.get(); } else if (name.compare(0, 16, "ceph.file.layout") == 0) { if (!cur->is_file()) { respond_to_request(mdr, -CEPHFS_EINVAL); return; } + if (!cur->get_projected_inode()->has_layout()) { + respond_to_request(mdr, 0); + return; + } if (cur->get_projected_inode()->size || cur->get_projected_inode()->truncate_seq > 1) { respond_to_request(mdr, -CEPHFS_ENOTEMPTY); @@ -6149,6 +6199,13 @@ void Server::handle_set_vxattr(const MDRequestRef& mdr, CInode *cur) } quota_info_t quota = cur->get_projected_inode()->quota; + if (is_rmxattr) { + if (!quota.is_enabled()) { + respond_to_request(mdr, 0); + return; + } + value = "0"; + } rest = name.substr(name.find("quota")); int r = parse_quota_vxattr(rest, value, "a); @@ -6234,6 +6291,14 @@ void Server::handle_set_vxattr(const MDRequestRef& mdr, CInode *cur) bool val; try { + if (is_rmxattr) { + const auto srnode = cur->get_projected_srnode(); + if (!srnode->is_subvolume()) { + respond_to_request(mdr, 0); + return; + } + value = "0"; + } val = boost::lexical_cast<bool>(value); } catch (boost::bad_lexical_cast const&) { dout(10) << "bad vxattr value, unable to parse bool for " << name << dendl; @@ -6307,6 +6372,13 @@ void Server::handle_set_vxattr(const MDRequestRef& mdr, CInode *cur) mds_rank_t rank; try { + if (is_rmxattr) { + if (cur->get_projected_inode()->export_pin == -1) { + respond_to_request(mdr, 0); + return; + } + value = "-1"; + } rank = boost::lexical_cast<mds_rank_t>(value); if (rank < 0) rank = MDS_RANK_NONE; else if (rank >= MAX_MDS) { @@ -6333,6 +6405,13 @@ void Server::handle_set_vxattr(const MDRequestRef& mdr, CInode *cur) double val; try { + if (is_rmxattr) { + if (cur->get_projected_inode()->export_ephemeral_random_pin == 0.0) { + respond_to_request(mdr, 0); + return; + } + value = "0"; + } val = boost::lexical_cast<double>(value); } catch (boost::bad_lexical_cast const&) { dout(10) << "bad vxattr value, unable to parse float for " << name << dendl; @@ -6362,6 +6441,13 @@ void Server::handle_set_vxattr(const MDRequestRef& mdr, CInode *cur) bool val; try { + if (is_rmxattr) { + if (cur->get_projected_inode()->get_ephemeral_distributed_pin() == 0) { + respond_to_request(mdr, 0); + return; + } + value = "0"; + } val = boost::lexical_cast<bool>(value); } catch (boost::bad_lexical_cast const&) { dout(10) << "bad vxattr value, unable to parse bool for " << name << dendl; @@ -6401,61 +6487,6 @@ void Server::handle_set_vxattr(const MDRequestRef& mdr, CInode *cur) return; } -void Server::handle_remove_vxattr(const MDRequestRef& mdr, CInode *cur) -{ - const cref_t<MClientRequest> &req = mdr->client_request; - string name(req->get_path2()); - - dout(10) << __func__ << " " << name << " on " << *cur << dendl; - - if (name == "ceph.dir.layout") { - if (!cur->is_dir()) { - respond_to_request(mdr, -CEPHFS_ENODATA); - return; - } - if (cur->is_root()) { - dout(10) << "can't remove layout policy on the root directory" << dendl; - respond_to_request(mdr, -CEPHFS_EINVAL); - return; - } - - if (!cur->get_projected_inode()->has_layout()) { - respond_to_request(mdr, -CEPHFS_ENODATA); - return; - } - - MutationImpl::LockOpVec lov; - lov.add_xlock(&cur->policylock); - if (!mds->locker->acquire_locks(mdr, lov)) - return; - - auto pi = cur->project_inode(mdr); - pi.inode->clear_layout(); - pi.inode->version = cur->pre_dirty(); - - // log + wait - mdr->ls = mdlog->get_current_segment(); - EUpdate *le = new EUpdate(mdlog, "remove dir layout vxattr"); - le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); - mdcache->predirty_journal_parents(mdr, &le->metablob, cur, 0, PREDIRTY_PRIMARY); - mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur); - - mdr->no_early_reply = true; - journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur)); - return; - } else if (name == "ceph.dir.layout.pool_namespace" - || name == "ceph.file.layout.pool_namespace") { - // Namespace is the only layout field that has a meaningful - // null/none value (empty string, means default layout). Is equivalent - // to a setxattr with empty string: pass through the empty payload of - // the rmxattr request to do this. - handle_set_vxattr(mdr, cur); - return; - } - - respond_to_request(mdr, -CEPHFS_ENODATA); -} - const Server::XattrHandler Server::xattr_handlers[] = { { xattr_name: Server::DEFAULT_HANDLER, @@ -6506,6 +6537,11 @@ int Server::xattr_validate(CInode *cur, const InodeStoreBase::xattr_map_const_pt return -CEPHFS_ENODATA; } + if ((flags & CEPH_XATTR_REMOVE2) && !(xattrs && xattrs->count(mempool::mds_co::string(xattr_name)))) { + dout(10) << "setxattr '" << xattr_name << "' XATTR_REMOVE2 and CEPHFS_ENODATA on " << *cur << dendl; + return -CEPHFS_ENODATA; + } + return 0; } @@ -6646,7 +6682,7 @@ void Server::handle_client_setxattr(const MDRequestRef& mdr) if (!cur) return; - handle_set_vxattr(mdr, cur); + handle_client_setvxattr(mdr, cur); return; } @@ -6715,7 +6751,7 @@ void Server::handle_client_setxattr(const MDRequestRef& mdr) pi.inode->change_attr++; pi.inode->xattr_version++; - if ((flags & CEPH_XATTR_REMOVE)) { + if ((flags & (CEPH_XATTR_REMOVE | CEPH_XATTR_REMOVE2))) { std::invoke(handler->removexattr, this, cur, pi.xattrs, xattr_op); } else { std::invoke(handler->setxattr, this, cur, pi.xattrs, xattr_op); @@ -6743,7 +6779,7 @@ void Server::handle_client_removexattr(const MDRequestRef& mdr) if (!cur) return; - handle_remove_vxattr(mdr, cur); + handle_client_setvxattr(mdr, cur); return; } diff --git a/src/mds/Server.h b/src/mds/Server.h index cbfe5c8cf5d..68842ea01cb 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -160,6 +160,7 @@ public: void force_clients_readonly(); // -- requests -- + void set_reply_extra_bl(const cref_t<MClientRequest> &req, inodeno_t ino, bufferlist& extra_bl); void trim_completed_request_list(ceph_tid_t tid, Session *session); void handle_client_request(const cref_t<MClientRequest> &m); void handle_client_reply(const cref_t<MClientReply> &m); @@ -232,8 +233,7 @@ public: std::string name, std::string value, file_layout_t *layout); - void handle_set_vxattr(const MDRequestRef& mdr, CInode *cur); - void handle_remove_vxattr(const MDRequestRef& mdr, CInode *cur); + void handle_client_setvxattr(const MDRequestRef& mdr, CInode *cur); void handle_client_getvxattr(const MDRequestRef& mdr); void handle_client_setxattr(const MDRequestRef& mdr); void handle_client_removexattr(const MDRequestRef& mdr); diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc index 9bec67c245d..48242f513b3 100644 --- a/src/mds/SessionMap.cc +++ b/src/mds/SessionMap.cc @@ -1217,6 +1217,13 @@ int SessionFilter::parse( state = v; } else if (k == "id") { std::string err; + if (v == "*") { + // evict all clients , by default id set to 0 + return 0; + } else if (v == "0") { + *ss << "Invalid value"; + return -CEPHFS_EINVAL; + } id = strict_strtoll(v.c_str(), 10, &err); if (!err.empty()) { *ss << err; diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 694808890bc..17a5bf7acae 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -973,7 +973,7 @@ struct mds_load_t { double cpu_load_avg = 0.0; - double mds_load() const; // defiend in MDBalancer.cc + double mds_load(int64_t bal_mode) const; // defiend in MDBalancer.cc void encode(ceph::buffer::list& bl) const; void decode(ceph::buffer::list::const_iterator& bl); void dump(ceph::Formatter *f) const; diff --git a/src/messages/MClientReclaimReply.h b/src/messages/MClientReclaimReply.h index 23d3a51b0ec..d1ab0c6da80 100644 --- a/src/messages/MClientReclaimReply.h +++ b/src/messages/MClientReclaimReply.h @@ -32,7 +32,8 @@ public: std::string_view get_type_name() const override { return "client_reclaim_reply"; } void print(std::ostream& o) const override { - o << "client_reclaim_reply(" << result << " e " << epoch << ")"; + o << "client_reclaim_reply(" << result << " e " << epoch + << " addrs " << addrs << ")"; } void encode_payload(uint64_t features) override { diff --git a/src/mgr/CMakeLists.txt b/src/mgr/CMakeLists.txt index 4f831e152ff..1e473355af0 100644 --- a/src/mgr/CMakeLists.txt +++ b/src/mgr/CMakeLists.txt @@ -1,5 +1,6 @@ add_library(mgr_cap_obj OBJECT MgrCap.cc) +target_link_libraries(mgr_cap_obj legacy-option-headers) if(WITH_MGR) set(mgr_srcs diff --git a/src/mgr/Mgr.cc b/src/mgr/Mgr.cc index b320ea484be..1a11046ee08 100644 --- a/src/mgr/Mgr.cc +++ b/src/mgr/Mgr.cc @@ -498,7 +498,7 @@ void Mgr::handle_osd_map() cluster_state.with_osdmap_and_pgmap([this, &names_exist](const OSDMap &osd_map, const PGMap &pg_map) { for (int osd_id = 0; osd_id < osd_map.get_max_osd(); ++osd_id) { - if (!osd_map.exists(osd_id) || (osd_map.is_out(osd_id) && osd_map.is_down(osd_id))) { + if (!osd_map.exists(osd_id)) { continue; } @@ -510,9 +510,16 @@ void Mgr::handle_osd_map() if (daemon_state.is_updating(k)) { continue; } + + DaemonStatePtr daemon = daemon_state.get(k); + + if (daemon && osd_map.is_out(osd_id) && osd_map.is_down(osd_id)) { + std::lock_guard l(daemon->lock); + daemon->daemon_health_metrics.clear(); + } bool update_meta = false; - if (daemon_state.exists(k)) { + if (daemon) { if (osd_map.get_up_from(osd_id) == osd_map.get_epoch()) { dout(4) << "Mgr::handle_osd_map: osd." << osd_id << " joined cluster at " << "e" << osd_map.get_epoch() diff --git a/src/mgr/MgrOpRequest.h b/src/mgr/MgrOpRequest.h index 9e67af080f8..5b29e5980a3 100644 --- a/src/mgr/MgrOpRequest.h +++ b/src/mgr/MgrOpRequest.h @@ -32,7 +32,7 @@ private: entity_inst_t req_src_inst; uint8_t hit_flag_points; uint8_t latest_flag_point; - const char* last_event_detail = nullptr; + std::string last_event_detail; static const uint8_t flag_started = 1 << 0; static const uint8_t flag_queued_for_module = 1 << 1; diff --git a/src/mgr/PyModule.cc b/src/mgr/PyModule.cc index 20234ec5768..cff63ef4a6b 100644 --- a/src/mgr/PyModule.cc +++ b/src/mgr/PyModule.cc @@ -47,7 +47,6 @@ std::string PyModule::mgr_store_prefix = "mgr/"; using std::string; -using std::wstring; // decode a Python exception into a string std::string handle_pyerror( @@ -231,72 +230,6 @@ std::pair<int, std::string> PyModuleConfig::set_config( } } -std::string PyModule::get_site_packages() -{ - std::stringstream site_packages; - - // CPython doesn't auto-add site-packages dirs to sys.path for us, - // but it does provide a module that we can ask for them. - auto site_module = PyImport_ImportModule("site"); - ceph_assert(site_module); - - auto site_packages_fn = PyObject_GetAttrString(site_module, "getsitepackages"); - if (site_packages_fn != nullptr) { - auto site_packages_list = PyObject_CallObject(site_packages_fn, nullptr); - ceph_assert(site_packages_list); - - auto n = PyList_Size(site_packages_list); - for (Py_ssize_t i = 0; i < n; ++i) { - if (i != 0) { - site_packages << ":"; - } - site_packages << PyUnicode_AsUTF8(PyList_GetItem(site_packages_list, i)); - } - - Py_DECREF(site_packages_list); - Py_DECREF(site_packages_fn); - } else { - // Fall back to generating our own site-packages paths by imitating - // what the standard site.py does. This is annoying but it lets us - // run inside virtualenvs :-/ - - auto site_packages_fn = PyObject_GetAttrString(site_module, "addsitepackages"); - ceph_assert(site_packages_fn); - - auto known_paths = PySet_New(nullptr); - auto pArgs = PyTuple_Pack(1, known_paths); - PyObject_CallObject(site_packages_fn, pArgs); - Py_DECREF(pArgs); - Py_DECREF(known_paths); - Py_DECREF(site_packages_fn); - - auto sys_module = PyImport_ImportModule("sys"); - ceph_assert(sys_module); - auto sys_path = PyObject_GetAttrString(sys_module, "path"); - ceph_assert(sys_path); - - dout(1) << "sys.path:" << dendl; - auto n = PyList_Size(sys_path); - bool first = true; - for (Py_ssize_t i = 0; i < n; ++i) { - dout(1) << " " << PyUnicode_AsUTF8(PyList_GetItem(sys_path, i)) << dendl; - if (first) { - first = false; - } else { - site_packages << ":"; - } - site_packages << PyUnicode_AsUTF8(PyList_GetItem(sys_path, i)); - } - - Py_DECREF(sys_path); - Py_DECREF(sys_module); - } - - Py_DECREF(site_module); - - return site_packages.str(); -} - PyObject* PyModule::init_ceph_logger() { auto py_logger = PyModule_Create(&ceph_logger_module); @@ -357,17 +290,6 @@ int PyModule::load(PyThreadState *pMainThreadState) return -EINVAL; } else { pMyThreadState.set(thread_state); - // Some python modules do not cope with an unpopulated argv, so lets - // fake one. This step also picks up site-packages into sys.path. - const wchar_t *argv[] = {L"ceph-mgr"}; - PySys_SetArgv(1, (wchar_t**)argv); - // Configure sys.path to include mgr_module_path - string paths = (g_conf().get_val<std::string>("mgr_module_path") + ':' + - get_site_packages() + ':'); - wstring sys_path(wstring(begin(paths), end(paths)) + Py_GetPath()); - PySys_SetPath(const_cast<wchar_t*>(sys_path.c_str())); - dout(10) << "Computed sys.path '" - << string(begin(sys_path), end(sys_path)) << "'" << dendl; } } // Environment is all good, import the external module @@ -515,8 +437,8 @@ int PyModule::load_notify_types() { PyObject *ls = PyObject_GetAttrString(pClass, "NOTIFY_TYPES"); if (ls == nullptr) { - derr << "Module " << get_name() << " has missing NOTIFY_TYPES member" << dendl; - return -EINVAL; + dout(10) << "Module " << get_name() << " has no NOTIFY_TYPES member" << dendl; + return 0; } if (!PyObject_TypeCheck(ls, &PyList_Type)) { // Relatively easy mistake for human to make, e.g. defining COMMANDS diff --git a/src/mgr/PyModule.h b/src/mgr/PyModule.h index 8d88ff94c62..177447c2cb3 100644 --- a/src/mgr/PyModule.h +++ b/src/mgr/PyModule.h @@ -51,7 +51,6 @@ class PyModule mutable ceph::mutex lock = ceph::make_mutex("PyModule::lock"); private: const std::string module_name; - std::string get_site_packages(); int load_subclass_of(const char* class_name, PyObject** py_class); // Did the MgrMap identify this module as one that should run? diff --git a/src/mgr/PyModuleRegistry.cc b/src/mgr/PyModuleRegistry.cc index eb2d2babe75..0eb304e7353 100644 --- a/src/mgr/PyModuleRegistry.cc +++ b/src/mgr/PyModuleRegistry.cc @@ -14,6 +14,7 @@ #include "PyModuleRegistry.h" #include <filesystem> +#include <boost/scope_exit.hpp> #include "include/stringify.h" #include "common/errno.h" @@ -46,21 +47,51 @@ void PyModuleRegistry::init() // Set up global python interpreter #define WCHAR(s) L ## #s - Py_SetProgramName(const_cast<wchar_t*>(WCHAR(MGR_PYTHON_EXECUTABLE))); -#undef WCHAR + PyConfig py_config; + // do not enable isolated mode, otherwise we would not be able to have access + // to the site packages. since we cannot import any module before initializing + // the interpreter, we would not be able to use "site" module for retrieving + // the path to site packager. we import "site" module for retrieving + // sitepackages in Python < 3.8 though, this does not apply to the + // initialization with PyConfig. + PyConfig_InitPythonConfig(&py_config); + BOOST_SCOPE_EXIT_ALL(&py_config) { + PyConfig_Clear(&py_config); + }; +#if PY_VERSION_HEX >= 0x030b0000 + py_config.safe_path = 0; +#endif + py_config.parse_argv = 0; + py_config.configure_c_stdio = 0; + py_config.install_signal_handlers = 0; + py_config.pathconfig_warnings = 0; + + PyStatus status; + status = PyConfig_SetString(&py_config, &py_config.program_name, WCHAR(MGR_PYTHON_EXECUTABLE)); + ceph_assertf(!PyStatus_Exception(status), "PyConfig_SetString: %s:%s", status.func, status.err_msg); + // Some python modules do not cope with an unpopulated argv, so lets + // fake one. This step also picks up site-packages into sys.path. + const wchar_t* argv[] = {L"ceph-mgr"}; + status = PyConfig_SetArgv(&py_config, 1, (wchar_t *const *)argv); + ceph_assertf(!PyStatus_Exception(status), "PyConfig_SetArgv: %s:%s", status.func, status.err_msg); // Add more modules if (g_conf().get_val<bool>("daemonize")) { PyImport_AppendInittab("ceph_logger", PyModule::init_ceph_logger); } PyImport_AppendInittab("ceph_module", PyModule::init_ceph_module); - Py_InitializeEx(0); -#if PY_VERSION_HEX < 0x03090000 - // Let CPython know that we will be calling it back from other - // threads in future. - if (! PyEval_ThreadsInitialized()) { - PyEval_InitThreads(); + // Configure sys.path to include mgr_module_path + auto pythonpath_env = g_conf().get_val<std::string>("mgr_module_path"); + if (const char* pythonpath = getenv("PYTHONPATH")) { + pythonpath_env += ":"; + pythonpath_env += pythonpath; } -#endif + status = PyConfig_SetBytesString(&py_config, &py_config.pythonpath_env, pythonpath_env.data()); + ceph_assertf(!PyStatus_Exception(status), "PyConfig_SetBytesString: %s:%s", status.func, status.err_msg); + dout(10) << "set PYTHONPATH to " << std::quoted(pythonpath_env) << dendl; + status = Py_InitializeFromConfig(&py_config); + ceph_assertf(!PyStatus_Exception(status), "Py_InitializeFromConfig: %s:%s", status.func, status.err_msg); +#undef WCHAR + // Drop the GIL and remember the main thread state (current // thread state becomes NULL) pMainThreadState = PyEval_SaveThread(); @@ -217,6 +248,72 @@ void PyModuleRegistry::active_start( } } +std::string PyModuleRegistry::get_site_packages() +{ + std::stringstream site_packages; + + // CPython doesn't auto-add site-packages dirs to sys.path for us, + // but it does provide a module that we can ask for them. + auto site_module = PyImport_ImportModule("site"); + ceph_assert(site_module); + + auto site_packages_fn = PyObject_GetAttrString(site_module, "getsitepackages"); + if (site_packages_fn != nullptr) { + auto site_packages_list = PyObject_CallObject(site_packages_fn, nullptr); + ceph_assert(site_packages_list); + + auto n = PyList_Size(site_packages_list); + for (Py_ssize_t i = 0; i < n; ++i) { + if (i != 0) { + site_packages << ":"; + } + site_packages << PyUnicode_AsUTF8(PyList_GetItem(site_packages_list, i)); + } + + Py_DECREF(site_packages_list); + Py_DECREF(site_packages_fn); + } else { + // Fall back to generating our own site-packages paths by imitating + // what the standard site.py does. This is annoying but it lets us + // run inside virtualenvs :-/ + + auto site_packages_fn = PyObject_GetAttrString(site_module, "addsitepackages"); + ceph_assert(site_packages_fn); + + auto known_paths = PySet_New(nullptr); + auto pArgs = PyTuple_Pack(1, known_paths); + PyObject_CallObject(site_packages_fn, pArgs); + Py_DECREF(pArgs); + Py_DECREF(known_paths); + Py_DECREF(site_packages_fn); + + auto sys_module = PyImport_ImportModule("sys"); + ceph_assert(sys_module); + auto sys_path = PyObject_GetAttrString(sys_module, "path"); + ceph_assert(sys_path); + + dout(1) << "sys.path:" << dendl; + auto n = PyList_Size(sys_path); + bool first = true; + for (Py_ssize_t i = 0; i < n; ++i) { + dout(1) << " " << PyUnicode_AsUTF8(PyList_GetItem(sys_path, i)) << dendl; + if (first) { + first = false; + } else { + site_packages << ":"; + } + site_packages << PyUnicode_AsUTF8(PyList_GetItem(sys_path, i)); + } + + Py_DECREF(sys_path); + Py_DECREF(sys_module); + } + + Py_DECREF(site_module); + + return site_packages.str(); +} + std::vector<std::string> PyModuleRegistry::probe_modules(const std::string &path) const { const auto opt = g_conf().get_val<std::string>("mgr_disabled_modules"); diff --git a/src/mgr/PyModuleRegistry.h b/src/mgr/PyModuleRegistry.h index 9d6d9c2cdd0..da5bb596c93 100644 --- a/src/mgr/PyModuleRegistry.h +++ b/src/mgr/PyModuleRegistry.h @@ -55,6 +55,7 @@ private: // before ClusterState exists. MgrMap mgr_map; + static std::string get_site_packages(); /** * Discover python modules from local disk */ diff --git a/src/mon/AuthMonitor.cc b/src/mon/AuthMonitor.cc index b20eac8399e..0a60ab6d26d 100644 --- a/src/mon/AuthMonitor.cc +++ b/src/mon/AuthMonitor.cc @@ -858,6 +858,7 @@ bool AuthMonitor::preprocess_command(MonOpRequestRef op) string prefix; cmd_getval(cmdmap, "prefix", prefix); if (prefix == "auth add" || + prefix == "auth rotate" || prefix == "auth del" || prefix == "auth rm" || prefix == "auth get-or-create" || @@ -1825,6 +1826,32 @@ bool AuthMonitor::prepare_command(MonOpRequestRef op) wait_for_commit(op, new Monitor::C_Command(mon, op, 0, rs, get_last_committed() + 1)); return true; + } else if (prefix == "auth rotate") { + if (entity_name.empty()) { + ss << "bad entity name"; + err = -EINVAL; + goto done; + } + + EntityAuth entity_auth; + if (!mon.key_server.get_auth(entity, entity_auth)) { + ss << "entity does not exist"; + err = -ENOENT; + goto done; + } + + entity_auth.key.create(g_ceph_context, CEPH_CRYPTO_AES); + + KeyServerData::Incremental auth_inc; + auth_inc.op = KeyServerData::AUTH_INC_ADD; + auth_inc.name = entity; + auth_inc.auth = entity_auth; + push_cephx_inc(auth_inc); + + _encode_auth(entity, entity_auth, rdata, f.get()); + wait_for_commit(op, new Monitor::C_Command(mon, op, 0, rs, rdata, + get_last_committed() + 1)); + return true; } done: rdata.append(ds); diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 357f944df21..76a57ac443d 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -16,6 +16,7 @@ #include <sstream> #include <queue> #include <ranges> +#include <boost/range/adaptors.hpp> #include <boost/utility.hpp> #include "MDSMonitor.h" @@ -238,6 +239,7 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t) if (!g_conf()->mon_mds_skip_sanity) { pending.sanity(true); } + pending.set_btime(); // apply to paxos ceph_assert(get_last_committed() + 1 == pending.get_epoch()); @@ -1024,6 +1026,52 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op) ds << fsmap; } r = 0; + } else if (prefix == "mds last-seen") { + std::string id; + cmd_getval(cmdmap, "id", id); + + dout(10) << "last seen check for " << id << dendl; + + auto& history = get_fsmap_history(); + auto now = real_clock::now(); + bool found = false; + /* Special case: + * If the mons consider the MDS "in" the latest FSMap, then the mds + * is always "last seen" **now** (for the purposes of this API). We + * don't look at past beacons because that is only managed by the + * leader and the logic is fudged in places in the event of suspected + * network partitions. + */ + std::chrono::seconds since = std::chrono::seconds(0); + + for (auto& [epoch, fsmaph] : boost::adaptors::reverse(history)) { + dout(25) << "looking at epoch " << epoch << dendl; + auto* info = fsmaph.find_by_name(id); + if (info) { + dout(10) << "found: " << *info << dendl; + found = true; + if (f) { + f->open_object_section("mds last-seen"); + f->dump_object("info", *info); + f->dump_string("last-seen", fmt::format("{}", since)); + f->dump_int("epoch", epoch); + f->close_section(); + f->flush(ds); + } else { + ds << fmt::format("{}", since); + } + break; + } + /* If the MDS appears in the next epoch, then it went away as of this epoch's btime. + */ + since = std::chrono::duration_cast<std::chrono::seconds>(now - fsmaph.get_btime()); + } + if (found) { + r = 0; + } else { + ss << "mds " << id << " not found in recent FSMaps"; + r = -ENOENT; + } } else if (prefix == "mds ok-to-stop") { vector<string> ids; if (!cmd_getval(cmdmap, "ids", ids)) { @@ -2379,6 +2427,39 @@ bool MDSMonitor::maybe_promote_standby(FSMap &fsmap, const Filesystem& fs) void MDSMonitor::tick() { + { + auto _history_prune_time = g_conf().get_val<std::chrono::seconds>("mon_fsmap_prune_threshold"); + set_fsmap_history_threshold(_history_prune_time); + dout(20) << _history_prune_time << dendl; + prune_fsmap_history(); + auto& history = get_fsmap_history(); + auto now = real_clock::now(); + if (auto it = history.begin(); it != history.end()) { + auto start = it->second.get_epoch(); + dout(20) << "oldest epoch in history is " << start << dendl; + for (;;) { + --start; + bufferlist bl; + FSMap fsmaph; + int err = get_version(start, bl); + if (err == -ENOENT) { + break; + } + ceph_assert(err == 0); + ceph_assert(bl.length()); + fsmaph.decode(bl); + auto btime = fsmaph.get_btime(); + auto since = std::chrono::duration_cast<std::chrono::milliseconds>(now - btime); + dout(20) << "loaded epoch " << fsmaph.get_epoch() << " which is " << since << " old" << dendl; + if (since <= _history_prune_time) { + put_fsmap_history(fsmaph); + } else { + break; + } + } + } + } + if (!is_active() || !is_leader()) return; auto &pending = get_pending_fsmap_writeable(); diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index b2a678dff53..e9025b05ef7 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -163,6 +163,10 @@ COMMAND("auth add " "add auth info for <entity> from input file, or random key if no " "input is given, and/or any caps specified in the command", "auth", "rwx") +COMMAND("auth rotate " + "name=entity,type=CephString", + "rotate entity key", + "auth", "rwx") COMMAND("auth get-or-create-key " "name=entity,type=CephString " "name=caps,type=CephString,n=N,req=false", @@ -294,6 +298,9 @@ COMMAND("versions", #define FS_NAME_GOODCHARS "[A-Za-z0-9-_.]" COMMAND_WITH_FLAG("mds stat", "show MDS status", "mds", "r", FLAG(HIDDEN)) +COMMAND("mds last-seen name=id,type=CephString,req=true", + "fetch metadata for mds <id>", + "mds", "r") COMMAND("fs dump " "name=epoch,type=CephInt,req=false,range=0", "dump all CephFS status, optionally from epoch", "mds", "r") @@ -853,7 +860,8 @@ COMMAND("osd unpause", "unpause osd", "osd", "rw") COMMAND("osd erasure-code-profile set " "name=name,type=CephString,goodchars=[A-Za-z0-9-_.] " "name=profile,type=CephString,n=N,req=false " - "name=force,type=CephBool,req=false", + "name=force,type=CephBool,req=false " + "name=yes_i_really_mean_it,type=CephBool,req=false", "create erasure code profile <name> with [<key[=value]> ...] pairs. Add a --force at the end to override an existing profile (VERY DANGEROUS)", "osd", "rw") COMMAND("osd erasure-code-profile get " diff --git a/src/mon/PaxosFSMap.h b/src/mon/PaxosFSMap.h index 72999883161..4312d7e1f4d 100644 --- a/src/mon/PaxosFSMap.h +++ b/src/mon/PaxosFSMap.h @@ -15,6 +15,8 @@ #ifndef CEPH_PAXOS_FSMAP_H #define CEPH_PAXOS_FSMAP_H +#include <chrono> + #include "mds/FSMap.h" #include "mds/MDSMap.h" @@ -39,13 +41,58 @@ protected: return pending_fsmap; } + void prune_fsmap_history() { + auto now = real_clock::now(); + for (auto it = history.begin(); it != history.end(); ) { + auto since = now - it->second.get_btime(); + /* Be sure to not make the map empty */ + auto itnext = std::next(it); + if (itnext == history.end()) { + break; + } + /* Keep the map just before the prune time threshold: + * [ e-1 (lifetime > history_prune_time) | e (lifetime 1s) ] + * If an mds was removed in (e), then we want to be able to say it was + * last seen 1 second ago. + */ + auto since2 = now - itnext->second.get_btime(); + if (since > history_prune_time && since2 > history_prune_time) { + it = history.erase(it); + } else { + break; + } + } + } + + void put_fsmap_history(const FSMap& _fsmap) { + auto now = real_clock::now(); + auto since = now - _fsmap.get_btime(); + if (since < history_prune_time) { + history.emplace(std::piecewise_construct, std::forward_as_tuple(_fsmap.get_epoch()), std::forward_as_tuple(_fsmap)); + } + } + + void set_fsmap_history_threshold(std::chrono::seconds t) { + history_prune_time = t; + } + std::chrono::seconds get_fsmap_history_threshold() const { + return history_prune_time; + } + + const auto& get_fsmap_history() const { + return history; + } + void decode(ceph::buffer::list &bl) { fsmap.decode(bl); + put_fsmap_history(fsmap); pending_fsmap = FSMap(); /* nuke it to catch invalid access */ } private: /* Keep these PRIVATE to prevent unprotected manipulation. */ + std::map<epoch_t, FSMap> history; + std::chrono::seconds history_prune_time = std::chrono::seconds(0); FSMap fsmap; /* the current epoch */ FSMap pending_fsmap; /* the next epoch */ }; diff --git a/src/msg/async/Event.cc b/src/msg/async/Event.cc index 926fdcdb1cc..08e117ea54a 100644 --- a/src/msg/async/Event.cc +++ b/src/msg/async/Event.cc @@ -404,6 +404,8 @@ int EventCenter::process_events(unsigned timeout_microseconds, ceph::timespan * if (end_time > now) { timeout_microseconds = std::chrono::duration_cast<std::chrono::microseconds>(end_time - now).count(); + timeout_microseconds = std::max<unsigned>(timeout_microseconds, + cct->_conf->ms_time_events_min_wait_interval); } else { timeout_microseconds = 0; } diff --git a/src/msg/async/ProtocolV1.cc b/src/msg/async/ProtocolV1.cc index 041942fd906..43a8e4f7597 100644 --- a/src/msg/async/ProtocolV1.cc +++ b/src/msg/async/ProtocolV1.cc @@ -682,7 +682,7 @@ CtPtr ProtocolV1::throttle_message() { // short time, so we can wait a ms. if (connection->register_time_events.empty()) { connection->register_time_events.insert( - connection->center->create_time_event(1000, + connection->center->create_time_event(cct->_conf->ms_client_throttle_retry_time_interval, connection->wakeup_handler)); } return nullptr; @@ -715,7 +715,8 @@ CtPtr ProtocolV1::throttle_bytes() { if (connection->register_time_events.empty()) { connection->register_time_events.insert( connection->center->create_time_event( - 1000, connection->wakeup_handler)); + cct->_conf->ms_client_throttle_retry_time_interval, + connection->wakeup_handler)); } return nullptr; } @@ -742,7 +743,7 @@ CtPtr ProtocolV1::throttle_dispatch_queue() { // short time, so we can wait a ms. if (connection->register_time_events.empty()) { connection->register_time_events.insert( - connection->center->create_time_event(1000, + connection->center->create_time_event(cct->_conf->ms_client_throttle_retry_time_interval, connection->wakeup_handler)); } return nullptr; diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc index 7c4a4d0fe94..10ae54cb284 100644 --- a/src/msg/async/ProtocolV2.cc +++ b/src/msg/async/ProtocolV2.cc @@ -1555,7 +1555,7 @@ CtPtr ProtocolV2::throttle_message() { // short time, so we can wait a ms. if (connection->register_time_events.empty()) { connection->register_time_events.insert( - connection->center->create_time_event(1000, + connection->center->create_time_event(cct->_conf->ms_client_throttle_retry_time_interval, connection->wakeup_handler)); } return nullptr; @@ -1587,7 +1587,8 @@ CtPtr ProtocolV2::throttle_bytes() { if (connection->register_time_events.empty()) { connection->register_time_events.insert( connection->center->create_time_event( - 1000, connection->wakeup_handler)); + cct->_conf->ms_client_throttle_retry_time_interval, + connection->wakeup_handler)); } return nullptr; } @@ -1615,7 +1616,7 @@ CtPtr ProtocolV2::throttle_dispatch_queue() { // short time, so we can wait a ms. if (connection->register_time_events.empty()) { connection->register_time_events.insert( - connection->center->create_time_event(1000, + connection->center->create_time_event(cct->_conf->ms_client_throttle_retry_time_interval, connection->wakeup_handler)); } return nullptr; diff --git a/src/nasm-wrapper b/src/nasm-wrapper index 84da2cb23e2..1ac60e1a42e 100755 --- a/src/nasm-wrapper +++ b/src/nasm-wrapper @@ -10,7 +10,7 @@ while [ -n "$*" ]; do refine_nasm_options+=" -f $1" shift ;; - -c | --param* | -m* | -pipe | -thread ) + -c | --param* | --coverage | -m* | -pipe | -thread ) # unknown options under nasm & yasm shift ;; diff --git a/src/os/CMakeLists.txt b/src/os/CMakeLists.txt index a27b64688d8..67770933a3d 100644 --- a/src/os/CMakeLists.txt +++ b/src/os/CMakeLists.txt @@ -37,15 +37,6 @@ if(HAVE_LIBXFS) fs/XFS.cc) endif() -if(HAVE_LIBZFS) - add_library(os_zfs_objs OBJECT - filestore/ZFSFileStoreBackend.cc - fs/ZFS.cc) - target_include_directories(os_zfs_objs SYSTEM PRIVATE - ${ZFS_INCLUDE_DIRS}) - list(APPEND libos_srcs $<TARGET_OBJECTS:os_zfs_objs>) -endif() - add_library(os STATIC ${libos_srcs}) target_link_libraries(os legacy-option-headers @@ -66,10 +57,6 @@ if(WITH_FUSE) target_link_libraries(os FUSE::FUSE) endif() -if(HAVE_LIBZFS) - target_link_libraries(os ${ZFS_LIBRARIES}) -endif() - if(WITH_LTTNG) add_dependencies(os objectstore-tp) add_dependencies(os bluestore-tp) diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h index 3f953533700..ddf48bfa89a 100644 --- a/src/os/ObjectStore.h +++ b/src/os/ObjectStore.h @@ -791,7 +791,7 @@ public: virtual void inject_data_error(const ghobject_t &oid) {} virtual void inject_mdata_error(const ghobject_t &oid) {} - virtual void compact() {} + virtual int compact() { return -ENOTSUP; } virtual bool has_builtin_csum() const { return false; } diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 9f419c19585..32086a17aa6 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -712,10 +712,27 @@ void BlueFS::_init_alloc() { dout(20) << __func__ << dendl; + // 'changed' should keep its previous value if no actual modification occurred + auto change_alloc_size = [this](uint64_t& max_alloc_size, + uint64_t new_alloc, bool& changed) { + if (max_alloc_size == 0 || + (max_alloc_size > new_alloc && ((new_alloc & (new_alloc -1)) == 0))) { + max_alloc_size = new_alloc; + changed = true; + dout(5) << " changed alloc_size to 0x" << std::hex << new_alloc << dendl; + } else if (max_alloc_size != new_alloc) { + derr << " can not change current alloc_size 0x" << std::hex + << max_alloc_size << " to new alloc_size 0x" << new_alloc << dendl; + } + }; + + bool alloc_size_changed = false; size_t wal_alloc_size = 0; if (bdev[BDEV_WAL]) { wal_alloc_size = cct->_conf->bluefs_alloc_size; alloc_size[BDEV_WAL] = wal_alloc_size; + change_alloc_size(super.bluefs_max_alloc_size[BDEV_WAL], + wal_alloc_size, alloc_size_changed); } logger->set(l_bluefs_wal_alloc_unit, wal_alloc_size); @@ -731,18 +748,38 @@ void BlueFS::_init_alloc() if (bdev[BDEV_SLOW]) { alloc_size[BDEV_DB] = cct->_conf->bluefs_alloc_size; alloc_size[BDEV_SLOW] = shared_alloc_size; + change_alloc_size(super.bluefs_max_alloc_size[BDEV_DB], + cct->_conf->bluefs_alloc_size, alloc_size_changed); + change_alloc_size(super.bluefs_max_alloc_size[BDEV_SLOW], + shared_alloc_size, alloc_size_changed); } else { alloc_size[BDEV_DB] = shared_alloc_size; alloc_size[BDEV_SLOW] = 0; + change_alloc_size(super.bluefs_max_alloc_size[BDEV_DB], + shared_alloc_size, alloc_size_changed); } logger->set(l_bluefs_db_alloc_unit, alloc_size[BDEV_DB]); logger->set(l_bluefs_slow_alloc_unit, alloc_size[BDEV_SLOW]); // new wal and db devices are never shared if (bdev[BDEV_NEWWAL]) { alloc_size[BDEV_NEWWAL] = cct->_conf->bluefs_alloc_size; + change_alloc_size(super.bluefs_max_alloc_size[BDEV_NEWWAL], + cct->_conf->bluefs_alloc_size, alloc_size_changed); } + if (alloc_size_changed) { + dout(1) << __func__ << " alloc_size changed, the new super is:" << super << dendl; + _write_super(BDEV_DB); + } + + alloc_size_changed = false; if (bdev[BDEV_NEWDB]) { alloc_size[BDEV_NEWDB] = cct->_conf->bluefs_alloc_size; + change_alloc_size(super.bluefs_max_alloc_size[BDEV_NEWDB], + cct->_conf->bluefs_alloc_size, alloc_size_changed); + } + if (alloc_size_changed) { + dout(1) << __func__ << " alloc_size changed, the new super is:" << super << dendl; + _write_super(BDEV_NEWDB); } for (unsigned id = 0; id < bdev.size(); ++id) { @@ -750,6 +787,7 @@ void BlueFS::_init_alloc() continue; } ceph_assert(bdev[id]->get_size()); + ceph_assert(super.bluefs_max_alloc_size[id]); if (is_shared_alloc(id)) { dout(1) << __func__ << " shared, id " << id << std::hex << ", capacity 0x" << bdev[id]->get_size() @@ -769,10 +807,11 @@ void BlueFS::_init_alloc() << ", capacity 0x" << bdev[id]->get_size() << ", reserved 0x" << block_reserved[id] << ", block size 0x" << alloc_size[id] + << ", max alloc size 0x" << super.bluefs_max_alloc_size[id] << std::dec << dendl; alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator, bdev[id]->get_size(), - alloc_size[id], + super.bluefs_max_alloc_size[id], name); alloc[id]->init_add_free( block_reserved[id], @@ -992,6 +1031,7 @@ int BlueFS::mount() _init_alloc(); + dout(5) << __func__ << " super: " << super << dendl; r = _replay(false, false); if (r < 0) { derr << __func__ << " failed to replay log: " << cpp_strerror(r) << dendl; @@ -4738,6 +4778,37 @@ size_t BlueFS::probe_alloc_avail(int dev, uint64_t alloc_size) } return total; } + +void BlueFS::trim_free_space(const string& type, std::ostream& outss) +{ + unsigned bdev_id; + if(type == "bdev-wal") { + bdev_id = BDEV_WAL; + } else if (type == "bdev-db") { + bdev_id = BDEV_DB; + } else { + derr << __func__ << " unknown bdev type " << type << dendl; + return; + } + auto iterated_allocation = [&](size_t off, size_t len) { + ceph_assert(len > 0); + interval_set<uint64_t> to_discard; + to_discard.union_insert(off, len); + bdev[bdev_id]->try_discard(to_discard, false); + }; + if (!bdev[bdev_id]) { + outss << "device " << type << " is not configured"; + return; + } + if (alloc[bdev_id] && !is_shared_alloc(bdev_id)) { + if (!bdev[bdev_id]->is_discard_supported()) { + outss << "device " << type << " does not support trim"; + return; + } + alloc[bdev_id]->foreach(iterated_allocation); + outss << "device " << type << " trim done"; + } +} // =============================================== // OriginalVolumeSelector diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 86fb3cc882e..f57deac5706 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -778,6 +778,7 @@ public: } uint64_t debug_get_dirty_seq(FileWriter *h); bool debug_get_is_dev_dirty(FileWriter *h, uint8_t dev); + void trim_free_space(const std::string& type, std::ostream& outss); private: // Wrappers for BlockDevice::read(...) and BlockDevice::read_random(...) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 2f6cf3efb89..4ecc7cc3a06 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -25,6 +25,7 @@ #include <boost/random/mersenne_twister.hpp> #include <boost/random/uniform_real.hpp> +#include "common/dout.h" #include "include/cpp-btree/btree_set.h" #include "BlueStore.h" @@ -582,12 +583,6 @@ void _dump_extent_map(CephContext *cct, const BlueStore::ExtentMap &em) dout(LogLevelV) << __func__ << " csum: " << std::hex << v << std::dec << dendl; } - std::lock_guard l(e.blob->get_cache()->lock); - for (auto& i : e.blob->get_bc().buffer_map) { - dout(LogLevelV) << __func__ << " 0x" << std::hex << i.first - << "~" << i.second.length << std::dec - << " " << i.second << dendl; - } } } @@ -617,6 +612,12 @@ void _dump_onode(CephContext *cct, const BlueStore::Onode& o) << " len " << p->second.length() << dendl; } _dump_extent_map<LogLevelV>(cct, o.extent_map); + + for (auto& b : o.bc.buffer_map) { + dout(LogLevelV) << __func__ << " 0x" << std::hex << b.offset << "~" + << b.length << std::dec << " " << b + << dendl; + } } template <int LogLevelV> @@ -1214,10 +1215,10 @@ struct LruBufferCacheShard : public BlueStore::BufferCacheShard { &BlueStore::Buffer::lru_item> > list_t; list_t lru; - explicit LruBufferCacheShard(CephContext *cct) : BlueStore::BufferCacheShard(cct) {} + explicit LruBufferCacheShard(BlueStore* store) : BlueStore::BufferCacheShard(store) {} void _add(BlueStore::Buffer *b, int level, BlueStore::Buffer *near) override { - if (near) { + if (near && !near->is_writing()) { auto q = lru.iterator_to(*near); lru.insert(q, *b); } else if (level > 0) { @@ -1335,15 +1336,18 @@ struct TwoQBufferCacheShard : public BlueStore::BufferCacheShard { uint64_t list_bytes[BUFFER_TYPE_MAX] = {0}; ///< bytes per type public: - explicit TwoQBufferCacheShard(CephContext *cct) : BufferCacheShard(cct) {} + explicit TwoQBufferCacheShard(BlueStore* store) : BufferCacheShard(store) {} void _add(BlueStore::Buffer *b, int level, BlueStore::Buffer *near) override { dout(20) << __func__ << " level " << level << " near " << near << " on " << *b << " which has cache_private " << b->cache_private << dendl; + ceph_assert(b->is_clean() || b->is_empty()); if (near) { b->cache_private = near->cache_private; + } + if (near && !near->is_writing()) { switch (b->cache_private) { case BUFFER_WARM_IN: warm_in.insert(warm_in.iterator_to(*near), *b); @@ -1358,17 +1362,18 @@ public: default: ceph_abort_msg("bad cache_private"); } - } else if (b->cache_private == BUFFER_NEW) { - b->cache_private = BUFFER_WARM_IN; - if (level > 0) { - warm_in.push_front(*b); - } else { - // take caller hint to start at the back of the warm queue - warm_in.push_back(*b); - } } else { // we got a hint from discard switch (b->cache_private) { + case BUFFER_NEW: + b->cache_private = BUFFER_WARM_IN; + if (level > 0) { + warm_in.push_front(*b); + } else { + // take caller hint to start at the back of the warm queue + warm_in.push_back(*b); + } + break; case BUFFER_WARM_IN: // stay in warm_in. move to front, even though 2Q doesn't actually // do this. @@ -1599,7 +1604,7 @@ public: #ifdef DEBUG_CACHE void _audit(const char *when) override { - dout(10) << __func__ << " " << when << " start" << dendl; + dout(10) << __func__ << " " << when << " start" << dendl; uint64_t s = 0; for (auto i = hot.begin(); i != hot.end(); ++i) { ceph_assert(i->cache_private == BUFFER_HOT); @@ -1648,47 +1653,122 @@ public: // BuferCacheShard BlueStore::BufferCacheShard *BlueStore::BufferCacheShard::create( - CephContext* cct, + BlueStore* store, string type, PerfCounters *logger) { BufferCacheShard *c = nullptr; if (type == "lru") - c = new LruBufferCacheShard(cct); + c = new LruBufferCacheShard(store); else if (type == "2q") - c = new TwoQBufferCacheShard(cct); + c = new TwoQBufferCacheShard(store); else ceph_abort_msg("unrecognized cache type"); c->logger = logger; return c; } +// Buffer +std::atomic<uint64_t> BlueStore::Buffer::total = 0; + // BufferSpace #undef dout_prefix #define dout_prefix *_dout << "bluestore.BufferSpace(" << this << " in " << cache << ") " +void BlueStore::BufferSpace::_add_buffer(BufferCacheShard* cache, + Buffer* b, + uint16_t cache_private, int level, + Buffer *near) +{ + ldout(cache->cct, 20) << __func__ << "? " << b << dendl; + cache->_audit("_add_buffer start"); + ceph_assert(!b->set_item.is_linked()); + // illegal to provide both near and cache_private + ceph_assert(!(near && cache_private != 0)); + bool add_to_map = true; + if (b->is_writing()) { + ceph_assert(b->txc); + // we might get already cached data for which resetting mempool is inppropriate + // hence calling try_assign_to_mempool + if (b->txc->add_writing(&onode, b->offset, b->length)) { + b->data.try_assign_to_mempool(mempool::mempool_bluestore_writing); + } else if (b->flags & Buffer::FLAG_NOCACHE) { + //txc is being finished and hence it hasn't added us to writing list. + // And we don't need to cache this buffer. + // So we delete it. + ldout(cache->cct, 20) << __func__ << + " not added to writing, releasing " << b + << dendl; + delete b; + b = nullptr; + add_to_map = false; + } else { + //txc is being finished and hence it hasn't added us to writing list. + // So we can cache it + b->state = Buffer::STATE_CLEAN; + b->txc = nullptr; + b->maybe_rebuild(); + } + } + if (add_to_map) { + ldout(cache->cct, 20) << __func__ << " added " << b << dendl; + b->data.reassign_to_mempool(mempool::mempool_bluestore_cache_data); + b->cache_private = cache_private; + buffer_map.insert(*b); + if (!b->is_writing()) { + cache->_add(b, level, near); + } + } + cache->_audit("_add_buffer end"); +} + +void BlueStore::BufferSpace::__rm_buffer(BufferCacheShard* cache, + Buffer* b) +{ + ceph_assert(b); + cache->_audit("_rm_buffer start"); + if (!b->is_writing()) { + cache->_rm(b); + } + ldout(cache->cct, 20) << __func__ << " erasing " << b << dendl; + __erase_from_map(b); + cache->_audit("_rm_buffer end"); +} + +void BlueStore::BufferSpace::__erase_from_map(Buffer* b) +{ + ceph_assert(b); + buffer_map.erase(buffer_map.iterator_to(*b)); + delete b; +} + void BlueStore::BufferSpace::_clear(BufferCacheShard* cache) { // note: we already hold cache->lock ldout(cache->cct, 20) << __func__ << dendl; while (!buffer_map.empty()) { - _rm_buffer(cache, buffer_map.begin()); + __rm_buffer(cache, &*buffer_map.begin()); } } -int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, uint32_t offset, uint32_t length) +int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, + uint32_t offset, uint32_t length) { // note: we already hold cache->lock - ldout(cache->cct, 20) << __func__ << std::hex << " 0x" << offset << "~" << length - << std::dec << dendl; + ldout(cache->cct, 20) << __func__ << std::hex << " 0x" << offset << "~" << length + << std::dec << dendl; int cache_private = 0; cache->_audit("discard start"); auto i = _data_lower_bound(offset); uint32_t end = offset + length; while (i != buffer_map.end()) { - Buffer *b = &i->second; - if (b->offset >= end) { + Buffer* b = &*i; + // First iteration either finds a buffer that contains the offset or the next buffer after it. + // Subsequent iterations are either buffers inside range or after the range. + // If we already found a buffer that doesn't overlaps with the range, we can break, as it must be next to the range. + bool overlaps = offset < b->end() && end > b->offset; + if (!overlaps) { break; } if (b->cache_private > cache_private) { @@ -1702,9 +1782,13 @@ int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, uint32_t offset, u if (b->data.length()) { bufferlist bl; bl.substr_of(b->data, b->length - tail, tail); - _add_buffer(cache, this, Buffer(this, b->state, b->seq, end, bl, b->flags), 0, 0, b); + _add_buffer(cache, + new Buffer(this, b->state, b->txc, end, bl, b->flags), + 0, 0, b); } else { - _add_buffer(cache, this, Buffer(this, b->state, b->seq, end, tail, b->flags), 0, 0, b); + _add_buffer(cache, + new Buffer(this, b->state, b->txc, end, tail, b->flags), + 0, 0, b); } if (!b->is_writing()) { cache->_adjust_size(b, front - (int64_t)b->length); @@ -1726,7 +1810,8 @@ int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, uint32_t offset, u } if (b->end() <= end) { // drop entire buffer - _rm_buffer(cache, i++); + auto i0 = i++; + __rm_buffer(cache, &*i0); continue; } // drop front @@ -1734,13 +1819,13 @@ int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, uint32_t offset, u if (b->data.length()) { bufferlist bl; bl.substr_of(b->data, b->length - keep, keep); - _add_buffer(cache, this, - Buffer(this, b->state, b->seq, end, bl, b->flags), 0, 0, b); + _add_buffer(cache, + new Buffer(this, b->state, b->txc, end, bl, b->flags), 0, 0, b); } else { - _add_buffer(cache, this, - Buffer(this, b->state, b->seq, end, keep, b->flags), 0, 0, b); + _add_buffer(cache, + new Buffer(this, b->state, b->txc, end, keep, b->flags), 0, 0, b); } - _rm_buffer(cache, i); + __rm_buffer(cache, &*i); cache->_audit("discard end 2"); break; } @@ -1763,8 +1848,8 @@ void BlueStore::BufferSpace::read( { std::lock_guard l(cache->lock); for (auto i = _data_lower_bound(offset); - i != buffer_map.end() && offset < end && i->first < end; ++i) { - Buffer *b = &i->second; + i != buffer_map.end() && offset < end && i->offset < end; ++i) { + Buffer* b = &*i; ceph_assert(b->end() > offset); bool val = false; @@ -1819,132 +1904,131 @@ void BlueStore::BufferSpace::read( cache->logger->inc(l_bluestore_buffer_miss_bytes, miss_bytes); } -void BlueStore::BufferSpace::_finish_write(BufferCacheShard* cache, uint64_t seq) +void BlueStore::BufferSpace::_finish_write(BufferCacheShard* cache, + TransContext* txc, + uint32_t offset, uint32_t len) { - auto i = writing.begin(); - while (i != writing.end()) { - if (i->seq > seq) { - break; - } - if (i->seq < seq) { - ++i; - continue; - } - - Buffer *b = &*i; - ceph_assert(b->is_writing()); + ldout(cache->cct, 10) << __func__ << " txc " << txc + << std::hex << " 0x" << offset << "~" << len << std::dec + << dendl; - if (b->flags & Buffer::FLAG_NOCACHE) { - writing.erase(i++); - ldout(cache->cct, 20) << __func__ << " discard " << *b << dendl; - buffer_map.erase(b->offset); - } else { - b->state = Buffer::STATE_CLEAN; - writing.erase(i++); - b->maybe_rebuild(); - b->data.reassign_to_mempool(mempool::mempool_bluestore_cache_data); - cache->_add(b, 1, nullptr); - ldout(cache->cct, 20) << __func__ << " added " << *b << dendl; + uint32_t end = offset + len; + std::lock_guard l(cache->lock); + auto i = _data_lower_bound(offset); + while (i != buffer_map.end() && offset < end && i->offset < end) { + Buffer* b = &*i; + i++; + ceph_assert(b->end() > offset); + if (b->txc == txc && b->is_writing()) { + ldout(cache->cct, 20) << __func__ << " finish " << *b + << dendl; + if (b->flags & Buffer::FLAG_NOCACHE) { + __erase_from_map(b); + } else { + b->state = Buffer::STATE_CLEAN; + b->txc = nullptr; + b->maybe_rebuild(); + b->data.reassign_to_mempool(mempool::mempool_bluestore_cache_data); + cache->_add(b, 1, nullptr); + } } } cache->_trim(); cache->_audit("finish_write end"); + ldout(cache->cct, 20) << __func__ << " done." << dendl; } /* copy Buffers that are in writing queue - returns: - true if something copied - false if nothing copied */ -bool BlueStore::BufferSpace::_dup_writing(BufferCacheShard* cache, BufferSpace* to) +void BlueStore::BufferSpace::_dup_writing(TransContext* txc, Collection* collection, OnodeRef onode, uint32_t offset, uint32_t length) { - bool copied = false; - if (!writing.empty()) { - copied = true; - for (auto it = writing.begin(); it != writing.end(); ++it) { - Buffer& b = *it; - ceph_assert(b.is_writing()); - to->_add_buffer(cache, to, - Buffer(to, b.state, b.seq, b.offset, b.data, b.flags), 0, - 0, nullptr); + uint64_t end = offset + length; + BufferSpace &to = onode->bc; + BufferCacheShard *cache = collection->cache; + ldout(cache->cct, 20) << __func__ << " offset=" << std::hex << offset << " length=" << std::hex << length << dendl; + for (auto i = _data_lower_bound(offset); + i != buffer_map.end() && offset < end && i->offset < end; ++i) { + Buffer *b = &*i; + if (!b->is_writing()) { + continue; } - } - return copied; -} - -void BlueStore::BufferSpace::split(BufferCacheShard* cache, size_t pos, BlueStore::BufferSpace &r) -{ - std::lock_guard lk(cache->lock); - if (buffer_map.empty()) - return; - auto p = --buffer_map.end(); - while (true) { - if (p->second.end() <= pos) break; - - if (p->second.offset < pos) { - ldout(cache->cct, 30) << __func__ << " cut " << p->second << dendl; - size_t left = pos - p->second.offset; - size_t right = p->second.length - left; - if (p->second.data.length()) { - bufferlist bl; - bl.substr_of(p->second.data, left, right); - r._add_buffer( - cache, &r, - Buffer(&r, p->second.state, p->second.seq, 0, bl, p->second.flags), - 0, 0, &p->second); + bufferlist buffer_to_copy; + uint32_t offset_to_copy = 0; + if (b->offset >= offset) { + if (b->end() > end) { + // take head + uint64_t tail = b->end() - end; + auto new_length = b->data.length() - tail; + buffer_to_copy.substr_of(b->data, 0, new_length); + offset_to_copy = b->offset; } else { - r._add_buffer(cache, &r, Buffer(&r, p->second.state, p->second.seq, 0, right, - p->second.flags), 0, 0, &p->second); + // take whole buffer + buffer_to_copy = b->data; + offset_to_copy = b->offset; } - cache->_adjust_size(&p->second, -right); - p->second.truncate(left); - break; - } - - ceph_assert(p->second.end() > pos); - ldout(cache->cct, 30) << __func__ << " move " << p->second << dendl; - if (p->second.data.length()) { - r._add_buffer(cache, &r, - Buffer(&r, p->second.state, p->second.seq, - p->second.offset - pos, p->second.data, - p->second.flags), - 0, 0, &p->second); - } else { - r._add_buffer(cache, &r, - Buffer(&r, p->second.state, p->second.seq, - p->second.offset - pos, p->second.length, - p->second.flags), - 0, 0, &p->second); - } - if (p == buffer_map.begin()) { - _rm_buffer(cache, p); - break; } else { - _rm_buffer(cache, p--); + if (b->end() > end) { + uint64_t front = offset - b->offset; + uint64_t tail = b->end() - end; + // take middle + uint64_t new_length = b->data.length() - front - tail; + buffer_to_copy.substr_of(b->data, front, new_length); + offset_to_copy = b->offset + front; + } else { + // take tail + uint64_t front = offset - b->offset; + uint64_t new_length = b->data.length() - front; + buffer_to_copy.substr_of(b->data, front, new_length); + offset_to_copy = b->offset + front; + } } - } - ceph_assert(writing.empty()); - cache->_trim(); + Buffer* to_b = new Buffer(&onode->bc, b->state, b->txc, offset_to_copy, + std::move(buffer_to_copy), b->flags); + ldout(cache->cct, 20) << __func__ << " offset=" << std::hex << offset + << " length=" << std::hex << length << " buffer=" << *to_b << dendl; + ceph_assert(to_b->is_writing()); + to._discard(collection->cache, to_b->offset, to_b->length); + to._add_buffer(collection->cache, to_b, to_b->cache_private, 0, nullptr); + } // for } // lists content of BufferSpace // BufferSpace must be under exclusive access std::ostream& operator<<(std::ostream& out, const BlueStore::BufferSpace& bc) { - for (auto& [i, j] : bc.buffer_map) { - out << " [0x" << std::hex << i << "]=" << j << std::dec; - } - if (!bc.writing.empty()) { - out << " writing:"; - for (auto i = bc.writing.begin(); i != bc.writing.end(); ++i) { - out << " " << *i; - } + for (auto& b : bc.buffer_map) { + out << " [0x" << std::hex << b.offset << "]=" << b << std::dec; } return out; } +// TransContext +bool BlueStore::TransContext::add_writing(Onode* o, uint32_t off, uint32_t len) +{ + std::lock_guard l(writings_lock); + + // Need to indicate non-initial observers that we're done. + if (were_writings && writings.empty()) { + return false; + } + writings.emplace_back(o, off, len); + were_writings = true; + return true; +} + +void BlueStore::TransContext::finish_writing() +{ + write_list_t finished; + { + std::lock_guard l(writings_lock); + finished.swap(writings); + } + for (auto& e : finished) { + e.onode->finish_write(this, e.offset, e.length); + } +} // OnodeSpace @@ -2185,11 +2269,8 @@ BlueStore::Blob::~Blob() if (coll_cache != get_cache()) { goto again; } - bc._clear(coll_cache); coll_cache->rm_blob(); } - SharedBlob* sb = shared_blob.get(); - ceph_assert(sb || (!sb && bc.buffer_map.empty())); } void BlueStore::Blob::dump(Formatter* f) const @@ -2219,46 +2300,6 @@ ostream& operator<<(ostream& out, const BlueStore::Blob& b) return out; } -void BlueStore::Blob::discard_unallocated(Collection *coll) -{ - if (get_blob().is_shared()) { - return; - } - if (get_blob().is_compressed()) { - bool discard = false; - bool all_invalid = true; - for (auto e : get_blob().get_extents()) { - if (!e.is_valid()) { - discard = true; - } else { - all_invalid = false; - } - } - ceph_assert(discard == all_invalid); // in case of compressed blob all - // or none pextents are invalid. - if (discard) { - dirty_bc().discard(get_cache(), 0, - get_blob().get_logical_length()); - } - } else { - size_t pos = 0; - for (auto e : get_blob().get_extents()) { - if (!e.is_valid()) { - dout(20) << __func__ << " 0x" << std::hex << pos - << "~" << e.length - << std::dec << dendl; - dirty_bc().discard(get_cache(), pos, e.length); - } - pos += e.length; - } - if (get_blob().can_prune_tail()) { - dirty_blob().prune_tail(); - used_in_blob.prune_tail(get_blob().get_ondisk_length()); - dout(20) << __func__ << " pruned tail, now " << get_blob() << dendl; - } - } -} - void BlueStore::Blob::get_ref( Collection *coll, uint32_t offset, @@ -2390,32 +2431,6 @@ bool BlueStore::Blob::can_reuse_blob(uint32_t min_alloc_size, #undef dout_context #define dout_context cct -// Cut Buffers that are not covered by extents. -// It happens when we punch hole in Blob, but not refill with new data. -// Normally it is not a problem (other then wasted memory), -// but when 2 Blobs are merged Buffers might collide. -// Todo: in future cut Buffers when we delete extents from Blobs, -// and get rid of this function. -void BlueStore::Blob::discard_unused_buffers(CephContext* cct, BufferCacheShard* cache) -{ - dout(25) << __func__ << " input " << *this << " bc=" << bc << dendl; - const PExtentVector& extents = get_blob().get_extents(); - uint32_t epos = 0; - auto e = extents.begin(); - while(e != extents.end()) { - if (!e->is_valid()) { - bc._discard(cache, epos, e->length); - } - epos += e->length; - ++e; - } - ceph_assert(epos <= blob.get_logical_length()); - // Preferably, we would trim up to blob.get_logical_length(), - // but we copied writing buffers (see _dup_writing) before blob logical_length is fixed. - bc._discard(cache, epos, OBJECT_MAX_SIZE - epos); - dout(25) << __func__ << " output bc=" << bc << dendl; -} - void BlueStore::Blob::dup(const Blob& from, bool copy_used_in_blob) { set_shared_blob(from.shared_blob); @@ -2429,7 +2444,7 @@ void BlueStore::Blob::dup(const Blob& from, bool copy_used_in_blob) } for (auto p : blob.get_extents()) { if (p.is_valid()) { - shared_blob->get_ref(p.offset, p.length); + get_dirty_shared_blob()->get_ref(p.offset, p.length); } } } @@ -2600,7 +2615,7 @@ void BlueStore::Blob::copy_extents_over_empty( if (prev != exto.end()) { if (prev->is_valid()) { if (prev->offset + prev->length == disk_offset) { - shared_blob->get_ref(disk_offset, disk_len); + get_dirty_shared_blob()->get_ref(disk_offset, disk_len); prev->length += disk_len; return; } @@ -2609,7 +2624,7 @@ void BlueStore::Blob::copy_extents_over_empty( it = exto.insert(it, bluestore_pextent_t(disk_offset, disk_len)); prev = it; ++it; - shared_blob->get_ref(disk_offset, disk_len); + get_dirty_shared_blob()->get_ref(disk_offset, disk_len); }; while (ito != exto.end() && sto >= ito->length) { @@ -2872,24 +2887,6 @@ uint32_t BlueStore::Blob::merge_blob(CephContext* cct, Blob* blob_to_dissolve) // now apply freshly merged tmp_extents into dst blob dst_blob.dirty_extents().swap(tmp_extents); - // move BufferSpace buffers - while(!src->bc.buffer_map.empty()) { - auto buf = src->bc.buffer_map.extract(src->bc.buffer_map.cbegin()); - buf.mapped().space = &dst->bc; - if (dst->bc.buffer_map.count(buf.key()) == 0) { - dst->bc.buffer_map.emplace(buf.key(), std::move(buf.mapped())); - } - } - // move BufferSpace writing - auto wrt_dst_it = dst->bc.writing.begin(); - while(!src->bc.writing.empty()) { - Buffer& buf = src->bc.writing.front(); - src->bc.writing.pop_front(); - while (wrt_dst_it != dst->bc.writing.end() && wrt_dst_it->seq < buf.seq) { - ++wrt_dst_it; - } - dst->bc.writing.insert(wrt_dst_it, buf); - } dout(20) << __func__ << " result=" << *dst << dendl; return dst_blob.get_logical_length(); } @@ -2897,24 +2894,6 @@ uint32_t BlueStore::Blob::merge_blob(CephContext* cct, Blob* blob_to_dissolve) #undef dout_context #define dout_context collection->store->cct -void BlueStore::Blob::finish_write(uint64_t seq) -{ - while (true) { - auto coll = get_collection(); - BufferCacheShard *cache = coll->cache; - std::lock_guard l(cache->lock); - if (coll->cache != cache) { - dout(20) << __func__ - << " raced with sb cache update, was " << cache - << ", now " << coll->cache << ", retrying" - << dendl; - continue; - } - bc._finish_write(cache, seq); - break; - } -} - void BlueStore::Blob::split(Collection *coll, uint32_t blob_offset, Blob *r) { dout(10) << __func__ << " 0x" << std::hex << blob_offset << std::dec @@ -2929,7 +2908,6 @@ void BlueStore::Blob::split(Collection *coll, uint32_t blob_offset, Blob *r) &(r->used_in_blob)); lb.split(blob_offset, rb); - dirty_bc().split(get_cache(), blob_offset, r->dirty_bc()); dout(10) << __func__ << " 0x" << std::hex << blob_offset << std::dec << " finish " << *this << dendl; @@ -2937,6 +2915,15 @@ void BlueStore::Blob::split(Collection *coll, uint32_t blob_offset, Blob *r) << " and " << *r << dendl; } + +void BlueStore::Blob::maybe_prune_tail() { + if (get_blob().can_prune_tail()) { + dirty_blob().prune_tail(); + used_in_blob.prune_tail(get_blob().get_ondisk_length()); + dout(20) << __func__ << " pruned tail, now " << get_blob() << dendl; + } +} + #ifndef CACHE_BLOB_BL void BlueStore::Blob::decode( bufferptr::const_iterator& p, @@ -3137,44 +3124,39 @@ void BlueStore::ExtentMap::make_range_shared_maybe_merge( if (e.logical_offset >= end) { break; } - dout(25) << __func__ << " src " << e - << " bc=" << e.blob->bc << dendl; - const bluestore_blob_t& blob = e.blob->get_blob(); + dout(25) << __func__ << " src " << e << " bc=" << onoderef->bc << dendl; + const bluestore_blob_t &blob = e.blob->get_blob(); // make sure it is shared if (!blob.is_shared()) { dirty_range_begin = std::min<uint32_t>(dirty_range_begin, e.blob_start()); // first try to find a shared blob nearby // that can accomodate extra extents - uint32_t blob_width; //to signal when extents end - dout(20) << __func__ << std::hex - << " e.blob_start=" << e.blob_start() - << " e.logical_offset=" << e.logical_offset - << std::dec << dendl; - Blob* b = blob.is_compressed() ? nullptr : - find_mergable_companion(e.blob.get(), e.blob_start(), blob_width, candidates); + uint32_t blob_width; // to signal when extents end + dout(20) << __func__ << std::hex << " e.blob_start=" << e.blob_start() + << " e.logical_offset=" << e.logical_offset << std::dec << dendl; + Blob *b = blob.is_compressed() ? nullptr : + find_mergable_companion(e.blob.get(), e.blob_start(), blob_width, candidates); if (b) { - dout(20) << __func__ << " merging to: " << *b << " bc=" << b->bc << dendl; - e.blob->discard_unused_buffers(store->cct, c->cache); - b->discard_unused_buffers(store->cct, c->cache); - uint32_t b_logical_length = b->merge_blob(store->cct, e.blob.get()); - for (auto p : blob.get_extents()) { - if (p.is_valid()) { - b->get_shared_blob()->get_ref(p.offset, p.length); - } - } - // reblob extents might erase e - dirty_range_end = std::max<uint32_t>(dirty_range_end, e.blob_start() + b_logical_length); - uint32_t goto_logical_offset = e.logical_offset + e.length; - reblob_extents(e.blob_start(), e.blob_start() + blob_width, + dout(20) << __func__ << " merging to: " << *b << " bc=" << onode->bc << dendl; + uint32_t b_logical_length = b->merge_blob(store->cct, e.blob.get()); + for (auto p : blob.get_extents()) { + if (p.is_valid()) { + b->get_dirty_shared_blob()->get_ref(p.offset, p.length); + } + } + // reblob extents might erase e + dirty_range_end = std::max<uint32_t>(dirty_range_end, e.blob_start() + b_logical_length); + uint32_t goto_logical_offset = e.logical_offset + e.length; + reblob_extents(e.blob_start(), e.blob_start() + blob_width, e.blob, b); - ep = seek_lextent(goto_logical_offset); - dout(20) << __func__ << " merged: " << *b << dendl; + ep = seek_lextent(goto_logical_offset); + dout(20) << __func__ << " merged: " << *b << dendl; } else { - // no candidate, has to convert to shared - c->make_blob_shared(store->_assign_blobid(txc), e.blob); - ceph_assert(e.logical_end() > 0); - dirty_range_end = std::max<uint32_t>(dirty_range_end, e.logical_end()); - ++ep; + // no candidate, has to convert to shared + c->make_blob_shared(store->_assign_blobid(txc), e.blob); + ceph_assert(e.logical_end() > 0); + dirty_range_end = std::max<uint32_t>(dirty_range_end, e.logical_end()); + ++ep; } } else { c->load_shared_blob(e.blob->get_shared_blob()); @@ -3244,15 +3226,6 @@ void BlueStore::ExtentMap::dup(BlueStore* b, TransContext* txc, e.blob->last_encoded_id = n; id_to_blob[n] = cb; e.blob->dup(*cb); - // By default do not copy buffers to clones, and let them read data by themselves. - // The exception are 'writing' buffers, which are not yet stable on device. - bool some_copied = e.blob->bc._dup_writing(cb->get_cache(), &cb->bc); - if (some_copied) { - // Pretend we just wrote those buffers; - // we need to get _finish_write called, so we can clear then from writing list. - // Otherwise it will be stuck until someone does write-op on clone. - txc->blobs_written.insert(cb); - } // bump the extent refs on the copied blob's extents for (auto p : blob.get_extents()) { @@ -3287,12 +3260,17 @@ void BlueStore::ExtentMap::dup(BlueStore* b, TransContext* txc, txc->statfs_delta.compressed_original() += ne->length; if (blob_duped) { txc->statfs_delta.compressed() += - cb->get_blob().get_compressed_payload_length(); + cb->get_blob().get_compressed_payload_length(); } } dout(20) << __func__ << " dst " << *ne << dendl; ++n; } + // By default do not copy buffers to clones, and let them read data by + // themselves. The exception are 'writing' buffers, which are not yet + // stable on device. + oldo->bc._dup_writing(txc, newo->c, newo, dstoff, length); + if (src_dirty) { oldo->extent_map.dirty_range(dirty_range_begin, dirty_range_end - dirty_range_begin); @@ -3371,15 +3349,6 @@ void BlueStore::ExtentMap::dup_esb(BlueStore* b, TransContext* txc, cb->dirty_blob().set_flag(bluestore_blob_t::FLAG_SHARED); cb->set_shared_blob(e.blob->get_shared_blob()); } - // By default do not copy buffers to clones, and let them read data by themselves. - // The exception are 'writing' buffers, which are not yet stable on device. - bool some_copied = e.blob->bc._dup_writing(cb->get_cache(), &cb->bc); - if (some_copied) { - // Pretend we just wrote those buffers; - // we need to get _finish_write called, so we can clear then from writing list. - // Otherwise it will be stuck until someone does write-op on the clone. - txc->blobs_written.insert(cb); - } txc->write_shared_blob(e.blob->get_shared_blob()); dout(20) << __func__ << " new " << *cb << dendl; @@ -3426,6 +3395,11 @@ void BlueStore::ExtentMap::dup_esb(BlueStore* b, TransContext* txc, dout(20) << __func__ << " dst " << *ne << dendl; ++n; } + // By default do not copy buffers to clones, and let them read data by + // themselves. The exception are 'writing' buffers, which are not yet + // stable on device. + oldo->bc._dup_writing(txc, newo->c, newo, dstoff, length); + if (src_dirty) { dirty_range(dirty_range_begin, dirty_range_end - dirty_range_begin); txc->write_onode(oldo); @@ -3809,40 +3783,40 @@ void BlueStore::ExtentMap::reshard( } if (e->blob_escapes_range(shard_start, shard_end - shard_start)) { - if (!e->blob->is_spanning()) { - // We have two options: (1) split the blob into pieces at the - // shard boundaries (and adjust extents accordingly), or (2) - // mark it spanning. We prefer to cut the blob if we can. Note that - // we may have to split it multiple times--potentially at every - // shard boundary. - bool must_span = false; - BlobRef b = e->blob; - if (b->can_split()) { - uint32_t bstart = e->blob_start(); - uint32_t bend = e->blob_end(); - for (const auto& sh : shards) { - if (bstart < sh.shard_info->offset && - bend > sh.shard_info->offset) { - uint32_t blob_offset = sh.shard_info->offset - bstart; - if (b->can_split_at(blob_offset)) { - dout(20) << __func__ << " splitting blob, bstart 0x" - << std::hex << bstart << " blob_offset 0x" - << blob_offset << std::dec << " " << *b << dendl; - b = split_blob(b, blob_offset, sh.shard_info->offset); - // switch b to the new right-hand side, in case it - // *also* has to get split. - bstart += blob_offset; - onode->c->store->logger->inc(l_bluestore_blob_split); - } else { - must_span = true; - break; - } - } - } - } else { - must_span = true; - } - if (must_span) { + if (!e->blob->is_spanning()) { + // We have two options: (1) split the blob into pieces at the + // shard boundaries (and adjust extents accordingly), or (2) + // mark it spanning. We prefer to cut the blob if we can. Note that + // we may have to split it multiple times--potentially at every + // shard boundary. + bool must_span = false; + BlobRef b = e->blob; + if (b->can_split()) { + uint32_t bstart = e->blob_start(); + uint32_t bend = e->blob_end(); + for (const auto& sh : shards) { + if (bstart < sh.shard_info->offset && + bend > sh.shard_info->offset) { + uint32_t blob_offset = sh.shard_info->offset - bstart; + if (b->can_split_at(blob_offset)) { + dout(20) << __func__ << " splitting blob, bstart 0x" + << std::hex << bstart << " blob_offset 0x" + << blob_offset << std::dec << " " << *b << dendl; + b = split_blob(b, blob_offset, sh.shard_info->offset); + // switch b to the new right-hand side, in case it + // *also* has to get split. + bstart += blob_offset; + onode->c->store->logger->inc(l_bluestore_blob_split); + } else { + must_span = true; + break; + } + } + } + } else { + must_span = true; + } + if (must_span) { auto bid = allocate_spanning_blob_id(); b->id = bid; spanning_blob_map[b->id] = b; @@ -4822,6 +4796,27 @@ void BlueStore::Onode::decode_omap_key(const string& key, string *user_key) *user_key = key.substr(pos); } +void BlueStore::Onode::finish_write(TransContext* txc, uint32_t offset, uint32_t length) +{ + while (true) { + BufferCacheShard *cache = c->cache; + std::lock_guard l(cache->lock); + if (cache != c->cache) { + ldout(cache->cct, 20) << __func__ + << " raced with sb cache update, was " << cache + << ", now " << c->cache << ", retrying" + << dendl; + continue; + } + ldout(c->store->cct, 10) << __func__ << " txc " << txc << std::hex + << " 0x" << offset << "~" << length << std::dec + << dendl; + bc._finish_write(cache, txc, offset, length); + break; + } + ldout(c->store->cct, 10) << __func__ << " done " << txc << dendl; +} + // ======================================================= // WriteContext @@ -5168,16 +5163,6 @@ void BlueStore::Collection::split_cache( // may not be faulted in) auto rehome_blob = [&](Blob* b) { - for (auto& i : b->bc.buffer_map) { - if (!i.second.is_writing()) { - ldout(store->cct, 1) << __func__ << " moving " << i.second - << dendl; - dest->cache->_move(cache, &i.second); - } else { - ldout(store->cct, 1) << __func__ << " not moving " << i.second - << dendl; - } - } cache->rm_blob(); dest->cache->add_blob(); SharedBlob* sb = b->get_shared_blob().get(); @@ -5197,10 +5182,17 @@ void BlueStore::Collection::split_cache( }; for (auto& e : o->extent_map.extent_map) { - e.blob->last_encoded_id = -1; + e.blob->last_encoded_id = -1; } for (auto& b : o->extent_map.spanning_blob_map) { - b.second->last_encoded_id = -1; + b.second->last_encoded_id = -1; + } + + for (auto& b : o->bc.buffer_map) { + ceph_assert(!b.is_writing()); + ldout(store->cct, 1) + << __func__ << " moving " << b << dendl; + dest->cache->_move(cache, &b); } for (auto& e : o->extent_map.extent_map) { cache->rm_extent(); @@ -5226,7 +5218,6 @@ void BlueStore::Collection::split_cache( } dest->cache->_trim(); } - // ======================================================= // MempoolThread @@ -7788,9 +7779,26 @@ void BlueStore::_close_db() db = nullptr; if (do_destage && fm && fm->is_null_manager()) { + if (cct->_conf->osd_fast_shutdown) { + interval_set<uint64_t> discard_queued; + bdev->swap_discard_queued(discard_queued); + dout(10) << __func__ << "::discard_drain: size=" << discard_queued.size() + << " num_intervals=" << discard_queued.num_intervals() << dendl; + // copy discard_queued to the allocator before storing it + for (auto p = discard_queued.begin(); p != discard_queued.end(); ++p) { + dout(20) << __func__ << "::discarded-extent=[" << p.get_start() + << ", " << p.get_len() << "]" << dendl; + alloc->init_add_free(p.get_start(), p.get_len()); + } + } + + // When we reach here it is either a graceful shutdown (so can drain the full discards-queue) + // or it was a fast shutdown, but we already moved the main discards-queue to the allocator + // and only need to wait for the threads local discard_processing queues to drain + bdev->discard_drain(); int ret = store_allocator(alloc); - if (ret != 0) { - derr << __func__ << "::NCB::store_allocator() failed (continue with bitmapFreelistManager)" << dendl; + if (unlikely(ret != 0)) { + derr << __func__ << "::NCB::store_allocator() failed (we will need to rebuild it on startup)" << dendl; } } @@ -8631,6 +8639,26 @@ int BlueStore::dump_bluefs_sizes(ostream& out) return r; } +void BlueStore::trim_free_space(const string& type, std::ostream& outss) +{ + auto iterated_allocation = [&](size_t off, size_t len) { + ceph_assert(len > 0); + interval_set<uint64_t> to_discard; + to_discard.union_insert(off, len); + bdev->try_discard(to_discard, false); + }; + if (type == "bdev-block") { + if (!bdev->is_discard_supported()) { + outss << "device " << type << " does not support trim"; + return; + } + shared_alloc.a->foreach(iterated_allocation); + outss << "device " << type << " trim done"; + } else { + bluefs->trim_free_space(type, outss); + } +} + void BlueStore::set_cache_shards(unsigned num) { dout(10) << __func__ << " " << num << dendl; @@ -8646,7 +8674,7 @@ void BlueStore::set_cache_shards(unsigned num) } for (unsigned i = bold; i < num; ++i) { buffer_cache_shards[i] = - BufferCacheShard::create(cct, cct->_conf->bluestore_cache_type, + BufferCacheShard::create(this, cct->_conf->bluestore_cache_type, logger); } } @@ -11288,6 +11316,22 @@ void BlueStore::inject_bluefs_file(std::string_view dir, std::string_view name, bluefs->close_writer(p_handle); } +int BlueStore::compact() +{ + int r = 0; + ceph_assert(db); + if (cct->_conf.get_val<bool>("bluestore_async_db_compaction")) { + dout(1) << __func__ << " starting async.." << dendl; + db->compact_async(); + r = -EINPROGRESS; + } else { + dout(1) << __func__ << " starting sync.." << dendl; + db->compact(); + dout(1) << __func__ << " finished." << dendl; + } + return r; +} + void BlueStore::collect_metadata(map<string,string> *pm) { dout(10) << __func__ << dendl; @@ -11833,11 +11877,11 @@ void BlueStore::_read_cache( ready_regions_t cache_res; interval_set<uint32_t> cache_interval; - bptr->dirty_bc().read( - bptr->get_cache(), b_off, b_len, cache_res, cache_interval, + o->bc.read( + o->c->cache, pos, b_len, cache_res, cache_interval, read_cache_policy); dout(20) << __func__ << " blob " << *bptr << std::hex - << " need 0x" << b_off << "~" << b_len + << " need 0x" << pos << "~" << b_len << " cache has 0x" << cache_interval << std::dec << dendl; @@ -11846,17 +11890,17 @@ void BlueStore::_read_cache( while (b_len > 0) { unsigned l; if (pc != cache_res.end() && - pc->first == b_off) { + pc->first == pos) { l = pc->second.length(); ready_regions[pos] = std::move(pc->second); dout(30) << __func__ << " use cache 0x" << std::hex << pos << ": 0x" - << b_off << "~" << l << std::dec << dendl; + << pos << "~" << l << std::dec << dendl; ++pc; } else { l = b_len; if (pc != cache_res.end()) { - ceph_assert(pc->first > b_off); - l = pc->first - b_off; + ceph_assert(pc->first > pos); + l = pc->first - pos; } dout(30) << __func__ << " will read 0x" << std::hex << pos << ": 0x" << b_off << "~" << l << std::dec << dendl; @@ -11991,8 +12035,10 @@ int BlueStore::_generate_read_result_bl( if (bptr->get_blob().is_compressed()) { ceph_assert(p != compressed_blob_bls.end()); bufferlist& compressed_bl = *p++; - if (_verify_csum(o, &bptr->get_blob(), 0, compressed_bl, - r2r.front().regs.front().logical_offset) < 0) { + uint32_t offset = r2r.front().regs.front().logical_offset; + uint32_t blob_offset = r2r.front().regs.front().blob_xoffset; + uint32_t length = r2r.front().regs.front().length; + if (_verify_csum(o, &bptr->get_blob(), 0, compressed_bl, offset) < 0) { *csum_error = true; return -EIO; } @@ -12001,8 +12047,9 @@ int BlueStore::_generate_read_result_bl( if (r < 0) return r; if (buffered) { - bptr->dirty_bc().did_read(bptr->get_cache(), 0, - raw_bl); + bufferlist region_buffer; + region_buffer.substr_of(raw_bl, blob_offset, length); + o->bc.did_read(o->c->cache, offset, std::move(region_buffer)); } for (auto& req : r2r) { for (auto& r : req.regs) { @@ -12012,18 +12059,20 @@ int BlueStore::_generate_read_result_bl( } } else { for (auto& req : r2r) { - if (_verify_csum(o, &bptr->get_blob(), req.r_off, req.bl, - req.regs.front().logical_offset) < 0) { + uint64_t offset = r2r.front().regs.front().logical_offset; + if (_verify_csum(o, &bptr->get_blob(), req.r_off, req.bl, offset) < 0) { *csum_error = true; return -EIO; } - if (buffered) { - bptr->dirty_bc().did_read(bptr->get_cache(), - req.r_off, req.bl); - } // prune and keep result for (const auto& r : req.regs) { + if (buffered) { + bufferlist region_buffer; + region_buffer.substr_of(req.bl, r.front, r.length); + // need offset before padding + o->bc.did_read(o->c->cache, r.logical_offset, std::move(region_buffer)); + } ready_regions[r.logical_offset].substr_of(req.bl, r.front, r.length); } } @@ -13441,13 +13490,15 @@ BlueStore::TransContext *BlueStore::_txc_create( txc->trace.init("TransContext", &trace_endpoint, &osd_op->pg_trace); txc->trace.event("txc create"); - txc->trace.keyval("txc seq", txc->seq); + //txc->trace.keyval("txc seq", txc->seq); + txc->trace.keyval("txc", txc); } #endif osr->queue_new(txc); dout(20) << __func__ << " osr " << osr << " = " << txc - << " seq " << txc->seq << dendl; + // << " seq " << txc->seq + << dendl; return txc; } @@ -13791,7 +13842,7 @@ void BlueStore::_txc_apply_kv(TransContext *txc, bool sync_submit_transaction) bluestore, transaction_kv_submit_latency, txc->osr->get_sequencer_id(), - txc->seq, + (uint64_t)txc, sync_submit_transaction, ceph::to_seconds<double>(mono_clock::now() - start)); } @@ -13841,10 +13892,8 @@ void BlueStore::_txc_finish(TransContext *txc) dout(20) << __func__ << " " << txc << " onodes " << txc->onodes << dendl; ceph_assert(txc->get_state() == TransContext::STATE_FINISHING); - for (auto& sb : txc->blobs_written) { - sb->finish_write(txc->seq); - } - txc->blobs_written.clear(); + txc->finish_writing(); + while (!txc->removed_collections.empty()) { _queue_reap_collection(txc->removed_collections.front()); txc->removed_collections.pop_front(); @@ -14333,7 +14382,7 @@ void BlueStore::_kv_sync_thread() bluestore, transaction_kv_sync_latency, txc->osr->get_sequencer_id(), - txc->seq, + (uint64_t)txc, kv_committing.size(), deferred_done.size(), deferred_stable.size(), @@ -15511,45 +15560,44 @@ void BlueStore::_do_write_small( uint64_t b_off = offset - head_pad - bstart; uint64_t b_len = length + head_pad + tail_pad; - // direct write into unused blocks of an existing mutable blob? - if ((b_off % chunk_size == 0 && b_len % chunk_size == 0) && - b->get_blob().get_ondisk_length() >= b_off + b_len && - b->get_blob().is_unused(b_off, b_len) && - b->get_blob().is_allocated(b_off, b_len)) { - _apply_padding(head_pad, tail_pad, bl); - - dout(20) << __func__ << " write to unused 0x" << std::hex - << b_off << "~" << b_len - << " pad 0x" << head_pad << " + 0x" << tail_pad - << std::dec << " of mutable " << *b << dendl; - _buffer_cache_write(txc, b, b_off, bl, - wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); - - if (!g_conf()->bluestore_debug_omit_block_device_write) { - if (b_len < prefer_deferred_size) { - dout(20) << __func__ << " deferring small 0x" << std::hex + // direct write into unused blocks of an existing mutable blob? + if ((b_off % chunk_size == 0 && b_len % chunk_size == 0) && + b->get_blob().get_ondisk_length() >= b_off + b_len && + b->get_blob().is_unused(b_off, b_len) && + b->get_blob().is_allocated(b_off, b_len)) { + _buffer_cache_write(txc, o, offset, bl, + wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + _apply_padding(head_pad, tail_pad, bl); + + dout(20) << __func__ << " write to unused 0x" << std::hex << b_off + << "~" << b_len << " pad 0x" << head_pad << " + 0x" + << tail_pad << std::dec << " of mutable " << *b << dendl; + + if (!g_conf()->bluestore_debug_omit_block_device_write) { + if (b_len < prefer_deferred_size) { + dout(20) << __func__ << " deferring small 0x" << std::hex << b_len << std::dec << " unused write via deferred" << dendl; - bluestore_deferred_op_t *op = _get_deferred_op(txc, bl.length()); - op->op = bluestore_deferred_op_t::OP_WRITE; - b->get_blob().map( + bluestore_deferred_op_t *op = _get_deferred_op(txc, bl.length()); + op->op = bluestore_deferred_op_t::OP_WRITE; + b->get_blob().map( b_off, b_len, - [&](uint64_t offset, uint64_t length) { - op->extents.emplace_back(bluestore_pextent_t(offset, length)); - return 0; - }); - op->data = bl; - } else { - b->get_blob().map_bl( - b_off, bl, + [&](uint64_t offset, uint64_t length) { + op->extents.emplace_back(bluestore_pextent_t(offset, length)); + return 0; + }); + op->data = bl; + } else { + b->get_blob().map_bl( + b_off, bl, [&](uint64_t offset, bufferlist& t) { - bdev->aio_write(offset, t, + bdev->aio_write(offset, t, &txc->ioc, wctx->buffered); - }); - } - } - b->dirty_blob().calc_csum(b_off, bl); - dout(20) << __func__ << " lex old " << *ep << dendl; - Extent *le = o->extent_map.set_lextent(c, offset, b_off + head_pad, length, + }); + } + } + b->dirty_blob().calc_csum(b_off, bl); + dout(20) << __func__ << " lex old " << *ep << dendl; + Extent *le = o->extent_map.set_lextent(c, offset, b_off + head_pad, length, b, &wctx->old_extents); b->dirty_blob().mark_used(le->blob_offset, le->length); @@ -15611,36 +15659,36 @@ void BlueStore::_do_write_small( } logger->inc(l_bluestore_write_small_pre_read); - _buffer_cache_write(txc, b, b_off, bl, - wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + _buffer_cache_write(txc, o, offset - head_read - head_pad, bl, + wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); - b->dirty_blob().calc_csum(b_off, bl); + b->dirty_blob().calc_csum(b_off, bl); - if (!g_conf()->bluestore_debug_omit_block_device_write) { - bluestore_deferred_op_t *op = _get_deferred_op(txc, bl.length()); - op->op = bluestore_deferred_op_t::OP_WRITE; - int r = b->get_blob().map( - b_off, b_len, + if (!g_conf()->bluestore_debug_omit_block_device_write) { + bluestore_deferred_op_t *op = _get_deferred_op(txc, bl.length()); + op->op = bluestore_deferred_op_t::OP_WRITE; + int r = b->get_blob().map( + b_off, b_len, [&](uint64_t offset, uint64_t length) { - op->extents.emplace_back(bluestore_pextent_t(offset, length)); - return 0; - }); - ceph_assert(r == 0); - op->data = std::move(bl); - dout(20) << __func__ << " deferred write 0x" << std::hex << b_off << "~" - << b_len << std::dec << " of mutable " << *b - << " at " << op->extents << dendl; - } + op->extents.emplace_back(bluestore_pextent_t(offset, length)); + return 0; + }); + ceph_assert(r == 0); + op->data = std::move(bl); + dout(20) << __func__ << " deferred write 0x" << std::hex << b_off + << "~" << b_len << std::dec << " of mutable " << *b << " at " + << op->extents << dendl; + } - Extent *le = o->extent_map.set_lextent(c, offset, offset - bstart, length, + Extent *le = o->extent_map.set_lextent(c, offset, offset - bstart, length, b, &wctx->old_extents); - b->dirty_blob().mark_used(le->blob_offset, le->length); - txc->statfs_delta.stored() += le->length; - dout(20) << __func__ << " lex " << *le << dendl; - return; - } - // try to reuse blob if we can - if (b->can_reuse_blob(min_alloc_size, + b->dirty_blob().mark_used(le->blob_offset, le->length); + txc->statfs_delta.stored() += le->length; + dout(20) << __func__ << " lex " << *le << dendl; + return; + } + // try to reuse blob if we can + if (b->can_reuse_blob(min_alloc_size, max_bsize, offset0 - bstart, &alloc_len)) { @@ -15899,7 +15947,7 @@ void BlueStore::_do_write_big_apply_deferred( logger->inc(l_bluestore_write_penalty_read_ops); } auto& b0 = dctx.blob_ref; - _buffer_cache_write(txc, b0, dctx.b_off, bl, + _buffer_cache_write(txc, o, dctx.off - dctx.head_read, bl, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); b0->dirty_blob().calc_csum(dctx.b_off, bl); @@ -16436,7 +16484,9 @@ int BlueStore::_do_alloc_write( wi.b->dirty_blob().mark_used(le->blob_offset, le->length); txc->statfs_delta.stored() += le->length; dout(20) << __func__ << " lex " << *le << dendl; - _buffer_cache_write(txc, wi.b, b_off, wi.bl, + bufferlist without_pad; + without_pad.substr_of(wi.bl, wi.b_off0-wi.b_off, wi.length0); + _buffer_cache_write(txc, o, wi.logical_offset, std::move(without_pad), wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); // queue io @@ -16455,7 +16505,7 @@ int BlueStore::_do_alloc_write( return 0; }); ceph_assert(r == 0); - op->data = *l; + op->data = *l; } else { wi.b->get_blob().map_bl( b_off, *l, @@ -16487,7 +16537,7 @@ void BlueStore::_wctx_finish( const bluestore_blob_t& blob = b->get_blob(); if (blob.is_compressed()) { if (lo.blob_empty) { - txc->statfs_delta.compressed() -= blob.get_compressed_payload_length(); + txc->statfs_delta.compressed() -= blob.get_compressed_payload_length(); } txc->statfs_delta.compressed_original() -= lo.e.length; } @@ -16517,13 +16567,8 @@ void BlueStore::_wctx_finish( r.swap(final); } } - // we can't invalidate our logical extents as we drop them because - // other lextents (either in our onode or others) may still - // reference them. but we can throw out anything that is no - // longer allocated. Note that this will leave behind edge bits - // that are no longer referenced but not deallocated (until they - // age out of the cache naturally). - b->discard_unallocated(c.get()); + + b->maybe_prune_tail(); for (auto e : r) { dout(20) << __func__ << " release " << e << dendl; txc->released.insert(e.offset, e.length); @@ -16899,6 +16944,7 @@ int BlueStore::_do_zero(TransContext *txc, WriteContext wctx; o->extent_map.fault_range(db, offset, length); o->extent_map.punch_hole(c, offset, length, &wctx.old_extents); + o->bc.discard(o->c->cache, offset, length); o->extent_map.dirty_range(offset, length); _wctx_finish(txc, c, o, &wctx); @@ -16930,6 +16976,7 @@ void BlueStore::_do_truncate( WriteContext wctx; if (offset < o->onode.size) { uint64_t length = o->onode.size - offset; + o->bc.discard(o->c->cache, offset, length); o->extent_map.fault_range(db, offset, length); o->extent_map.punch_hole(c, offset, length, &wctx.old_extents); o->extent_map.dirty_range(offset, length); @@ -17529,6 +17576,8 @@ int BlueStore::_do_clone_range( << " 0x" << dstoff << "~" << length << std::dec << dendl; oldo->extent_map.fault_range(db, srcoff, length); newo->extent_map.fault_range(db, dstoff, length); + // it is possible the onode had previous buffers written + newo->bc.discard(c->cache, dstoff, length); _dump_onode<30>(cct, *oldo); _dump_onode<30>(cct, *newo); @@ -17960,7 +18009,7 @@ void BlueStore::BlueStoreThrottle::emit_initial_tracepoint( bluestore, transaction_initial_state, txc.osr->get_sequencer_id(), - txc.seq, + (uint64_t)&txc, throttle_bytes.get_current(), throttle_deferred_bytes.get_current(), pending_kv_ios, @@ -17973,7 +18022,7 @@ void BlueStore::BlueStoreThrottle::emit_initial_tracepoint( bluestore, transaction_initial_state_rocksdb, txc.osr->get_sequencer_id(), - txc.seq, + (uint64_t)&txc, rocksdb_base_level, rocksdb_estimate_pending_compaction_bytes, rocksdb_cur_size_all_mem_tables, @@ -18001,7 +18050,7 @@ mono_clock::duration BlueStore::BlueStoreThrottle::log_state_latency( bluestore, transaction_state_duration, txc.osr->get_sequencer_id(), - txc.seq, + (uint64_t)&txc, state, ceph::to_seconds<double>(lat)); } @@ -18045,7 +18094,7 @@ void BlueStore::BlueStoreThrottle::complete_kv(TransContext &txc) bluestore, transaction_commit_latency, txc.osr->get_sequencer_id(), - txc.seq, + (uint64_t)&txc, ceph::to_seconds<double>(mono_clock::now() - txc.start)); } } @@ -18064,7 +18113,7 @@ void BlueStore::BlueStoreThrottle::complete(TransContext &txc) bluestore, transaction_total_duration, txc.osr->get_sequencer_id(), - txc.seq, + (uint64_t)&txc, ceph::to_seconds<double>(lat)); } } @@ -18187,6 +18236,9 @@ void BlueStore::_shutdown_cache() ceph_assert(i->empty()); } for (auto& p : coll_map) { + // Clear deferred write buffers before clearing up Onodes + std::unique_lock l(p.second->lock); + p.second->onode_space.clear(); if (!p.second->shared_blob_set.empty()) { derr << __func__ << " stray shared blobs on " << p.first << dendl; @@ -18199,6 +18251,7 @@ void BlueStore::_shutdown_cache() for (auto i : onode_cache_shards) { ceph_assert(i->empty()); } + ceph_assert(Buffer::total == 0); } // For external caller. @@ -18705,7 +18758,7 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) { << ", l_multi=" << byte_u_t(level_multiplier) << std::endl; constexpr std::array<const char*, 8> names{ { - "DEV/LEV", + "LEV/DEV", "WAL", "DB", "SLOW", @@ -19521,20 +19574,20 @@ void BlueStore::ExtentDecoderPartial::_consume_new_blob(bool spanning, } } else { auto it = sb_info.find(sbid); - if (it == sb_info.end()) { - derr << __func__ << " shared blob not found:" << sbid - << dendl; - } - auto &sbi = *it; - auto pool_id = oid.hobj.get_logical_pool(); - if (sbi.pool_id == sb_info_t::INVALID_POOL_ID) { - sbi.pool_id = pool_id; - size_t alloc_delta = sbi.allocated_chunks << min_alloc_size_order; - per_pool_statfs->allocated() += alloc_delta; - if (compressed) { - per_pool_statfs->compressed_allocated() += alloc_delta; - ++stats.compressed_blob_count; + if (it != sb_info.end()) { + auto &sbi = *it; + auto pool_id = oid.hobj.get_logical_pool(); + if (sbi.pool_id == sb_info_t::INVALID_POOL_ID) { + sbi.pool_id = pool_id; + size_t alloc_delta = sbi.allocated_chunks << min_alloc_size_order; + per_pool_statfs->allocated() += alloc_delta; + if (compressed) { + per_pool_statfs->compressed_allocated() += alloc_delta; + ++stats.compressed_blob_count; + } } + } else { + derr << __func__ << " shared blob not found:" << sbid << dendl; } if (compressed) { per_pool_statfs->compressed() += diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index a9b510e162e..f7eafe93a35 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -263,7 +263,9 @@ public: struct BufferSpace; struct Collection; + struct Onode; typedef boost::intrusive_ptr<Collection> CollectionRef; + typedef boost::intrusive_ptr<Onode> OnodeRef; struct AioContext { virtual void aio_finish(BlueStore *store) = 0; @@ -302,35 +304,29 @@ public: uint16_t state; ///< STATE_* uint16_t cache_private = 0; ///< opaque (to us) value used by Cache impl uint32_t flags; ///< FLAG_* - uint64_t seq; + TransContext* txc; uint32_t offset, length; ceph::buffer::list data; std::shared_ptr<int64_t> cache_age_bin; ///< cache age bin boost::intrusive::list_member_hook<> lru_item; - boost::intrusive::list_member_hook<> state_item; - - Buffer(BufferSpace *space, unsigned s, uint64_t q, uint32_t o, uint32_t l, - unsigned f = 0) - : space(space), state(s), flags(f), seq(q), offset(o), length(l) {} - Buffer(BufferSpace *space, unsigned s, uint64_t q, uint32_t o, ceph::buffer::list& b, - unsigned f = 0) - : space(space), state(s), flags(f), seq(q), offset(o), - length(b.length()), data(b) {} - - Buffer(Buffer &&other) { - std::swap(space, other.space); - std::swap(state, other.state); - std::swap(cache_private, other.cache_private); - std::swap(flags, other.flags); - std::swap(seq, other.seq); - std::swap(offset, other.offset); - std::swap(length, other.length); - std::swap(data, other.data); - std::swap(cache_age_bin, other.cache_age_bin); - lru_item.swap_nodes(other.lru_item); - state_item.swap_nodes(other.state_item); - } + boost::intrusive::set_member_hook<> set_item; + + static std::atomic<uint64_t> total; + + Buffer(BufferSpace *space, unsigned s, TransContext* _txc, + uint32_t o, uint32_t l, unsigned f = 0) + : space(space), state(s), flags(f), txc(_txc), offset(o), length(l) { total++; } + Buffer(BufferSpace *space, unsigned s, TransContext* _txc, + uint32_t o, ceph::buffer::list& b, unsigned f = 0) + : space(space), state(s), flags(f), txc(_txc), offset(o), + length(b.length()), data(b) { total++; } + Buffer(BufferSpace *space, unsigned s, TransContext* _txc, + uint32_t o, ceph::buffer::list&& b, unsigned f = 0) + : space(space), state(s), flags(f), txc(_txc), offset(o), + length(b.length()), data(std::move(b)) { total++; } + + ~Buffer() { total--; } bool is_empty() const { return state == STATE_EMPTY; @@ -365,13 +361,12 @@ public: void dump(ceph::Formatter *f) const { f->dump_string("state", get_state_name(state)); - f->dump_unsigned("seq", seq); + f->dump_unsigned("txc", (uint64_t)txc); f->dump_unsigned("offset", offset); f->dump_unsigned("length", length); f->dump_unsigned("data_length", data.length()); } }; - struct BufferCacheShard; /// map logical extent range (object) onto buffers @@ -380,83 +375,46 @@ public: BYPASS_CLEAN_CACHE = 0x1, // bypass clean cache }; - typedef boost::intrusive::list< + struct BufferKey { + using type = uint32_t; + const type &operator() (const Buffer& b) { + return b.offset; + } + }; + typedef boost::intrusive::set< Buffer, boost::intrusive::member_hook< Buffer, - boost::intrusive::list_member_hook<>, - &Buffer::state_item> > state_list_t; + boost::intrusive::set_member_hook<>, + &Buffer::set_item>, + boost::intrusive::key_of_value<BufferKey> > buffer_map_t; - mempool::bluestore_cache_meta::map<uint32_t, Buffer> - buffer_map; + buffer_map_t buffer_map; - // we use a bare intrusive list here instead of std::map because - // it uses less memory and we expect this to be very small (very - // few IOs in flight to the same Blob at the same time). - state_list_t writing; ///< writing buffers, sorted by seq, ascending + Onode& onode; + BufferSpace(Onode& _onode) : onode(_onode) {} ~BufferSpace() { ceph_assert(buffer_map.empty()); - ceph_assert(writing.empty()); - } - - void _add_buffer(BufferCacheShard *cache, BufferSpace *space, Buffer&& buffer, - uint16_t cache_private, int level, Buffer *near) { - auto it = buffer_map.emplace(buffer.offset, std::move(buffer)); - Buffer *cached_buffer = &it.first->second; - cached_buffer->cache_private = cache_private; - _add_buffer(cache, space, cached_buffer, level, near); - } - - void _add_buffer(BufferCacheShard *cache, BufferSpace *space, - Buffer *buffer, int level, Buffer *near) { - cache->_audit("_add_buffer start"); - if (buffer->is_writing()) { - // we might get already cached data for which resetting mempool is inppropriate - // hence calling try_assign_to_mempool - buffer->data.try_assign_to_mempool(mempool::mempool_bluestore_writing); - if (writing.empty() || writing.rbegin()->seq <= buffer->seq) { - writing.push_back(*buffer); - } else { - auto it = writing.begin(); - while (it->seq < buffer->seq) { - ++it; - } + } - ceph_assert(it->seq >= buffer->seq); - // note that this will insert b before it - // hence the order is maintained - writing.insert(it, *buffer); - } - } else { - buffer->data.reassign_to_mempool(mempool::mempool_bluestore_cache_data); - cache->_add(buffer, level, near); - } - cache->_audit("_add_buffer end"); - } - void _rm_buffer(BufferCacheShard* cache, Buffer *b) { - _rm_buffer(cache, buffer_map.find(b->offset)); - } - std::map<uint32_t, Buffer>::iterator - _rm_buffer(BufferCacheShard* cache, - std::map<uint32_t, Buffer>::iterator p) { - ceph_assert(p != buffer_map.end()); - cache->_audit("_rm_buffer start"); - if (p->second.is_writing()) { - writing.erase(writing.iterator_to(p->second)); - } else { - cache->_rm(&p->second); - } - p = buffer_map.erase(p); - cache->_audit("_rm_buffer end"); - return p; + void _add_buffer(BufferCacheShard* cache, + Buffer* b, + uint16_t cache_private, int level, Buffer *near); + + void _rm_buffer(BufferCacheShard* cache, + Buffer* b) { + ceph_assert(b->set_item.is_linked()); + __rm_buffer(cache, b); } + void __rm_buffer(BufferCacheShard* cache, Buffer* b); + void __erase_from_map(Buffer* b); - std::map<uint32_t, Buffer>::iterator _data_lower_bound(uint32_t offset) { + buffer_map_t::iterator _data_lower_bound(uint32_t offset) { auto i = buffer_map.lower_bound(offset); if (i != buffer_map.begin()) { --i; - if (i->first + i->second.length <= offset) + if (i->offset + i->length <= offset) ++i; } return i; @@ -466,55 +424,69 @@ public: void _clear(BufferCacheShard* cache); // return value is the highest cache_private of a trimmed buffer, or 0. - int discard(BufferCacheShard* cache, uint32_t offset, uint32_t length) { + int discard(BufferCacheShard* cache, + uint32_t offset, uint32_t length) { std::lock_guard l(cache->lock); int ret = _discard(cache, offset, length); cache->_trim(); return ret; } - int _discard(BufferCacheShard* cache, uint32_t offset, uint32_t length); + int _discard(BufferCacheShard* cache, + uint32_t offset, uint32_t length); - void write(BufferCacheShard* cache, uint64_t seq, uint32_t offset, ceph::buffer::list& bl, + void write(BufferCacheShard* cache, + TransContext* txc, uint32_t offset, ceph::buffer::list&& bl, unsigned flags) { std::lock_guard l(cache->lock); uint16_t cache_private = _discard(cache, offset, bl.length()); - _add_buffer(cache, this, - Buffer(this, Buffer::STATE_WRITING, seq, offset, bl, - flags), + _add_buffer(cache, + new Buffer(this, Buffer::STATE_WRITING, txc, offset, std::move(bl), flags), + cache_private, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, nullptr); + cache->_trim(); + } + void write(BufferCacheShard* cache, + TransContext* txc, uint32_t offset, ceph::buffer::list& bl, + unsigned flags) { + std::lock_guard l(cache->lock); + uint16_t cache_private = _discard(cache, offset, bl.length()); + _add_buffer(cache, + new Buffer(this, Buffer::STATE_WRITING, txc, offset, bl, flags), cache_private, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, nullptr); cache->_trim(); } - void _finish_write(BufferCacheShard* cache, uint64_t seq); - void did_read(BufferCacheShard* cache, uint32_t offset, ceph::buffer::list& bl) { + void _finish_write(BufferCacheShard* cache, TransContext* txc, + uint32_t offset, uint32_t length); + void did_read(BufferCacheShard* cache, + uint32_t offset, ceph::buffer::list&& bl) { std::lock_guard l(cache->lock); uint16_t cache_private = _discard(cache, offset, bl.length()); _add_buffer( - cache, this, - Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl, 0), + cache, + new Buffer(this, Buffer::STATE_CLEAN, 0, offset, std::move(bl), 0), cache_private, 1, nullptr); cache->_trim(); } - void read(BufferCacheShard* cache, uint32_t offset, uint32_t length, + void read(BufferCacheShard* cache, + uint32_t offset, uint32_t length, BlueStore::ready_regions_t& res, interval_set<uint32_t>& res_intervals, int flags = 0); - void truncate(BufferCacheShard* cache, uint32_t offset) { + void truncate(BufferCacheShard* cache, + uint32_t offset) { discard(cache, offset, (uint32_t)-1 - offset); } - bool _dup_writing(BufferCacheShard* cache, BufferSpace* to); - void split(BufferCacheShard* cache, size_t pos, BufferSpace &r); + void _dup_writing(TransContext* txc, Collection* collection, OnodeRef onode, uint32_t offset, uint32_t length); void dump(BufferCacheShard* cache, ceph::Formatter *f) const { std::lock_guard l(cache->lock); f->open_array_section("buffers"); - for (auto& i : buffer_map) { + for (auto& b : buffer_map) { f->open_object_section("buffer"); - ceph_assert(i.first == i.second.offset); - i.second.dump(f); + b.dump(f); f->close_section(); } f->close_section(); @@ -647,7 +619,6 @@ public: ceph_assert(get_cache()); } Blob(CollectionRef collection) : collection(collection) {} - BufferSpace bc; private: SharedBlobRef shared_blob; ///< shared blob state (if any) mutable bluestore_blob_t blob; ///< decoded blob metadata @@ -692,10 +663,8 @@ public: } bool can_split() { - std::lock_guard l(get_cache()->lock); // splitting a BufferSpace writing list is too hard; don't try. - return get_bc().writing.empty() && - used_in_blob.can_split() && + return used_in_blob.can_split() && get_blob().can_split(); } @@ -735,29 +704,17 @@ public: #endif return blob; } - /// clear buffers from unused sections - void discard_unused_buffers(CephContext* cct, BufferCacheShard* cache); - - inline const BufferSpace& get_bc() const { - return bc; - } - inline BufferSpace& dirty_bc() { - return bc; - } - - /// discard buffers for unallocated regions - void discard_unallocated(Collection *coll); /// get logical references void get_ref(Collection *coll, uint32_t offset, uint32_t length); /// put logical references, and get back any released extents bool put_ref(Collection *coll, uint32_t offset, uint32_t length, PExtentVector *r); - // update caches to reflect content up to seq - void finish_write(uint64_t seq); /// split the blob void split(Collection *coll, uint32_t blob_offset, Blob *o); + void maybe_prune_tail(); + void get() { ++nref; } @@ -948,14 +905,12 @@ public: boost::intrusive::list_member_hook<>, &OldExtent::old_extent_item> > old_extent_map_t; - struct Onode; /// a sharded extent map, mapping offsets to lextents to blobs struct ExtentMap { Onode *onode; extent_map_t extent_map; ///< map of Extents to Blobs blob_map_t spanning_blob_map; ///< blobs that span shards - typedef boost::intrusive_ptr<Onode> OnodeRef; struct Shard { bluestore_onode_t::shard_info *shard_info = nullptr; @@ -1328,6 +1283,7 @@ public: /// (it can be pinned and hence physically out /// of it at the moment though) ExtentMap extent_map; + BufferSpace bc; ///< buffer cache // track txc's that have not been committed to kv store (and whose // effects cannot be read via the kvdb read methods) @@ -1347,7 +1303,8 @@ public: cached(false), extent_map(this, c->store->cct->_conf-> - bluestore_extent_map_inline_shard_prealloc_size) { + bluestore_extent_map_inline_shard_prealloc_size), + bc(*this) { } Onode(Collection* c, const ghobject_t& o, const std::string& k) @@ -1358,7 +1315,8 @@ public: cached(false), extent_map(this, c->store->cct->_conf-> - bluestore_extent_map_inline_shard_prealloc_size) { + bluestore_extent_map_inline_shard_prealloc_size), + bc(*this) { } Onode(Collection* c, const ghobject_t& o, const char* k) @@ -1369,7 +1327,8 @@ public: cached(false), extent_map(this, c->store->cct->_conf-> - bluestore_extent_map_inline_shard_prealloc_size) { + bluestore_extent_map_inline_shard_prealloc_size), + bc(*this) { } Onode(CephContext* cct) : c(nullptr), @@ -1377,8 +1336,17 @@ public: cached(false), extent_map(this, cct->_conf-> - bluestore_extent_map_inline_shard_prealloc_size) { + bluestore_extent_map_inline_shard_prealloc_size), + bc(*this) { } + + ~Onode() { + if (c) { + std::lock_guard l(c->cache->lock); + bc._clear(c->cache); + } + } + static void decode_raw( BlueStore::Onode* on, const bufferlist& v, @@ -1433,10 +1401,11 @@ public: void rewrite_omap_key(const std::string& old, std::string *out); void decode_omap_key(const std::string& key, std::string *user_key); + void finish_write(TransContext* txc, uint32_t offset, uint32_t length); + private: void _decode(const ceph::buffer::list& v); }; - typedef boost::intrusive_ptr<Onode> OnodeRef; /// A generic Cache Shard struct CacheShard { @@ -1544,14 +1513,15 @@ private: std::atomic<uint64_t> num_extents = {0}; std::atomic<uint64_t> num_blobs = {0}; uint64_t buffer_bytes = 0; - public: - BufferCacheShard(CephContext* cct) : CacheShard(cct) {} + BufferCacheShard(BlueStore* store) + : CacheShard(store->cct) { + } virtual ~BufferCacheShard() { ceph_assert(num_blobs == 0); ceph_assert(num_extents == 0); } - static BufferCacheShard *create(CephContext* cct, std::string type, + static BufferCacheShard *create(BlueStore* store, std::string type, PerfCounters *logger); virtual void _add(Buffer *b, int level, Buffer *near) = 0; virtual void _rm(Buffer *b) = 0; @@ -1894,7 +1864,7 @@ private: std::set<OnodeRef> modified_objects; ///< objects we modified (and need a ref) std::set<SharedBlobRef> shared_blobs; ///< these need to be updated/written - std::set<BlobRef> blobs_written; ///< update these on io completion + KeyValueDB::Transaction t; ///< then we will commit this std::list<Context*> oncommits; ///< more commit completions std::list<CollectionRef> removed_collections; ///< colls we removed @@ -1909,7 +1879,7 @@ private: IOContext ioc; bool had_ios = false; ///< true if we submitted IOs before our kv txn - uint64_t seq = 0; + //uint64_t seq = 0; ceph::mono_clock::time_point start; ceph::mono_clock::time_point last_stamp; @@ -1924,6 +1894,21 @@ private: ZTracer::Trace trace; #endif + ceph::mutex writings_lock = ceph::make_mutex("BlueStore::TransContextWritings::lock"); + struct WriteObserverEntry { + Onode* onode; + uint32_t offset; + uint32_t length; + WriteObserverEntry(Onode* _o, uint32_t off, uint32_t len) + : onode(_o), offset(off), length(len) {} + }; + using write_list_t = mempool::bluestore_writing::list<WriteObserverEntry>; + write_list_t writings; + bool were_writings = false; + + bool add_writing(Onode* o, uint32_t off, uint32_t len); + void finish_writing(); + explicit TransContext(CephContext* cct, Collection *c, OpSequencer *o, std::list<Context*> *on_commits) : ch(c), @@ -2136,8 +2121,6 @@ private: BlueStore *store; coll_t cid; - uint64_t last_seq = 0; - std::atomic_int txc_with_unstable_io = {0}; ///< num txcs with unstable io std::atomic_int kv_committing_serially = {0}; @@ -2154,13 +2137,11 @@ private: void queue_new(TransContext *txc) { std::lock_guard l(qlock); - txc->seq = ++last_seq; q.push_back(*txc); } void undo_queue(TransContext* txc) { std::lock_guard l(qlock); ceph_assert(&q.back() == txc); - --last_seq; q.pop_back(); } @@ -2908,13 +2889,24 @@ private: void _buffer_cache_write( TransContext *txc, - BlobRef b, - uint64_t offset, + OnodeRef onode, + uint32_t offset, + ceph::buffer::list&& bl, + unsigned flags) { + onode->bc.write(onode->c->cache, + txc, offset, std::move(bl), + flags); + } + + void _buffer_cache_write( + TransContext *txc, + OnodeRef onode, + uint32_t offset, ceph::buffer::list& bl, unsigned flags) { - b->dirty_bc().write(b->get_cache(), txc->seq, offset, bl, - flags); - txc->blobs_written.insert(b); + onode->bc.write(onode->c->cache, + txc, offset, bl, + flags); } int _collection_list( @@ -3093,6 +3085,7 @@ public: std::string get_device_path(unsigned id); int dump_bluefs_sizes(std::ostream& out); + void trim_free_space(const std::string& type, std::ostream& outss); public: int statfs(struct store_statfs_t *buf, @@ -3403,10 +3396,7 @@ public: std::string_view name, size_t new_size); - void compact() override { - ceph_assert(db); - db->compact(); - } + int compact() override; bool has_builtin_csum() const override { return true; } @@ -4256,8 +4246,8 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector LEVEL_SLOW, LEVEL_MAX }; - // add +1 row for corresponding per-device totals - // add +1 column for per-level actual (taken from file size) total + // add +1 row for per-level actual (taken from file size) total + // add +1 column for corresponding per-device totals typedef matrix_2d<std::atomic<uint64_t>, BlueFS::MAX_BDEV + 1, LEVEL_MAX - LEVEL_FIRST + 1> per_level_per_dev_usage_t; per_level_per_dev_usage_t per_level_per_dev_usage; diff --git a/src/os/bluestore/bluefs_types.cc b/src/os/bluestore/bluefs_types.cc index 5b2281a9ffd..e18dd490140 100644 --- a/src/os/bluestore/bluefs_types.cc +++ b/src/os/bluestore/bluefs_types.cc @@ -3,6 +3,7 @@ #include <algorithm> #include "bluefs_types.h" +#include "BlueFS.h" #include "common/Formatter.h" #include "include/denc.h" #include "include/uuid.h" @@ -74,22 +75,26 @@ void bluefs_layout_t::generate_test_instances(list<bluefs_layout_t*>& ls) } // bluefs_super_t +bluefs_super_t::bluefs_super_t() : version(0), block_size(4096) { + bluefs_max_alloc_size.resize(BlueFS::MAX_BDEV, 0); +} void bluefs_super_t::encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); + ENCODE_START(3, 1, bl); encode(uuid, bl); encode(osd_uuid, bl); encode(version, bl); encode(block_size, bl); encode(log_fnode, bl); encode(memorized_layout, bl); + encode(bluefs_max_alloc_size, bl); ENCODE_FINISH(bl); } void bluefs_super_t::decode(bufferlist::const_iterator& p) { - DECODE_START(2, p); + DECODE_START(3, p); decode(uuid, p); decode(osd_uuid, p); decode(version, p); @@ -98,6 +103,11 @@ void bluefs_super_t::decode(bufferlist::const_iterator& p) if (struct_v >= 2) { decode(memorized_layout, p); } + if (struct_v >= 3) { + decode(bluefs_max_alloc_size, p); + } else { + std::fill(bluefs_max_alloc_size.begin(), bluefs_max_alloc_size.end(), 0); + } DECODE_FINISH(p); } @@ -108,6 +118,8 @@ void bluefs_super_t::dump(Formatter *f) const f->dump_unsigned("version", version); f->dump_unsigned("block_size", block_size); f->dump_object("log_fnode", log_fnode); + for (auto& p : bluefs_max_alloc_size) + f->dump_unsigned("max_alloc_size", p); } void bluefs_super_t::generate_test_instances(list<bluefs_super_t*>& ls) @@ -125,6 +137,7 @@ ostream& operator<<(ostream& out, const bluefs_super_t& s) << " v " << s.version << " block_size 0x" << std::hex << s.block_size << " log_fnode 0x" << s.log_fnode + << " max_alloc_size " << s.bluefs_max_alloc_size << std::dec << ")"; } diff --git a/src/os/bluestore/bluefs_types.h b/src/os/bluestore/bluefs_types.h index 6516f404e12..627118c12f8 100644 --- a/src/os/bluestore/bluefs_types.h +++ b/src/os/bluestore/bluefs_types.h @@ -219,9 +219,9 @@ struct bluefs_super_t { std::optional<bluefs_layout_t> memorized_layout; - bluefs_super_t() - : version(0), - block_size(4096) { } + std::vector<uint64_t> bluefs_max_alloc_size; + + bluefs_super_t(); uint64_t block_mask() const { return ~((uint64_t)block_size - 1); diff --git a/src/os/bluestore/bluestore_tool.cc b/src/os/bluestore/bluestore_tool.cc index 32cc5ecf4ed..173450d7961 100644 --- a/src/os/bluestore/bluestore_tool.cc +++ b/src/os/bluestore/bluestore_tool.cc @@ -285,9 +285,11 @@ int main(int argc, char **argv) string dest_file; string key, value; vector<string> allocs_name; + vector<string> bdev_type; string empty_sharding(1, '\0'); string new_sharding = empty_sharding; string resharding_ctrl; + string really; int log_level = 30; bool fsck_deep = false; po::options_description po_options("Options"); @@ -309,6 +311,8 @@ int main(int argc, char **argv) ("key,k", po::value<string>(&key), "label metadata key name") ("value,v", po::value<string>(&value), "label metadata value") ("allocator", po::value<vector<string>>(&allocs_name), "allocator to inspect: 'block'/'bluefs-wal'/'bluefs-db'") + ("bdev-type", po::value<vector<string>>(&bdev_type), "bdev type to inspect: 'bdev-block'/'bdev-wal'/'bdev-db'") + ("really", po::value<string>(&really), "--yes-i-really-really-mean-it") ("sharding", po::value<string>(&new_sharding), "new sharding to apply") ("resharding-ctrl", po::value<string>(&resharding_ctrl), "gives control over resharding procedure details") ("op", po::value<string>(&action_aux), @@ -340,7 +344,8 @@ int main(int argc, char **argv) "free-fragmentation, " "bluefs-stats, " "reshard, " - "show-sharding") + "show-sharding, " + "trim") ; po::options_description po_all("All options"); po_all.add(po_options).add(po_positional); @@ -572,6 +577,29 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } } + if (action == "trim") { + if (path.empty()) { + cerr << "must specify bluestore path" << std::endl; + exit(EXIT_FAILURE); + } + if (really.empty() || strcmp(really.c_str(), "--yes-i-really-really-mean-it") != 0) { + cerr << "Trimming a non healthy bluestore is a dangerous operation which could cause data loss, " + << "please run fsck and confirm with --yes-i-really-really-mean-it option" + << std::endl; + exit(EXIT_FAILURE); + } + for (auto type : bdev_type) { + if (!type.empty() && + type != "bdev-block" && + type != "bdev-db" && + type != "bdev-wal") { + cerr << "unknown bdev type '" << type << "'" << std::endl; + exit(EXIT_FAILURE); + } + } + if (bdev_type.empty()) + bdev_type = vector<string>{"bdev-block", "bdev-db", "bdev-wal"}; + } if (action == "restore_cfb") { #ifndef CEPH_BLUESTORE_TOOL_RESTORE_ALLOCATION @@ -1175,6 +1203,20 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } cout << sharding << std::endl; + } else if (action == "trim") { + BlueStore bluestore(cct.get(), path); + int r = bluestore.cold_open(); + if (r < 0) { + cerr << "error from cold_open: " << cpp_strerror(r) << std::endl; + exit(EXIT_FAILURE); + } + for (auto type : bdev_type) { + cout << "trimming: " << type << std::endl; + ostringstream outss; + bluestore.trim_free_space(type, outss); + cout << "status: " << outss.str() << std::endl; + } + bluestore.cold_close(); } else { cerr << "unrecognized action " << action << std::endl; return 1; diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 7032ae904e9..c127bf14168 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -909,6 +909,8 @@ public: bool can_prune_tail() const { return + !is_shared() && + !is_compressed() && extents.size() > 1 && // if it's all invalid it's not pruning. !extents.back().is_valid() && !has_unused(); diff --git a/src/os/kstore/KStore.h b/src/os/kstore/KStore.h index 5b275dd3e01..9a9d413c66a 100644 --- a/src/os/kstore/KStore.h +++ b/src/os/kstore/KStore.h @@ -580,9 +580,10 @@ public: TrackedOpRef op = TrackedOpRef(), ThreadPool::TPHandle *handle = NULL) override; - void compact () override { + int compact () override { ceph_assert(db); db->compact(); + return 0; } private: diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 74f58520d46..beb9eacfd2a 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -195,8 +195,8 @@ struct RecoveryMessages { const map<pg_shard_t, vector<pair<int, int>>> &need, bool attrs) { - list<boost::tuple<uint64_t, uint64_t, uint32_t> > to_read; - to_read.push_back(boost::make_tuple(off, len, 0)); + list<ECCommon::ec_align_t> to_read; + to_read.emplace_back(ECCommon::ec_align_t{off, len, 0}); ceph_assert(!recovery_reads.count(hoid)); want_to_read.insert(make_pair(hoid, std::move(_want_to_read))); recovery_reads.insert( @@ -229,28 +229,20 @@ void ECBackend::handle_recovery_push( recovery_backend.handle_recovery_push(op, m, is_repair); - if (op.after_progress.data_complete) { - if ((get_parent()->pgb_is_primary())) { - if (get_parent()->pg_is_repair() || is_repair) - get_parent()->inc_osd_stat_repaired(); - } else { - // If primary told us this is a repair, bump osd_stat_t::num_objects_repaired - if (is_repair) - get_parent()->inc_osd_stat_repaired(); - if (get_parent()->pg_is_remote_backfilling()) { - struct stat st; - int r = store->stat(ch, ghobject_t(op.soid, ghobject_t::NO_GEN, - get_parent()->whoami_shard().shard), &st); - if (r == 0) { - get_parent()->pg_sub_local_num_bytes(st.st_size); - // XXX: This can be way overestimated for small objects - get_parent()->pg_sub_num_bytes(st.st_size * get_ec_data_chunk_count()); - dout(10) << __func__ << " " << op.soid - << " sub actual data by " << st.st_size - << " sub num_bytes by " << st.st_size * get_ec_data_chunk_count() - << dendl; - } - } + if (op.after_progress.data_complete && + !(get_parent()->pgb_is_primary()) && + get_parent()->pg_is_remote_backfilling()) { + struct stat st; + int r = store->stat(ch, ghobject_t(op.soid, ghobject_t::NO_GEN, + get_parent()->whoami_shard().shard), &st); + if (r == 0) { + get_parent()->pg_sub_local_num_bytes(st.st_size); + // XXX: This can be way overestimated for small objects + get_parent()->pg_sub_num_bytes(st.st_size * get_ec_data_chunk_count()); + dout(10) << __func__ << " " << op.soid + << " sub actual data by " << st.st_size + << " sub num_bytes by " << st.st_size * get_ec_data_chunk_count() + << dendl; } } } @@ -471,7 +463,8 @@ struct RecoveryReadCompleter : ECCommon::ReadCompleter { void finish_single_request( const hobject_t &hoid, ECCommon::read_result_t &res, - list<boost::tuple<uint64_t, uint64_t, uint32_t> >) override + list<ECCommon::ec_align_t>, + set<int> wanted_to_read) override { if (!(res.r == 0 && res.errors.empty())) { backend._failed_push(hoid, res); @@ -1036,7 +1029,7 @@ void ECBackend::handle_sub_read( if ((op.subchunks.find(i->first)->second.size() == 1) && (op.subchunks.find(i->first)->second.front().second == ec_impl->get_sub_chunk_count())) { - dout(25) << __func__ << " case1: reading the complete chunk/shard." << dendl; + dout(20) << __func__ << " case1: reading the complete chunk/shard." << dendl; r = store->read( ch, ghobject_t(i->first, ghobject_t::NO_GEN, shard), @@ -1044,9 +1037,11 @@ void ECBackend::handle_sub_read( j->get<1>(), bl, j->get<2>()); // Allow EIO return } else { - dout(25) << __func__ << " case2: going to do fragmented read." << dendl; int subchunk_size = sinfo.get_chunk_size() / ec_impl->get_sub_chunk_count(); + dout(20) << __func__ << " case2: going to do fragmented read;" + << " subchunk_size=" << subchunk_size + << " chunk_size=" << sinfo.get_chunk_size() << dendl; bool error = false; for (int m = 0; m < (int)j->get<1>() && !error; m += sinfo.get_chunk_size()) { @@ -1222,7 +1217,7 @@ void ECBackend::handle_sub_read_reply( dout(20) << __func__ << " to_read skipping" << dendl; continue; } - list<boost::tuple<uint64_t, uint64_t, uint32_t> >::const_iterator req_iter = + list<ec_align_t>::const_iterator req_iter = rop.to_read.find(i->first)->second.to_read.begin(); list< boost::tuple< @@ -1233,10 +1228,10 @@ void ECBackend::handle_sub_read_reply( ++j, ++req_iter, ++riter) { ceph_assert(req_iter != rop.to_read.find(i->first)->second.to_read.end()); ceph_assert(riter != rop.complete[i->first].returned.end()); - pair<uint64_t, uint64_t> adjusted = - sinfo.aligned_offset_len_to_chunk( - make_pair(req_iter->get<0>(), req_iter->get<1>())); - ceph_assert(adjusted.first == j->first); + pair<uint64_t, uint64_t> aligned = + sinfo.chunk_aligned_offset_len_to_chunk( + make_pair(req_iter->offset, req_iter->size)); + ceph_assert(aligned.first == j->first); riter->get<2>()[from] = std::move(j->second); } } @@ -1537,29 +1532,41 @@ int ECBackend::objects_read_sync( return -EOPNOTSUPP; } +static bool should_partial_read( + const ECUtil::stripe_info_t& sinfo, + uint64_t off, + uint32_t len, + bool fast_read) +{ + // Don't partial read if we are doing a fast_read + if (fast_read) { + return false; + } + // Same stripe only + return sinfo.offset_length_is_same_stripe(off, len); +} + void ECBackend::objects_read_async( const hobject_t &hoid, - const list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>, - pair<bufferlist*, Context*> > > &to_read, + const list<pair<ECCommon::ec_align_t, + pair<bufferlist*, Context*>>> &to_read, Context *on_complete, bool fast_read) { - map<hobject_t,std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > > - reads; + map<hobject_t,std::list<ec_align_t>> reads; uint32_t flags = 0; extent_set es; - for (list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>, - pair<bufferlist*, Context*> > >::const_iterator i = - to_read.begin(); - i != to_read.end(); - ++i) { - pair<uint64_t, uint64_t> tmp = - sinfo.offset_len_to_stripe_bounds( - make_pair(i->first.get<0>(), i->first.get<1>())); - + for (const auto& [read, ctx] : to_read) { + pair<uint64_t, uint64_t> tmp; + if (!cct->_conf->osd_ec_partial_reads || + !should_partial_read(sinfo, read.offset, read.size, fast_read)) { + tmp = sinfo.offset_len_to_stripe_bounds(make_pair(read.offset, read.size)); + } else { + tmp = sinfo.offset_len_to_chunk_bounds(make_pair(read.offset, read.size)); + } es.union_insert(tmp.first, tmp.second); - flags |= i->first.get<2>(); + flags |= read.flags; } if (!es.empty()) { @@ -1567,32 +1574,28 @@ void ECBackend::objects_read_async( for (auto j = es.begin(); j != es.end(); ++j) { - offsets.push_back( - boost::make_tuple( - j.get_start(), - j.get_len(), - flags)); + offsets.emplace_back(ec_align_t{j.get_start(), j.get_len(), flags}); } } struct cb { ECBackend *ec; hobject_t hoid; - list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>, + list<pair<ECCommon::ec_align_t, pair<bufferlist*, Context*> > > to_read; unique_ptr<Context> on_complete; cb(const cb&) = delete; cb(cb &&) = default; cb(ECBackend *ec, const hobject_t &hoid, - const list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>, + const list<pair<ECCommon::ec_align_t, pair<bufferlist*, Context*> > > &to_read, Context *on_complete) : ec(ec), hoid(hoid), to_read(to_read), on_complete(on_complete) {} - void operator()(map<hobject_t,pair<int, extent_map> > &&results) { + void operator()(ECCommon::ec_extents_t &&results) { auto dpp = ec->get_parent()->get_dpp(); ldpp_dout(dpp, 20) << "objects_read_async_cb: got: " << results << dendl; @@ -1603,24 +1606,24 @@ void ECBackend::objects_read_async( int r = 0; for (auto &&read: to_read) { - if (got.first < 0) { + if (got.err < 0) { // error handling if (read.second.second) { - read.second.second->complete(got.first); + read.second.second->complete(got.err); } if (r == 0) - r = got.first; + r = got.err; } else { ceph_assert(read.second.first); - uint64_t offset = read.first.get<0>(); - uint64_t length = read.first.get<1>(); - auto range = got.second.get_containing_range(offset, length); + uint64_t offset = read.first.offset; + uint64_t length = read.first.size; + auto range = got.emap.get_containing_range(offset, length); ceph_assert(range.first != range.second); ceph_assert(range.first.get_off() <= offset); - ldpp_dout(dpp, 30) << "offset: " << offset << dendl; - ldpp_dout(dpp, 30) << "range offset: " << range.first.get_off() << dendl; - ldpp_dout(dpp, 30) << "length: " << length << dendl; - ldpp_dout(dpp, 30) << "range length: " << range.first.get_len() << dendl; + ldpp_dout(dpp, 20) << "offset: " << offset << dendl; + ldpp_dout(dpp, 20) << "range offset: " << range.first.get_off() << dendl; + ldpp_dout(dpp, 20) << "length: " << length << dendl; + ldpp_dout(dpp, 20) << "range length: " << range.first.get_len() << dendl; ceph_assert( (offset + length) <= (range.first.get_off() + range.first.get_len())); @@ -1650,7 +1653,7 @@ void ECBackend::objects_read_async( reads, fast_read, make_gen_lambda_context< - map<hobject_t,pair<int, extent_map> > &&, cb>( + ECCommon::ec_extents_t &&, cb>( cb(this, hoid, to_read, @@ -1659,10 +1662,10 @@ void ECBackend::objects_read_async( void ECBackend::objects_read_and_reconstruct( const map<hobject_t, - std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > + std::list<ECBackend::ec_align_t> > &reads, bool fast_read, - GenContextURef<map<hobject_t,pair<int, extent_map> > &&> &&func) + GenContextURef<ECCommon::ec_extents_t &&> &&func) { return read_pipeline.objects_read_and_reconstruct( reads, fast_read, std::move(func)); diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index e61ec6a0cc5..910cdc064e4 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -141,15 +141,14 @@ public: * check_recovery_sources. */ void objects_read_and_reconstruct( - const std::map<hobject_t, std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > - > &reads, + const std::map<hobject_t, std::list<ECCommon::ec_align_t>> &reads, bool fast_read, - GenContextURef<std::map<hobject_t,std::pair<int, extent_map> > &&> &&func) override; + GenContextURef<ECCommon::ec_extents_t &&> &&func) override; void objects_read_async( const hobject_t &hoid, - const std::list<std::pair<boost::tuple<uint64_t, uint64_t, uint32_t>, - std::pair<ceph::buffer::list*, Context*> > > &to_read, + const std::list<std::pair<ECCommon::ec_align_t, + std::pair<ceph::buffer::list*, Context*>>> &to_read, Context *on_complete, bool fast_read = false) override; diff --git a/src/osd/ECCommon.cc b/src/osd/ECCommon.cc index 8752b54b462..02bb04c4a0a 100644 --- a/src/osd/ECCommon.cc +++ b/src/osd/ECCommon.cc @@ -63,6 +63,7 @@ static ostream& _prefix(std::ostream *_dout, // TODO: backref to ECListener? return *_dout; } +static ostream& _prefix(std::ostream *_dout, struct ClientReadCompleter *read_completer); ostream &operator<<(ostream &lhs, const ECCommon::RMWPipeline::pipeline_state_t &rhs) { switch (rhs.pipeline_state) { @@ -76,6 +77,19 @@ ostream &operator<<(ostream &lhs, const ECCommon::RMWPipeline::pipeline_state_t return lhs; // unreachable } +ostream &operator<<(ostream &lhs, const ECCommon::ec_align_t &rhs) +{ + return lhs << rhs.offset << "," + << rhs.size << "," + << rhs.flags; +} + +ostream &operator<<(ostream &lhs, const ECCommon::ec_extent_t &rhs) +{ + return lhs << rhs.err << "," + << rhs.emap; +} + ostream &operator<<(ostream &lhs, const ECCommon::read_request_t &rhs) { return lhs << "read_request_t(to_read=[" << rhs.to_read << "]" @@ -110,6 +124,7 @@ ostream &operator<<(ostream &lhs, const ECCommon::ReadOp &rhs) << ", priority=" << rhs.priority << ", obj_to_source=" << rhs.obj_to_source << ", source_to_obj=" << rhs.source_to_obj + << ", want_to_read" << rhs.want_to_read << ", in_progress=" << rhs.in_progress << ")"; } @@ -126,6 +141,7 @@ void ECCommon::ReadOp::dump(Formatter *f) const f->dump_int("priority", priority); f->dump_stream("obj_to_source") << obj_to_source; f->dump_stream("source_to_obj") << source_to_obj; + f->dump_stream("want_to_read") << want_to_read; f->dump_stream("in_progress") << in_progress; } @@ -158,16 +174,19 @@ ostream &operator<<(ostream &lhs, const ECCommon::RMWPipeline::Op &rhs) void ECCommon::ReadPipeline::complete_read_op(ReadOp &rop) { - map<hobject_t, read_request_t>::iterator reqiter = + dout(20) << __func__ << " completing " << rop << dendl; + map<hobject_t, read_request_t>::iterator req_iter = rop.to_read.begin(); map<hobject_t, read_result_t>::iterator resiter = rop.complete.begin(); ceph_assert(rop.to_read.size() == rop.complete.size()); - for (; reqiter != rop.to_read.end(); ++reqiter, ++resiter) { + for (; req_iter != rop.to_read.end(); ++req_iter, ++resiter) { + ceph_assert(rop.want_to_read.contains(req_iter->first)); rop.on_complete->finish_single_request( - reqiter->first, + req_iter->first, resiter->second, - reqiter->second.to_read); + req_iter->second.to_read, + rop.want_to_read[req_iter->first]); } ceph_assert(rop.on_complete); std::move(*rop.on_complete).finish(rop.priority); @@ -298,6 +317,39 @@ int ECCommon::ReadPipeline::get_min_avail_to_read_shards( return 0; } +// a static for the sake of unittesting +void ECCommon::ReadPipeline::get_min_want_to_read_shards( + const uint64_t offset, + const uint64_t length, + const ECUtil::stripe_info_t& sinfo, + const vector<int>& chunk_mapping, + set<int> *want_to_read) +{ + const auto [left_chunk_index, right_chunk_index] = + sinfo.offset_length_to_data_chunk_indices(offset, length); + for(uint64_t i = left_chunk_index; i < right_chunk_index; i++) { + auto raw_chunk = i % sinfo.get_data_chunk_count(); + auto chunk = chunk_mapping.size() > raw_chunk ? + chunk_mapping[raw_chunk] : static_cast<int>(raw_chunk); + if (auto [_, inserted] = want_to_read->insert(chunk); !inserted) { + // aready processed all chunks + ceph_assert(want_to_read->size() == sinfo.get_data_chunk_count()); + break; + } + } +} + +void ECCommon::ReadPipeline::get_min_want_to_read_shards( + const uint64_t offset, + const uint64_t length, + set<int> *want_to_read) +{ + get_min_want_to_read_shards( + offset, length, sinfo, ec_impl->get_chunk_mapping(), want_to_read); + dout(20) << __func__ << ": offset " << offset << " length " << length + << " want_to_read " << *want_to_read << dendl; +} + int ECCommon::ReadPipeline::get_remaining_shards( const hobject_t &hoid, const set<int> &avail, @@ -400,12 +452,9 @@ void ECCommon::ReadPipeline::do_read_op(ReadOp &op) op.obj_to_source[i->first].insert(j->first); op.source_to_obj[j->first].insert(i->first); } - for (list<boost::tuple<uint64_t, uint64_t, uint32_t> >::const_iterator j = - i->second.to_read.begin(); - j != i->second.to_read.end(); - ++j) { - pair<uint64_t, uint64_t> chunk_off_len = - sinfo.aligned_offset_len_to_chunk(make_pair(j->get<0>(), j->get<1>())); + for (const auto& read : i->second.to_read) { + auto p = make_pair(read.offset, read.size); + pair<uint64_t, uint64_t> chunk_off_len = sinfo.chunk_aligned_offset_len_to_chunk(p); for (auto k = i->second.need.begin(); k != i->second.need.end(); ++k) { @@ -413,7 +462,7 @@ void ECCommon::ReadPipeline::do_read_op(ReadOp &op) boost::make_tuple( chunk_off_len.first, chunk_off_len.second, - j->get<2>())); + read.flags)); } ceph_assert(!need_attrs); } @@ -470,19 +519,27 @@ struct ClientReadCompleter : ECCommon::ReadCompleter { void finish_single_request( const hobject_t &hoid, ECCommon::read_result_t &res, - list<boost::tuple<uint64_t, uint64_t, uint32_t> > to_read) override + list<ECCommon::ec_align_t> to_read, + set<int> wanted_to_read) override { + auto* cct = read_pipeline.cct; + dout(20) << __func__ << " completing hoid=" << hoid + << " res=" << res << " to_read=" << to_read << dendl; extent_map result; if (res.r != 0) goto out; ceph_assert(res.returned.size() == to_read.size()); ceph_assert(res.errors.empty()); for (auto &&read: to_read) { - pair<uint64_t, uint64_t> adjusted = - read_pipeline.sinfo.offset_len_to_stripe_bounds( - make_pair(read.get<0>(), read.get<1>())); - ceph_assert(res.returned.front().get<0>() == adjusted.first); - ceph_assert(res.returned.front().get<1>() == adjusted.second); + const auto bounds = make_pair(read.offset, read.size); + // the configurable serves only the preservation of old behavior + // which will be dropped. ReadPipeline is actually able to handle + // reads aligned to chunk size. + const auto aligned = g_conf()->osd_ec_partial_reads \ + ? read_pipeline.sinfo.offset_len_to_chunk_bounds(bounds) + : read_pipeline.sinfo.offset_len_to_stripe_bounds(bounds); + ceph_assert(res.returned.front().get<0>() == aligned.first); + ceph_assert(res.returned.front().get<1>() == aligned.second); map<int, bufferlist> to_decode; bufferlist bl; for (map<pg_shard_t, bufferlist>::iterator j = @@ -491,26 +548,36 @@ struct ClientReadCompleter : ECCommon::ReadCompleter { ++j) { to_decode[j->first.shard] = std::move(j->second); } + dout(20) << __func__ << " going to decode: " + << " wanted_to_read=" << wanted_to_read + << " to_decode=" << to_decode + << dendl; int r = ECUtil::decode( read_pipeline.sinfo, read_pipeline.ec_impl, + wanted_to_read, to_decode, &bl); if (r < 0) { + dout(10) << __func__ << " error on ECUtil::decode r=" << r << dendl; res.r = r; goto out; } bufferlist trimmed; - trimmed.substr_of( - bl, - read.get<0>() - adjusted.first, - std::min(read.get<1>(), - bl.length() - (read.get<0>() - adjusted.first))); + auto off = read.offset - aligned.first; + auto len = std::min(read.size, bl.length() - off); + dout(20) << __func__ << " bl.length()=" << bl.length() + << " len=" << len << " read.size=" << read.size + << " off=" << off << " read.offset=" << read.offset + << dendl; + trimmed.substr_of(bl, off, len); result.insert( - read.get<0>(), trimmed.length(), std::move(trimmed)); + read.offset, trimmed.length(), std::move(trimmed)); res.returned.pop_front(); } out: + dout(20) << __func__ << " calling complete_object with result=" + << result << dendl; status->complete_object(hoid, res.r, std::move(result)); read_pipeline.kick_reads(); } @@ -523,13 +590,14 @@ out: ECCommon::ReadPipeline &read_pipeline; ECCommon::ClientAsyncReadStatus *status; }; +static ostream& _prefix(std::ostream *_dout, ClientReadCompleter *read_completer) { + return _prefix(_dout, &read_completer->read_pipeline); +} void ECCommon::ReadPipeline::objects_read_and_reconstruct( - const map<hobject_t, - std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > - > &reads, + const map<hobject_t, std::list<ECCommon::ec_align_t>> &reads, bool fast_read, - GenContextURef<map<hobject_t,pair<int, extent_map> > &&> &&func) + GenContextURef<ECCommon::ec_extents_t &&> &&func) { in_progress_client_reads.emplace_back( reads.size(), std::move(func)); @@ -539,11 +607,19 @@ void ECCommon::ReadPipeline::objects_read_and_reconstruct( } map<hobject_t, set<int>> obj_want_to_read; - set<int> want_to_read; - get_want_to_read_shards(&want_to_read); map<hobject_t, read_request_t> for_read_op; for (auto &&to_read: reads) { + set<int> want_to_read; + if (cct->_conf->osd_ec_partial_reads) { + for (const auto& single_region : to_read.second) { + get_min_want_to_read_shards(single_region.offset, + single_region.size, + &want_to_read); + } + } else { + get_want_to_read_shards(&want_to_read); + } map<pg_shard_t, vector<pair<int, int>>> shards; int r = get_min_avail_to_read_shards( to_read.first, @@ -553,6 +629,12 @@ void ECCommon::ReadPipeline::objects_read_and_reconstruct( &shards); ceph_assert(r == 0); + int subchunk_size = + sinfo.get_chunk_size() / ec_impl->get_sub_chunk_count(); + dout(20) << __func__ + << " subchunk_size=" << subchunk_size + << " chunk_size=" << sinfo.get_chunk_size() << dendl; + for_read_op.insert( make_pair( to_read.first, @@ -589,8 +671,7 @@ int ECCommon::ReadPipeline::send_all_remaining_reads( if (r) return r; - list<boost::tuple<uint64_t, uint64_t, uint32_t> > offsets = - rop.to_read.find(hoid)->second.to_read; + list<ec_align_t> to_read = rop.to_read.find(hoid)->second.to_read; // (Note cuixf) If we need to read attrs and we read failed, try to read again. bool want_attrs = @@ -604,7 +685,7 @@ int ECCommon::ReadPipeline::send_all_remaining_reads( rop.to_read.insert(make_pair( hoid, read_request_t( - offsets, + to_read, shards, want_attrs))); return 0; @@ -694,9 +775,9 @@ bool ECCommon::RMWPipeline::try_state_to_reads() ceph_assert(get_parent()->get_pool().allows_ecoverwrites()); objects_read_async_no_cache( op->remote_read, - [op, this](map<hobject_t,pair<int, extent_map> > &&results) { + [op, this](ec_extents_t &&results) { for (auto &&i: results) { - op->remote_read_result.emplace(i.first, i.second.second); + op->remote_read_result.emplace(make_pair(i.first, i.second.emap)); } check_ops(); }); diff --git a/src/osd/ECCommon.h b/src/osd/ECCommon.h index 3ceb3d295b2..88f2940111e 100644 --- a/src/osd/ECCommon.h +++ b/src/osd/ECCommon.h @@ -209,6 +209,20 @@ struct ECListener { }; struct ECCommon { + struct ec_align_t { + uint64_t offset; + uint64_t size; + uint32_t flags; + }; + friend std::ostream &operator<<(std::ostream &lhs, const ec_align_t &rhs); + + struct ec_extent_t { + int err; + extent_map emap; + }; + friend std::ostream &operator<<(std::ostream &lhs, const ec_extent_t &rhs); + using ec_extents_t = std::map<hobject_t, ec_extent_t>; + virtual ~ECCommon() = default; virtual void handle_sub_write( @@ -220,17 +234,16 @@ struct ECCommon { ) = 0; virtual void objects_read_and_reconstruct( - const std::map<hobject_t, std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > - > &reads, + const std::map<hobject_t, std::list<ec_align_t>> &reads, bool fast_read, - GenContextURef<std::map<hobject_t,std::pair<int, extent_map> > &&> &&func) = 0; + GenContextURef<ec_extents_t &&> &&func) = 0; struct read_request_t { - const std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > to_read; + const std::list<ec_align_t> to_read; std::map<pg_shard_t, std::vector<std::pair<int, int>>> need; bool want_attrs; read_request_t( - const std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > &to_read, + const std::list<ec_align_t> &to_read, const std::map<pg_shard_t, std::vector<std::pair<int, int>>> &need, bool want_attrs) : to_read(to_read), need(need), want_attrs(want_attrs) {} @@ -272,7 +285,8 @@ struct ECCommon { virtual void finish_single_request( const hobject_t &hoid, read_result_t &res, - std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > to_read) = 0; + std::list<ECCommon::ec_align_t> to_read, + std::set<int> wanted_to_read) = 0; virtual void finish(int priority) && = 0; @@ -282,11 +296,11 @@ struct ECCommon { friend struct CallClientContexts; struct ClientAsyncReadStatus { unsigned objects_to_read; - GenContextURef<std::map<hobject_t,std::pair<int, extent_map> > &&> func; - std::map<hobject_t,std::pair<int, extent_map> > results; + GenContextURef<ec_extents_t &&> func; + ec_extents_t results; explicit ClientAsyncReadStatus( unsigned objects_to_read, - GenContextURef<std::map<hobject_t,std::pair<int, extent_map> > &&> &&func) + GenContextURef<ec_extents_t &&> &&func) : objects_to_read(objects_to_read), func(std::move(func)) {} void complete_object( const hobject_t &hoid, @@ -295,7 +309,7 @@ struct ECCommon { ceph_assert(objects_to_read); --objects_to_read; ceph_assert(!results.count(hoid)); - results.emplace(hoid, std::make_pair(err, std::move(buffers))); + results.emplace(hoid, ec_extent_t{err, std::move(buffers)}); } bool is_complete() const { return objects_to_read == 0; @@ -353,8 +367,8 @@ struct ECCommon { for (auto &&extent: hpair.second.to_read) { returned.push_back( boost::make_tuple( - extent.get<0>(), - extent.get<1>(), + extent.offset, + extent.size, std::map<pg_shard_t, ceph::buffer::list>())); } } @@ -365,10 +379,9 @@ struct ECCommon { }; struct ReadPipeline { void objects_read_and_reconstruct( - const std::map<hobject_t, std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > - > &reads, + const std::map<hobject_t, std::list<ec_align_t>> &reads, bool fast_read, - GenContextURef<std::map<hobject_t,std::pair<int, extent_map> > &&> &&func); + GenContextURef<ec_extents_t &&> &&func); template <class F, class G> void filter_read_op( @@ -429,6 +442,27 @@ struct ECCommon { parent(parent) { } + /** + * While get_want_to_read_shards creates a want_to_read based on the EC + * plugin's all get_data_chunk_count() (full stripe), this method + * inserts only the chunks actually necessary to read the length of data. + * That is, we can do so called "partial read" -- fetch subset of stripe. + * + * Like in get_want_to_read_shards, we check the plugin's mapping. + * + */ + void get_min_want_to_read_shards( + uint64_t offset, ///< [in] + uint64_t length, ///< [in] + std::set<int> *want_to_read ///< [out] + ); + static void get_min_want_to_read_shards( + const uint64_t offset, + const uint64_t length, + const ECUtil::stripe_info_t& sinfo, + const std::vector<int>& chunk_mapping, + std::set<int> *want_to_read); + int get_remaining_shards( const hobject_t &hoid, const std::set<int> &avail, @@ -618,18 +652,18 @@ struct ECCommon { const std::map<hobject_t,extent_set> &to_read, Func &&on_complete ) { - std::map<hobject_t,std::list<boost::tuple<uint64_t, uint64_t, uint32_t> > > _to_read; + std::map<hobject_t, std::list<ec_align_t>> _to_read; for (auto &&hpair: to_read) { auto &l = _to_read[hpair.first]; for (auto extent: hpair.second) { - l.emplace_back(extent.first, extent.second, 0); + l.emplace_back(ec_align_t{extent.first, extent.second, 0}); } } ec_backend.objects_read_and_reconstruct( _to_read, false, make_gen_lambda_context< - std::map<hobject_t,std::pair<int, extent_map> > &&, Func>( + ECCommon::ec_extents_t &&, Func>( std::forward<Func>(on_complete))); } void handle_sub_write( diff --git a/src/osd/ECUtil.cc b/src/osd/ECUtil.cc index 94b32845847..6d9477a99af 100644 --- a/src/osd/ECUtil.cc +++ b/src/osd/ECUtil.cc @@ -1,6 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- #include <errno.h> +#include "common/ceph_context.h" +#include "global/global_context.h" #include "include/encoding.h" #include "ECUtil.h" @@ -9,11 +11,20 @@ using ceph::bufferlist; using ceph::ErasureCodeInterfaceRef; using ceph::Formatter; +std::pair<uint64_t, uint64_t> ECUtil::stripe_info_t::chunk_aligned_offset_len_to_chunk( + std::pair<uint64_t, uint64_t> in) const { + return std::make_pair( + chunk_aligned_logical_offset_to_chunk_offset(in.first), + chunk_aligned_logical_size_to_chunk_size(in.second)); +} + int ECUtil::decode( const stripe_info_t &sinfo, ErasureCodeInterfaceRef &ec_impl, + const set<int> want_to_read, map<int, bufferlist> &to_decode, - bufferlist *out) { + bufferlist *out) +{ ceph_assert(to_decode.size()); uint64_t total_data_size = to_decode.begin()->second.length(); @@ -39,9 +50,9 @@ int ECUtil::decode( chunks[j->first].substr_of(j->second, i, sinfo.get_chunk_size()); } bufferlist bl; - int r = ec_impl->decode_concat(chunks, &bl); + int r = ec_impl->decode_concat(want_to_read, chunks, &bl); ceph_assert(r == 0); - ceph_assert(bl.length() == sinfo.get_stripe_width()); + ceph_assert(bl.length() % sinfo.get_chunk_size() == 0); out->claim_append(bl); } return 0; diff --git a/src/osd/ECUtil.h b/src/osd/ECUtil.h index dce78b8a868..c84a87ee380 100644 --- a/src/osd/ECUtil.h +++ b/src/osd/ECUtil.h @@ -42,6 +42,9 @@ public: uint64_t get_chunk_size() const { return chunk_size; } + uint64_t get_data_chunk_count() const { + return get_stripe_width() / get_chunk_size(); + } uint64_t logical_to_prev_chunk_offset(uint64_t offset) const { return (offset / stripe_width) * chunk_size; } @@ -60,16 +63,26 @@ public: ceph_assert(offset % stripe_width == 0); return (offset / stripe_width) * chunk_size; } + uint64_t chunk_aligned_logical_offset_to_chunk_offset(uint64_t offset) const { + [[maybe_unused]] const auto residue_in_stripe = offset % stripe_width; + ceph_assert(residue_in_stripe % chunk_size == 0); + ceph_assert(stripe_width % chunk_size == 0); + // this rounds down + return (offset / stripe_width) * chunk_size; + } + uint64_t chunk_aligned_logical_size_to_chunk_size(uint64_t len) const { + [[maybe_unused]] const auto residue_in_stripe = len % stripe_width; + ceph_assert(residue_in_stripe % chunk_size == 0); + ceph_assert(stripe_width % chunk_size == 0); + // this rounds up + return ((len + stripe_width - 1) / stripe_width) * chunk_size; + } uint64_t aligned_chunk_offset_to_logical_offset(uint64_t offset) const { ceph_assert(offset % chunk_size == 0); return (offset / chunk_size) * stripe_width; } - std::pair<uint64_t, uint64_t> aligned_offset_len_to_chunk( - std::pair<uint64_t, uint64_t> in) const { - return std::make_pair( - aligned_logical_offset_to_chunk_offset(in.first), - aligned_logical_offset_to_chunk_offset(in.second)); - } + std::pair<uint64_t, uint64_t> chunk_aligned_offset_len_to_chunk( + std::pair<uint64_t, uint64_t> in) const; std::pair<uint64_t, uint64_t> offset_len_to_stripe_bounds( std::pair<uint64_t, uint64_t> in) const { uint64_t off = logical_to_prev_stripe_offset(in.first); @@ -77,11 +90,38 @@ public: (in.first - off) + in.second); return std::make_pair(off, len); } + std::pair<uint64_t, uint64_t> offset_len_to_chunk_bounds( + std::pair<uint64_t, uint64_t> in) const { + uint64_t off = in.first - (in.first % chunk_size); + uint64_t tmp_len = (in.first - off) + in.second; + uint64_t len = ((tmp_len % chunk_size) ? + (tmp_len - (tmp_len % chunk_size) + chunk_size) : + tmp_len); + return std::make_pair(off, len); + } + std::pair<uint64_t, uint64_t> offset_length_to_data_chunk_indices( + uint64_t off, uint64_t len) const { + assert(chunk_size > 0); + const auto first_chunk_idx = (off / chunk_size); + const auto last_chunk_idx = (chunk_size - 1 + off + len) / chunk_size; + return {first_chunk_idx, last_chunk_idx}; + } + bool offset_length_is_same_stripe( + uint64_t off, uint64_t len) const { + if (len == 0) { + return true; + } + assert(chunk_size > 0); + const auto first_stripe_idx = off / stripe_width; + const auto last_inc_stripe_idx = (off + len - 1) / stripe_width; + return first_stripe_idx == last_inc_stripe_idx; + } }; int decode( const stripe_info_t &sinfo, ceph::ErasureCodeInterfaceRef &ec_impl, + const std::set<int> want_to_read, std::map<int, ceph::buffer::list> &to_decode, ceph::buffer::list *out); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 7dcebe83b5a..2b59c7c87e4 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1693,6 +1693,11 @@ void OSDService::enqueue_front(OpSchedulerItem&& qi) osd->op_shardedwq.queue_front(std::move(qi)); } +double OSDService::get_cost_per_io() const +{ + return osd->op_shardedwq.get_cost_per_io(); +} + void OSDService::queue_recovery_context( PG *pg, GenContext<ThreadPool::TPHandle&> *c, @@ -1761,56 +1766,66 @@ template <class MSG_TYPE> void OSDService::queue_scrub_event_msg(PG* pg, Scrub::scrub_prio_t with_priority, unsigned int qu_priority, - Scrub::act_token_t act_token) + Scrub::act_token_t act_token, + uint64_t cost) { const auto epoch = pg->get_osdmap_epoch(); auto msg = new MSG_TYPE(pg->get_pgid(), epoch, act_token); dout(15) << "queue a scrub event (" << *msg << ") for " << *pg << ". Epoch: " << epoch << " token: " << act_token << dendl; enqueue_back(OpSchedulerItem( - unique_ptr<OpSchedulerItem::OpQueueable>(msg), get_scrub_cost(), + unique_ptr<OpSchedulerItem::OpQueueable>(msg), cost, pg->scrub_requeue_priority(with_priority, qu_priority), ceph_clock_now(), 0, epoch)); } template <class MSG_TYPE> void OSDService::queue_scrub_event_msg(PG* pg, - Scrub::scrub_prio_t with_priority) + Scrub::scrub_prio_t with_priority, + uint64_t cost) { const auto epoch = pg->get_osdmap_epoch(); auto msg = new MSG_TYPE(pg->get_pgid(), epoch); dout(15) << "queue a scrub event (" << *msg << ") for " << *pg << ". Epoch: " << epoch << dendl; enqueue_back(OpSchedulerItem( - unique_ptr<OpSchedulerItem::OpQueueable>(msg), get_scrub_cost(), + unique_ptr<OpSchedulerItem::OpQueueable>(msg), cost, pg->scrub_requeue_priority(with_priority), ceph_clock_now(), 0, epoch)); } -int64_t OSDService::get_scrub_cost() +template <class MSG_TYPE> +void OSDService::queue_scrub_event_msg_default_cost(PG* pg, + Scrub::scrub_prio_t with_priority, + unsigned int qu_priority, + Scrub::act_token_t act_token) { + uint64_t cost = cct->_conf->osd_scrub_event_cost; + queue_scrub_event_msg<MSG_TYPE>(pg, with_priority, qu_priority, act_token, cost); +} - int64_t cost_for_queue = cct->_conf->osd_scrub_cost; - if (op_queue_type_t::mClockScheduler == osd->osd_op_queue_type()) { - cost_for_queue = cct->_conf->osd_scrub_event_cost * - cct->_conf->osd_shallow_scrub_chunk_max; - } - return cost_for_queue; +template <class MSG_TYPE> +void OSDService::queue_scrub_event_msg_default_cost(PG* pg, + Scrub::scrub_prio_t with_priority) +{ + uint64_t cost = cct->_conf->osd_scrub_event_cost; + queue_scrub_event_msg<MSG_TYPE>(pg, with_priority, cost); } void OSDService::queue_for_scrub(PG* pg, Scrub::scrub_prio_t with_priority) { - queue_scrub_event_msg<PGScrub>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrub>(pg, with_priority); } void OSDService::queue_scrub_after_repair(PG* pg, Scrub::scrub_prio_t with_priority) { - queue_scrub_event_msg<PGScrubAfterRepair>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrubAfterRepair>(pg, with_priority); } void OSDService::queue_for_rep_scrub(PG* pg, Scrub::scrub_prio_t with_priority, unsigned int qu_priority, - Scrub::act_token_t act_token) + Scrub::act_token_t act_token, + uint64_t cost) { - queue_scrub_event_msg<PGRepScrub>(pg, with_priority, qu_priority, act_token); + queue_scrub_event_msg<PGRepScrub>(pg, with_priority, qu_priority, act_token, cost); } void OSDService::queue_for_rep_scrub_resched(PG* pg, @@ -1819,73 +1834,73 @@ void OSDService::queue_for_rep_scrub_resched(PG* pg, Scrub::act_token_t act_token) { // Resulting scrub event: 'SchedReplica' - queue_scrub_event_msg<PGRepScrubResched>(pg, with_priority, qu_priority, - act_token); + queue_scrub_event_msg_default_cost<PGRepScrubResched>(pg, with_priority, qu_priority, + act_token); } void OSDService::queue_for_scrub_resched(PG* pg, Scrub::scrub_prio_t with_priority) { // Resulting scrub event: 'InternalSchedScrub' - queue_scrub_event_msg<PGScrubResched>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrubResched>(pg, with_priority); } void OSDService::queue_scrub_pushes_update(PG* pg, Scrub::scrub_prio_t with_priority) { // Resulting scrub event: 'ActivePushesUpd' - queue_scrub_event_msg<PGScrubPushesUpdate>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrubPushesUpdate>(pg, with_priority); } -void OSDService::queue_scrub_chunk_free(PG* pg, Scrub::scrub_prio_t with_priority) +void OSDService::queue_scrub_chunk_free(PG* pg, Scrub::scrub_prio_t with_priority, uint64_t cost) { // Resulting scrub event: 'SelectedChunkFree' - queue_scrub_event_msg<PGScrubChunkIsFree>(pg, with_priority); + queue_scrub_event_msg<PGScrubChunkIsFree>(pg, with_priority, cost); } void OSDService::queue_scrub_chunk_busy(PG* pg, Scrub::scrub_prio_t with_priority) { // Resulting scrub event: 'ChunkIsBusy' - queue_scrub_event_msg<PGScrubChunkIsBusy>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrubChunkIsBusy>(pg, with_priority); } void OSDService::queue_scrub_applied_update(PG* pg, Scrub::scrub_prio_t with_priority) { - queue_scrub_event_msg<PGScrubAppliedUpdate>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrubAppliedUpdate>(pg, with_priority); } void OSDService::queue_scrub_unblocking(PG* pg, Scrub::scrub_prio_t with_priority) { // Resulting scrub event: 'Unblocked' - queue_scrub_event_msg<PGScrubUnblocked>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrubUnblocked>(pg, with_priority); } void OSDService::queue_scrub_digest_update(PG* pg, Scrub::scrub_prio_t with_priority) { // Resulting scrub event: 'DigestUpdate' - queue_scrub_event_msg<PGScrubDigestUpdate>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrubDigestUpdate>(pg, with_priority); } void OSDService::queue_scrub_got_repl_maps(PG* pg, Scrub::scrub_prio_t with_priority) { // Resulting scrub event: 'GotReplicas' - queue_scrub_event_msg<PGScrubGotReplMaps>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrubGotReplMaps>(pg, with_priority); } void OSDService::queue_scrub_replica_pushes(PG *pg, Scrub::scrub_prio_t with_priority) { // Resulting scrub event: 'ReplicaPushesUpd' - queue_scrub_event_msg<PGScrubReplicaPushes>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrubReplicaPushes>(pg, with_priority); } void OSDService::queue_scrub_is_finished(PG *pg) { // Resulting scrub event: 'ScrubFinished' - queue_scrub_event_msg<PGScrubScrubFinished>(pg, Scrub::scrub_prio_t::high_priority); + queue_scrub_event_msg_default_cost<PGScrubScrubFinished>(pg, Scrub::scrub_prio_t::high_priority); } void OSDService::queue_scrub_next_chunk(PG *pg, Scrub::scrub_prio_t with_priority) { // Resulting scrub event: 'NextChunk' - queue_scrub_event_msg<PGScrubGetNextChunk>(pg, with_priority); + queue_scrub_event_msg_default_cost<PGScrubGetNextChunk>(pg, with_priority); } void OSDService::queue_for_pg_delete(spg_t pgid, epoch_t e, int64_t num_objects) @@ -2985,15 +3000,22 @@ will start to track new ops received afterwards."; } else if (prefix == "compact") { dout(1) << "triggering manual compaction" << dendl; auto start = ceph::coarse_mono_clock::now(); - store->compact(); - auto end = ceph::coarse_mono_clock::now(); - double duration = std::chrono::duration<double>(end-start).count(); - dout(1) << "finished manual compaction in " - << duration - << " seconds" << dendl; - f->open_object_section("compact_result"); - f->dump_float("elapsed_time", duration); - f->close_section(); + int r = store->compact(); + if (r == 0) { + auto end = ceph::coarse_mono_clock::now(); + double duration = std::chrono::duration<double>(end-start).count(); + + dout(1) << "finished manual compaction in " + << duration + << " seconds" << dendl; + f->open_object_section("compact_result"); + f->dump_float("elapsed_time", duration); + f->close_section(); + } else if ( r == -EINPROGRESS) { + dout(1) << "manual compaction is being executed asynchronously" << dendl; + } else { + derr << "error starting manual compaction:" << cpp_strerror(r) << dendl; + } } else if (prefix == "get_mapped_pools") { f->open_array_section("mapped_pools"); set<int64_t> poollist = get_mapped_pools(); @@ -3909,7 +3931,7 @@ int OSD::init() dout(2) << "superblock: I am osd." << superblock.whoami << dendl; if (cct->_conf.get_val<bool>("osd_compact_on_start")) { - dout(2) << "compacting object store's omap" << dendl; + dout(2) << "compacting object store's DB" << dendl; store->compact(); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index c762f14c462..0ccfdb05d8e 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -119,6 +119,8 @@ public: void enqueue_back(OpSchedulerItem&& qi); void enqueue_front(OpSchedulerItem&& qi); + /// scheduler cost per io, only valid for mclock, asserts for wpq + double get_cost_per_io() const; void maybe_inject_dispatch_delay() { if (g_conf()->osd_debug_inject_dispatch_delay_probability > 0) { @@ -525,7 +527,7 @@ public: void queue_scrub_applied_update(PG* pg, Scrub::scrub_prio_t with_priority); /// Signals that the selected chunk (objects range) is available for scrubbing - void queue_scrub_chunk_free(PG* pg, Scrub::scrub_prio_t with_priority); + void queue_scrub_chunk_free(PG* pg, Scrub::scrub_prio_t with_priority, uint64_t cost); /// The chunk selected is blocked by user operations, and cannot be scrubbed now void queue_scrub_chunk_busy(PG* pg, Scrub::scrub_prio_t with_priority); @@ -551,7 +553,8 @@ public: void queue_for_rep_scrub(PG* pg, Scrub::scrub_prio_t with_high_priority, unsigned int qu_priority, - Scrub::act_token_t act_token); + Scrub::act_token_t act_token, + uint64_t cost); /// Signals a change in the number of in-flight recovery writes void queue_scrub_replica_pushes(PG *pg, Scrub::scrub_prio_t with_priority); @@ -584,14 +587,20 @@ private: void queue_scrub_event_msg(PG* pg, Scrub::scrub_prio_t with_priority, unsigned int qu_priority, - Scrub::act_token_t act_token); + Scrub::act_token_t act_token, + uint64_t cost); /// An alternative version of queue_scrub_event_msg(), in which the queuing priority is /// provided by the executing scrub (i.e. taken from PgScrubber::m_flags) template <class MSG_TYPE> - void queue_scrub_event_msg(PG* pg, Scrub::scrub_prio_t with_priority); - int64_t get_scrub_cost(); - + void queue_scrub_event_msg(PG* pg, Scrub::scrub_prio_t with_priority, uint64_t cost); + template <class MSG_TYPE> + void queue_scrub_event_msg_default_cost(PG* pg, Scrub::scrub_prio_t with_priority); + template <class MSG_TYPE> + void queue_scrub_event_msg_default_cost(PG* pg, + Scrub::scrub_prio_t with_priority, + unsigned int qu_priority, + Scrub::act_token_t act_token); utime_t defer_recovery_until; uint64_t recovery_ops_active; uint64_t recovery_ops_reserved; @@ -1622,6 +1631,11 @@ protected: p->complete(0); } } + + double get_cost_per_io() const { + auto &sdata = osd->shards[0]; + return sdata->scheduler->get_cost_per_io(); + } } op_shardedwq; diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 116ccea047f..bf50b18a8da 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -883,7 +883,7 @@ void OSDMap::Incremental::decode(ceph::buffer::list::const_iterator& bl) return; } { - DECODE_START(8, bl); // client-usable data + DECODE_START(9, bl); // client-usable data decode(fsid, bl); decode(epoch, bl); decode(modified, bl); @@ -3548,7 +3548,7 @@ void OSDMap::decode(ceph::buffer::list::const_iterator& bl) * Since we made it past that hurdle, we can use our normal paths. */ { - DECODE_START(9, bl); // client-usable data + DECODE_START(10, bl); // client-usable data // base decode(fsid, bl); decode(epoch, bl); diff --git a/src/osd/OSDMapMapping.cc b/src/osd/OSDMapMapping.cc index 9cd1fbf5823..bc53bddecde 100644 --- a/src/osd/OSDMapMapping.cc +++ b/src/osd/OSDMapMapping.cc @@ -158,7 +158,6 @@ void ParallelPGMapper::WQ::_process(Item *i, ThreadPool::TPHandle &h) else i->job->process(i->pool, i->begin, i->end); i->job->finish_one(); - delete i; } void ParallelPGMapper::queue( diff --git a/src/osd/OSDMapMapping.h b/src/osd/OSDMapMapping.h index 216c30446a9..40193b582f6 100644 --- a/src/osd/OSDMapMapping.h +++ b/src/osd/OSDMapMapping.h @@ -144,6 +144,7 @@ protected: } void _process(Item *i, ThreadPool::TPHandle &h) override; + void _process_finish(Item *i) override { delete i;} void _clear() override { ceph_assert(_empty()); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 7a6b7b01180..172f19ae9ca 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1324,94 +1324,31 @@ unsigned int PG::scrub_requeue_priority(Scrub::scrub_prio_t with_priority, unsig // SCRUB -/* - * implementation note: - * PG::start_scrubbing() is called only once per a specific scrub session. - * That call commits us to the whatever choices are made (deep/shallow, etc'). - * Unless failing to start scrubbing, the 'planned scrub' flag-set is 'frozen' into - * PgScrubber's m_flags, then cleared. - */ Scrub::schedule_result_t PG::start_scrubbing( Scrub::OSDRestrictions osd_restrictions) { - using Scrub::schedule_result_t; dout(10) << fmt::format( "{}: {}+{} (env restrictions:{})", __func__, (is_active() ? "<active>" : "<not-active>"), (is_clean() ? "<clean>" : "<not-clean>"), osd_restrictions) << dendl; ceph_assert(ceph_mutex_is_locked(_lock)); - - // recheck PG status (as the PG was unlocked for a time after being selected - // for scrubbing) - if (!is_primary() || !is_active() || !is_clean()) { - dout(10) << __func__ << ": cannot scrub (not a clean and active primary)" - << dendl; - m_scrubber->penalize_next_scrub(Scrub::delay_cause_t::pg_state); - return schedule_result_t::target_specific_failure; - } - ceph_assert(m_scrubber); - if (is_scrub_queued_or_active()) { - dout(10) << __func__ << ": scrub already in progress" << dendl; - return schedule_result_t::target_specific_failure; - } - // if only explicitly requested repairing is allowed - skip other types - // of scrubbing - if (osd_restrictions.allow_requested_repair_only && - !get_planned_scrub().must_repair) { - dout(10) << __func__ - << ": skipping this PG as repairing was not explicitly " - "requested for it" - << dendl; - m_scrubber->penalize_next_scrub(Scrub::delay_cause_t::scrub_params); - return schedule_result_t::target_specific_failure; - } - if (state_test(PG_STATE_SNAPTRIM) || state_test(PG_STATE_SNAPTRIM_WAIT)) { - // note that the trimmer checks scrub status when setting 'snaptrim_wait' - // (on the transition from NotTrimming to Trimming/WaitReservation), - // i.e. some time before setting 'snaptrim'. - dout(10) << __func__ << ": cannot scrub while snap-trimming" << dendl; - m_scrubber->penalize_next_scrub(Scrub::delay_cause_t::pg_state); - return schedule_result_t::target_specific_failure; - } + Scrub::ScrubPGPreconds pg_cond{}; + pg_cond.allow_shallow = + !(get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) || + pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)); + pg_cond.allow_deep = + !(get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) || + pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB)); + pg_cond.has_deep_errors = (info.stats.stats.sum.num_deep_scrub_errors > 0); + pg_cond.can_autorepair = + (cct->_conf->osd_scrub_auto_repair && + get_pgbackend()->auto_repair_supported()); - // analyze the combination of the requested scrub flags, the osd/pool - // configuration and the PG status to determine whether we should scrub - // now, and what type of scrub should that be. - auto updated_flags = validate_scrub_mode(); - if (!updated_flags) { - // the stars do not align for starting a scrub for this PG at this time - // (due to configuration or priority issues) - // The reason was already reported by the callee. - dout(10) << __func__ << ": failed to initiate a scrub" << dendl; - m_scrubber->penalize_next_scrub(Scrub::delay_cause_t::scrub_params); - return schedule_result_t::target_specific_failure; - } - - // try to reserve the local OSD resources. If failing: no harm. We will - // be retried by the OSD later on. - if (!m_scrubber->reserve_local()) { - dout(10) << __func__ << ": failed to reserve locally" << dendl; - m_scrubber->penalize_next_scrub(Scrub::delay_cause_t::local_resources); - return schedule_result_t::osd_wide_failure; - } - - // can commit to the updated flags now, as nothing will stop the scrub - m_planned_scrub = *updated_flags; - - // An interrupted recovery repair could leave this set. - state_clear(PG_STATE_REPAIR); - - // Pass control to the scrubber. It is the scrubber that handles the - // replicas' resources reservations. - m_scrubber->set_op_parameters(m_planned_scrub); - - // using the OSD queue, as to not execute the scrub code as part of the tick. - dout(10) << __func__ << ": queueing" << dendl; - osd->queue_for_scrub(this, Scrub::scrub_prio_t::low_priority); - return schedule_result_t::scrub_initiated; + return m_scrubber->start_scrub_session( + osd_restrictions, pg_cond, m_planned_scrub); } @@ -1423,311 +1360,6 @@ double PG::next_deepscrub_interval() const deep_scrub_interval = cct->_conf->osd_deep_scrub_interval; return info.history.last_deep_scrub_stamp + deep_scrub_interval; } - -bool PG::is_time_for_deep(bool allow_deep_scrub, - bool allow_shallow_scrub, - bool has_deep_errors, - const requested_scrub_t& planned) const -{ - dout(10) << fmt::format( - "{}: need-auto? {} allowed? {}/{} deep-errors? {} " - "last_deep_scrub_stamp {}", - __func__, - planned.need_auto, - allow_shallow_scrub, - allow_deep_scrub, - has_deep_errors, - info.history.last_deep_scrub_stamp) - << dendl; - - if (!allow_deep_scrub) - return false; - - if (planned.need_auto) { - dout(10) << __func__ << ": need repair after scrub errors" << dendl; - return true; - } - - if (ceph_clock_now() >= next_deepscrub_interval()) { - dout(20) << __func__ << ": now (" << ceph_clock_now() - << ") >= time for deep (" << next_deepscrub_interval() << ")" - << dendl; - return true; - } - - if (has_deep_errors) { - // note: the text below is matched by 'standalone' tests - osd->clog->info() << "osd." << osd->whoami << " pg " << info.pgid - << " Deep scrub errors, upgrading scrub to deep-scrub"; - return true; - } - - // we only flip coins if 'allow_shallow_scrub' is asserted. Otherwise - as - // this function is called often, we will probably be deep-scrubbing most of - // the time. - if (allow_shallow_scrub) { - const bool deep_coin_flip = - (rand() % 100) < cct->_conf->osd_deep_scrub_randomize_ratio * 100; - - dout(15) << __func__ << ": time_for_deep=" << planned.time_for_deep - << " deep_coin_flip=" << deep_coin_flip << dendl; - - if (deep_coin_flip) - return true; - } - - return false; -} - -/* - clang-format off - - Request details | none | no-scrub | no-scrub+no-deep | no-deep - ------------------------------------------------------------------------ - ------------------------------------------------------------------------ - initiated | shallow | shallow | shallow | shallow - ------------------------------------------------------------------------ - init. + t.f.deep | deep | deep | shallow | shallow - ------------------------------------------------------------------------ - initiated deep | deep | deep | deep | deep - ------------------------------------------------------------------------ - - clang-format on -*/ -std::optional<requested_scrub_t> PG::validate_initiated_scrub( - bool allow_deep_scrub, - bool try_to_auto_repair, - bool time_for_deep, - bool has_deep_errors, - const requested_scrub_t& planned) const -{ - requested_scrub_t upd_flags{planned}; - - upd_flags.time_for_deep = time_for_deep; - upd_flags.deep_scrub_on_error = false; - upd_flags.auto_repair = false; - - if (upd_flags.must_deep_scrub) { - upd_flags.calculated_to_deep = true; - } else if (upd_flags.time_for_deep && allow_deep_scrub) { - upd_flags.calculated_to_deep = true; - } else { - upd_flags.calculated_to_deep = false; - if (has_deep_errors) { - osd->clog->error() << fmt::format( - "osd.{} pg {} Regular scrub request, deep-scrub details will be lost", - osd->whoami, - info.pgid); - } - } - - if (try_to_auto_repair) { - // for shallow scrubs: rescrub if errors found - // for deep: turn 'auto-repair' on - if (upd_flags.calculated_to_deep) { - dout(10) << fmt::format( - "{}: performing an auto-repair deep scrub", - __func__) - << dendl; - upd_flags.auto_repair = true; - } else { - dout(10) << fmt::format( - "{}: will perform an auto-repair deep scrub if errors " - "are found", - __func__) - << dendl; - upd_flags.deep_scrub_on_error = true; - } - } - - return upd_flags; -} - -/* - clang-format off - - for periodic scrubs: - - Periodic type | none | no-scrub | no-scrub+no-deep | no-deep - ------------------------------------------------------------------------ - ------------------------------------------------------------------------ - periodic | shallow | x | x | shallow - ------------------------------------------------------------------------ - periodic + t.f.deep| deep | deep | x | shallow - ------------------------------------------------------------------------ - - clang-format on -*/ -std::optional<requested_scrub_t> PG::validate_periodic_mode( - bool allow_deep_scrub, - bool try_to_auto_repair, - bool allow_shallow_scrub, - bool time_for_deep, - bool has_deep_errors, - const requested_scrub_t& planned) const - -{ - ceph_assert(!planned.must_deep_scrub && !planned.must_repair); - - if (!allow_deep_scrub && has_deep_errors) { - osd->clog->error() - << "osd." << osd->whoami << " pg " << info.pgid - << " Regular scrub skipped due to deep-scrub errors and nodeep-scrub set"; - return std::nullopt; // no scrubbing - } - - requested_scrub_t upd_flags{planned}; - - upd_flags.time_for_deep = time_for_deep; - upd_flags.deep_scrub_on_error = false; - upd_flags.auto_repair = false; - upd_flags.calculated_to_deep = false; - - dout(20) << fmt::format("{}: allowed:{}/{} t.f.d:{} req:{}", - __func__, - allow_shallow_scrub, - allow_deep_scrub, - upd_flags.time_for_deep, - planned) - << dendl; - - // should we perform a shallow scrub? - if (allow_shallow_scrub) { - if (!upd_flags.time_for_deep || !allow_deep_scrub) { - if (try_to_auto_repair) { - dout(10) << __func__ - << ": auto repair with scrubbing, rescrub if errors found" - << dendl; - upd_flags.deep_scrub_on_error = true; - } - dout(20) << __func__ << " will do shallow scrub (time_for_deep = " - << upd_flags.time_for_deep << ")" << dendl; - return upd_flags; - } - // else - either deep-scrub or nothing - } - - if (upd_flags.time_for_deep) { - if (allow_deep_scrub) { - if (try_to_auto_repair) { - dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl; - upd_flags.auto_repair = true; - } - upd_flags.calculated_to_deep = true; - dout(20) << fmt::format("{}: final: {}", __func__, upd_flags) << dendl; - return upd_flags; - } - if (allow_shallow_scrub) { - dout(20) << fmt::format("{}: final:{}", __func__, upd_flags) << dendl; - return upd_flags; - } - return std::nullopt; - } - - return std::nullopt; // no scrubbing -} - - -/* - From docs.ceph.com (osd-internals/scrub): - - clang-format off - - Desired no-scrub flags & scrub type interactions: - - Periodic type | none | no-scrub | no-scrub+no-deep | no-deep - ------------------------------------------------------------------------ - ------------------------------------------------------------------------ - periodic | shallow | x | x | shallow - ------------------------------------------------------------------------ - periodic + t.f.deep| deep | deep | x | shallow - ------------------------------------------------------------------------ - initiated | shallow | shallow | shallow | shallow - ------------------------------------------------------------------------ - init. + t.f.deep | deep | deep | shallow | shallow - ------------------------------------------------------------------------ - initiated deep | deep | deep | deep | deep - ------------------------------------------------------------------------ - - "periodic" - if !must_scrub && !must_deep_scrub; - "initiated deep" - if must_scrub && must_deep_scrub; - "initiated" - if must_scrub && !must_deep_scrub; - - clang-format on -*/ -/* - * The returned flags collection (requested_scrub_t) is based on - * m_planned_scrub with the following modifications: - * - * - calculated_to_deep will be set to shallow or deep, depending on the - * scrub type (according to the decision table above); - * - deep_scrub_on_error will be determined; - * - same for auto_repair; - * - time_for_deep will be set to true if the scrub is periodic and the - * time for a deep scrub has been reached (+ some other conditions); - * and - * - need_auto is cleared - */ -std::optional<requested_scrub_t> PG::validate_scrub_mode() const -{ - const bool allow_shallow_scrub = - !(get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) || - pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)); - const bool allow_deep_scrub = - !(get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) || - pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB)); - const bool has_deep_errors = (info.stats.stats.sum.num_deep_scrub_errors > 0); - const bool try_to_auto_repair = (cct->_conf->osd_scrub_auto_repair && - get_pgbackend()->auto_repair_supported()); - - dout(10) << __func__ << " pg: " << info.pgid - << " allow: " << allow_shallow_scrub << "/" << allow_deep_scrub - << " deep errs: " << has_deep_errors - << " auto-repair: " << try_to_auto_repair << " (" - << cct->_conf->osd_scrub_auto_repair << ")" << dendl; - - // scrubbing while recovering? - const bool prevented_by_recovery = - osd->is_recovery_active() && !cct->_conf->osd_scrub_during_recovery && - (!cct->_conf->osd_repair_during_recovery || !m_planned_scrub.must_repair); - - if (prevented_by_recovery) { - dout(20) << __func__ << ": scrubbing prevented during recovery" << dendl; - return std::nullopt; - } - - const bool time_for_deep = is_time_for_deep(allow_deep_scrub, - allow_shallow_scrub, - has_deep_errors, - m_planned_scrub); - std::optional<requested_scrub_t> upd_flags; - - if (m_planned_scrub.must_scrub) { - upd_flags = validate_initiated_scrub(allow_deep_scrub, - try_to_auto_repair, - time_for_deep, - has_deep_errors, - m_planned_scrub); - } else { - ceph_assert(!m_planned_scrub.must_deep_scrub); - upd_flags = validate_periodic_mode(allow_deep_scrub, - try_to_auto_repair, - allow_shallow_scrub, - time_for_deep, - has_deep_errors, - m_planned_scrub); - if (!upd_flags) { - dout(20) << __func__ << ": no periodic scrubs allowed" << dendl; - return std::nullopt; - } - } - - dout(10) << fmt::format("{}: next scrub flags: {}", __func__, *upd_flags) - << dendl; - upd_flags->need_auto = false; - return upd_flags; -} - void PG::on_scrub_schedule_input_change() { if (is_active() && is_primary()) { diff --git a/src/osd/PG.h b/src/osd/PG.h index dc0276ffaa6..68aa160a949 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -713,35 +713,6 @@ private: // auxiliaries used by sched_scrub(): double next_deepscrub_interval() const; - /// should we perform deep scrub? - bool is_time_for_deep(bool allow_deep_scrub, - bool allow_shallow_scrub, - bool has_deep_errors, - const requested_scrub_t& planned) const; - - /** - * Validate the various 'next scrub' flags in m_planned_scrub against configuration - * and scrub-related timestamps. - * - * @returns an updated copy of the m_planned_flags (or nothing if no scrubbing) - */ - std::optional<requested_scrub_t> validate_scrub_mode() const; - - std::optional<requested_scrub_t> validate_periodic_mode( - bool allow_deep_scrub, - bool try_to_auto_repair, - bool allow_shallow_scrub, - bool time_for_deep, - bool has_deep_errors, - const requested_scrub_t& planned) const; - - std::optional<requested_scrub_t> validate_initiated_scrub( - bool allow_deep_scrub, - bool try_to_auto_repair, - bool time_for_deep, - bool has_deep_errors, - const requested_scrub_t& planned) const; - using ScrubAPI = void (ScrubPgIF::*)(epoch_t epoch_queued); void forward_scrub_event(ScrubAPI fn, epoch_t epoch_queued, std::string_view desc); // and for events that carry a meaningful 'activation token' diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index c44c90514a3..9cbb5e8e97c 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -18,6 +18,7 @@ #ifndef PGBACKEND_H #define PGBACKEND_H +#include "ECCommon.h" #include "osd_types.h" #include "common/WorkQueue.h" #include "include/Context.h" @@ -560,7 +561,7 @@ typedef std::shared_ptr<const OSDMap> OSDMapRef; virtual void objects_read_async( const hobject_t &hoid, - const std::list<std::pair<boost::tuple<uint64_t, uint64_t, uint32_t>, + const std::list<std::pair<ECCommon::ec_align_t, std::pair<ceph::buffer::list*, Context*> > > &to_read, Context *on_complete, bool fast_read = false) = 0; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index a7e6d90a8fe..56be3eb3a7e 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -294,9 +294,19 @@ void PrimaryLogPG::OpContext::start_async_reads(PrimaryLogPG *pg) list<pair<boost::tuple<uint64_t, uint64_t, unsigned>, pair<bufferlist*, Context*> > > in; in.swap(pending_async_reads); + // TODO: drop the converter + list<pair<ECCommon::ec_align_t, + pair<bufferlist*, Context*> > > in_native; + for (auto [align_tuple, ctx_pair] : in) { + in_native.emplace_back( + ECCommon::ec_align_t{ + align_tuple.get<0>(), align_tuple.get<1>(), align_tuple.get<2>() + }, + std::move(ctx_pair)); + } pg->pgbackend->objects_read_async( obc->obs.oi.soid, - in, + in_native, new OnReadComplete(pg, this), pg->get_pool().fast_read); } void PrimaryLogPG::OpContext::finish_read(PrimaryLogPG *pg) @@ -4904,7 +4914,7 @@ int PrimaryLogPG::trim_object( encode(snapset, bl); attrs[SS_ATTR] = std::move(bl); - bl.clear(); + bl.clear(); //NOLINT(bugprone-use-after-move) encode(head_obc->obs.oi, bl, get_osdmap()->get_features(CEPH_ENTITY_TYPE_OSD, nullptr)); attrs[OI_ATTR] = std::move(bl); diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index 8fd2d2022f8..3702490fb61 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -289,7 +289,7 @@ int ReplicatedBackend::objects_readv_sync( void ReplicatedBackend::objects_read_async( const hobject_t &hoid, - const list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>, + const list<pair<ECCommon::ec_align_t, pair<bufferlist*, Context*> > > &to_read, Context *on_complete, bool fast_read) diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h index 41b634269ba..aab75d21c73 100644 --- a/src/osd/ReplicatedBackend.h +++ b/src/osd/ReplicatedBackend.h @@ -148,7 +148,7 @@ public: void objects_read_async( const hobject_t &hoid, - const std::list<std::pair<boost::tuple<uint64_t, uint64_t, uint32_t>, + const std::list<std::pair<ECCommon::ec_align_t, std::pair<ceph::buffer::list*, Context*> > > &to_read, Context *on_complete, bool fast_read = false) override; diff --git a/src/osd/SnapMapper.h b/src/osd/SnapMapper.h index a90faa8e84f..f9a371932f6 100644 --- a/src/osd/SnapMapper.h +++ b/src/osd/SnapMapper.h @@ -356,6 +356,11 @@ private: return prefix_itr; } + /// reset the MapCacher backend, this should be called on pg interval change + void reset_backend() { + backend.reset(); + } + /// Update snaps for oid, empty new_snaps removes the mapping int update_snaps( const hobject_t &oid, ///< [in] oid to update diff --git a/src/osd/osd_perf_counters.cc b/src/osd/osd_perf_counters.cc index 30f0ba53166..db448e640da 100644 --- a/src/osd/osd_perf_counters.cc +++ b/src/osd/osd_perf_counters.cc @@ -406,7 +406,6 @@ PerfCounters *build_scrub_labeled_perf(CephContext *cct, std::string label) scrub_perf.add_u64_counter(scrbcnt_resrv_success, "scrub_reservations_completed", "successfully completed reservation processes"); scrub_perf.add_time_avg(scrbcnt_resrv_successful_elapsed, "successful_reservations_elapsed", "time to scrub reservation completion"); scrub_perf.add_u64_counter(scrbcnt_resrv_aborted, "reservation_process_aborted", "scrub reservation was aborted"); - scrub_perf.add_u64_counter(scrbcnt_resrv_timed_out, "reservation_process_timed_out", "scrub reservation timed out"); scrub_perf.add_u64_counter(scrbcnt_resrv_rejected, "reservation_process_failure", "scrub reservation failed due to replica denial"); scrub_perf.add_u64_counter(scrbcnt_resrv_skipped, "reservation_process_skipped", "scrub reservation skipped for high priority scrub"); scrub_perf.add_time_avg(scrbcnt_resrv_failed_elapsed, "failed_reservations_elapsed", "time for scrub reservation to fail"); diff --git a/src/osd/osd_perf_counters.h b/src/osd/osd_perf_counters.h index 00127dd7ff5..e008c67bdfa 100644 --- a/src/osd/osd_perf_counters.h +++ b/src/osd/osd_perf_counters.h @@ -218,8 +218,6 @@ enum { scrbcnt_resrv_successful_elapsed, /// # failed attempt to reserve replicas due to an abort scrbcnt_resrv_aborted, - /// # reservation process timed out - scrbcnt_resrv_timed_out, /// # reservation failed due to a 'rejected' response scrbcnt_resrv_rejected, /// # reservation skipped for high-priority scrubs diff --git a/src/osd/scheduler/OpScheduler.h b/src/osd/scheduler/OpScheduler.h index 570a2a16290..cb6ff69f2fe 100644 --- a/src/osd/scheduler/OpScheduler.h +++ b/src/osd/scheduler/OpScheduler.h @@ -22,6 +22,8 @@ #include "mon/MonClient.h" #include "osd/scheduler/OpSchedulerItem.h" +#include "include/ceph_assert.h" + namespace ceph::osd::scheduler { using client = uint64_t; @@ -58,6 +60,11 @@ public: // Get the scheduler type set for the queue virtual op_queue_type_t get_type() const = 0; + virtual double get_cost_per_io() const { + ceph_assert(0 == "impossible for wpq"); + return 0.0; + } + // Destructor virtual ~OpScheduler() {}; }; diff --git a/src/osd/scheduler/mClockScheduler.h b/src/osd/scheduler/mClockScheduler.h index 16e7f911ff9..7d3eb64afa4 100644 --- a/src/osd/scheduler/mClockScheduler.h +++ b/src/osd/scheduler/mClockScheduler.h @@ -261,6 +261,10 @@ public: const char** get_tracked_conf_keys() const final; void handle_conf_change(const ConfigProxy& conf, const std::set<std::string> &changed) final; + + double get_cost_per_io() const { + return osd_bandwidth_cost_per_io; + } private: // Enqueue the op to the high priority queue void enqueue_high(unsigned prio, OpSchedulerItem &&item, bool front = false); diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index 48f978b18e3..b1b063c484e 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -171,12 +171,6 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing( << dendl; env_conditions.high_priority_only = true; - } else if (m_queue.is_reserving_now()) { - // if there is a PG that is just now trying to reserve scrub replica - // resources - we should wait and not initiate a new scrub - dout(10) << "scrub resources reservation in progress" << dendl; - env_conditions.high_priority_only = true; - } else if (is_recovery_active && !conf->osd_scrub_during_recovery) { if (conf->osd_repair_during_recovery) { dout(15) @@ -482,13 +476,3 @@ int OsdScrub::get_blocked_pgs_count() const { return m_queue.get_blocked_pgs_count(); } - -bool OsdScrub::set_reserving_now(spg_t reserving_id, utime_t now_is) -{ - return m_queue.set_reserving_now(reserving_id, now_is); -} - -void OsdScrub::clear_reserving_now(spg_t reserving_id) -{ - m_queue.clear_reserving_now(reserving_id); -} diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h index cd1158d4723..41f5122681c 100644 --- a/src/osd/scrubber/osd_scrub.h +++ b/src/osd/scrubber/osd_scrub.h @@ -135,15 +135,6 @@ class OsdScrub { bool high_priority_scrub) const; /** - * No new scrub session will start while a scrub was initiated on a PG, - * and that PG is trying to acquire replica resources. - * \retval false if the flag was already set (due to a race) - */ - bool set_reserving_now(spg_t reserving_id, utime_t now_is); - - void clear_reserving_now(spg_t reserving_id); - - /** * push the 'not_before' time out by 'delay' seconds, so that this scrub target * would not be retried before 'delay' seconds have passed. */ diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc index 1d0bf614c9b..079e2a7e7ae 100644 --- a/src/osd/scrubber/osd_scrub_sched.cc +++ b/src/osd/scrubber/osd_scrub_sched.cc @@ -361,34 +361,3 @@ int ScrubQueue::get_blocked_pgs_count() const { return blocked_scrubs_cnt; } - -// ////////////////////////////////////////////////////////////////////////// // -// ScrubQueue - maintaining the 'some PG is reserving' flag - -bool ScrubQueue::set_reserving_now(spg_t reserving_id, utime_t now_is) -{ - std::unique_lock l{reserving_lock}; - - if (!reserving_pg.has_value()) { - reserving_pg = reserving_id; - reserving_since = now_is; - return true; - } - ceph_assert(reserving_id != *reserving_pg); - return false; -} - -void ScrubQueue::clear_reserving_now(spg_t was_reserving_id) -{ - std::unique_lock l{reserving_lock}; - if (reserving_pg && (*reserving_pg == was_reserving_id)) { - reserving_pg.reset(); - } - // otherwise - ignore silently -} - -bool ScrubQueue::is_reserving_now() const -{ - // no lock needed, as set_reserving_now() will recheck - return reserving_pg.has_value(); -} diff --git a/src/osd/scrubber/osd_scrub_sched.h b/src/osd/scrubber/osd_scrub_sched.h index 140c1428889..75708af7bcf 100644 --- a/src/osd/scrubber/osd_scrub_sched.h +++ b/src/osd/scrubber/osd_scrub_sched.h @@ -84,7 +84,6 @@ ScrubQueue interfaces (main functions): - can_inc_scrubs() - {inc/dec}_scrubs_{local/remote}() - dump_scrub_reservations() - - {set/clear/is}_reserving_now() <2> - environment conditions: @@ -117,13 +116,6 @@ namespace Scrub { using namespace ::std::literals; -/// possible outcome when trying to select a PG and scrub it -enum class schedule_result_t { - scrub_initiated, // successfully started a scrub - target_specific_failure, // failed to scrub this specific target - osd_wide_failure // failed to scrub any target -}; - // the OSD services provided to the scrub scheduler class ScrubSchedListener { public: @@ -238,30 +230,6 @@ class ScrubQueue { public: void dump_scrubs(ceph::Formatter* f) const; - /** - * No new scrub session will start while a scrub was initiated on a PG, - * and that PG is trying to acquire replica resources. - * - * \todo replace the atomic bool with a regular bool protected by a - * common OSD-service lock. Or better still - once PR#53263 is merged, - * remove this flag altogether. - */ - - /** - * set_reserving_now() - * \returns 'false' if the flag was already set - * (which is a possible result of a race between the check in OsdScrub and - * the initiation of a scrub by some other PG) - */ - bool set_reserving_now(spg_t reserving_id, utime_t now_is); - - /** - * silently ignore attempts to clear the flag if it was not set by - * the named pg. - */ - void clear_reserving_now(spg_t reserving_id); - bool is_reserving_now() const; - /// counting the number of PGs stuck while scrubbing, waiting for objects void mark_pg_scrub_blocked(spg_t blocked_pg); void clear_pg_scrub_blocked(spg_t blocked_pg); @@ -332,17 +300,6 @@ class ScrubQueue { std::atomic_int_fast16_t blocked_scrubs_cnt{0}; /** - * One of the OSD's primary PGs is in the initial phase of a scrub, - * trying to secure its replicas' resources. We will refrain from initiating - * any other scrub sessions until this one is done. - * - * \todo replace the local lock with regular osd-service locking - */ - ceph::mutex reserving_lock = ceph::make_mutex("ScrubQueue::reserving_lock"); - std::optional<spg_t> reserving_pg; - utime_t reserving_since; - - /** * If the scrub job was not explicitly requested, we postpone it by some * random length of time. * And if delaying the scrub - we calculate, based on pool parameters, a diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index e1604222c2c..471677318b1 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -876,8 +876,11 @@ int PgScrubber::get_whoami() const * - m_max_end * - end * - start + * returns: + * - std::nullopt if the range is blocked + * - otherwise, the number of objects in the selected range */ -bool PgScrubber::select_range() +std::optional<uint64_t> PgScrubber::select_range() { m_be->new_chunk(); @@ -959,7 +962,7 @@ bool PgScrubber::select_range() // we'll be requeued by whatever made us unavailable for scrub dout(10) << __func__ << ": scrub blocked somewhere in range " << "[" << m_start << ", " << candidate_end << ")" << dendl; - return false; + return std::nullopt; } m_end = candidate_end; @@ -972,20 +975,20 @@ bool PgScrubber::select_range() // debug: be 'blocked' if told so by the 'pg scrub_debug block' asok command if (m_debug_blockrange > 0) { m_debug_blockrange--; - return false; + return std::nullopt; } - return true; + return objects.size(); } void PgScrubber::select_range_n_notify() { get_counters_set().inc(scrbcnt_chunks_selected); - - if (select_range()) { + auto num_chunk_objects = select_range(); + if (num_chunk_objects.has_value()) { // the next chunk to handle is not blocked dout(20) << __func__ << ": selection OK" << dendl; - m_osds->queue_scrub_chunk_free(m_pg, Scrub::scrub_prio_t::low_priority); - + auto cost = get_scrub_cost(num_chunk_objects.value()); + m_osds->queue_scrub_chunk_free(m_pg, Scrub::scrub_prio_t::low_priority, cost); } else { // we will wait for the objects range to become available for scrubbing dout(10) << __func__ << ": selected chunk is busy" << dendl; @@ -994,6 +997,28 @@ void PgScrubber::select_range_n_notify() } } +uint64_t PgScrubber::get_scrub_cost(uint64_t num_chunk_objects) +{ + const auto& conf = m_pg->get_cct()->_conf; + if (op_queue_type_t::WeightedPriorityQueue == m_osds->osd->osd_op_queue_type()) { + // if the osd_op_queue is WPQ, we will use the default osd_scrub_cost value + return conf->osd_scrub_cost; + } + uint64_t cost = 0; + double scrub_metadata_cost = m_osds->get_cost_per_io(); + if (m_is_deep) { + auto pg_avg_object_size = m_pg->get_average_object_size(); + cost = conf->osd_scrub_event_cost + (num_chunk_objects + * (scrub_metadata_cost + pg_avg_object_size)); + dout(20) << fmt::format("{} : deep-scrub cost = {}", __func__, cost) << dendl; + return cost; + } else { + cost = conf->osd_scrub_event_cost + (num_chunk_objects * scrub_metadata_cost); + dout(20) << fmt::format("{} : shallow-scrub cost = {}", __func__, cost) << dendl; + return cost; + } +} + bool PgScrubber::write_blocked_by_scrub(const hobject_t& soid) { if (soid < m_start || soid >= m_end) { @@ -1574,10 +1599,15 @@ void PgScrubber::replica_scrub_op(OpRequestRef op) set_queued_or_active(); advance_token(); + const auto& conf = m_pg->get_cct()->_conf; + const int max_from_conf = size_from_conf( + m_is_deep, conf, "osd_scrub_chunk_max", "osd_shallow_scrub_chunk_max"); + auto cost = get_scrub_cost(max_from_conf); m_osds->queue_for_rep_scrub(m_pg, m_replica_request_priority, m_flags.priority, - m_current_token); + m_current_token, + cost); } void PgScrubber::set_op_parameters(const requested_scrub_t& request) @@ -1743,17 +1773,6 @@ void PgScrubber::handle_scrub_reserve_msgs(OpRequestRef op) } } - -bool PgScrubber::set_reserving_now() { - return m_osds->get_scrub_services().set_reserving_now(m_pg_id, - ceph_clock_now()); -} - -void PgScrubber::clear_reserving_now() -{ - m_osds->get_scrub_services().clear_reserving_now(m_pg_id); -} - void PgScrubber::set_queued_or_active() { m_queued_or_active = true; @@ -2043,6 +2062,89 @@ void PgScrubber::on_digest_updates() } } + +// a placeholder. requeue_penalized() is fully implemented in the +// following commits of this PR +void PgScrubber::requeue_penalized(Scrub::delay_cause_t cause) +{ + penalize_next_scrub(cause); +} + + +Scrub::schedule_result_t PgScrubber::start_scrub_session( + Scrub::OSDRestrictions osd_restrictions, + Scrub::ScrubPGPreconds pg_cond, + const requested_scrub_t& requested_flags) +{ + if (is_queued_or_active()) { + // not a real option when the queue entry is the whole ScrubJob, but + // will be possible when using level-specific targets + dout(10) << __func__ << ": scrub already in progress" << dendl; + return schedule_result_t::target_specific_failure; + } + + // for all other failures - we must reinstate our entry in the Scrub Queue + if (!is_primary() || !m_pg->is_active() || !m_pg->is_clean()) { + dout(10) << __func__ << ": cannot scrub (not a clean and active primary)" + << dendl; + requeue_penalized(Scrub::delay_cause_t::pg_state); + return schedule_result_t::target_specific_failure; + } + + if (state_test(PG_STATE_SNAPTRIM) || state_test(PG_STATE_SNAPTRIM_WAIT)) { + // note that the trimmer checks scrub status when setting 'snaptrim_wait' + // (on the transition from NotTrimming to Trimming/WaitReservation), + // i.e. some time before setting 'snaptrim'. + dout(10) << __func__ << ": cannot scrub while snap-trimming" << dendl; + requeue_penalized(Scrub::delay_cause_t::pg_state); + return schedule_result_t::target_specific_failure; + } + + // analyze the combination of the requested scrub flags, the osd/pool + // configuration and the PG status to determine whether we should scrub + // now, and what type of scrub should that be. + auto updated_flags = validate_scrub_mode(osd_restrictions, pg_cond); + if (!updated_flags) { + dout(10) << __func__ << ": scrub not allowed" << dendl; + requeue_penalized(Scrub::delay_cause_t::scrub_params); + return schedule_result_t::target_specific_failure; + } + + // if only explicitly requested repairing is allowed - skip other types + // of scrubbing + if (osd_restrictions.allow_requested_repair_only && + !updated_flags->must_repair) { + dout(10) << __func__ + << ": skipping this PG as repairing was not explicitly " + "requested for it" + << dendl; + requeue_penalized(Scrub::delay_cause_t::scrub_params); + return schedule_result_t::target_specific_failure; + } + + // try to reserve the local OSD resources. If failing: no harm. We will + // be retried by the OSD later on. + if (!reserve_local()) { + dout(10) << __func__ << ": failed to reserve locally" << dendl; + requeue_penalized(Scrub::delay_cause_t::local_resources); + return schedule_result_t::osd_wide_failure; + } + + // can commit to the updated flags now, as nothing will stop the scrub + m_planned_scrub = *updated_flags; + + // An interrupted recovery repair could leave this set. + state_clear(PG_STATE_REPAIR); + + set_op_parameters(m_planned_scrub); + + // using the OSD queue, as to not execute the scrub code as part of the tick. + dout(10) << __func__ << ": queueing" << dendl; + m_osds->queue_for_scrub(m_pg, Scrub::scrub_prio_t::low_priority); + return schedule_result_t::scrub_initiated; +} + + /* * note that the flags-set fetched from the PG (m_pg->m_planned_scrub) * is cleared once scrubbing starts; Some of the values dumped here are @@ -2475,6 +2577,284 @@ void PgScrubber::update_scrub_stats(ceph::coarse_real_clock::time_point now_is) } +bool PgScrubber::is_time_for_deep( + Scrub::ScrubPGPreconds pg_cond, + const requested_scrub_t& planned) const +{ + const auto last_deep = m_pg->info.history.last_deep_scrub_stamp; // shorthand + dout(10) << fmt::format( + "{}: pg_cond:({}) need-auto?{} last_deep_scrub_stamp:{}", + __func__, pg_cond, planned.need_auto, last_deep) + << dendl; + + if (!pg_cond.allow_deep) + return false; + + if (planned.need_auto) { + dout(10) << __func__ << ": need repair after scrub errors" << dendl; + return true; + } + + const auto sched_conf = populate_config_params(); + const auto next_deep = last_deep + sched_conf.deep_interval; + const auto timenow = ceph_clock_now(); + if (timenow >= next_deep) { + dout(20) << fmt::format( + "{}: now ({}) >= time for deep ({})", __func__, timenow, + next_deep) + << dendl; + return true; + } + + if (pg_cond.has_deep_errors) { + // note: the text below is matched by 'standalone' tests + get_clog()->info() << fmt::format( + "osd.{} pg {} Deep scrub errors, upgrading scrub to deep-scrub", + get_whoami(), m_pg_id); + return true; + } + + // we only flip coins if 'allow_shallow_scrub' is asserted. Otherwise - as + // this function is called often, we will probably be deep-scrubbing most of + // the time. + if (pg_cond.allow_shallow) { + const bool deep_coin_flip = + random_bool_with_probability(sched_conf.deep_randomize_ratio); + if (deep_coin_flip) { + dout(10) << fmt::format( + "{}: scrub upgraded to deep (coin flip)", __func__) + << dendl; + return true; + } + } + + return false; +} + + +/* + clang-format off + + Request details | none | no-scrub | no-scrub+no-deep | no-deep + ------------------------------------------------------------------------ + ------------------------------------------------------------------------ + initiated | shallow | shallow | shallow | shallow + ------------------------------------------------------------------------ + init. + t.f.deep | deep | deep | shallow | shallow + ------------------------------------------------------------------------ + initiated deep | deep | deep | deep | deep + ------------------------------------------------------------------------ + + clang-format on +*/ +std::optional<requested_scrub_t> PgScrubber::validate_initiated_scrub( + Scrub::ScrubPGPreconds pg_cond, + bool time_for_deep, + const requested_scrub_t& planned) const +{ + requested_scrub_t upd_flags{planned}; + + upd_flags.time_for_deep = time_for_deep; + upd_flags.deep_scrub_on_error = false; + upd_flags.auto_repair = false; + + if (upd_flags.must_deep_scrub) { + upd_flags.calculated_to_deep = true; + } else if ( + upd_flags.time_for_deep && pg_cond.allow_deep) { + upd_flags.calculated_to_deep = true; + } else { + upd_flags.calculated_to_deep = false; + if (pg_cond.has_deep_errors) { + get_clog()->error() << fmt::format( + "osd.{} pg {} Regular scrub request, deep-scrub details will be lost", + get_whoami(), m_pg_id); + } + } + + if (pg_cond.can_autorepair) { + // for shallow scrubs: rescrub if errors found + // for deep: turn 'auto-repair' on + if (upd_flags.calculated_to_deep) { + dout(10) << fmt::format( + "{}: performing an auto-repair deep scrub", __func__) + << dendl; + upd_flags.auto_repair = true; + } else { + dout(10) << fmt::format( + "{}: will perform an auto-repair deep scrub if errors " + "are found", + __func__) + << dendl; + upd_flags.deep_scrub_on_error = true; + } + } + + return upd_flags; +} + +/* + clang-format off + + for periodic scrubs: + + Periodic type | none | no-scrub | no-scrub+no-deep | no-deep + ------------------------------------------------------------------------ + ------------------------------------------------------------------------ + periodic | shallow | x | x | shallow + ------------------------------------------------------------------------ + periodic + t.f.deep| deep | deep | x | shallow + ------------------------------------------------------------------------ + + clang-format on +*/ +std::optional<requested_scrub_t> PgScrubber::validate_periodic_mode( + Scrub::ScrubPGPreconds pg_cond, + bool time_for_deep, + const requested_scrub_t& planned) const + +{ + ceph_assert(!planned.must_deep_scrub && !planned.must_repair); + + if (!pg_cond.allow_deep && pg_cond.has_deep_errors) { + get_clog()->error() << fmt::format( + "osd.{} pg {} Regular scrub skipped due to deep-scrub errors and " + "nodeep-scrub set", + get_whoami(), m_pg_id); + return std::nullopt; // no scrubbing + } + + requested_scrub_t upd_flags{planned}; + + upd_flags.time_for_deep = time_for_deep; + upd_flags.deep_scrub_on_error = false; + upd_flags.auto_repair = false; + upd_flags.calculated_to_deep = false; + + dout(20) << fmt::format( + "{}: allowed:{}/{} t.f.d:{} req:{}", __func__, + pg_cond.allow_shallow, pg_cond.allow_deep, + upd_flags.time_for_deep, planned) + << dendl; + + // should we perform a shallow scrub? + if (pg_cond.allow_shallow) { + if (!upd_flags.time_for_deep || !pg_cond.allow_deep) { + if (pg_cond.can_autorepair) { + dout(10) << __func__ + << ": auto repair with scrubbing, rescrub if errors found" + << dendl; + upd_flags.deep_scrub_on_error = true; + } + dout(20) << __func__ << " will do shallow scrub (time_for_deep = " + << upd_flags.time_for_deep << ")" << dendl; + return upd_flags; + } + // else - either deep-scrub or nothing + } + + if (upd_flags.time_for_deep) { + if (pg_cond.allow_deep) { + if (pg_cond.can_autorepair) { + dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl; + upd_flags.auto_repair = true; + } + upd_flags.calculated_to_deep = true; + dout(20) << fmt::format("{}: final: {}", __func__, upd_flags) << dendl; + return upd_flags; + } + if (pg_cond.allow_shallow) { + dout(20) << fmt::format("{}: final:{}", __func__, upd_flags) << dendl; + return upd_flags; + } + // else - no scrubbing + } + + return std::nullopt; // no scrubbing +} + + +/* + From docs.ceph.com (osd-internals/scrub): + + clang-format off + + Desired no-scrub flags & scrub type interactions: + + Periodic type | none | no-scrub | no-scrub+no-deep | no-deep + ------------------------------------------------------------------------ + ------------------------------------------------------------------------ + periodic | shallow | x | x | shallow + ------------------------------------------------------------------------ + periodic + t.f.deep| deep | deep | x | shallow + ------------------------------------------------------------------------ + initiated | shallow | shallow | shallow | shallow + ------------------------------------------------------------------------ + init. + t.f.deep | deep | deep | shallow | shallow + ------------------------------------------------------------------------ + initiated deep | deep | deep | deep | deep + ------------------------------------------------------------------------ + + "periodic" - if !must_scrub && !must_deep_scrub; + "initiated deep" - if must_scrub && must_deep_scrub; + "initiated" - if must_scrub && !must_deep_scrub; + + clang-format on +*/ +/* + * The returned flags collection (requested_scrub_t) is based on + * m_planned_scrub with the following modifications: + * + * - calculated_to_deep will be set to shallow or deep, depending on the + * scrub type (according to the decision table above); + * - deep_scrub_on_error will be determined; + * - same for auto_repair; + * - time_for_deep will be set to true if the scrub is periodic and the + * time for a deep scrub has been reached (+ some other conditions); + * and + * - need_auto is cleared + */ +std::optional<requested_scrub_t> PgScrubber::validate_scrub_mode( + Scrub::OSDRestrictions osd_restrictions, + Scrub::ScrubPGPreconds pg_cond) const +{ + dout(10) << fmt::format( + "{}: osd_restrictions:{} pg_cond:{}", __func__, + osd_restrictions, pg_cond) + << dendl; + + const bool time_for_deep = is_time_for_deep(pg_cond, m_planned_scrub); + std::optional<requested_scrub_t> upd_flags; + + if (m_scrub_job->is_high_priority()) { + // 'initiated' scrubs + dout(10) << fmt::format( + "{}: initiated (\"must\") scrub (target:{} pg:{})", + __func__, *m_scrub_job, pg_cond) + << dendl; + upd_flags = + validate_initiated_scrub(pg_cond, time_for_deep, m_planned_scrub); + + } else { + // -------- a periodic scrub + dout(10) << fmt::format( + "{}: periodic target:{} pg:{}", __func__, *m_scrub_job, + pg_cond) + << dendl; + upd_flags = validate_periodic_mode(pg_cond, time_for_deep, m_planned_scrub); + if (!upd_flags) { + dout(20) << __func__ << ": no periodic scrubs allowed" << dendl; + return std::nullopt; + } + } + + dout(10) << fmt::format("{}: next scrub flags: {}", __func__, *upd_flags) + << dendl; + upd_flags->need_auto = false; + return upd_flags; +} + + // ///////////////////// preemption_data_t ////////////////////////////////// PgScrubber::preemption_data_t::preemption_data_t(PG* pg) : m_pg{pg} diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 78e8ba90d44..7a50093c2d1 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -187,6 +187,11 @@ class PgScrubber : public ScrubPgIF, /// are we waiting for resource reservation grants form our replicas? [[nodiscard]] bool is_reserving() const final; + Scrub::schedule_result_t start_scrub_session( + Scrub::OSDRestrictions osd_restrictions, + Scrub::ScrubPGPreconds, + const requested_scrub_t& requested_flags) final; + void initiate_regular_scrub(epoch_t epoch_queued) final; void initiate_scrub_after_repair(epoch_t epoch_queued) final; @@ -456,9 +461,6 @@ class PgScrubber : public ScrubPgIF, int build_replica_map_chunk() final; - bool set_reserving_now() final; - void clear_reserving_now() final; - [[nodiscard]] bool was_epoch_changed() const final; void set_queued_or_active() final; @@ -561,6 +563,11 @@ class PgScrubber : public ScrubPgIF, // 'query' command data for an active scrub void dump_active_scrubber(ceph::Formatter* f, bool is_deep) const; + /// calls penalize_next_scrub() to push the 'not before' to a later time + /// (for now. The fuller implementation will also push the scrub job back + /// into the queue). + void requeue_penalized(Scrub::delay_cause_t cause); + // ----- methods used to verify the relevance of incoming events: /** @@ -729,6 +736,8 @@ class PgScrubber : public ScrubPgIF, /// Returns epoch of current osdmap epoch_t get_osdmap_epoch() const { return get_osdmap()->get_epoch(); } + uint64_t get_scrub_cost(uint64_t num_chunk_objects); + // collected statistics int m_shallow_errors{0}; int m_deep_errors{0}; @@ -793,6 +802,31 @@ class PgScrubber : public ScrubPgIF, Scrub::sched_params_t determine_scrub_time( const pool_opts_t& pool_conf) const; + /// should we perform deep scrub? + bool is_time_for_deep( + Scrub::ScrubPGPreconds pg_cond, + const requested_scrub_t& planned) const; + + /** + * Validate the various 'next scrub' flags against configuration + * and scrub-related timestamps. + * + * @returns an updated copy of the m_planned_flags (or nothing if no scrubbing) + */ + std::optional<requested_scrub_t> validate_scrub_mode( + Scrub::OSDRestrictions osd_restrictions, + Scrub::ScrubPGPreconds pg_cond) const; + + std::optional<requested_scrub_t> validate_periodic_mode( + Scrub::ScrubPGPreconds pg_cond, + bool time_for_deep, + const requested_scrub_t& planned) const; + + std::optional<requested_scrub_t> validate_initiated_scrub( + Scrub::ScrubPGPreconds pg_cond, + bool time_for_deep, + const requested_scrub_t& planned) const; + /* * Select a range of objects to scrub. * @@ -802,8 +836,11 @@ class PgScrubber : public ScrubPgIF, * - handling some head/clones issues * * The selected range is set directly into 'm_start' and 'm_end' + * + * Returns std::nullopt if the range is busy otherwise returns the + * number of objects in the range. */ - bool select_range(); + std::optional<uint64_t> select_range(); std::list<Context*> m_callbacks; diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index acdddbd18eb..ca0ff522278 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -183,15 +183,6 @@ Session::Session(my_context ctx) dout(10) << "-- state -->> PrimaryActive/Session" << dendl; DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - // while we've checked the 'someone is reserving' flag before queueing - // the start-scrub event, it's possible that the flag was set in the meantime. - // Handling this case here requires adding a new sub-state, and the - // complication of reporting a failure to the caller in a new failure - // path. On the other hand - ignoring an ongoing reservation on rare - // occasions will cause no harm. - // We choose ignorance. - std::ignore = scrbr->set_reserving_now(); - m_perf_set = &scrbr->get_counters_set(); m_perf_set->inc(scrbcnt_started); } @@ -235,18 +226,7 @@ ReservingReplicas::ReservingReplicas(my_context ctx) *scrbr, context<PrimaryActive>().last_request_sent_nonce, *session.m_perf_set); - if (session.m_reservations->get_last_sent()) { - // the 1'st reservation request was sent - - auto timeout = scrbr->get_pg_cct()->_conf.get_val<milliseconds>( - "osd_scrub_reservation_timeout"); - if (timeout.count() > 0) { - // Start a timer to handle case where the replicas take a long time to - // ack the reservation. See ReservationTimeout handler below. - m_timeout_token = - machine.schedule_timer_event_after<ReservationTimeout>(timeout); - } - } else { + if (!session.m_reservations->get_last_sent()) { // no replicas to reserve dout(10) << "no replicas to reserve" << dendl; // can't transit directly from here @@ -254,14 +234,6 @@ ReservingReplicas::ReservingReplicas(my_context ctx) } } -ReservingReplicas::~ReservingReplicas() -{ - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - // it's OK to try and clear the flag even if we don't hold it - // (the flag remembers the actual holder) - scrbr->clear_reserving_now(); -} - sc::result ReservingReplicas::react(const ReplicaGrant& ev) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases @@ -305,26 +277,6 @@ sc::result ReservingReplicas::react(const ReplicaReject& ev) return transit<PrimaryIdle>(); } -sc::result ReservingReplicas::react(const ReservationTimeout&) -{ - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - auto& session = context<Session>(); - dout(10) << "ReservingReplicas::react(const ReservationTimeout&)" << dendl; - ceph_assert(session.m_reservations); - - session.m_reservations->log_failure_and_duration(scrbcnt_resrv_timed_out); - - const auto msg = fmt::format( - "osd.{} PgScrubber: {} timeout on reserving replicas (since {})", - scrbr->get_whoami(), scrbr->get_spgid(), entered_at); - dout(1) << msg << dendl; - scrbr->get_clog()->warn() << msg; - - // cause the scrubber to stop the scrub session, marking 'reservation - // failure' as the cause (affecting future scheduling) - scrbr->flag_reservations_failure(); - return transit<PrimaryIdle>(); -} // ----------------------- ActiveScrubbing ----------------------------------- diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index b9f60481674..cf8d28c765b 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -160,9 +160,6 @@ VALUE_EVENT(ReserverGranted, AsyncScrubResData); /// all replicas have granted our reserve request MEV(RemotesReserved) -/// reservations have timed out -MEV(ReservationTimeout) - /// initiate a new scrubbing session (relevant if we are a Primary) MEV(StartScrub) @@ -565,25 +562,21 @@ struct Session : sc::state<Session, PrimaryActive, ReservingReplicas>, ScrubTimePoint m_session_started_at{ScrubClock::now()}; }; -struct ReservingReplicas : sc::state<ReservingReplicas, Session>, - NamedSimply { +struct ReservingReplicas : sc::state<ReservingReplicas, Session>, NamedSimply { explicit ReservingReplicas(my_context ctx); - ~ReservingReplicas(); - using reactions = mpl::list<sc::custom_reaction<ReplicaGrant>, - sc::custom_reaction<ReplicaReject>, - sc::transition<RemotesReserved, ActiveScrubbing>, - sc::custom_reaction<ReservationTimeout>>; + ~ReservingReplicas() = default; + using reactions = mpl::list< + sc::custom_reaction<ReplicaGrant>, + sc::custom_reaction<ReplicaReject>, + sc::transition<RemotesReserved, ActiveScrubbing>>; ScrubTimePoint entered_at = ScrubClock::now(); - ScrubMachine::timer_event_token_t m_timeout_token; /// a "raw" event carrying a peer's grant response sc::result react(const ReplicaGrant&); /// a "raw" event carrying a peer's denial response sc::result react(const ReplicaReject&); - - sc::result react(const ReservationTimeout&); }; diff --git a/src/osd/scrubber/scrub_machine_lstnr.h b/src/osd/scrubber/scrub_machine_lstnr.h index ea893ba81f0..85c518c402f 100644 --- a/src/osd/scrubber/scrub_machine_lstnr.h +++ b/src/osd/scrubber/scrub_machine_lstnr.h @@ -206,18 +206,6 @@ struct ScrubMachineListener { virtual void set_scrub_duration(std::chrono::milliseconds duration) = 0; /** - * No new scrub session will start while a scrub was initiate on a PG, - * and that PG is trying to acquire replica resources. - * set_reserving_now()/clear_reserving_now() let's the OSD scrub-queue know - * we are busy reserving. - * - * set_reserving_now() returns 'false' if there already is a PG in the - * reserving stage of the scrub session. - */ - virtual bool set_reserving_now() = 0; - virtual void clear_reserving_now() = 0; - - /** * Manipulate the 'I am being scrubbed now' Scrubber's flag */ virtual void set_queued_or_active() = 0; diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 66e61d856cd..1c92321c731 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -90,19 +90,49 @@ struct OSDRestrictions { }; static_assert(sizeof(Scrub::OSDRestrictions) <= sizeof(uint32_t)); +/// concise passing of PG state affecting scrub to the +/// scrubber at the initiation of a scrub +struct ScrubPGPreconds { + bool allow_shallow{true}; + bool allow_deep{true}; + bool has_deep_errors{false}; + bool can_autorepair{false}; +}; +static_assert(sizeof(Scrub::ScrubPGPreconds) <= sizeof(uint32_t)); + +/// possible outcome when trying to select a PG and scrub it +enum class schedule_result_t { + scrub_initiated, // successfully started a scrub + target_specific_failure, // failed to scrub this specific target + osd_wide_failure // failed to scrub any target +}; } // namespace Scrub namespace fmt { template <> +struct formatter<Scrub::ScrubPGPreconds> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template <typename FormatContext> + auto format(const Scrub::ScrubPGPreconds& conds, FormatContext& ctx) const + { + return fmt::format_to( + ctx.out(), "allowed(shallow/deep):{:1}/{:1},deep-err:{:1},can-autorepair:{:1}", + conds.allow_shallow, conds.allow_deep, conds.has_deep_errors, + conds.can_autorepair); + } +}; + +template <> struct formatter<Scrub::OSDRestrictions> { constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } template <typename FormatContext> - auto format(const Scrub::OSDRestrictions& conds, FormatContext& ctx) + auto format(const Scrub::OSDRestrictions& conds, FormatContext& ctx) const { return fmt::format_to( ctx.out(), - "priority-only:{} overdue-only:{} load:{} time:{} repair-only:{}", + "priority-only:{},overdue-only:{},load:{},time:{},repair-only:{}", conds.high_priority_only, conds.only_deadlined, conds.load_is_low ? "ok" : "high", @@ -389,6 +419,23 @@ struct ScrubPgIF { virtual void replica_scrub_op(OpRequestRef op) = 0; + /** + * attempt to initiate a scrub session. + * @param osd_restrictions limitations on the types of scrubs that can + * be initiated on this OSD at this time. + * @param preconds the PG state re scrubbing at the time of the request, + * affecting scrub parameters. + * @param requested_flags the set of flags that determine the scrub type + * and attributes (to be removed in the next iteration). + * @return the result of the scrub initiation attempt. A success, + * or either a failure due to the specific PG, or a failure due to + * external reasons. + */ + virtual Scrub::schedule_result_t start_scrub_session( + Scrub::OSDRestrictions osd_restrictions, + Scrub::ScrubPGPreconds, + const requested_scrub_t& requested_flags) = 0; + virtual void set_op_parameters(const requested_scrub_t&) = 0; /// stop any active scrubbing (on interval end) and unregister from diff --git a/src/osdc/Striper.cc b/src/osdc/Striper.cc index 69bdda06f84..d45828d15c5 100644 --- a/src/osdc/Striper.cc +++ b/src/osdc/Striper.cc @@ -407,7 +407,7 @@ void Striper::StripedReadResult::add_partial_result( << " to " << buffer_extents << dendl; for (auto& be : buffer_extents) { auto& r = partial[be.first]; - size_t actual = std::min<uint64_t>(bl.length(), be.second); + size_t actual = std::min<uint64_t>(bl.length(), be.second); //NOLINT(bugprone-use-after-move) if (buffer_extents.size() == 1) { r.first = std::move(bl); } else { diff --git a/src/pybind/mgr/CMakeLists.txt b/src/pybind/mgr/CMakeLists.txt index 0b706c74edb..b2a8ac9a325 100644 --- a/src/pybind/mgr/CMakeLists.txt +++ b/src/pybind/mgr/CMakeLists.txt @@ -13,7 +13,7 @@ if(WITH_MGR_ROOK_CLIENT) endif() if(WITH_TESTS) include(AddCephTest) - add_tox_test(mgr ${CMAKE_CURRENT_SOURCE_DIR} TOX_ENVS py3 py37 mypy flake8 jinjalint nooptional) + add_tox_test(mgr ${CMAKE_CURRENT_SOURCE_DIR} TOX_ENVS __tox_defaults__) endif() # Location needs to match default setting for mgr_module_path, currently: diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index e38122ddc4b..d796e4d8115 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -44,9 +44,6 @@ cherrypy.log.access_log.propagate = False class AgentEndpoint: - KV_STORE_AGENT_ROOT_CERT = 'cephadm_agent/root/cert' - KV_STORE_AGENT_ROOT_KEY = 'cephadm_agent/root/key' - def __init__(self, mgr: "CephadmOrchestrator") -> None: self.mgr = mgr self.ssl_certs = SSLCerts() @@ -60,14 +57,15 @@ class AgentEndpoint: cherrypy.tree.mount(self.node_proxy_endpoint, '/node-proxy', config=conf) def configure_tls(self, server: Server) -> None: - old_cert = self.mgr.get_store(self.KV_STORE_AGENT_ROOT_CERT) - old_key = self.mgr.get_store(self.KV_STORE_AGENT_ROOT_KEY) + old_cert = self.mgr.cert_key_store.get_cert('agent_endpoint_root_cert') + old_key = self.mgr.cert_key_store.get_key('agent_endpoint_key') + if old_cert and old_key: self.ssl_certs.load_root_credentials(old_cert, old_key) else: self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip()) - self.mgr.set_store(self.KV_STORE_AGENT_ROOT_CERT, self.ssl_certs.get_root_cert()) - self.mgr.set_store(self.KV_STORE_AGENT_ROOT_KEY, self.ssl_certs.get_root_key()) + self.mgr.cert_key_store.save_cert('agent_endpoint_root_cert', self.ssl_certs.get_root_cert()) + self.mgr.cert_key_store.save_key('agent_endpoint_key', self.ssl_certs.get_root_key()) host = self.mgr.get_hostname() addr = self.mgr.get_mgr_ip() diff --git a/src/pybind/mgr/cephadm/configchecks.py b/src/pybind/mgr/cephadm/configchecks.py index 6e442c4a3ce..714f9494b51 100644 --- a/src/pybind/mgr/cephadm/configchecks.py +++ b/src/pybind/mgr/cephadm/configchecks.py @@ -91,7 +91,7 @@ class SubnetLookup: speed: [hostname] } - @ property + @property def host_list(self) -> List[str]: hosts = [] for mtu in self.mtu_map: diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index 5ecb142cb6a..8b536057c57 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -8,11 +8,19 @@ import logging import math import socket from typing import TYPE_CHECKING, Dict, List, Iterator, Optional, Any, Tuple, Set, Mapping, cast, \ - NamedTuple, Type, ValuesView + NamedTuple, Type, ValuesView, Union import orchestrator from ceph.deployment import inventory -from ceph.deployment.service_spec import ServiceSpec, PlacementSpec, TunedProfileSpec, IngressSpec +from ceph.deployment.service_spec import ( + ServiceSpec, + PlacementSpec, + TunedProfileSpec, + IngressSpec, + RGWSpec, + IscsiServiceSpec, + NvmeofServiceSpec, +) from ceph.utils import str_to_datetime, datetime_to_str, datetime_now from orchestrator import OrchestratorError, HostSpec, OrchestratorEvent, service_to_daemon_types from cephadm.services.cephadmservice import CephadmDaemonDeploySpec @@ -30,6 +38,8 @@ HOST_CACHE_PREFIX = "host." SPEC_STORE_PREFIX = "spec." AGENT_CACHE_PREFIX = 'agent.' NODE_PROXY_CACHE_PREFIX = 'node_proxy' +CERT_STORE_CERT_PREFIX = 'cert_store.cert.' +CERT_STORE_KEY_PREFIX = 'cert_store.key.' class HostCacheStatus(enum.Enum): @@ -38,6 +48,26 @@ class HostCacheStatus(enum.Enum): devices = 'devices' +class OrchSecretNotFound(OrchestratorError): + def __init__( + self, + message: Optional[str] = '', + entity: Optional[str] = '', + service_name: Optional[str] = '', + hostname: Optional[str] = '' + ): + if not message: + message = f'No secret found for entity {entity}' + if service_name: + message += f' with service name {service_name}' + if hostname: + message += f' with hostname {hostname}' + super().__init__(message) + self.entity = entity + self.service_name = service_name + self.hostname = hostname + + class Inventory: """ The inventory stores a HostSpec for all hosts persistently. @@ -309,6 +339,7 @@ class SpecStore(): if update_create: self.spec_created[name] = datetime_now() self._save(name) + self._save_certs_and_keys(spec) def save_rank_map(self, name: str, @@ -337,6 +368,75 @@ class SpecStore(): OrchestratorEvent.INFO, 'service was created') + def _save_certs_and_keys(self, spec: ServiceSpec) -> None: + if spec.service_type == 'rgw': + rgw_spec = cast(RGWSpec, spec) + if rgw_spec.rgw_frontend_ssl_certificate: + rgw_cert: Union[str, List[str]] = rgw_spec.rgw_frontend_ssl_certificate + if isinstance(rgw_cert, list): + cert_str = '\n'.join(rgw_cert) + else: + cert_str = rgw_cert + assert isinstance(cert_str, str) + self.mgr.cert_key_store.save_cert( + 'rgw_frontend_ssl_cert', + cert_str, + service_name=rgw_spec.service_name(), + user_made=True) + elif spec.service_type == 'iscsi': + iscsi_spec = cast(IscsiServiceSpec, spec) + if iscsi_spec.ssl_cert: + self.mgr.cert_key_store.save_cert( + 'iscsi_ssl_cert', + iscsi_spec.ssl_cert, + service_name=iscsi_spec.service_name(), + user_made=True) + if iscsi_spec.ssl_key: + self.mgr.cert_key_store.save_key( + 'iscsi_ssl_key', + iscsi_spec.ssl_key, + service_name=iscsi_spec.service_name(), + user_made=True) + elif spec.service_type == 'ingress': + ingress_spec = cast(IngressSpec, spec) + if ingress_spec.ssl_cert: + self.mgr.cert_key_store.save_cert( + 'ingress_ssl_cert', + ingress_spec.ssl_cert, + service_name=ingress_spec.service_name(), + user_made=True) + if ingress_spec.ssl_key: + self.mgr.cert_key_store.save_key( + 'ingress_ssl_key', + ingress_spec.ssl_key, + service_name=ingress_spec.service_name(), + user_made=True) + elif spec.service_type == 'nvmeof': + nvmeof_spec = cast(NvmeofServiceSpec, spec) + for cert_attr in [ + 'server_cert', + 'client_cert', + 'root_ca_cert' + ]: + cert = getattr(nvmeof_spec, cert_attr, None) + if cert: + self.mgr.cert_key_store.save_cert( + f'nvmeof_{cert_attr}', + cert, + service_name=nvmeof_spec.service_name(), + user_made=True) + for key_attr in [ + 'server_key', + 'client_key', + ]: + key = getattr(nvmeof_spec, key_attr, None) + if key: + self.mgr.cert_key_store.save_key( + f'nvmeof_{key_attr}', + key, + service_name=nvmeof_spec.service_name(), + user_made=True) + def rm(self, service_name: str) -> bool: if service_name not in self._specs: return False @@ -353,6 +453,7 @@ class SpecStore(): # type: (str) -> bool found = service_name in self._specs if found: + self._rm_certs_and_keys(self._specs[service_name]) del self._specs[service_name] if service_name in self._rank_maps: del self._rank_maps[service_name] @@ -364,6 +465,22 @@ class SpecStore(): self.mgr.set_store(SPEC_STORE_PREFIX + service_name, None) return found + def _rm_certs_and_keys(self, spec: ServiceSpec) -> None: + if spec.service_type == 'rgw': + self.mgr.cert_key_store.rm_cert('rgw_frontend_ssl_cert', service_name=spec.service_name()) + if spec.service_type == 'iscsi': + self.mgr.cert_key_store.rm_cert('iscsi_ssl_cert', service_name=spec.service_name()) + self.mgr.cert_key_store.rm_key('iscsi_ssl_key', service_name=spec.service_name()) + if spec.service_type == 'ingress': + self.mgr.cert_key_store.rm_cert('ingress_ssl_cert', service_name=spec.service_name()) + self.mgr.cert_key_store.rm_key('ingress_ssl_key', service_name=spec.service_name()) + if spec.service_type == 'nvmeof': + self.mgr.cert_key_store.rm_cert('nvmeof_server_cert', service_name=spec.service_name()) + self.mgr.cert_key_store.rm_cert('nvmeof_client_cert', service_name=spec.service_name()) + self.mgr.cert_key_store.rm_cert('nvmeof_root_ca_cert', service_name=spec.service_name()) + self.mgr.cert_key_store.rm_key('nvmeof_server_key', service_name=spec.service_name()) + self.mgr.cert_key_store.rm_key('nvmeof_client_key', service_name=spec.service_name()) + def get_created(self, spec: ServiceSpec) -> Optional[datetime.datetime]: return self.spec_created.get(spec.service_name()) @@ -1699,6 +1816,294 @@ class AgentCache(): self.save_agent(daemon_spec.host) +class Cert(): + def __init__(self, cert: str = '', user_made: bool = False) -> None: + self.cert = cert + self.user_made = user_made + + def __bool__(self) -> bool: + return bool(self.cert) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, Cert): + return self.cert == other.cert and self.user_made == other.user_made + return NotImplemented + + def to_json(self) -> Dict[str, Union[str, bool]]: + return { + 'cert': self.cert, + 'user_made': self.user_made + } + + @classmethod + def from_json(cls, data: Dict[str, Union[str, bool]]) -> 'Cert': + if 'cert' not in data: + return cls() + cert = data['cert'] + if not isinstance(cert, str): + raise OrchestratorError('Tried to make Cert object with non-string cert') + if any(k not in ['cert', 'user_made'] for k in data.keys()): + raise OrchestratorError(f'Got unknown field for Cert object. Fields: {data.keys()}') + user_made: Union[str, bool] = data.get('user_made', False) + if not isinstance(user_made, bool): + if isinstance(user_made, str): + if user_made.lower() == 'true': + user_made = True + elif user_made.lower() == 'false': + user_made = False + try: + user_made = bool(user_made) + except Exception: + raise OrchestratorError(f'Expected user_made field in Cert object to be bool but got {type(user_made)}') + return cls(cert=cert, user_made=user_made) + + +class PrivKey(): + def __init__(self, key: str = '', user_made: bool = False) -> None: + self.key = key + self.user_made = user_made + + def __bool__(self) -> bool: + return bool(self.key) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, PrivKey): + return self.key == other.key and self.user_made == other.user_made + return NotImplemented + + def to_json(self) -> Dict[str, Union[str, bool]]: + return { + 'key': self.key, + 'user_made': self.user_made + } + + @classmethod + def from_json(cls, data: Dict[str, str]) -> 'PrivKey': + if 'key' not in data: + return cls() + key = data['key'] + if not isinstance(key, str): + raise OrchestratorError('Tried to make PrivKey object with non-string key') + if any(k not in ['key', 'user_made'] for k in data.keys()): + raise OrchestratorError(f'Got unknown field for PrivKey object. Fields: {data.keys()}') + user_made: Union[str, bool] = data.get('user_made', False) + if not isinstance(user_made, bool): + if isinstance(user_made, str): + if user_made.lower() == 'true': + user_made = True + elif user_made.lower() == 'false': + user_made = False + try: + user_made = bool(user_made) + except Exception: + raise OrchestratorError(f'Expected user_made field in PrivKey object to be bool but got {type(user_made)}') + return cls(key=key, user_made=user_made) + + +class CertKeyStore(): + service_name_cert = [ + 'rgw_frontend_ssl_cert', + 'iscsi_ssl_cert', + 'ingress_ssl_cert', + 'nvmeof_server_cert', + 'nvmeof_client_cert', + 'nvmeof_root_ca_cert', + ] + + host_cert = [ + 'grafana_cert', + 'alertmanager_cert', + 'prometheus_cert', + 'node_exporter_cert', + ] + + host_key = [ + 'grafana_key', + 'alertmanager_key', + 'prometheus_key', + 'node_exporter_key', + ] + + service_name_key = [ + 'iscsi_ssl_key', + 'ingress_ssl_key', + 'nvmeof_server_key', + 'nvmeof_client_key', + ] + + known_certs: Dict[str, Any] = {} + known_keys: Dict[str, Any] = {} + + def __init__(self, mgr: 'CephadmOrchestrator') -> None: + self.mgr: CephadmOrchestrator = mgr + self._init_known_cert_key_dicts() + + def _init_known_cert_key_dicts(self) -> None: + # In an effort to try and track all the certs we manage in cephadm + # we're being explicit here and listing them out. + self.known_certs = { + 'rgw_frontend_ssl_cert': {}, # service-name -> cert + 'iscsi_ssl_cert': {}, # service-name -> cert + 'ingress_ssl_cert': {}, # service-name -> cert + 'nvmeof_server_cert': {}, # service-name -> cert + 'nvmeof_client_cert': {}, # service-name -> cert + 'nvmeof_root_ca_cert': {}, # service-name -> cert + 'agent_endpoint_root_cert': Cert(), # cert + 'mgmt_gw_root_cert': Cert(), # cert + 'service_discovery_root_cert': Cert(), # cert + 'grafana_cert': {}, # host -> cert + 'alertmanager_cert': {}, # host -> cert + 'prometheus_cert': {}, # host -> cert + 'node_exporter_cert': {}, # host -> cert + } + # Similar to certs but for priv keys. Entries in known_certs + # that don't have a key here are probably certs in PEM format + # so there is no need to store a separate key + self.known_keys = { + 'agent_endpoint_key': PrivKey(), # key + 'service_discovery_key': PrivKey(), # key + 'mgmt_gw_root_key': PrivKey(), # cert + 'grafana_key': {}, # host -> key + 'alertmanager_key': {}, # host -> key + 'prometheus_key': {}, # host -> key + 'node_exporter_key': {}, # host -> key + 'iscsi_ssl_key': {}, # service-name -> key + 'ingress_ssl_key': {}, # service-name -> key + 'nvmeof_server_key': {}, # service-name -> key + 'nvmeof_client_key': {}, # service-name -> key + } + + def get_cert(self, entity: str, service_name: str = '', host: str = '') -> str: + self._validate_cert_entity(entity, service_name, host) + + cert = Cert() + if entity in self.service_name_cert or entity in self.host_cert: + var = service_name if entity in self.service_name_cert else host + if var not in self.known_certs[entity]: + return '' + cert = self.known_certs[entity][var] + else: + cert = self.known_certs[entity] + if not cert or not isinstance(cert, Cert): + return '' + return cert.cert + + def save_cert(self, entity: str, cert: str, service_name: str = '', host: str = '', user_made: bool = False) -> None: + self._validate_cert_entity(entity, service_name, host) + + cert_obj = Cert(cert, user_made) + + j: Union[str, Dict[Any, Any], None] = None + if entity in self.service_name_cert or entity in self.host_cert: + var = service_name if entity in self.service_name_cert else host + j = {} + self.known_certs[entity][var] = cert_obj + for service_name in self.known_certs[entity].keys(): + j[var] = Cert.to_json(self.known_certs[entity][var]) + else: + self.known_certs[entity] = cert_obj + j = Cert.to_json(cert_obj) + self.mgr.set_store(CERT_STORE_CERT_PREFIX + entity, json.dumps(j)) + + def rm_cert(self, entity: str, service_name: str = '', host: str = '') -> None: + self.save_cert(entity, cert='', service_name=service_name, host=host) + + def _validate_cert_entity(self, entity: str, service_name: str = '', host: str = '') -> None: + if entity not in self.known_certs.keys(): + raise OrchestratorError(f'Attempted to access cert for unknown entity {entity}') + + if entity in self.host_cert and not host: + raise OrchestratorError(f'Need host to access cert for entity {entity}') + + if entity in self.service_name_cert and not service_name: + raise OrchestratorError(f'Need service name to access cert for entity {entity}') + + def cert_ls(self) -> Dict[str, Union[bool, Dict[str, bool]]]: + ls: Dict[str, Any] = {} + for k, v in self.known_certs.items(): + if k in self.service_name_cert or k in self.host_cert: + tmp: Dict[str, Any] = {key: True for key in v if v[key]} + ls[k] = tmp if tmp else False + else: + ls[k] = bool(v) + return ls + + def get_key(self, entity: str, service_name: str = '', host: str = '') -> str: + self._validate_key_entity(entity, host) + + key = PrivKey() + if entity in self.host_key or entity in self.service_name_key: + var = service_name if entity in self.service_name_key else host + if var not in self.known_keys[entity]: + return '' + key = self.known_keys[entity][var] + else: + key = self.known_keys[entity] + if not key or not isinstance(key, PrivKey): + return '' + return key.key + + def save_key(self, entity: str, key: str, service_name: str = '', host: str = '', user_made: bool = False) -> None: + self._validate_key_entity(entity, host) + + pkey = PrivKey(key, user_made) + + j: Union[str, Dict[Any, Any], None] = None + if entity in self.host_key or entity in self.service_name_key: + var = service_name if entity in self.service_name_key else host + j = {} + self.known_keys[entity][var] = pkey + for k in self.known_keys[entity]: + j[k] = PrivKey.to_json(self.known_keys[entity][k]) + else: + self.known_keys[entity] = pkey + j = PrivKey.to_json(pkey) + self.mgr.set_store(CERT_STORE_KEY_PREFIX + entity, json.dumps(j)) + + def rm_key(self, entity: str, service_name: str = '', host: str = '') -> None: + self.save_key(entity, key='', service_name=service_name, host=host) + + def _validate_key_entity(self, entity: str, host: str = '') -> None: + if entity not in self.known_keys.keys(): + raise OrchestratorError(f'Attempted to access priv key for unknown entity {entity}') + + if entity in self.host_key and not host: + raise OrchestratorError(f'Need host to access priv key for entity {entity}') + + def key_ls(self) -> Dict[str, Union[bool, Dict[str, bool]]]: + ls: Dict[str, Any] = {} + for k, v in self.known_keys.items(): + if k in self.host_key or k in self.service_name_key: + tmp: Dict[str, Any] = {key: True for key in v if v[key]} + ls[k] = tmp if tmp else False + else: + ls[k] = bool(v) + return ls + + def load(self) -> None: + for k, v in self.mgr.get_store_prefix(CERT_STORE_CERT_PREFIX).items(): + entity = k[len(CERT_STORE_CERT_PREFIX):] + self.known_certs[entity] = json.loads(v) + if entity in self.service_name_cert or entity in self.host_cert: + for k in self.known_certs[entity]: + cert_obj = Cert.from_json(self.known_certs[entity][k]) + self.known_certs[entity][k] = cert_obj + else: + cert_obj = Cert.from_json(self.known_certs[entity]) + self.known_certs[entity] = cert_obj + + for k, v in self.mgr.get_store_prefix(CERT_STORE_KEY_PREFIX).items(): + entity = k[len(CERT_STORE_KEY_PREFIX):] + self.known_keys[entity] = json.loads(v) + if entity in self.host_key or entity in self.service_name_key: + for k in self.known_keys[entity]: + priv_key_obj = PrivKey.from_json(self.known_keys[entity][k]) + self.known_keys[entity][k] = priv_key_obj + else: + priv_key_obj = PrivKey.from_json(self.known_keys[entity]) + self.known_keys[entity] = priv_key_obj + + class EventStore(): def __init__(self, mgr): # type: (CephadmOrchestrator) -> None diff --git a/src/pybind/mgr/cephadm/migrations.py b/src/pybind/mgr/cephadm/migrations.py index 27f777af6b4..8de3504b06d 100644 --- a/src/pybind/mgr/cephadm/migrations.py +++ b/src/pybind/mgr/cephadm/migrations.py @@ -14,7 +14,7 @@ from orchestrator import OrchestratorError, DaemonDescription if TYPE_CHECKING: from .module import CephadmOrchestrator -LAST_MIGRATION = 6 +LAST_MIGRATION = 7 logger = logging.getLogger(__name__) @@ -105,6 +105,10 @@ class Migrations: if self.migrate_5_6(): self.set(6) + if self.mgr.migration_current == 6: + if self.migrate_6_7(): + self.set(7) + def migrate_0_1(self) -> bool: """ Migration 0 -> 1 @@ -410,6 +414,60 @@ class Migrations: self.rgw_migration_queue = [] return True + def migrate_6_7(self) -> bool: + # start by placing certs/keys from rgw, iscsi, and ingress specs into cert store + for spec in self.mgr.spec_store.all_specs.values(): + if spec.service_type in ['rgw', 'ingress', 'iscsi']: + logger.info(f'Migrating certs/keys for {spec.service_name()} spec to cert store') + self.mgr.spec_store._save_certs_and_keys(spec) + + # Migrate service discovery and agent endpoint certs + # These constants were taken from where these certs were + # originally generated and should be the location they + # were store at prior to the cert store + KV_STORE_AGENT_ROOT_CERT = 'cephadm_agent/root/cert' + KV_STORE_AGENT_ROOT_KEY = 'cephadm_agent/root/key' + KV_STORE_SD_ROOT_CERT = 'service_discovery/root/cert' + KV_STORE_SD_ROOT_KEY = 'service_discovery/root/key' + + agent_endpoint_cert = self.mgr.get_store(KV_STORE_AGENT_ROOT_CERT) + if agent_endpoint_cert: + logger.info('Migrating agent root cert to cert store') + self.mgr.cert_key_store.save_cert('agent_endpoint_root_cert', agent_endpoint_cert) + agent_endpoint_key = self.mgr.get_store(KV_STORE_AGENT_ROOT_KEY) + if agent_endpoint_key: + logger.info('Migrating agent root key to cert store') + self.mgr.cert_key_store.save_key('agent_endpoint_key', agent_endpoint_key) + service_discovery_cert = self.mgr.get_store(KV_STORE_SD_ROOT_CERT) + if service_discovery_cert: + logger.info('Migrating service discovery cert to cert store') + self.mgr.cert_key_store.save_cert('service_discovery_root_cert', service_discovery_cert) + service_discovery_key = self.mgr.get_store(KV_STORE_SD_ROOT_KEY) + if service_discovery_key: + logger.info('Migrating service discovery key to cert store') + self.mgr.cert_key_store.save_key('service_discovery_key', service_discovery_key) + + # grafana certs are stored based on the host they are placed on + for grafana_daemon in self.mgr.cache.get_daemons_by_type('grafana'): + logger.info(f'Checking for cert/key for {grafana_daemon.name()}') + hostname = grafana_daemon.hostname + assert hostname is not None # for mypy + grafana_cert_path = f'{hostname}/grafana_crt' + grafana_key_path = f'{hostname}/grafana_key' + grafana_cert = self.mgr.get_store(grafana_cert_path) + if grafana_cert: + logger.info(f'Migrating {grafana_daemon.name()} cert to cert store') + self.mgr.cert_key_store.save_cert('grafana_cert', grafana_cert, host=hostname) + grafana_key = self.mgr.get_store(grafana_key_path) + if grafana_key: + logger.info(f'Migrating {grafana_daemon.name()} key to cert store') + self.mgr.cert_key_store.save_key('grafana_key', grafana_key, host=hostname) + + # NOTE: prometheus, alertmanager, and node-exporter certs were not stored + # and appeared to just be generated at daemon deploy time if secure_monitoring_stack + # was set to true. Therefore we have nothing to migrate for those daemons + return True + def queue_migrate_rgw_spec(mgr: "CephadmOrchestrator", spec_dict: Dict[Any, Any]) -> None: """ diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index e8696ae2089..c34f77a740c 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -14,8 +14,6 @@ from tempfile import TemporaryDirectory, NamedTemporaryFile from urllib.error import HTTPError from threading import Event -from cephadm.service_discovery import ServiceDiscovery - from ceph.deployment.service_spec import PrometheusSpec import string @@ -70,6 +68,7 @@ from .services.ingress import IngressService from .services.container import CustomContainerService from .services.iscsi import IscsiService from .services.nvmeof import NvmeofService +from .services.mgmt_gateway import MgmtGatewayService from .services.nfs import NFSService from .services.osd import OSDRemovalQueue, OSDService, OSD, NotFoundError from .services.monitoring import GrafanaService, AlertmanagerService, PrometheusService, \ @@ -78,8 +77,19 @@ from .services.jaeger import ElasticSearchService, JaegerAgentService, JaegerCol from .services.node_proxy import NodeProxy from .services.smb import SMBService from .schedule import HostAssignment -from .inventory import Inventory, SpecStore, HostCache, AgentCache, EventStore, \ - ClientKeyringStore, ClientKeyringSpec, TunedProfileStore, NodeProxyCache +from .inventory import ( + Inventory, + SpecStore, + HostCache, + AgentCache, + EventStore, + ClientKeyringStore, + ClientKeyringSpec, + TunedProfileStore, + NodeProxyCache, + CertKeyStore, + OrchSecretNotFound, +) from .upgrade import CephadmUpgrade from .template import TemplateMgr from .utils import CEPH_IMAGE_TYPES, RESCHEDULE_FROM_OFFLINE_HOSTS_TYPES, forall_hosts, \ @@ -131,6 +141,7 @@ DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1' DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23' DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29' DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29' +DEFAULT_NGINX_IMAGE = 'quay.io/ceph/nginx:1.26.1' DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29' DEFAULT_SAMBA_IMAGE = 'quay.io/samba.org/samba-server:devbuilds-centos-amd64' # ------------------------------------------------------------------------------ @@ -270,6 +281,11 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, desc='SNMP Gateway container image', ), Option( + 'container_image_nginx', + default=DEFAULT_NGINX_IMAGE, + desc='Nginx container image', + ), + Option( 'container_image_elasticsearch', default=DEFAULT_ELASTICSEARCH_IMAGE, desc='elasticsearch container image', @@ -554,6 +570,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.container_image_haproxy = '' self.container_image_keepalived = '' self.container_image_snmp_gateway = '' + self.container_image_nginx = '' self.container_image_elasticsearch = '' self.container_image_jaeger_agent = '' self.container_image_jaeger_collector = '' @@ -654,6 +671,9 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.tuned_profile_utils = TunedProfileUtils(self) + self.cert_key_store = CertKeyStore(self) + self.cert_key_store.load() + # ensure the host lists are in sync for h in self.inventory.keys(): if h not in self.cache.daemons: @@ -697,6 +717,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, RgwService, SMBService, SNMPGatewayService, + MgmtGatewayService, ] # https://github.com/python/mypy/issues/8993 @@ -907,7 +928,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'mon', 'crash', 'ceph-exporter', 'node-proxy', 'prometheus', 'node-exporter', 'grafana', 'alertmanager', 'container', 'agent', 'snmp-gateway', 'loki', 'promtail', - 'elasticsearch', 'jaeger-collector', 'jaeger-agent', 'jaeger-query' + 'elasticsearch', 'jaeger-collector', 'jaeger-agent', 'jaeger-query', 'mgmt-gateway' ] if forcename: if len([d for d in existing if d.daemon_id == forcename]): @@ -1639,6 +1660,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, 'prometheus': self.container_image_prometheus, 'promtail': self.container_image_promtail, 'snmp-gateway': self.container_image_snmp_gateway, + 'mgmt-gateway': self.container_image_nginx, # The image can't be resolved here, the necessary information # is only available when a container is deployed (given # via spec). @@ -2915,17 +2937,18 @@ Then run the following: deps.append('ingress') # add dependency on ceph-exporter daemons deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')] + deps += [d.name() for d in self.cache.get_daemons_by_service('mgmt-gateway')] if self.secure_monitoring_stack: if prometheus_user and prometheus_password: deps.append(f'{hash(prometheus_user + prometheus_password)}') if alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') elif daemon_type == 'grafana': - deps += get_daemon_names(['prometheus', 'loki']) + deps += get_daemon_names(['prometheus', 'loki', 'mgmt-gateway']) if self.secure_monitoring_stack and prometheus_user and prometheus_password: deps.append(f'{hash(prometheus_user + prometheus_password)}') elif daemon_type == 'alertmanager': - deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway']) + deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway', 'mgmt-gateway']) if self.secure_monitoring_stack and alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') elif daemon_type == 'promtail': @@ -2936,11 +2959,15 @@ Then run the following: port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT deps.append(build_url(host=dd.hostname, port=port).lstrip('/')) deps = sorted(deps) + elif daemon_type == 'mgmt-gateway': + # url_prefix for monitoring daemons depends on the presence of mgmt-gateway + # while dashboard urls depend on the mgr daemons + deps += get_daemon_names(['mgr', 'grafana', 'prometheus', 'alertmanager']) else: - # TODO(redo): some error message! + # this daemon type doesn't need deps mgmt pass - if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana']: + if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana', 'mgmt-gateway']: deps.append(f'secure_monitoring_stack:{self.secure_monitoring_stack}') return sorted(deps) @@ -3014,7 +3041,7 @@ Then run the following: ) daemons.append(sd) - @ forall_hosts + @forall_hosts def create_func_map(*args: Any) -> str: daemon_spec = self.cephadm_services[daemon_type].prepare_create(*args) with self.async_timeout_handler(daemon_spec.host, f'cephadm deploy ({daemon_spec.daemon_type} daemon)'): @@ -3121,6 +3148,38 @@ Then run the following: 'certificate': self.http_server.service_discovery.ssl_certs.get_root_cert()} @handle_orch_error + def cert_store_cert_ls(self) -> Dict[str, Any]: + return self.cert_key_store.cert_ls() + + @handle_orch_error + def cert_store_key_ls(self) -> Dict[str, Any]: + return self.cert_key_store.key_ls() + + @handle_orch_error + def cert_store_get_cert( + self, + entity: str, + service_name: Optional[str] = None, + hostname: Optional[str] = None + ) -> str: + cert = self.cert_key_store.get_cert(entity, service_name or '', hostname or '') + if not cert: + raise OrchSecretNotFound(entity=entity, service_name=service_name, hostname=hostname) + return cert + + @handle_orch_error + def cert_store_get_key( + self, + entity: str, + service_name: Optional[str] = None, + hostname: Optional[str] = None + ) -> str: + key = self.cert_key_store.get_key(entity, service_name or '', hostname or '') + if not key: + raise OrchSecretNotFound(entity=entity, service_name=service_name, hostname=hostname) + return key + + @handle_orch_error def apply_mon(self, spec: ServiceSpec) -> str: return self._apply(spec) @@ -3236,7 +3295,7 @@ Then run the following: @handle_orch_error def service_discovery_dump_cert(self) -> str: - root_cert = self.get_store(ServiceDiscovery.KV_STORE_SD_ROOT_CERT) + root_cert = self.cert_key_store.get_cert('service_discovery_root_cert') if not root_cert: raise OrchestratorError('No certificate found for service discovery') return root_cert @@ -3262,6 +3321,9 @@ Then run the following: 'data': self._preview_osdspecs(osdspecs=[cast(DriveGroupSpec, spec)])} svc = self.cephadm_services[spec.service_type] + rank_map = None + if svc.ranked(spec): + rank_map = self.spec_store[spec.service_name()].rank_map ha = HostAssignment( spec=spec, hosts=self.cache.get_schedulable_hosts(), @@ -3270,7 +3332,7 @@ Then run the following: networks=self.cache.networks, daemons=self.cache.get_daemons_by_service(spec.service_name()), allow_colo=svc.allow_colo(), - rank_map=self.spec_store[spec.service_name()].rank_map if svc.ranked() else None + rank_map=rank_map ) ha.validate() hosts, to_add, to_remove = ha.place() @@ -3318,6 +3380,7 @@ Then run the following: 'crash': PlacementSpec(host_pattern='*'), 'container': PlacementSpec(count=1), 'snmp-gateway': PlacementSpec(count=1), + 'mgmt-gateway': PlacementSpec(count=1), 'elasticsearch': PlacementSpec(count=1), 'jaeger-agent': PlacementSpec(host_pattern='*'), 'jaeger-collector': PlacementSpec(count=1), @@ -3341,7 +3404,7 @@ Then run the following: (f'The maximum number of {spec.service_type} daemons allowed with {host_count} hosts is {max(5, host_count)}.')) elif spec.service_type != 'osd': if spec.placement.count > (max_count * host_count): - raise OrchestratorError((f'The maximum number of {spec.service_type} daemons allowed with {host_count} hosts is {host_count*max_count} ({host_count}x{max_count}).' + raise OrchestratorError((f'The maximum number of {spec.service_type} daemons allowed with {host_count} hosts is {host_count * max_count} ({host_count}x{max_count}).' + ' This limit can be adjusted by changing the mgr/cephadm/max_count_per_host config option')) if spec.placement.count_per_host is not None and spec.placement.count_per_host > max_count and spec.service_type != 'osd': @@ -3457,6 +3520,10 @@ Then run the following: return self._apply(spec) @handle_orch_error + def apply_mgmt_gateway(self, spec: ServiceSpec) -> str: + return self._apply(spec) + + @handle_orch_error def set_unmanaged(self, service_name: str, value: bool) -> str: return self.spec_store.set_unmanaged(service_name, value) diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 4eb1bfe3d7b..eaaf4386f62 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -41,7 +41,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'rgw', 'nvmeof'] +REQUIRES_POST_ACTIONS = ['grafana', 'iscsi', 'prometheus', 'alertmanager', 'rgw', 'nvmeof', 'mgmt-gateway'] WHICH = ssh.RemoteExecutable('which') CEPHADM_EXE = ssh.RemoteExecutable('/usr/bin/cephadm') @@ -136,8 +136,10 @@ class CephadmServe: def _check_certificates(self) -> None: for d in self.mgr.cache.get_daemons_by_type('grafana'): - cert = self.mgr.get_store(f'{d.hostname}/grafana_crt') - key = self.mgr.get_store(f'{d.hostname}/grafana_key') + host = d.hostname + assert host is not None + cert = self.mgr.cert_key_store.get_cert('grafana_cert', host=host) + key = self.mgr.cert_key_store.get_key('grafana_key', host=host) if (not cert or not cert.strip()) and (not key or not key.strip()): # certificate/key are empty... nothing to check return @@ -778,7 +780,7 @@ class CephadmServe: } rank_map = None - if svc.ranked(): + if svc.ranked(spec): rank_map = self.mgr.spec_store[spec.service_name()].rank_map or {} ha = HostAssignment( spec=spec, @@ -1091,10 +1093,12 @@ class CephadmServe: self.log.debug(f'{dd.name()} deps {last_deps} -> {deps}') self.log.info(f'Reconfiguring {dd.name()} (dependencies changed)...') action = 'reconfig' - # we need only redeploy if secure_monitoring_stack value has changed: + # we need only redeploy if secure_monitoring_stack or mgmt-gateway value has changed: + # TODO(redo): check if we should just go always with redeploy (it's fast enough) if dd.daemon_type in ['prometheus', 'node-exporter', 'alertmanager']: - diff = list(set(last_deps) - set(deps)) - if any('secure_monitoring_stack' in e for e in diff): + diff = list(set(last_deps).symmetric_difference(set(deps))) + REDEPLOY_TRIGGERS = ['secure_monitoring_stack', 'mgmt-gateway'] + if any(svc in e for e in diff for svc in REDEPLOY_TRIGGERS): action = 'redeploy' elif dd.daemon_type == 'jaeger-agent': # changes to jaeger-agent deps affect the way the unit.run for diff --git a/src/pybind/mgr/cephadm/service_discovery.py b/src/pybind/mgr/cephadm/service_discovery.py index 2b82f87493f..b72570382f4 100644 --- a/src/pybind/mgr/cephadm/service_discovery.py +++ b/src/pybind/mgr/cephadm/service_discovery.py @@ -45,9 +45,6 @@ class Route(NamedTuple): class ServiceDiscovery: - KV_STORE_SD_ROOT_CERT = 'service_discovery/root/cert' - KV_STORE_SD_ROOT_KEY = 'service_discovery/root/key' - def __init__(self, mgr: "CephadmOrchestrator") -> None: self.mgr = mgr self.ssl_certs = SSLCerts() @@ -89,14 +86,14 @@ class ServiceDiscovery: self.mgr.set_store('service_discovery/root/username', self.username) def configure_tls(self, server: Server) -> None: - old_cert = self.mgr.get_store(self.KV_STORE_SD_ROOT_CERT) - old_key = self.mgr.get_store(self.KV_STORE_SD_ROOT_KEY) + old_cert = self.mgr.cert_key_store.get_cert('service_discovery_root_cert') + old_key = self.mgr.cert_key_store.get_key('service_discovery_key') if old_key and old_cert: self.ssl_certs.load_root_credentials(old_cert, old_key) else: self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip()) - self.mgr.set_store(self.KV_STORE_SD_ROOT_CERT, self.ssl_certs.get_root_cert()) - self.mgr.set_store(self.KV_STORE_SD_ROOT_KEY, self.ssl_certs.get_root_key()) + self.mgr.cert_key_store.save_cert('service_discovery_root_cert', self.ssl_certs.get_root_cert()) + self.mgr.cert_key_store.save_key('service_discovery_key', self.ssl_certs.get_root_key()) addr = self.mgr.get_mgr_ip() host_fqdn = socket.getfqdn(addr) server.ssl_certificate, server.ssl_private_key = self.ssl_certs.generate_cert_files( diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 4b22400b49e..72e6177bc1d 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -5,6 +5,8 @@ import re import socket import time from abc import ABCMeta, abstractmethod +import ipaddress +from urllib.parse import urlparse from typing import TYPE_CHECKING, List, Callable, TypeVar, \ Optional, Dict, Any, Tuple, NewType, cast @@ -73,6 +75,61 @@ def simplified_keyring(entity: str, contents: str) -> str: return keyring +def get_dashboard_endpoints(svc: 'CephadmService') -> Tuple[List[str], Optional[str]]: + dashboard_endpoints: List[str] = [] + port = None + protocol = None + mgr_map = svc.mgr.get('mgr_map') + url = mgr_map.get('services', {}).get('dashboard', None) + if url: + p_result = urlparse(url.rstrip('/')) + protocol = p_result.scheme + port = p_result.port + # assume that they are all dashboards on the same port as the active mgr. + for dd in svc.mgr.cache.get_daemons_by_service('mgr'): + if not port: + continue + assert dd.hostname is not None + addr = svc._inventory_get_fqdn(dd.hostname) + dashboard_endpoints.append(f'{addr}:{port}') + + return dashboard_endpoints, protocol + + +def get_dashboard_urls(svc: 'CephadmService') -> List[str]: + # dashboard(s) + dashboard_urls: List[str] = [] + mgr_map = svc.mgr.get('mgr_map') + port = None + proto = None # http: or https: + url = mgr_map.get('services', {}).get('dashboard', None) + if url: + p_result = urlparse(url.rstrip('/')) + hostname = socket.getfqdn(p_result.hostname) + try: + ip = ipaddress.ip_address(hostname) + except ValueError: + pass + else: + if ip.version == 6: + hostname = f'[{hostname}]' + dashboard_urls.append(f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}') + proto = p_result.scheme + port = p_result.port + + # assume that they are all dashboards on the same port as the active mgr. + for dd in svc.mgr.cache.get_daemons_by_service('mgr'): + if not port: + continue + if dd.daemon_id == svc.mgr.get_mgr_id(): + continue + assert dd.hostname is not None + addr = svc._inventory_get_fqdn(dd.hostname) + dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/')) + + return dashboard_urls + + class CephadmDaemonDeploySpec: # typing.NamedTuple + Generic is broken in py36 def __init__(self, host: str, daemon_id: str, @@ -231,7 +288,7 @@ class CephadmService(metaclass=ABCMeta): """ return None - def ranked(self) -> bool: + def ranked(self, spec: ServiceSpec) -> bool: """ If True, we will assign a stable rank (0, 1, ...) and monotonically increasing generation (0, 1, ...) to each daemon we create/deploy. @@ -336,22 +393,21 @@ class CephadmService(metaclass=ABCMeta): addr = self.mgr.inventory.get_addr(hostname) return socket.getfqdn(addr) - def _set_service_url_on_dashboard(self, - service_name: str, - get_mon_cmd: str, - set_mon_cmd: str, - service_url: str) -> None: - """A helper to get and set service_url via Dashboard's MON command. - - If result of get_mon_cmd differs from service_url, set_mon_cmd will + def _set_value_on_dashboard(self, + service_name: str, + get_mon_cmd: str, + set_mon_cmd: str, + new_value: str) -> None: + """A helper to get and set values via Dashboard's MON command. + If result of get_mon_cmd differs from the new_value, set_mon_cmd will be sent to set the service_url. """ def get_set_cmd_dicts(out: str) -> List[dict]: cmd_dict = { 'prefix': set_mon_cmd, - 'value': service_url + 'value': new_value } - return [cmd_dict] if service_url != out else [] + return [cmd_dict] if new_value != out else [] self._check_and_set_dashboard( service_name=service_name, diff --git a/src/pybind/mgr/cephadm/services/ingress.py b/src/pybind/mgr/cephadm/services/ingress.py index e46fab732ed..a17000cd632 100644 --- a/src/pybind/mgr/cephadm/services/ingress.py +++ b/src/pybind/mgr/cephadm/services/ingress.py @@ -169,9 +169,9 @@ class IngressService(CephService): if spec.enable_haproxy_protocol: server_opts.append("send-proxy-v2") logger.debug("enabled default server opts: %r", server_opts) - ip = '[..]' if spec.virtual_ips_list else str(spec.virtual_ip).split('/')[0] or daemon_spec.ip or '[..]' + ip = '[::]' if spec.virtual_ips_list else str(spec.virtual_ip).split('/')[0] or daemon_spec.ip or '[::]' frontend_port = daemon_spec.ports[0] if daemon_spec.ports else spec.frontend_port - if ip != '[..]' and frontend_port: + if ip != '[::]' and frontend_port: daemon_spec.port_ips = {str(frontend_port): ip} haproxy_conf = self.mgr.template.render( 'services/ingress/haproxy.cfg.j2', @@ -260,7 +260,10 @@ class IngressService(CephService): for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items(): if ifaces and ipaddress.ip_address(bare_ip) in ipaddress.ip_network(subnet): interface = list(ifaces.keys())[0] - host_ip = ifaces[interface][0] + for ip_addr in ifaces[interface]: + if ip_addr != str(bare_ip): + host_ip = ip_addr + break logger.info( f'{bare_ip} is in {subnet} on {host} interface {interface}' ) @@ -270,7 +273,10 @@ class IngressService(CephService): for subnet, ifaces in self.mgr.cache.networks.get(host, {}).items(): if subnet in spec.virtual_interface_networks: interface = list(ifaces.keys())[0] - host_ip = ifaces[interface][0] + for ip_addr in ifaces[interface]: + if ip_addr != str(bare_ip): + host_ip = ip_addr + break logger.info( f'{spec.virtual_ip} will be configured on {host} interface ' f'{interface} (which is in subnet {subnet})' diff --git a/src/pybind/mgr/cephadm/services/mgmt_gateway.py b/src/pybind/mgr/cephadm/services/mgmt_gateway.py new file mode 100644 index 00000000000..7ba59faca28 --- /dev/null +++ b/src/pybind/mgr/cephadm/services/mgmt_gateway.py @@ -0,0 +1,146 @@ +import logging +from typing import List, Any, Tuple, Dict, cast + +from orchestrator import DaemonDescription +from ceph.deployment.service_spec import MgmtGatewaySpec, GrafanaSpec +from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_endpoints +from cephadm.ssl_cert_utils import SSLCerts + +logger = logging.getLogger(__name__) + + +class MgmtGatewayService(CephadmService): + TYPE = 'mgmt-gateway' + SVC_TEMPLATE_PATH = 'services/mgmt-gateway/nginx.conf.j2' + EXTERNAL_SVC_TEMPLATE_PATH = 'services/mgmt-gateway/external_server.conf.j2' + INTERNAL_SVC_TEMPLATE_PATH = 'services/mgmt-gateway/internal_server.conf.j2' + INTERNAL_SERVICE_PORT = 29443 + + def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + + def get_service_endpoints(self, service_name: str) -> List[str]: + srv_entries = [] + for dd in self.mgr.cache.get_daemons_by_service(service_name): + assert dd.hostname is not None + addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + port = dd.ports[0] if dd.ports else None + srv_entries.append(f'{addr}:{port}') + return srv_entries + + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + if daemon_descrs: + return daemon_descrs[0] + # if empty list provided, return empty Daemon Desc + return DaemonDescription() + + def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: + # we adjust the standby behaviour so rev-proxy can pick correctly the active instance + self.mgr.set_module_option_ex('dashboard', 'standby_error_status_code', '503') + self.mgr.set_module_option_ex('dashboard', 'standby_behaviour', 'error') + + def get_certificates(self, svc_spec: MgmtGatewaySpec, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str, str, str]: + self.ssl_certs = SSLCerts() + old_cert = self.mgr.cert_key_store.get_cert('mgmt_gw_root_cert') + old_key = self.mgr.cert_key_store.get_key('mgmt_gw_root_key') + if old_cert and old_key: + self.ssl_certs.load_root_credentials(old_cert, old_key) + else: + self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip()) + self.mgr.cert_key_store.save_cert('mgmt_gw_root_cert', self.ssl_certs.get_root_cert()) + self.mgr.cert_key_store.save_key('mgmt_gw_root_key', self.ssl_certs.get_root_key()) + + node_ip = self.mgr.inventory.get_addr(daemon_spec.host) + host_fqdn = self._inventory_get_fqdn(daemon_spec.host) + internal_cert, internal_pkey = self.ssl_certs.generate_cert(host_fqdn, node_ip) + cert = svc_spec.ssl_certificate + pkey = svc_spec.ssl_certificate_key + if not (cert and pkey): + # In case the user has not provided certificates then we generate self-signed ones + cert, pkey = self.ssl_certs.generate_cert(host_fqdn, node_ip) + + return internal_cert, internal_pkey, cert, pkey + + def get_mgmt_gateway_deps(self) -> List[str]: + # url_prefix for the following services depends on the presence of mgmt-gateway + deps: List[str] = [] + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('prometheus')] + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('alertmanager')] + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('grafana')] + # secure_monitoring_stack affects the protocol used by monitoring services + deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}'] + for dd in self.mgr.cache.get_daemons_by_service('mgr'): + # we consider mgr a dep even if the dashboard is disabled + # in order to be consistent with _calc_daemon_deps(). + deps.append(dd.name()) + + return deps + + def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: + assert self.TYPE == daemon_spec.daemon_type + svc_spec = cast(MgmtGatewaySpec, self.mgr.spec_store[daemon_spec.service_name].spec) + dashboard_endpoints, dashboard_scheme = get_dashboard_endpoints(self) + scheme = 'https' if self.mgr.secure_monitoring_stack else 'http' + + prometheus_endpoints = self.get_service_endpoints('prometheus') + alertmanager_endpoints = self.get_service_endpoints('alertmanager') + grafana_endpoints = self.get_service_endpoints('grafana') + try: + grafana_spec = cast(GrafanaSpec, self.mgr.spec_store['grafana'].spec) + grafana_protocol = grafana_spec.protocol + except Exception: + grafana_protocol = 'https' # defualt to https just for UT + + main_context = { + 'dashboard_endpoints': dashboard_endpoints, + 'prometheus_endpoints': prometheus_endpoints, + 'alertmanager_endpoints': alertmanager_endpoints, + 'grafana_endpoints': grafana_endpoints + } + external_server_context = { + 'spec': svc_spec, + 'dashboard_scheme': dashboard_scheme, + 'grafana_scheme': grafana_protocol, + 'prometheus_scheme': scheme, + 'alertmanager_scheme': scheme, + 'dashboard_endpoints': dashboard_endpoints, + 'prometheus_endpoints': prometheus_endpoints, + 'alertmanager_endpoints': alertmanager_endpoints, + 'grafana_endpoints': grafana_endpoints + } + internal_server_context = { + 'spec': svc_spec, + 'internal_port': self.INTERNAL_SERVICE_PORT, + 'grafana_scheme': grafana_protocol, + 'prometheus_scheme': scheme, + 'alertmanager_scheme': scheme, + 'prometheus_endpoints': prometheus_endpoints, + 'alertmanager_endpoints': alertmanager_endpoints, + 'grafana_endpoints': grafana_endpoints + } + + internal_cert, internal_pkey, cert, pkey = self.get_certificates(svc_spec, daemon_spec) + daemon_config = { + "files": { + "nginx.conf": self.mgr.template.render(self.SVC_TEMPLATE_PATH, main_context), + "nginx_external_server.conf": self.mgr.template.render(self.EXTERNAL_SVC_TEMPLATE_PATH, external_server_context), + "nginx_internal_server.conf": self.mgr.template.render(self.INTERNAL_SVC_TEMPLATE_PATH, internal_server_context), + "nginx_internal.crt": internal_cert, + "nginx_internal.key": internal_pkey + } + } + if not svc_spec.disable_https: + daemon_config["files"]["nginx.crt"] = cert + daemon_config["files"]["nginx.key"] = pkey + + return daemon_config, sorted(self.get_mgmt_gateway_deps()) + + def pre_remove(self, daemon: DaemonDescription) -> None: + """ + Called before mgmt-gateway daemon is removed. + """ + # reset the standby dashboard redirection behaviour + self.mgr.set_module_option_ex('dashboard', 'standby_error_status_code', '500') + self.mgr.set_module_option_ex('dashboard', 'standby_behaviour', 'redirect') diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 184fb0251b8..71a9b60d31f 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -1,17 +1,16 @@ import errno -import ipaddress import logging import os import socket from typing import List, Any, Tuple, Dict, Optional, cast -from urllib.parse import urlparse from mgr_module import HandleCommandResult from orchestrator import DaemonDescription from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ - SNMPGatewaySpec, PrometheusSpec -from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec + SNMPGatewaySpec, PrometheusSpec, MgmtGatewaySpec +from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_urls +from cephadm.services.mgmt_gateway import MgmtGatewayService from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url, get_cert_issuer_info, password_hash from ceph.deployment.utils import wrap_ipv6 @@ -35,6 +34,9 @@ class GrafanaService(CephadmService): deps.append(f'{hash(prometheus_user + prometheus_password)}') deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') + # add a dependency since url_prefix depends on the existence of mgmt-gateway + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] + prom_services = [] # type: List[str] for dd in self.mgr.cache.get_daemons_by_service('prometheus'): assert dd.hostname is not None @@ -78,13 +80,15 @@ class GrafanaService(CephadmService): daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to} grafana_ip = ip_to_bind_to + mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 grafana_ini = self.mgr.template.render( 'services/grafana/grafana.ini.j2', { 'anonymous_access': spec.anonymous_access, 'initial_admin_password': spec.initial_admin_password, 'http_port': grafana_port, 'protocol': spec.protocol, - 'http_addr': grafana_ip + 'http_addr': grafana_ip, + 'use_url_prefix': mgmt_gw_enabled }) if 'dashboard' in self.mgr.get('mgr_map')['modules'] and spec.initial_admin_password: @@ -121,10 +125,8 @@ class GrafanaService(CephadmService): return config_file, sorted(deps) def prepare_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: - cert_path = f'{daemon_spec.host}/grafana_crt' - key_path = f'{daemon_spec.host}/grafana_key' - cert = self.mgr.get_store(cert_path) - pkey = self.mgr.get_store(key_path) + cert = self.mgr.cert_key_store.get_cert('grafana_cert', host=daemon_spec.host) + pkey = self.mgr.cert_key_store.get_key('grafana_key', host=daemon_spec.host) certs_present = (cert and pkey) is_valid_certificate = False (org, cn) = (None, None) @@ -148,8 +150,8 @@ class GrafanaService(CephadmService): logger.info('Regenerating cephadm self-signed grafana TLS certificates') host_fqdn = socket.getfqdn(daemon_spec.host) cert, pkey = create_self_signed_cert('Ceph', host_fqdn) - self.mgr.set_store(cert_path, cert) - self.mgr.set_store(key_path, pkey) + self.mgr.cert_key_store.save_cert('grafana_cert', cert, host=daemon_spec.host) + self.mgr.cert_key_store.save_key('grafana_key', pkey, host=daemon_spec.host) if 'dashboard' in self.mgr.get('mgr_map')['modules']: self.mgr.check_mon_command({ 'prefix': 'dashboard set-grafana-api-ssl-verify', @@ -189,13 +191,36 @@ class GrafanaService(CephadmService): addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT spec = cast(GrafanaSpec, self.mgr.spec_store[dd.service_name()].spec) - service_url = build_url(scheme=spec.protocol, host=addr, port=port) - self._set_service_url_on_dashboard( - 'Grafana', - 'dashboard get-grafana-api-url', - 'dashboard set-grafana-api-url', - service_url - ) + + mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway') + if mgmt_gw_daemons: + dd = mgmt_gw_daemons[0] + assert dd.hostname is not None + mgmt_gw_spec = cast(MgmtGatewaySpec, self.mgr.spec_store['mgmt-gateway'].spec) + mgmt_gw_port = dd.ports[0] if dd.ports else None + mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname) + protocol = 'http' if mgmt_gw_spec.disable_https else 'https' + mgmt_gw_external_endpoint = build_url(scheme=protocol, host=mgmt_gw_addr, port=mgmt_gw_port) + self._set_value_on_dashboard( + 'Grafana', + 'dashboard get-grafana-api-url', + 'dashboard set-grafana-api-url', + f'{mgmt_gw_external_endpoint}/grafana' + ) + self._set_value_on_dashboard( + 'Grafana', + 'dashboard get-grafana-api-ssl-verify', + 'dashboard set-grafana-api-ssl-verify', + 'false' + ) + else: + service_url = build_url(scheme=spec.protocol, host=addr, port=port) + self._set_value_on_dashboard( + 'Grafana', + 'dashboard get-grafana-api-url', + 'dashboard set-grafana-api-url', + service_url + ) def pre_remove(self, daemon: DaemonDescription) -> None: """ @@ -203,10 +228,8 @@ class GrafanaService(CephadmService): """ if daemon.hostname is not None: # delete cert/key entires for this grafana daemon - cert_path = f'{daemon.hostname}/grafana_crt' - key_path = f'{daemon.hostname}/grafana_key' - self.mgr.set_store(cert_path, None) - self.mgr.set_store(key_path, None) + self.mgr.cert_key_store.rm_cert('grafana_cert', host=daemon.hostname) + self.mgr.cert_key_store.rm_key('grafana_key', host=daemon.hostname) def ok_to_stop(self, daemon_ids: List[str], @@ -244,44 +267,15 @@ class AlertmanagerService(CephadmService): user_data['default_webhook_urls'], list): default_webhook_urls.extend(user_data['default_webhook_urls']) - # dashboard(s) - dashboard_urls: List[str] = [] - snmp_gateway_urls: List[str] = [] - mgr_map = self.mgr.get('mgr_map') - port = None - proto = None # http: or https: - url = mgr_map.get('services', {}).get('dashboard', None) - if url: - p_result = urlparse(url.rstrip('/')) - hostname = socket.getfqdn(p_result.hostname) - - try: - ip = ipaddress.ip_address(hostname) - except ValueError: - pass - else: - if ip.version == 6: - hostname = f'[{hostname}]' - - dashboard_urls.append( - f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}') - proto = p_result.scheme - port = p_result.port - + # add a dependency since url_prefix depends on the existence of mgmt-gateway + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] # scan all mgrs to generate deps and to get standbys too. - # assume that they are all on the same port as the active mgr. for dd in self.mgr.cache.get_daemons_by_service('mgr'): # we consider mgr a dep even if the dashboard is disabled # in order to be consistent with _calc_daemon_deps(). deps.append(dd.name()) - if not port: - continue - if dd.daemon_id == self.mgr.get_mgr_id(): - continue - assert dd.hostname is not None - addr = self._inventory_get_fqdn(dd.hostname) - dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/')) + snmp_gateway_urls: List[str] = [] for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'): assert dd.hostname is not None assert dd.ports @@ -293,7 +287,7 @@ class AlertmanagerService(CephadmService): context = { 'secure_monitoring_stack': self.mgr.secure_monitoring_stack, - 'dashboard_urls': dashboard_urls, + 'dashboard_urls': get_dashboard_urls(self), 'default_webhook_urls': default_webhook_urls, 'snmp_gateway_urls': snmp_gateway_urls, 'secure': secure, @@ -308,16 +302,21 @@ class AlertmanagerService(CephadmService): addr = self._inventory_get_fqdn(dd.hostname) peers.append(build_url(host=addr, port=port).lstrip('/')) + mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') - if self.mgr.secure_monitoring_stack: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() if alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') node_ip = self.mgr.inventory.get_addr(daemon_spec.host) host_fqdn = self._inventory_get_fqdn(daemon_spec.host) - cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert( - host_fqdn, node_ip) + cert = self.mgr.cert_key_store.get_cert('alertmanager_cert', host=daemon_spec.host) + key = self.mgr.cert_key_store.get_key('alertmanager_key', host=daemon_spec.host) + if not (cert and key): + cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert( + host_fqdn, node_ip) + self.mgr.cert_key_store.save_cert('alertmanager_cert', cert, host=daemon_spec.host) + self.mgr.cert_key_store.save_key('alertmanager_key', key, host=daemon_spec.host) context = { 'alertmanager_web_user': alertmanager_user, 'alertmanager_web_password': password_hash(alertmanager_password), @@ -331,14 +330,16 @@ class AlertmanagerService(CephadmService): 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert() }, 'peers': peers, - 'web_config': '/etc/alertmanager/web.yml' + 'web_config': '/etc/alertmanager/web.yml', + 'use_url_prefix': mgmt_gw_enabled }, sorted(deps) else: return { "files": { "alertmanager.yml": yml }, - "peers": peers + "peers": peers, + 'use_url_prefix': mgmt_gw_enabled }, sorted(deps) def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: @@ -354,13 +355,42 @@ class AlertmanagerService(CephadmService): addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' - service_url = build_url(scheme=protocol, host=addr, port=port) - self._set_service_url_on_dashboard( - 'AlertManager', - 'dashboard get-alertmanager-api-host', - 'dashboard set-alertmanager-api-host', - service_url - ) + + mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway') + if mgmt_gw_daemons: + dd = mgmt_gw_daemons[0] + assert dd.hostname is not None + mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname) + mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT) + self._set_value_on_dashboard( + 'AlertManager', + 'dashboard get-alertmanager-api-host', + 'dashboard set-alertmanager-api-host', + f'{mgmt_gw_internal_endpoint}/internal/alertmanager' + ) + self._set_value_on_dashboard( + 'Alertmanager', + 'dashboard get-alertmanager-api-ssl-verify', + 'dashboard set-alertmanager-api-ssl-verify', + 'false' + ) + else: + service_url = build_url(scheme=protocol, host=addr, port=port) + self._set_value_on_dashboard( + 'AlertManager', + 'dashboard get-alertmanager-api-host', + 'dashboard set-alertmanager-api-host', + service_url + ) + + def pre_remove(self, daemon: DaemonDescription) -> None: + """ + Called before alertmanager daemon is removed. + """ + if daemon.hostname is not None: + # delete cert/key entires for this grafana daemon + self.mgr.cert_key_store.rm_cert('alertmanager_cert', host=daemon.hostname) + self.mgr.cert_key_store.rm_key('alertmanager_key', host=daemon.hostname) def ok_to_stop(self, daemon_ids: List[str], @@ -471,7 +501,10 @@ class PrometheusService(CephadmService): 'prometheus_web_password': password_hash(prometheus_password), } + mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 if self.mgr.secure_monitoring_stack: + # NOTE: this prometheus root cert is managed by the prometheus module + # we are using it in a read only fashion in the cephadm module cfg_key = 'mgr/prometheus/root/cert' cmd = {'prefix': 'config-key get', 'key': cfg_key} ret, mgr_prometheus_rootca, err = self.mgr.mon_command(cmd) @@ -480,7 +513,12 @@ class PrometheusService(CephadmService): else: node_ip = self.mgr.inventory.get_addr(daemon_spec.host) host_fqdn = self._inventory_get_fqdn(daemon_spec.host) - cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert(host_fqdn, node_ip) + cert = self.mgr.cert_key_store.get_cert('prometheus_cert', host=daemon_spec.host) + key = self.mgr.cert_key_store.get_key('prometheus_key', host=daemon_spec.host) + if not (cert and key): + cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert(host_fqdn, node_ip) + self.mgr.cert_key_store.save_cert('prometheus_cert', cert, host=daemon_spec.host) + self.mgr.cert_key_store.save_key('prometheus_key', key, host=daemon_spec.host) r: Dict[str, Any] = { 'files': { 'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context), @@ -493,7 +531,8 @@ class PrometheusService(CephadmService): 'retention_time': retention_time, 'retention_size': retention_size, 'ip_to_bind_to': ip_to_bind_to, - 'web_config': '/etc/prometheus/web.yml' + 'web_config': '/etc/prometheus/web.yml', + 'use_url_prefix': mgmt_gw_enabled } else: r = { @@ -502,7 +541,8 @@ class PrometheusService(CephadmService): }, 'retention_time': retention_time, 'retention_size': retention_size, - 'ip_to_bind_to': ip_to_bind_to + 'ip_to_bind_to': ip_to_bind_to, + 'use_url_prefix': mgmt_gw_enabled } # include alerts, if present in the container @@ -546,6 +586,10 @@ class PrometheusService(CephadmService): if alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') + + # add a dependency since url_prefix depends on the existence of mgmt-gateway + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] + # add dependency on ceph-exporter daemons deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('ceph-exporter')] deps += [s for s in ['node-exporter', 'alertmanager'] if self.mgr.cache.get_daemons_by_service(s)] @@ -566,13 +610,42 @@ class PrometheusService(CephadmService): addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' - service_url = build_url(scheme=protocol, host=addr, port=port) - self._set_service_url_on_dashboard( - 'Prometheus', - 'dashboard get-prometheus-api-host', - 'dashboard set-prometheus-api-host', - service_url - ) + + mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway') + if mgmt_gw_daemons: + dd = mgmt_gw_daemons[0] + assert dd.hostname is not None + mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname) + mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT) + self._set_value_on_dashboard( + 'Prometheus', + 'dashboard get-prometheus-api-host', + 'dashboard set-prometheus-api-host', + f'{mgmt_gw_internal_endpoint}/internal/prometheus' + ) + self._set_value_on_dashboard( + 'Prometheus', + 'dashboard get-prometheus-api-ssl-verify', + 'dashboard set-prometheus-api-ssl-verify', + 'false' + ) + else: + service_url = build_url(scheme=protocol, host=addr, port=port) + self._set_value_on_dashboard( + 'Prometheus', + 'dashboard get-prometheus-api-host', + 'dashboard set-prometheus-api-host', + service_url + ) + + def pre_remove(self, daemon: DaemonDescription) -> None: + """ + Called before prometheus daemon is removed. + """ + if daemon.hostname is not None: + # delete cert/key entires for this prometheus daemon + self.mgr.cert_key_store.rm_cert('prometheus_cert', host=daemon.hostname) + self.mgr.cert_key_store.rm_key('prometheus_key', host=daemon.hostname) def ok_to_stop(self, daemon_ids: List[str], @@ -599,8 +672,13 @@ class NodeExporterService(CephadmService): if self.mgr.secure_monitoring_stack: node_ip = self.mgr.inventory.get_addr(daemon_spec.host) host_fqdn = self._inventory_get_fqdn(daemon_spec.host) - cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert( - host_fqdn, node_ip) + cert = self.mgr.cert_key_store.get_cert('node_exporter_cert', host=daemon_spec.host) + key = self.mgr.cert_key_store.get_key('node_exporter_key', host=daemon_spec.host) + if not (cert and key): + cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert( + host_fqdn, node_ip) + self.mgr.cert_key_store.save_cert('node_exporter_cert', cert, host=daemon_spec.host) + self.mgr.cert_key_store.save_key('node_exporter_key', key, host=daemon_spec.host) r = { 'files': { 'web.yml': self.mgr.template.render('services/node-exporter/web.yml.j2', {}), @@ -615,6 +693,15 @@ class NodeExporterService(CephadmService): return r, deps + def pre_remove(self, daemon: DaemonDescription) -> None: + """ + Called before node-exporter daemon is removed. + """ + if daemon.hostname is not None: + # delete cert/key entires for this node-exporter daemon + self.mgr.cert_key_store.rm_cert('node_exporter_cert', host=daemon.hostname) + self.mgr.cert_key_store.rm_key('node_exporter_key', host=daemon.hostname) + def ok_to_stop(self, daemon_ids: List[str], force: bool = False, diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py index f46f65b084b..a0d7da9bb7e 100644 --- a/src/pybind/mgr/cephadm/services/nfs.py +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) class NFSService(CephService): TYPE = 'nfs' - def ranked(self) -> bool: + def ranked(self, spec: ServiceSpec) -> bool: return True def fence(self, daemon_id: str) -> None: diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py index 99e63c0b7da..ac258887f6a 100644 --- a/src/pybind/mgr/cephadm/services/nvmeof.py +++ b/src/pybind/mgr/cephadm/services/nvmeof.py @@ -53,6 +53,28 @@ class NvmeofService(CephService): daemon_spec.keyring = keyring daemon_spec.extra_files = {'ceph-nvmeof.conf': gw_conf} + + if spec.enable_auth: + if ( + not spec.client_cert + or not spec.client_key + or not spec.server_cert + or not spec.server_key + or not spec.root_ca_cert + ): + err_msg = 'enable_auth is true but ' + for cert_key_attr in ['server_key', 'server_cert', 'client_key', 'client_cert', 'root_ca_cert']: + if not hasattr(spec, cert_key_attr): + err_msg += f'{cert_key_attr}, ' + err_msg += 'attribute(s) missing from nvmeof spec' + self.mgr.log.error(err_msg) + else: + daemon_spec.extra_files['server_cert'] = spec.server_cert + daemon_spec.extra_files['client_cert'] = spec.client_cert + daemon_spec.extra_files['server_key'] = spec.server_key + daemon_spec.extra_files['client_key'] = spec.client_key + daemon_spec.extra_files['root_ca_cert'] = spec.root_ca_cert + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) daemon_spec.deps = [] return daemon_spec @@ -67,9 +89,10 @@ class NvmeofService(CephService): for dd in daemon_descrs: assert dd.hostname is not None + service_name = dd.service_name() if not spec: - logger.warning(f'No ServiceSpec found for {dd.service_name()}') + logger.warning(f'No ServiceSpec found for {service_name}') continue ip = utils.resolve_ip(self.mgr.inventory.get_addr(dd.hostname)) @@ -82,7 +105,7 @@ class NvmeofService(CephService): cmd_dicts.append({ 'prefix': 'dashboard nvmeof-gateway-add', 'inbuf': service_url, - 'name': dd.hostname + 'name': service_name }) return cmd_dicts @@ -118,11 +141,12 @@ class NvmeofService(CephService): """ # to clean the keyring up super().post_remove(daemon, is_failed_deploy=is_failed_deploy) + service_name = daemon.service_name() # remove config for dashboard nvmeof gateways if any ret, out, err = self.mgr.mon_command({ 'prefix': 'dashboard nvmeof-gateway-rm', - 'name': daemon.hostname, + 'name': service_name, }) if not ret: logger.info(f'{daemon.hostname} removed from nvmeof gateways dashboard config') diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 index e6c7bce1524..4d3d11e2083 100644 --- a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 +++ b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 @@ -14,6 +14,10 @@ cert_key = /etc/grafana/certs/cert_key http_port = {{ http_port }} http_addr = {{ http_addr }} +{% if use_url_prefix %} + root_url = %(protocol)s://%(domain)s:%(http_port)s/grafana/ + serve_from_sub_path = true +{% endif %} [snapshots] external_enabled = false [security] diff --git a/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 b/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 index e19f556c6f4..4a8237a4f2b 100644 --- a/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/ingress/keepalived.conf.j2 @@ -1,4 +1,9 @@ # {{ cephadm_managed }} +global_defs { + enable_script_security + script_user root +} + vrrp_script check_backend { script "{{ script }}" weight -20 diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 new file mode 100644 index 00000000000..2220e8e4759 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 @@ -0,0 +1,75 @@ + +server { +{% if spec.disable_https %} + listen {{ spec.port or 80 }}; +{% else %} + listen {{ spec.port or 443 }} ssl; + listen [::]:{{ spec.port or 443 }} ssl; + ssl_certificate /etc/nginx/ssl/nginx.crt; + ssl_certificate_key /etc/nginx/ssl/nginx.key; + {% if spec.ssl_protocols %} + ssl_protocols {{ spec.ssl_protocols | join(' ') }}; + {% else %} + ssl_protocols TLSv1.3; + {% endif %} + {% if spec.ssl_ciphers %} + ssl_ciphers {{ spec.ssl_ciphers | join(':') }}; + {% else %} + # from: https://ssl-config.mozilla.org/#server=nginx + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305; + {% endif %} + + # Only return Nginx in server header, no extra info will be provided + server_tokens {{ spec.server_tokens or 'off'}}; + + # Perfect Forward Secrecy(PFS) is frequently compromised without this + ssl_prefer_server_ciphers {{ spec.ssl_prefer_server_ciphers or 'on'}}; + + # Enable SSL session caching for improved performance + ssl_session_tickets {{ spec.ssl_session_tickets or 'off'}}; + ssl_session_timeout {{ spec.ssl_session_timeout or '1d'}}; + ssl_session_cache {{ spec.ssl_session_cache or 'shared:SSL:10m'}}; + + # OCSP stapling + ssl_stapling {{ spec.ssl_stapling or 'on'}}; + ssl_stapling_verify {{ spec.ssl_stapling_verify or 'on'}}; + resolver_timeout 5s; + + # Security headers + ## X-Content-Type-Options: avoid MIME type sniffing + add_header X-Content-Type-Options nosniff; + ## Strict Transport Security (HSTS): Yes + add_header Strict-Transport-Security "max-age=31536000; includeSubdomains; preload"; + ## Enables the Cross-site scripting (XSS) filter in browsers. + add_header X-XSS-Protection "1; mode=block"; + ## Content-Security-Policy (CSP): FIXME + # add_header Content-Security-Policy "default-src 'self'; script-src 'self'; object-src 'none'; base-uri 'none'; require-trusted-types-for 'script'; frame-ancestors 'self';"; + +{% endif %} + +{% if dashboard_endpoints %} + location / { + proxy_pass {{ dashboard_scheme }}://dashboard_servers; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + } +{% endif %} + +{% if grafana_endpoints %} + location /grafana { + rewrite ^/grafana/(.*) /$1 break; + proxy_pass {{ grafana_scheme }}://grafana_servers; + } +{% endif %} + +{% if prometheus_endpoints %} + location /prometheus { + proxy_pass {{ prometheus_scheme }}://prometheus_servers; + } +{% endif %} + +{% if alertmanager_endpoints %} + location /alertmanager { + proxy_pass {{ alertmanager_scheme }}://alertmanager_servers; + } +{% endif %} +} diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 new file mode 100644 index 00000000000..6848c04ebe8 --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 @@ -0,0 +1,31 @@ + +server { + listen {{ internal_port }} ssl; + listen [::]:{{ internal_port }} ssl; + ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers AES128-SHA:AES256-SHA:RC4-SHA:DES-CBC3-SHA:RC4-MD5; + ssl_prefer_server_ciphers on; + +{% if grafana_endpoints %} + location /internal/grafana { + rewrite ^/internal/grafana/(.*) /$1 break; + proxy_pass {{ grafana_scheme }}://grafana_servers; + } +{% endif %} + +{% if prometheus_endpoints %} + location /internal/prometheus { + rewrite ^/internal/prometheus/(.*) /prometheus/$1 break; + proxy_pass {{ prometheus_scheme }}://prometheus_servers; + } +{% endif %} + +{% if alertmanager_endpoints %} + location /internal/alertmanager { + rewrite ^/internal/alertmanager/(.*) /alertmanager/$1 break; + proxy_pass {{ alertmanager_scheme }}://alertmanager_servers; + } +{% endif %} +} diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 new file mode 100644 index 00000000000..9ce6eb9867d --- /dev/null +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/nginx.conf.j2 @@ -0,0 +1,44 @@ + +# {{ cephadm_managed }} +worker_rlimit_nofile 8192; + +events { + worker_connections 4096; +} + +http { +{% if dashboard_endpoints %} + upstream dashboard_servers { + {% for ep in dashboard_endpoints %} + server {{ ep }}; + {% endfor %} + } +{% endif %} + +{% if grafana_endpoints %} + upstream grafana_servers { + {% for ep in grafana_endpoints %} + server {{ ep }}; + {% endfor %} + } +{% endif %} + +{% if prometheus_endpoints %} + upstream prometheus_servers { + {% for ep in prometheus_endpoints %} + server {{ ep }}; + {% endfor %} + } +{% endif %} + +{% if alertmanager_endpoints %} + upstream alertmanager_servers { + {% for ep in alertmanager_endpoints %} + server {{ ep }}; + {% endfor %} + } +{% endif %} + + include /etc/nginx_external_server.conf; + include /etc/nginx_internal_server.conf; +} diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 index f2f994c5521..18786f95bbe 100644 --- a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 @@ -41,10 +41,11 @@ config_file = /etc/ceph/ceph.conf id = {{ rados_id }} [mtls] -server_key = {{ spec.server_key }} -client_key = {{ spec.client_key }} -server_cert = {{ spec.server_cert }} -client_cert = {{ spec.client_cert }} +server_key = /server.key +client_key = /client.key +server_cert = /server.cert +client_cert = /client.cert +root_ca_cert = /root.ca.cert [spdk] tgt_path = {{ spec.tgt_path }} diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 77661830b7e..3b0cb341646 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -8,7 +8,14 @@ import pytest from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection from cephadm.serve import CephadmServe -from cephadm.inventory import HostCacheStatus, ClientKeyringSpec +from cephadm.inventory import ( + HostCacheStatus, + ClientKeyringSpec, + Cert, + PrivKey, + CERT_STORE_CERT_PREFIX, + CERT_STORE_KEY_PREFIX, +) from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims from cephadm.utils import SpecialHostLabels @@ -979,7 +986,7 @@ class TestCephadm(object): assert osd_claims.filtered_by_host('host1') == ['0'] assert osd_claims.filtered_by_host('host1.domain.com') == ['0'] - @ pytest.mark.parametrize( + @pytest.mark.parametrize( "ceph_services, cephadm_daemons, strays_expected, metadata", # [ ([(daemon_type, daemon_id), ... ], [...], [...]), ... ] [ @@ -1690,6 +1697,242 @@ class TestCephadm(object): assert cephadm_module.cache._get_host_cache_entry_status( 'host.nothing.com') == HostCacheStatus.stray + @mock.patch("cephadm.module.CephadmOrchestrator.set_store") + def test_cert_store_save_cert(self, _set_store, cephadm_module: CephadmOrchestrator): + cephadm_module.cert_key_store._init_known_cert_key_dicts() + + agent_endpoint_root_cert = 'fake-agent-cert' + alertmanager_host1_cert = 'fake-alertm-host1-cert' + rgw_frontend_rgw_foo_host2_cert = 'fake-rgw-cert' + nvmeof_client_cert = 'fake-nvmeof-client-cert' + nvmeof_server_cert = 'fake-nvmeof-server-cert' + nvmeof_root_ca_cert = 'fake-nvmeof-root-ca-cert' + cephadm_module.cert_key_store.save_cert('agent_endpoint_root_cert', agent_endpoint_root_cert) + cephadm_module.cert_key_store.save_cert('alertmanager_cert', alertmanager_host1_cert, host='host1') + cephadm_module.cert_key_store.save_cert('rgw_frontend_ssl_cert', rgw_frontend_rgw_foo_host2_cert, service_name='rgw.foo', user_made=True) + cephadm_module.cert_key_store.save_cert('nvmeof_server_cert', nvmeof_server_cert, service_name='nvmeof.foo', user_made=True) + cephadm_module.cert_key_store.save_cert('nvmeof_client_cert', nvmeof_client_cert, service_name='nvmeof.foo', user_made=True) + cephadm_module.cert_key_store.save_cert('nvmeof_root_ca_cert', nvmeof_root_ca_cert, service_name='nvmeof.foo', user_made=True) + + expected_calls = [ + mock.call(f'{CERT_STORE_CERT_PREFIX}agent_endpoint_root_cert', json.dumps(Cert(agent_endpoint_root_cert).to_json())), + mock.call(f'{CERT_STORE_CERT_PREFIX}alertmanager_cert', json.dumps({'host1': Cert(alertmanager_host1_cert).to_json()})), + mock.call(f'{CERT_STORE_CERT_PREFIX}rgw_frontend_ssl_cert', json.dumps({'rgw.foo': Cert(rgw_frontend_rgw_foo_host2_cert, True).to_json()})), + mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_server_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_server_cert, True).to_json()})), + mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_client_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_client_cert, True).to_json()})), + mock.call(f'{CERT_STORE_CERT_PREFIX}nvmeof_root_ca_cert', json.dumps({'nvmeof.foo': Cert(nvmeof_root_ca_cert, True).to_json()})), + ] + _set_store.assert_has_calls(expected_calls) + + @mock.patch("cephadm.module.CephadmOrchestrator.set_store") + def test_cert_store_cert_ls(self, _set_store, cephadm_module: CephadmOrchestrator): + cephadm_module.cert_key_store._init_known_cert_key_dicts() + + expected_ls = { + 'rgw_frontend_ssl_cert': False, + 'iscsi_ssl_cert': False, + 'ingress_ssl_cert': False, + 'agent_endpoint_root_cert': False, + 'service_discovery_root_cert': False, + 'mgmt_gw_root_cert': False, + 'grafana_cert': False, + 'alertmanager_cert': False, + 'prometheus_cert': False, + 'node_exporter_cert': False, + 'nvmeof_client_cert': False, + 'nvmeof_server_cert': False, + 'nvmeof_root_ca_cert': False, + } + assert cephadm_module.cert_key_store.cert_ls() == expected_ls + + cephadm_module.cert_key_store.save_cert('agent_endpoint_root_cert', 'xxx') + expected_ls['agent_endpoint_root_cert'] = True + assert cephadm_module.cert_key_store.cert_ls() == expected_ls + + cephadm_module.cert_key_store.save_cert('alertmanager_cert', 'xxx', host='host1') + cephadm_module.cert_key_store.save_cert('alertmanager_cert', 'xxx', host='host2') + expected_ls['alertmanager_cert'] = {} + expected_ls['alertmanager_cert']['host1'] = True + expected_ls['alertmanager_cert']['host2'] = True + assert cephadm_module.cert_key_store.cert_ls() == expected_ls + + cephadm_module.cert_key_store.save_cert('rgw_frontend_ssl_cert', 'xxx', service_name='rgw.foo', user_made=True) + cephadm_module.cert_key_store.save_cert('rgw_frontend_ssl_cert', 'xxx', service_name='rgw.bar', user_made=True) + expected_ls['rgw_frontend_ssl_cert'] = {} + expected_ls['rgw_frontend_ssl_cert']['rgw.foo'] = True + expected_ls['rgw_frontend_ssl_cert']['rgw.bar'] = True + assert cephadm_module.cert_key_store.cert_ls() == expected_ls + + cephadm_module.cert_key_store.save_cert('nvmeof_client_cert', 'xxx', service_name='nvmeof.foo', user_made=True) + cephadm_module.cert_key_store.save_cert('nvmeof_server_cert', 'xxx', service_name='nvmeof.foo', user_made=True) + cephadm_module.cert_key_store.save_cert('nvmeof_root_ca_cert', 'xxx', service_name='nvmeof.foo', user_made=True) + expected_ls['nvmeof_client_cert'] = {} + expected_ls['nvmeof_client_cert']['nvmeof.foo'] = True + expected_ls['nvmeof_server_cert'] = {} + expected_ls['nvmeof_server_cert']['nvmeof.foo'] = True + expected_ls['nvmeof_root_ca_cert'] = {} + expected_ls['nvmeof_root_ca_cert']['nvmeof.foo'] = True + assert cephadm_module.cert_key_store.cert_ls() == expected_ls + + @mock.patch("cephadm.module.CephadmOrchestrator.set_store") + def test_cert_store_save_key(self, _set_store, cephadm_module: CephadmOrchestrator): + cephadm_module.cert_key_store._init_known_cert_key_dicts() + + agent_endpoint_key = 'fake-agent-key' + grafana_host1_key = 'fake-grafana-host1-key' + nvmeof_client_key = 'nvmeof-client-key' + nvmeof_server_key = 'nvmeof-server-key' + cephadm_module.cert_key_store.save_key('agent_endpoint_key', agent_endpoint_key) + cephadm_module.cert_key_store.save_key('grafana_key', grafana_host1_key, host='host1') + cephadm_module.cert_key_store.save_key('nvmeof_client_key', nvmeof_client_key, service_name='nvmeof.foo') + cephadm_module.cert_key_store.save_key('nvmeof_server_key', nvmeof_server_key, service_name='nvmeof.foo') + + expected_calls = [ + mock.call(f'{CERT_STORE_KEY_PREFIX}agent_endpoint_key', json.dumps(PrivKey(agent_endpoint_key).to_json())), + mock.call(f'{CERT_STORE_KEY_PREFIX}grafana_key', json.dumps({'host1': PrivKey(grafana_host1_key).to_json()})), + mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_client_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_client_key).to_json()})), + mock.call(f'{CERT_STORE_KEY_PREFIX}nvmeof_server_key', json.dumps({'nvmeof.foo': PrivKey(nvmeof_server_key).to_json()})), + ] + _set_store.assert_has_calls(expected_calls) + + @mock.patch("cephadm.module.CephadmOrchestrator.set_store") + def test_cert_store_key_ls(self, _set_store, cephadm_module: CephadmOrchestrator): + cephadm_module.cert_key_store._init_known_cert_key_dicts() + + expected_ls = { + 'agent_endpoint_key': False, + 'service_discovery_key': False, + 'grafana_key': False, + 'alertmanager_key': False, + 'mgmt_gw_root_key': False, + 'prometheus_key': False, + 'node_exporter_key': False, + 'iscsi_ssl_key': False, + 'ingress_ssl_key': False, + 'nvmeof_client_key': False, + 'nvmeof_server_key': False, + } + assert cephadm_module.cert_key_store.key_ls() == expected_ls + + cephadm_module.cert_key_store.save_key('agent_endpoint_key', 'xxx') + expected_ls['agent_endpoint_key'] = True + assert cephadm_module.cert_key_store.key_ls() == expected_ls + + cephadm_module.cert_key_store.save_key('alertmanager_key', 'xxx', host='host1') + cephadm_module.cert_key_store.save_key('alertmanager_key', 'xxx', host='host2') + expected_ls['alertmanager_key'] = {} + expected_ls['alertmanager_key']['host1'] = True + expected_ls['alertmanager_key']['host2'] = True + assert cephadm_module.cert_key_store.key_ls() == expected_ls + + cephadm_module.cert_key_store.save_key('nvmeof_client_key', 'xxx', service_name='nvmeof.foo') + cephadm_module.cert_key_store.save_key('nvmeof_server_key', 'xxx', service_name='nvmeof.foo') + expected_ls['nvmeof_server_key'] = {} + expected_ls['nvmeof_server_key']['nvmeof.foo'] = True + expected_ls['nvmeof_client_key'] = {} + expected_ls['nvmeof_client_key']['nvmeof.foo'] = True + assert cephadm_module.cert_key_store.key_ls() == expected_ls + + @mock.patch("cephadm.module.CephadmOrchestrator.get_store_prefix") + def test_cert_store_load(self, _get_store_prefix, cephadm_module: CephadmOrchestrator): + cephadm_module.cert_key_store._init_known_cert_key_dicts() + + agent_endpoint_root_cert = 'fake-agent-cert' + alertmanager_host1_cert = 'fake-alertm-host1-cert' + rgw_frontend_rgw_foo_host2_cert = 'fake-rgw-cert' + agent_endpoint_key = 'fake-agent-key' + grafana_host1_key = 'fake-grafana-host1-cert' + nvmeof_server_cert = 'nvmeof-server-cert' + nvmeof_client_cert = 'nvmeof-client-cert' + nvmeof_root_ca_cert = 'nvmeof-root-ca-cert' + nvmeof_server_key = 'nvmeof-server-key' + nvmeof_client_key = 'nvmeof-client-key' + + def _fake_prefix_store(key): + if key == 'cert_store.cert.': + return { + f'{CERT_STORE_CERT_PREFIX}agent_endpoint_root_cert': json.dumps(Cert(agent_endpoint_root_cert).to_json()), + f'{CERT_STORE_CERT_PREFIX}alertmanager_cert': json.dumps({'host1': Cert(alertmanager_host1_cert).to_json()}), + f'{CERT_STORE_CERT_PREFIX}rgw_frontend_ssl_cert': json.dumps({'rgw.foo': Cert(rgw_frontend_rgw_foo_host2_cert, True).to_json()}), + f'{CERT_STORE_CERT_PREFIX}nvmeof_server_cert': json.dumps({'nvmeof.foo': Cert(nvmeof_server_cert, True).to_json()}), + f'{CERT_STORE_CERT_PREFIX}nvmeof_client_cert': json.dumps({'nvmeof.foo': Cert(nvmeof_client_cert, True).to_json()}), + f'{CERT_STORE_CERT_PREFIX}nvmeof_root_ca_cert': json.dumps({'nvmeof.foo': Cert(nvmeof_root_ca_cert, True).to_json()}), + } + elif key == 'cert_store.key.': + return { + f'{CERT_STORE_KEY_PREFIX}agent_endpoint_key': json.dumps(PrivKey(agent_endpoint_key).to_json()), + f'{CERT_STORE_KEY_PREFIX}grafana_key': json.dumps({'host1': PrivKey(grafana_host1_key).to_json()}), + f'{CERT_STORE_KEY_PREFIX}nvmeof_server_key': json.dumps({'nvmeof.foo': PrivKey(nvmeof_server_key).to_json()}), + f'{CERT_STORE_KEY_PREFIX}nvmeof_client_key': json.dumps({'nvmeof.foo': PrivKey(nvmeof_client_key).to_json()}), + } + else: + raise Exception(f'Get store with unexpected value {key}') + + _get_store_prefix.side_effect = _fake_prefix_store + cephadm_module.cert_key_store.load() + assert cephadm_module.cert_key_store.known_certs['agent_endpoint_root_cert'] == Cert(agent_endpoint_root_cert) + assert cephadm_module.cert_key_store.known_certs['alertmanager_cert']['host1'] == Cert(alertmanager_host1_cert) + assert cephadm_module.cert_key_store.known_certs['rgw_frontend_ssl_cert']['rgw.foo'] == Cert(rgw_frontend_rgw_foo_host2_cert, True) + assert cephadm_module.cert_key_store.known_certs['nvmeof_server_cert']['nvmeof.foo'] == Cert(nvmeof_server_cert, True) + assert cephadm_module.cert_key_store.known_certs['nvmeof_client_cert']['nvmeof.foo'] == Cert(nvmeof_client_cert, True) + assert cephadm_module.cert_key_store.known_certs['nvmeof_root_ca_cert']['nvmeof.foo'] == Cert(nvmeof_root_ca_cert, True) + assert cephadm_module.cert_key_store.known_keys['agent_endpoint_key'] == PrivKey(agent_endpoint_key) + assert cephadm_module.cert_key_store.known_keys['grafana_key']['host1'] == PrivKey(grafana_host1_key) + assert cephadm_module.cert_key_store.known_keys['nvmeof_server_key']['nvmeof.foo'] == PrivKey(nvmeof_server_key) + assert cephadm_module.cert_key_store.known_keys['nvmeof_client_key']['nvmeof.foo'] == PrivKey(nvmeof_client_key) + + def test_cert_store_get_cert_key(self, cephadm_module: CephadmOrchestrator): + cephadm_module.cert_key_store._init_known_cert_key_dicts() + + agent_endpoint_root_cert = 'fake-agent-cert' + alertmanager_host1_cert = 'fake-alertm-host1-cert' + rgw_frontend_rgw_foo_host2_cert = 'fake-rgw-cert' + nvmeof_client_cert = 'fake-nvmeof-client-cert' + nvmeof_server_cert = 'fake-nvmeof-server-cert' + cephadm_module.cert_key_store.save_cert('agent_endpoint_root_cert', agent_endpoint_root_cert) + cephadm_module.cert_key_store.save_cert('alertmanager_cert', alertmanager_host1_cert, host='host1') + cephadm_module.cert_key_store.save_cert('rgw_frontend_ssl_cert', rgw_frontend_rgw_foo_host2_cert, service_name='rgw.foo', user_made=True) + cephadm_module.cert_key_store.save_cert('nvmeof_server_cert', nvmeof_server_cert, service_name='nvmeof.foo', user_made=True) + cephadm_module.cert_key_store.save_cert('nvmeof_client_cert', nvmeof_client_cert, service_name='nvmeof.foo', user_made=True) + + assert cephadm_module.cert_key_store.get_cert('agent_endpoint_root_cert') == agent_endpoint_root_cert + assert cephadm_module.cert_key_store.get_cert('alertmanager_cert', host='host1') == alertmanager_host1_cert + assert cephadm_module.cert_key_store.get_cert('rgw_frontend_ssl_cert', service_name='rgw.foo') == rgw_frontend_rgw_foo_host2_cert + assert cephadm_module.cert_key_store.get_cert('nvmeof_server_cert', service_name='nvmeof.foo') == nvmeof_server_cert + assert cephadm_module.cert_key_store.get_cert('nvmeof_client_cert', service_name='nvmeof.foo') == nvmeof_client_cert + assert cephadm_module.cert_key_store.get_cert('service_discovery_root_cert') == '' + assert cephadm_module.cert_key_store.get_cert('grafana_cert', host='host1') == '' + assert cephadm_module.cert_key_store.get_cert('iscsi_ssl_cert', service_name='iscsi.foo') == '' + assert cephadm_module.cert_key_store.get_cert('nvmeof_root_ca_cert', service_name='nvmeof.foo') == '' + + with pytest.raises(OrchestratorError, match='Attempted to access cert for unknown entity'): + cephadm_module.cert_key_store.get_cert('unknown_entity') + with pytest.raises(OrchestratorError, match='Need host to access cert for entity'): + cephadm_module.cert_key_store.get_cert('grafana_cert') + with pytest.raises(OrchestratorError, match='Need service name to access cert for entity'): + cephadm_module.cert_key_store.get_cert('rgw_frontend_ssl_cert', host='foo') + + agent_endpoint_key = 'fake-agent-key' + grafana_host1_key = 'fake-grafana-host1-cert' + nvmeof_server_key = 'nvmeof-server-key' + cephadm_module.cert_key_store.save_key('agent_endpoint_key', agent_endpoint_key) + cephadm_module.cert_key_store.save_key('grafana_key', grafana_host1_key, host='host1') + cephadm_module.cert_key_store.save_key('agent_endpoint_key', agent_endpoint_key) + cephadm_module.cert_key_store.save_key('grafana_key', grafana_host1_key, host='host1') + cephadm_module.cert_key_store.save_key('nvmeof_server_key', nvmeof_server_key, service_name='nvmeof.foo') + + assert cephadm_module.cert_key_store.get_key('agent_endpoint_key') == agent_endpoint_key + assert cephadm_module.cert_key_store.get_key('grafana_key', host='host1') == grafana_host1_key + assert cephadm_module.cert_key_store.get_key('nvmeof_server_key', service_name='nvmeof.foo') == nvmeof_server_key + assert cephadm_module.cert_key_store.get_key('nvmeof_client_key', service_name='nvmeof.foo') == '' + assert cephadm_module.cert_key_store.get_key('service_discovery_key') == '' + assert cephadm_module.cert_key_store.get_key('alertmanager_key', host='host1') == '' + + with pytest.raises(OrchestratorError, match='Attempted to access priv key for unknown entity'): + cephadm_module.cert_key_store.get_key('unknown_entity') + with pytest.raises(OrchestratorError, match='Need host to access priv key for entity'): + cephadm_module.cert_key_store.get_key('grafana_key') + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) @mock.patch("cephadm.services.nfs.NFSService.run_grace_tool", mock.MagicMock()) @mock.patch("cephadm.services.nfs.NFSService.purge", mock.MagicMock()) diff --git a/src/pybind/mgr/cephadm/tests/test_migration.py b/src/pybind/mgr/cephadm/tests/test_migration.py index 1f1d32e8b40..26e034a1633 100644 --- a/src/pybind/mgr/cephadm/tests/test_migration.py +++ b/src/pybind/mgr/cephadm/tests/test_migration.py @@ -1,13 +1,21 @@ import json import pytest -from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec +from ceph.deployment.service_spec import ( + PlacementSpec, + ServiceSpec, + HostPlacementSpec, + RGWSpec, + IngressSpec, + IscsiServiceSpec +) from ceph.utils import datetime_to_str, datetime_now from cephadm import CephadmOrchestrator from cephadm.inventory import SPEC_STORE_PREFIX from cephadm.migrations import LAST_MIGRATION from cephadm.tests.fixtures import _run_cephadm, wait, with_host, receive_agent_metadata_all_hosts from cephadm.serve import CephadmServe +from orchestrator import DaemonDescription from tests import mock @@ -338,3 +346,44 @@ def test_migrate_rgw_spec(cephadm_module: CephadmOrchestrator, rgw_spec_store_en # if it was migrated, so we can use this to test the spec # was untouched assert 'rgw.foo' not in cephadm_module.spec_store.all_specs + + +def test_migrate_cert_store(cephadm_module: CephadmOrchestrator): + rgw_spec = RGWSpec(service_id='foo', rgw_frontend_ssl_certificate='rgw_cert', ssl=True) + iscsi_spec = IscsiServiceSpec(service_id='foo', pool='foo', ssl_cert='iscsi_cert', ssl_key='iscsi_key') + ingress_spec = IngressSpec(service_id='rgw.foo', ssl_cert='ingress_cert', ssl_key='ingress_key', ssl=True) + cephadm_module.spec_store._specs = { + 'rgw.foo': rgw_spec, + 'iscsi.foo': iscsi_spec, + 'ingress.rgw.foo': ingress_spec + } + + cephadm_module.set_store('cephadm_agent/root/cert', 'agent_cert') + cephadm_module.set_store('cephadm_agent/root/key', 'agent_key') + cephadm_module.set_store('service_discovery/root/cert', 'service_discovery_cert') + cephadm_module.set_store('service_discovery/root/key', 'service_discovery_key') + + cephadm_module.set_store('host1/grafana_crt', 'grafana_cert1') + cephadm_module.set_store('host1/grafana_key', 'grafana_key1') + cephadm_module.set_store('host2/grafana_crt', 'grafana_cert2') + cephadm_module.set_store('host2/grafana_key', 'grafana_key2') + cephadm_module.cache.daemons = {'host1': {'grafana.host1': DaemonDescription('grafana', 'host1', 'host1')}, + 'host2': {'grafana.host2': DaemonDescription('grafana', 'host2', 'host2')}} + + cephadm_module.migration.migrate_6_7() + + assert cephadm_module.cert_key_store.get_cert('rgw_frontend_ssl_cert', service_name='rgw.foo') + assert cephadm_module.cert_key_store.get_cert('iscsi_ssl_cert', service_name='iscsi.foo') + assert cephadm_module.cert_key_store.get_key('iscsi_ssl_key', service_name='iscsi.foo') + assert cephadm_module.cert_key_store.get_cert('ingress_ssl_cert', service_name='ingress.rgw.foo') + assert cephadm_module.cert_key_store.get_key('ingress_ssl_key', service_name='ingress.rgw.foo') + + assert cephadm_module.cert_key_store.get_cert('agent_endpoint_root_cert') + assert cephadm_module.cert_key_store.get_key('agent_endpoint_key') + assert cephadm_module.cert_key_store.get_cert('service_discovery_root_cert') + assert cephadm_module.cert_key_store.get_key('service_discovery_key') + + assert cephadm_module.cert_key_store.get_cert('grafana_cert', host='host1') + assert cephadm_module.cert_key_store.get_cert('grafana_cert', host='host2') + assert cephadm_module.cert_key_store.get_key('grafana_key', host='host1') + assert cephadm_module.cert_key_store.get_key('grafana_key', host='host2') diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 0ef4ee1601b..f733db7ab77 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -35,6 +35,7 @@ from ceph.deployment.service_spec import ( SNMPGatewaySpec, ServiceSpec, TracingSpec, + MgmtGatewaySpec, ) from cephadm.tests.fixtures import with_host, with_service, _run_cephadm, async_side_effect @@ -45,9 +46,9 @@ from orchestrator._interface import DaemonDescription from typing import Dict, List -grafana_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n""" +ceph_generated_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n""" -grafana_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n""" +ceph_generated_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n""" class FakeInventory: @@ -91,17 +92,17 @@ class FakeMgr: class TestCephadmService: - def test_set_service_url_on_dashboard(self): + def test_set_value_on_dashboard(self): # pylint: disable=protected-access mgr = FakeMgr() service_url = 'http://svc:1000' service = GrafanaService(mgr) - service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + service._set_value_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) assert mgr.config == service_url # set-cmd should not be called if value doesn't change mgr.check_mon_command.reset_mock() - service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + service._set_value_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'}) def _get_services(self, mgr): @@ -426,10 +427,11 @@ config_file = /etc/ceph/ceph.conf id = nvmeof.{nvmeof_daemon_id} [mtls] -server_key = ./server.key -client_key = ./client.key -server_cert = ./server.crt -client_cert = ./client.crt +server_key = /server.key +client_key = /client.key +server_cert = /server.cert +client_cert = /client.cert +root_ca_cert = /root.ca.cert [spdk] tgt_path = /usr/local/bin/nvmf_tgt @@ -591,6 +593,7 @@ class TestMonitoring: "alertmanager.yml": y, }, "peers": [], + "use_url_prefix": False, } }), use_current_daemon_image=False, @@ -687,11 +690,15 @@ class TestMonitoring: }, 'peers': [], 'web_config': '/etc/alertmanager/web.yml', + "use_url_prefix": False, } }), use_current_daemon_image=False, ) + assert cephadm_module.cert_key_store.get_cert('alertmanager_cert', host='test') == 'mycert' + assert cephadm_module.cert_key_store.get_key('alertmanager_key', host='test') == 'mykey' + @patch("cephadm.serve.CephadmServe._run_cephadm") @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator): @@ -825,6 +832,7 @@ class TestMonitoring: 'retention_time': '15d', 'retention_size': '0', 'ip_to_bind_to': '1.2.3.1', + "use_url_prefix": False }, }), use_current_daemon_image=False, @@ -1008,6 +1016,7 @@ class TestMonitoring: 'retention_size': '0', 'ip_to_bind_to': '', 'web_config': '/etc/prometheus/web.yml', + "use_url_prefix": False }, }), use_current_daemon_image=False, @@ -1155,8 +1164,8 @@ class TestMonitoring: _run_cephadm.side_effect = async_side_effect(("{}", "", 0)) with with_host(cephadm_module, "test"): - cephadm_module.set_store("test/grafana_crt", grafana_cert) - cephadm_module.set_store("test/grafana_key", grafana_key) + cephadm_module.cert_key_store.save_cert('grafana_cert', ceph_generated_cert, host='test') + cephadm_module.cert_key_store.save_key('grafana_key', ceph_generated_key, host='test') with with_service( cephadm_module, PrometheusSpec("prometheus") ) as _, with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service( @@ -1211,9 +1220,9 @@ class TestMonitoring: isDefault: false editable: false""").lstrip(), 'certs/cert_file': dedent(f""" - # generated by cephadm\n{grafana_cert}""").lstrip(), + # generated by cephadm\n{ceph_generated_cert}""").lstrip(), 'certs/cert_key': dedent(f""" - # generated by cephadm\n{grafana_key}""").lstrip(), + # generated by cephadm\n{ceph_generated_key}""").lstrip(), 'provisioning/dashboards/default.yml': dedent(""" # This file is generated by cephadm. apiVersion: 1 @@ -1418,7 +1427,6 @@ spec: "deploy_arguments": [], "params": { 'tcp_ports': [4200, 9094], - 'reconfig': True, }, "meta": { 'service_name': 'alertmanager', @@ -1432,7 +1440,7 @@ spec: }, "config_blobs": {}, }), - use_current_daemon_image=True, + use_current_daemon_image=False, ) @@ -1846,7 +1854,10 @@ class TestIngressService: with with_host(cephadm_module, 'test', addr='1.2.3.7'): cephadm_module.cache.update_host_networks('test', { '1.2.3.0/24': { - 'if0': ['1.2.3.4'] + 'if0': [ + '1.2.3.4', # simulate already assigned VIP + '1.2.3.1', # simulate interface IP + ] } }) @@ -1874,6 +1885,10 @@ class TestIngressService: { 'keepalived.conf': '# This file is generated by cephadm.\n' + 'global_defs {\n ' + 'enable_script_security\n ' + 'script_user root\n' + '}\n\n' 'vrrp_script check_backend {\n ' 'script "/usr/bin/curl http://1.2.3.7:8999/health"\n ' 'weight -20\n ' @@ -1890,7 +1905,7 @@ class TestIngressService: 'auth_type PASS\n ' 'auth_pass 12345\n ' '}\n ' - 'unicast_src_ip 1.2.3.4\n ' + 'unicast_src_ip 1.2.3.1\n ' 'unicast_peer {\n ' '}\n ' 'virtual_ipaddress {\n ' @@ -1965,7 +1980,6 @@ class TestIngressService: @patch("cephadm.serve.CephadmServe._run_cephadm") def test_ingress_config_ssl_rgw(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) - with with_host(cephadm_module, 'test'): cephadm_module.cache.update_host_networks('test', { '1.2.3.0/24': { @@ -1997,6 +2011,10 @@ class TestIngressService: { 'keepalived.conf': '# This file is generated by cephadm.\n' + 'global_defs {\n ' + 'enable_script_security\n ' + 'script_user root\n' + '}\n\n' 'vrrp_script check_backend {\n ' 'script "/usr/bin/curl http://[1::4]:8999/health"\n ' 'weight -20\n ' @@ -2090,7 +2108,6 @@ class TestIngressService: @patch("cephadm.serve.CephadmServe._run_cephadm") def test_ingress_config_multi_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) - with with_host(cephadm_module, 'test', addr='1.2.3.7'): cephadm_module.cache.update_host_networks('test', { '1.2.3.0/24': { @@ -2123,6 +2140,10 @@ class TestIngressService: { 'keepalived.conf': '# This file is generated by cephadm.\n' + 'global_defs {\n ' + 'enable_script_security\n ' + 'script_user root\n' + '}\n\n' 'vrrp_script check_backend {\n ' 'script "/usr/bin/curl http://1.2.3.7:8999/health"\n ' 'weight -20\n ' @@ -2189,7 +2210,7 @@ class TestIngressService: 'maxconn 8000\n' '\nfrontend stats\n ' 'mode http\n ' - 'bind [..]:8999\n ' + 'bind [::]:8999\n ' 'bind 1.2.3.7:8999\n ' 'stats enable\n ' 'stats uri /stats\n ' @@ -2198,7 +2219,7 @@ class TestIngressService: 'http-request use-service prometheus-exporter if { path /metrics }\n ' 'monitor-uri /health\n' '\nfrontend frontend\n ' - 'bind [..]:8089\n ' + 'bind [::]:8089\n ' 'default_backend backend\n\n' 'backend backend\n ' 'option forwardfor\n ' @@ -2214,7 +2235,6 @@ class TestIngressService: @patch("cephadm.serve.CephadmServe._run_cephadm") def test_keepalive_config_multi_interface_vips(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) - with with_host(cephadm_module, 'test', addr='1.2.3.1'): with with_host(cephadm_module, 'test2', addr='1.2.3.2'): cephadm_module.cache.update_host_networks('test', { @@ -2257,6 +2277,10 @@ class TestIngressService: { 'keepalived.conf': '# This file is generated by cephadm.\n' + 'global_defs {\n ' + 'enable_script_security\n ' + 'script_user root\n' + '}\n\n' 'vrrp_script check_backend {\n ' 'script "/usr/bin/curl http://1.2.3.1:8999/health"\n ' 'weight -20\n ' @@ -2448,6 +2472,10 @@ class TestIngressService: { 'keepalived.conf': '# This file is generated by cephadm.\n' + 'global_defs {\n ' + 'enable_script_security\n ' + 'script_user root\n' + '}\n\n' 'vrrp_script check_backend {\n ' 'script "/usr/bin/false"\n ' 'weight -20\n ' @@ -3168,3 +3196,178 @@ class TestSMB: stdin=json.dumps(expected), use_current_daemon_image=False, ) + + +class TestMgmtGateway: + @patch("cephadm.serve.CephadmServe._run_cephadm") + @patch("cephadm.services.mgmt_gateway.MgmtGatewayService.get_service_endpoints") + @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') + @patch('cephadm.ssl_cert_utils.SSLCerts.generate_cert', lambda instance, fqdn, ip: (ceph_generated_cert, ceph_generated_key)) + @patch("cephadm.services.mgmt_gateway.get_dashboard_endpoints", lambda _: (["ceph-node-2:8443", "ceph-node-2:8443"], "https")) + def test_mgmt_gateway_config(self, get_service_endpoints_mock: List[str], _run_cephadm, cephadm_module: CephadmOrchestrator): + + def get_services_endpoints(name): + if name == 'prometheus': + return ["192.168.100.100:9095", "192.168.100.101:9095"] + elif name == 'grafana': + return ["ceph-node-2:3000", "ceph-node-2:3000"] + elif name == 'alertmanager': + return ["192.168.100.100:9093", "192.168.100.102:9093"] + return [] + + _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + get_service_endpoints_mock.side_effect = get_services_endpoints + + server_port = 5555 + spec = MgmtGatewaySpec(port=server_port, + ssl_certificate=ceph_generated_cert, + ssl_certificate_key=ceph_generated_key) + + expected = { + "fsid": "fsid", + "name": "mgmt-gateway.ceph-node", + "image": "", + "deploy_arguments": [], + "params": {"tcp_ports": [server_port]}, + "meta": { + "service_name": "mgmt-gateway", + "ports": [server_port], + "ip": None, + "deployed_by": [], + "rank": None, + "rank_generation": None, + "extra_container_args": None, + "extra_entrypoint_args": None + }, + "config_blobs": { + "files": { + "nginx.conf": dedent(""" + # This file is generated by cephadm. + worker_rlimit_nofile 8192; + + events { + worker_connections 4096; + } + + http { + upstream dashboard_servers { + server ceph-node-2:8443; + server ceph-node-2:8443; + } + + upstream grafana_servers { + server ceph-node-2:3000; + server ceph-node-2:3000; + } + + upstream prometheus_servers { + server 192.168.100.100:9095; + server 192.168.100.101:9095; + } + + upstream alertmanager_servers { + server 192.168.100.100:9093; + server 192.168.100.102:9093; + } + + include /etc/nginx_external_server.conf; + include /etc/nginx_internal_server.conf; + }"""), + "nginx_external_server.conf": dedent(""" + server { + listen 5555 ssl; + listen [::]:5555 ssl; + ssl_certificate /etc/nginx/ssl/nginx.crt; + ssl_certificate_key /etc/nginx/ssl/nginx.key; + ssl_protocols TLSv1.3; + # from: https://ssl-config.mozilla.org/#server=nginx + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305; + + # Only return Nginx in server header, no extra info will be provided + server_tokens off; + + # Perfect Forward Secrecy(PFS) is frequently compromised without this + ssl_prefer_server_ciphers on; + + # Enable SSL session caching for improved performance + ssl_session_tickets off; + ssl_session_timeout 1d; + ssl_session_cache shared:SSL:10m; + + # OCSP stapling + ssl_stapling on; + ssl_stapling_verify on; + resolver_timeout 5s; + + # Security headers + ## X-Content-Type-Options: avoid MIME type sniffing + add_header X-Content-Type-Options nosniff; + ## Strict Transport Security (HSTS): Yes + add_header Strict-Transport-Security "max-age=31536000; includeSubdomains; preload"; + ## Enables the Cross-site scripting (XSS) filter in browsers. + add_header X-XSS-Protection "1; mode=block"; + ## Content-Security-Policy (CSP): FIXME + # add_header Content-Security-Policy "default-src 'self'; script-src 'self'; object-src 'none'; base-uri 'none'; require-trusted-types-for 'script'; frame-ancestors 'self';"; + + + location / { + proxy_pass https://dashboard_servers; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + } + + location /grafana { + rewrite ^/grafana/(.*) /$1 break; + proxy_pass https://grafana_servers; + } + + location /prometheus { + proxy_pass http://prometheus_servers; + } + + location /alertmanager { + proxy_pass http://alertmanager_servers; + } + }"""), + "nginx_internal_server.conf": dedent(""" + server { + listen 29443 ssl; + listen [::]:29443 ssl; + ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers AES128-SHA:AES256-SHA:RC4-SHA:DES-CBC3-SHA:RC4-MD5; + ssl_prefer_server_ciphers on; + + location /internal/grafana { + rewrite ^/internal/grafana/(.*) /$1 break; + proxy_pass https://grafana_servers; + } + + location /internal/prometheus { + rewrite ^/internal/prometheus/(.*) /prometheus/$1 break; + proxy_pass http://prometheus_servers; + } + + location /internal/alertmanager { + rewrite ^/internal/alertmanager/(.*) /alertmanager/$1 break; + proxy_pass http://alertmanager_servers; + } + }"""), + "nginx_internal.crt": f"{ceph_generated_cert}", + "nginx_internal.key": f"{ceph_generated_key}", + "nginx.crt": f"{ceph_generated_cert}", + "nginx.key": f"{ceph_generated_key}", + } + } + } + + with with_host(cephadm_module, 'ceph-node'): + with with_service(cephadm_module, spec): + _run_cephadm.assert_called_with( + 'ceph-node', + 'mgmt-gateway.ceph-node', + ['_orch', 'deploy'], + [], + stdin=json.dumps(expected), + use_current_daemon_image=False, + ) diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index 7a98a74b03d..d8ffab2da51 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -9,7 +9,7 @@ from cephadm.registry import Registry from cephadm.serve import CephadmServe from cephadm.services.cephadmservice import CephadmDaemonDeploySpec from cephadm.utils import ceph_release_to_major, name_to_config_section, CEPH_UPGRADE_ORDER, \ - CEPH_TYPES, NON_CEPH_IMAGE_TYPES, GATEWAY_TYPES + CEPH_TYPES, CEPH_IMAGE_TYPES, NON_CEPH_IMAGE_TYPES, MONITORING_STACK_TYPES, GATEWAY_TYPES from cephadm.ssh import HostConnectionError from orchestrator import OrchestratorError, DaemonDescription, DaemonDescriptionStatus, daemon_type_to_service @@ -1199,8 +1199,10 @@ class CephadmUpgrade: upgraded_daemon_count += done self._update_upgrade_progress(upgraded_daemon_count / len(daemons)) - # make sure mgr and non-ceph-image daemons are properly redeployed in staggered upgrade scenarios - if daemon_type == 'mgr' or daemon_type in NON_CEPH_IMAGE_TYPES: + # make sure mgr and monitoring stack daemons are properly redeployed in staggered upgrade scenarios + # The idea here is to upgrade the mointoring daemons after the mgr is done upgrading as + # that means cephadm and the dashboard modules themselves have been upgraded + if daemon_type == 'mgr' or daemon_type in MONITORING_STACK_TYPES: if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): need_upgrade_names = [d[0].name() for d in need_upgrade] + \ [d[0].name() for d in need_upgrade_deployer] @@ -1214,6 +1216,20 @@ class CephadmUpgrade: else: # no point in trying to redeploy with new version if active mgr is not on the new version need_upgrade_deployer = [] + elif daemon_type in NON_CEPH_IMAGE_TYPES: + # Also handle daemons that are not on the ceph image but aren't monitoring daemons. + # This needs to be handled differently than the monitoring daemons as the nvmeof daemon, + # which falls in this category, relies on the mons being upgraded as well. This block + # sets these daemon types to be upgraded only when all ceph image daemons have been upgraded + if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): + ceph_daemons = [d for d in self.mgr.cache.get_daemons() if d.daemon_type in CEPH_IMAGE_TYPES] + _, n1, n2, __ = self._detect_need_upgrade(ceph_daemons, target_digests, target_image) + if not n1 and not n2: + # no ceph daemons need upgrade + dds = [d for d in self.mgr.cache.get_daemons_by_type( + daemon_type) if d.name() not in need_upgrade_names] + _, ___, n2, ____ = self._detect_need_upgrade(dds, target_digests, target_image) + need_upgrade_deployer += n2 if any(d in target_digests for d in self.mgr.get_active_mgr_digests()): # only after the mgr itself is upgraded can we expect daemons to have diff --git a/src/pybind/mgr/dashboard/constraints.txt b/src/pybind/mgr/dashboard/constraints.txt index fd614104880..0eb72fd4ba2 100644 --- a/src/pybind/mgr/dashboard/constraints.txt +++ b/src/pybind/mgr/dashboard/constraints.txt @@ -4,3 +4,4 @@ bcrypt~=3.1 python3-saml~=1.4 requests~=2.26 Routes~=2.4 +cheroot~=10.0 diff --git a/src/pybind/mgr/dashboard/controllers/cephfs.py b/src/pybind/mgr/dashboard/controllers/cephfs.py index 4210746fbd1..587e7677b0e 100644 --- a/src/pybind/mgr/dashboard/controllers/cephfs.py +++ b/src/pybind/mgr/dashboard/controllers/cephfs.py @@ -639,6 +639,17 @@ class CephFS(RESTController): cfs = self._cephfs_instance(fs_id) cfs.rm_snapshot(path, name) + @RESTController.Resource('PUT', path='/rename-path') + def rename_path(self, fs_id, src_path, dst_path) -> None: + """ + Rename a file or directory. + :param fs_id: The filesystem identifier. + :param src_path: The path to the existing file or directory. + :param dst_path: The new name of the file or directory. + """ + cfs = self._cephfs_instance(fs_id) + cfs.rename_path(src_path, dst_path) + class CephFSClients(object): def __init__(self, module_inst, fscid): @@ -842,6 +853,15 @@ class CephFSSubvolumeGroups(RESTController): f'Failed to get info for subvolume group {group["name"]}: {err}' ) group['info'] = json.loads(out) + + error_code, out, err = mgr.remote('volumes', '_cmd_fs_subvolumegroup_getpath', + None, {'vol_name': vol_name, + 'group_name': group['name']}) + if error_code != 0: + raise DashboardException( + f'Failed to get path for subvolume group {group["name"]}: {err}' + ) + group['info']['path'] = out return subvolume_groups @RESTController.Resource('GET') diff --git a/src/pybind/mgr/dashboard/controllers/crush_rule.py b/src/pybind/mgr/dashboard/controllers/crush_rule.py index 250f657b2ba..dd0ab91ba4d 100644 --- a/src/pybind/mgr/dashboard/controllers/crush_rule.py +++ b/src/pybind/mgr/dashboard/controllers/crush_rule.py @@ -38,14 +38,24 @@ class CrushRule(RESTController): return r raise NotFound('No such crush rule') - def create(self, name, root, failure_domain, device_class=None): - rule = { - 'name': name, - 'root': root, - 'type': failure_domain, - 'class': device_class - } - CephService.send_command('mon', 'osd crush rule create-replicated', **rule) + def create(self, name, failure_domain, device_class=None, root=None, profile=None, + pool_type='replication'): + if pool_type == 'erasure': + rule = { + 'name': name, + 'profile': profile, + 'type': failure_domain, + 'class': device_class + } + CephService.send_command('mon', 'osd crush rule create-erasure', **rule) + else: + rule = { + 'name': name, + 'root': root, + 'type': failure_domain, + 'class': device_class + } + CephService.send_command('mon', 'osd crush rule create-replicated', **rule) def delete(self, name): CephService.send_command('mon', 'osd crush rule rm', name=name) diff --git a/src/pybind/mgr/dashboard/controllers/health.py b/src/pybind/mgr/dashboard/controllers/health.py index 3edc386b012..de45bebbb46 100644 --- a/src/pybind/mgr/dashboard/controllers/health.py +++ b/src/pybind/mgr/dashboard/controllers/health.py @@ -44,6 +44,7 @@ HEALTH_MINIMAL_SCHEMA = ({ 'failed': ([int], ''), 'metadata_pool': (int, ''), 'epoch': (int, ''), + 'btime': (str, ''), 'stopped': ([int], ''), 'max_mds': (int, ''), 'compat': ({ diff --git a/src/pybind/mgr/dashboard/controllers/multi_cluster.py b/src/pybind/mgr/dashboard/controllers/multi_cluster.py index 8fdecf99d44..f7e6d516b49 100644 --- a/src/pybind/mgr/dashboard/controllers/multi_cluster.py +++ b/src/pybind/mgr/dashboard/controllers/multi_cluster.py @@ -5,6 +5,7 @@ import json import re import tempfile import time +from typing import Any, Dict from urllib.parse import urlparse import requests @@ -230,7 +231,7 @@ class MultiCluster(RESTController): @Endpoint('PUT') @UpdatePermission - def set_config(self, config: object): + def set_config(self, config: Dict[str, Any]): multicluster_config = self.load_multi_cluster_config() multicluster_config.update({'current_url': config['url']}) multicluster_config.update({'current_user': config['user']}) @@ -267,12 +268,13 @@ class MultiCluster(RESTController): @Endpoint('PUT') @UpdatePermission # pylint: disable=unused-variable - def edit_cluster(self, url, cluster_alias, username, verify=False, ssl_certificate=None): + def edit_cluster(self, name, url, cluster_alias, username, verify=False, ssl_certificate=None): multicluster_config = self.load_multi_cluster_config() if "config" in multicluster_config: for key, cluster_details in multicluster_config["config"].items(): for cluster in cluster_details: - if cluster["url"] == url and cluster["user"] == username: + if cluster["name"] == name and cluster["user"] == username: + cluster['url'] = url cluster['cluster_alias'] = cluster_alias cluster['ssl_verify'] = verify cluster['ssl_certificate'] = ssl_certificate if verify else '' diff --git a/src/pybind/mgr/dashboard/controllers/nvmeof.py b/src/pybind/mgr/dashboard/controllers/nvmeof.py index 3d9e3378702..84d7a37952e 100644 --- a/src/pybind/mgr/dashboard/controllers/nvmeof.py +++ b/src/pybind/mgr/dashboard/controllers/nvmeof.py @@ -1,16 +1,27 @@ # -*- coding: utf-8 -*- -from typing import Optional +import logging +from typing import Any, Dict, Optional +from .. import mgr from ..model import nvmeof as model from ..security import Scope +from ..services.orchestrator import OrchClient from ..tools import str_to_bool -from . import APIDoc, APIRouter, Endpoint, EndpointDoc, Param, ReadPermission, RESTController +from . import APIDoc, APIRouter, BaseController, Endpoint, EndpointDoc, Param, \ + ReadPermission, RESTController, UIRouter + +logger = logging.getLogger(__name__) + +NVME_SCHEMA = { + "available": (bool, "Is NVMe/TCP available?"), + "message": (str, "Descriptions") +} try: from ..services.nvmeof_client import NVMeoFClient, empty_response, \ handle_nvmeof_error, map_collection, map_model -except ImportError: - pass +except ImportError as e: + logger.error("Failed to import NVMeoFClient and related components: %s", e) else: @APIRouter("/nvmeof/gateway", Scope.NVME_OF) @APIDoc("NVMe-oF Gateway Management API", "NVMe-oF Gateway") @@ -380,3 +391,24 @@ else: return NVMeoFClient().stub.list_connections( NVMeoFClient.pb2.list_connections_req(subsystem=nqn) ) + + +@UIRouter('/nvmeof', Scope.NVME_OF) +@APIDoc("NVMe/TCP Management API", "NVMe/TCP") +class NVMeoFStatus(BaseController): + @Endpoint() + @ReadPermission + @EndpointDoc("Display NVMe/TCP service Status", + responses={200: NVME_SCHEMA}) + def status(self) -> dict: + status: Dict[str, Any] = {'available': True, 'message': None} + orch_backend = mgr.get_module_option_ex('orchestrator', 'orchestrator') + if orch_backend == 'cephadm': + orch = OrchClient.instance() + orch_status = orch.status() + if not orch_status['available']: + return status + if not orch.services.list_daemons(daemon_type='nvmeof'): + status["available"] = False + status["message"] = 'Create an NVMe/TCP service to get started.' + return status diff --git a/src/pybind/mgr/dashboard/controllers/pool.py b/src/pybind/mgr/dashboard/controllers/pool.py index 9310d2f97aa..5c25c8b2a5d 100644 --- a/src/pybind/mgr/dashboard/controllers/pool.py +++ b/src/pybind/mgr/dashboard/controllers/pool.py @@ -14,6 +14,7 @@ from ..services.rbd import RbdConfiguration from ..tools import TaskManager, str_to_bool from . import APIDoc, APIRouter, Endpoint, EndpointDoc, ReadPermission, \ RESTController, Task, UIRouter +from .rbd_mirroring import RbdMirroringPoolMode POOL_SCHEMA = ([{ "pool": (int, "pool id"), @@ -169,25 +170,38 @@ class Pool(RESTController): yes_i_really_really_mean_it=True) @pool_task('edit', ['{pool_name}']) - def set(self, pool_name, flags=None, application_metadata=None, configuration=None, **kwargs): + def set(self, pool_name, flags=None, application_metadata=None, configuration=None, + rbd_mirroring=None, **kwargs): self._set_pool_values(pool_name, application_metadata, flags, True, kwargs) if kwargs.get('pool'): pool_name = kwargs['pool'] RbdConfiguration(pool_name).set_configuration(configuration) + if rbd_mirroring is not None: + self._set_mirroring_mode(rbd_mirroring, pool_name) self._wait_for_pgs(pool_name) @pool_task('create', {'pool_name': '{pool}'}) @handle_send_command_error('pool') def create(self, pool, pg_num, pool_type, erasure_code_profile=None, flags=None, - application_metadata=None, rule_name=None, configuration=None, **kwargs): + application_metadata=None, rule_name=None, configuration=None, + rbd_mirroring=None, **kwargs): ecp = erasure_code_profile if erasure_code_profile else None CephService.send_command('mon', 'osd pool create', pool=pool, pg_num=int(pg_num), pgp_num=int(pg_num), pool_type=pool_type, erasure_code_profile=ecp, rule=rule_name) self._set_pool_values(pool, application_metadata, flags, False, kwargs) RbdConfiguration(pool).set_configuration(configuration) + if rbd_mirroring is not None: + self._set_mirroring_mode(rbd_mirroring, pool) self._wait_for_pgs(pool) + def _set_mirroring_mode(self, mirroring_enabled, pool): + rbd_mirroring = RbdMirroringPoolMode() + if str_to_bool(mirroring_enabled): + rbd_mirroring.set_pool_mirror_mode(pool, 'pool') + else: + rbd_mirroring.set_pool_mirror_mode(pool, 'disabled') + def _set_pool_values(self, pool, application_metadata, flags, update_existing, kwargs): current_pool = self._get(pool) if update_existing and kwargs.get('compression_mode') == 'unset': diff --git a/src/pybind/mgr/dashboard/controllers/rbd.py b/src/pybind/mgr/dashboard/controllers/rbd.py index f803ab1a18a..767d23577b6 100644 --- a/src/pybind/mgr/dashboard/controllers/rbd.py +++ b/src/pybind/mgr/dashboard/controllers/rbd.py @@ -137,12 +137,12 @@ class Rbd(RESTController): @RbdTask('edit', ['{image_spec}', '{name}'], 4.0) def set(self, image_spec, name=None, size=None, features=None, configuration=None, metadata=None, enable_mirror=None, primary=None, - force=False, resync=False, mirror_mode=None, schedule_interval='', - remove_scheduling=False): + force=False, resync=False, mirror_mode=None, image_mirror_mode=None, + schedule_interval='', remove_scheduling=False): return RbdService.set(image_spec, name, size, features, configuration, metadata, enable_mirror, primary, - force, resync, mirror_mode, schedule_interval, - remove_scheduling) + force, resync, mirror_mode, image_mirror_mode, + schedule_interval, remove_scheduling) @RbdTask('copy', {'src_image_spec': '{image_spec}', diff --git a/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py b/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py index af30e8415eb..1e80de74b3b 100644 --- a/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py +++ b/src/pybind/mgr/dashboard/controllers/rbd_mirroring.py @@ -11,7 +11,6 @@ import cherrypy import rbd from .. import mgr -from ..controllers.pool import RBDPool from ..controllers.service import Service from ..security import Scope from ..services.ceph_service import CephService @@ -507,6 +506,9 @@ class RbdMirroringPoolMode(RESTController): @RbdMirroringTask('pool/edit', {'pool_name': '{pool_name}'}, 5.0) def set(self, pool_name, mirror_mode=None): + return self.set_pool_mirror_mode(pool_name, mirror_mode) + + def set_pool_mirror_mode(self, pool_name, mirror_mode): def _edit(ioctx, mirror_mode=None): if mirror_mode: mode_enum = {x[1]: x[0] for x in @@ -674,6 +676,8 @@ class RbdMirroringStatus(BaseController): @EndpointDoc('Configure RBD Mirroring') @CreatePermission def configure(self): + from ..controllers.pool import RBDPool # to avoid circular import + rbd_pool = RBDPool() service = Service() diff --git a/src/pybind/mgr/dashboard/controllers/rgw.py b/src/pybind/mgr/dashboard/controllers/rgw.py index f48dc592292..c479214dce3 100644 --- a/src/pybind/mgr/dashboard/controllers/rgw.py +++ b/src/pybind/mgr/dashboard/controllers/rgw.py @@ -14,7 +14,8 @@ from ..rest_client import RequestException from ..security import Permission, Scope from ..services.auth import AuthManager, JwtManager from ..services.ceph_service import CephService -from ..services.rgw_client import NoRgwDaemonsException, RgwClient, RgwMultisite +from ..services.rgw_client import _SYNC_GROUP_ID, NoRgwDaemonsException, RgwClient, RgwMultisite +from ..services.service import RgwServiceManager from ..tools import json_str_to_object, str_to_bool from . import APIDoc, APIRouter, BaseController, CreatePermission, \ CRUDCollectionMethod, CRUDEndpoint, DeletePermission, Endpoint, \ @@ -112,6 +113,27 @@ class RgwMultisiteStatus(RESTController): secret_key) return result + @RESTController.Collection(method='POST', path='/multisite-replications') + @allow_empty_body + # pylint: disable=W0102,W0613 + def setup_multisite_replication(self, daemon_name=None, realm_name=None, zonegroup_name=None, + zonegroup_endpoints=None, zone_name=None, zone_endpoints=None, + username=None, cluster_fsid=None): + multisite_instance = RgwMultisite() + result = multisite_instance.setup_multisite_replication(realm_name, zonegroup_name, + zonegroup_endpoints, zone_name, + zone_endpoints, username, + cluster_fsid) + return result + + @RESTController.Collection(method='PUT', path='/setup-rgw-credentials') + @allow_empty_body + # pylint: disable=W0102,W0613 + def restart_rgw_daemons_and_set_credentials(self): + rgw_service_manager_instance = RgwServiceManager() + result = rgw_service_manager_instance.restart_rgw_daemons_and_set_credentials() + return result + @APIRouter('rgw/multisite', Scope.RGW) @APIDoc("RGW Multisite Management API", "RgwMultisite") @@ -129,8 +151,21 @@ class RgwMultisiteController(RESTController): @Endpoint(path='/sync-policy') @EndpointDoc("Get the sync policy") @ReadPermission - def get_sync_policy(self, bucket_name='', zonegroup_name=''): + def get_sync_policy(self, bucket_name='', zonegroup_name='', all_policy=None): multisite_instance = RgwMultisite() + all_policy = str_to_bool(all_policy) + if all_policy: + sync_policy_list = [] + buckets = json.loads(RgwBucket().list(stats=False)) + for bucket in buckets: + sync_policy = multisite_instance.get_sync_policy(bucket, zonegroup_name) + for policy in sync_policy['groups']: + policy['bucketName'] = bucket + sync_policy_list.append(policy) + other_sync_policy = multisite_instance.get_sync_policy(bucket_name, zonegroup_name) + for policy in other_sync_policy['groups']: + sync_policy_list.append(policy) + return sync_policy_list return multisite_instance.get_sync_policy(bucket_name, zonegroup_name) @Endpoint(path='/sync-policy-group') @@ -242,6 +277,7 @@ class RgwDaemon(RESTController): 'server_hostname': hostname, 'realm_name': metadata['realm_name'], 'zonegroup_name': metadata['zonegroup_name'], + 'zonegroup_id': metadata['zonegroup_id'], 'zone_name': metadata['zone_name'], 'default': instance.daemon.name == metadata['id'], 'port': int(port) if port else None @@ -307,6 +343,8 @@ class RgwSite(RgwRESTController): return RgwClient.admin_instance(daemon_name=daemon_name).get_realms() if query == 'default-realm': return RgwClient.admin_instance(daemon_name=daemon_name).get_default_realm() + if query == 'default-zonegroup': + return RgwMultisite().get_all_zonegroups_info()['default_zonegroup'] # @TODO: for multisite: by default, retrieve cluster topology/map. raise DashboardException(http_status_code=501, component='rgw', msg='Not Implemented') @@ -376,8 +414,8 @@ class RgwBucket(RgwRESTController): retention_period_days, retention_period_years) - def _get_policy(self, bucket: str): - rgw_client = RgwClient.admin_instance() + def _get_policy(self, bucket: str, daemon_name, owner): + rgw_client = RgwClient.instance(owner, daemon_name) return rgw_client.get_bucket_policy(bucket) def _set_policy(self, bucket_name: str, policy: str, daemon_name, owner): @@ -388,6 +426,18 @@ class RgwBucket(RgwRESTController): rgw_client = RgwClient.instance(owner, daemon_name) return rgw_client.set_tags(bucket_name, tags) + def _get_lifecycle(self, bucket_name: str, daemon_name, owner): + rgw_client = RgwClient.instance(owner, daemon_name) + return rgw_client.get_lifecycle(bucket_name) + + def _set_lifecycle(self, bucket_name: str, lifecycle: str, daemon_name, owner): + rgw_client = RgwClient.instance(owner, daemon_name) + return rgw_client.set_lifecycle(bucket_name, lifecycle) + + def _delete_lifecycle(self, bucket_name: str, daemon_name, owner): + rgw_client = RgwClient.instance(owner, daemon_name) + return rgw_client.delete_lifecycle(bucket_name) + def _get_acl(self, bucket_name, daemon_name, owner): rgw_client = RgwClient.instance(owner, daemon_name) return str(rgw_client.get_acl(bucket_name)) @@ -396,6 +446,25 @@ class RgwBucket(RgwRESTController): rgw_client = RgwClient.instance(owner, daemon_name) return rgw_client.set_acl(bucket_name, acl) + def _set_replication(self, bucket_name: str, replication: bool, owner, daemon_name): + multisite = RgwMultisite() + # return immediately if the multisite is not configured + if not multisite.get_multisite_status(): + return None + + rgw_client = RgwClient.instance(owner, daemon_name) + zonegroup_name = RgwClient.admin_instance(daemon_name=daemon_name).get_default_zonegroup() + + policy_exists = multisite.policy_group_exists(_SYNC_GROUP_ID, zonegroup_name) + if replication and not policy_exists: + multisite.create_dashboard_admin_sync_group(zonegroup_name=zonegroup_name) + + return rgw_client.set_bucket_replication(bucket_name, replication) + + def _get_replication(self, bucket_name: str, owner, daemon_name): + rgw_client = RgwClient.instance(owner, daemon_name) + return rgw_client.get_bucket_replication(bucket_name) + @staticmethod def strip_tenant_from_bucket_name(bucket_name): # type (str) -> str @@ -448,8 +517,10 @@ class RgwBucket(RgwRESTController): result['encryption'] = encryption['Status'] result['versioning'] = versioning['Status'] result['mfa_delete'] = versioning['MfaDelete'] - result['bucket_policy'] = self._get_policy(bucket_name) + result['bucket_policy'] = self._get_policy(bucket_name, daemon_name, result['owner']) result['acl'] = self._get_acl(bucket_name, daemon_name, result['owner']) + result['replication'] = self._get_replication(bucket_name, result['owner'], daemon_name) + result['lifecycle'] = self._get_lifecycle(bucket_name, daemon_name, result['owner']) # Append the locking configuration. locking = self._get_locking(result['owner'], daemon_name, bucket_name) @@ -463,9 +534,11 @@ class RgwBucket(RgwRESTController): lock_retention_period_days=None, lock_retention_period_years=None, encryption_state='false', encryption_type=None, key_id=None, tags=None, - bucket_policy=None, canned_acl=None, daemon_name=None): + bucket_policy=None, canned_acl=None, replication='false', + daemon_name=None): lock_enabled = str_to_bool(lock_enabled) encryption_state = str_to_bool(encryption_state) + replication = str_to_bool(replication) try: rgw_client = RgwClient.instance(uid, daemon_name) result = rgw_client.create_bucket(bucket, zonegroup, @@ -488,6 +561,8 @@ class RgwBucket(RgwRESTController): if canned_acl: self._set_acl(bucket, canned_acl, uid, daemon_name) + if replication: + self._set_replication(bucket, replication, uid, daemon_name) return result except RequestException as e: # pragma: no cover - handling is too obvious raise DashboardException(e, http_status_code=500, component='rgw') @@ -498,8 +573,11 @@ class RgwBucket(RgwRESTController): mfa_delete=None, mfa_token_serial=None, mfa_token_pin=None, lock_mode=None, lock_retention_period_days=None, lock_retention_period_years=None, tags=None, bucket_policy=None, - canned_acl=None, daemon_name=None): + canned_acl=None, replication=None, lifecycle=None, daemon_name=None): + # pylint: disable=R0912 encryption_state = str_to_bool(encryption_state) + if replication is not None: + replication = str_to_bool(replication) # When linking a non-tenant-user owned bucket to a tenanted user, we # need to prefix bucket name with '/'. e.g. photos -> /photos if '$' in uid and '/' not in bucket: @@ -544,6 +622,12 @@ class RgwBucket(RgwRESTController): self._set_policy(bucket_name, bucket_policy, daemon_name, uid) if canned_acl: self._set_acl(bucket_name, canned_acl, uid, daemon_name) + if replication: + self._set_replication(bucket_name, replication, uid, daemon_name) + if lifecycle and not lifecycle == '{}': + self._set_lifecycle(bucket_name, lifecycle, daemon_name, uid) + else: + self._delete_lifecycle(bucket_name, daemon_name, uid) return self._append_bid(result) def delete(self, bucket, purge_objects='true', daemon_name=None): @@ -1013,11 +1097,9 @@ class RgwRealm(RESTController): @UpdatePermission @allow_empty_body # pylint: disable=W0613 - def import_realm_token(self, realm_token, zone_name, port, placement_spec): + def import_realm_token(self, realm_token, zone_name, port, placement_spec=None): try: - multisite_instance = RgwMultisite() result = CephService.import_realm_token(realm_token, zone_name, port, placement_spec) - multisite_instance.update_period() return result except NoRgwDaemonsException as e: raise DashboardException(e, http_status_code=404, component='rgw') diff --git a/src/pybind/mgr/dashboard/frontend/.nvmrc b/src/pybind/mgr/dashboard/frontend/.nvmrc new file mode 100644 index 00000000000..9bcccb9439d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/.nvmrc @@ -0,0 +1 @@ +v20.13.1 diff --git a/src/pybind/mgr/dashboard/frontend/CMakeLists.txt b/src/pybind/mgr/dashboard/frontend/CMakeLists.txt index 2527ef23e85..c0d16511e96 100644 --- a/src/pybind/mgr/dashboard/frontend/CMakeLists.txt +++ b/src/pybind/mgr/dashboard/frontend/CMakeLists.txt @@ -64,7 +64,7 @@ else(WITH_SYSTEM_NPM) OUTPUT "${mgr-dashboard-nodeenv-dir}/bin/npm" COMMAND ${CMAKE_SOURCE_DIR}/src/tools/setup-virtualenv.sh --python=${MGR_PYTHON_EXECUTABLE} ${mgr-dashboard-nodeenv-dir} COMMAND ${mgr-dashboard-nodeenv-dir}/bin/pip install nodeenv - COMMAND ${mgr-dashboard-nodeenv-dir}/bin/nodeenv --verbose ${node_mirror_opt} -p --node=18.17.0 + COMMAND ${mgr-dashboard-nodeenv-dir}/bin/nodeenv --verbose ${node_mirror_opt} -p --node=20.13.1 COMMAND mkdir ${mgr-dashboard-nodeenv-dir}/.npm WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMENT "dashboard nodeenv is being installed") diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/a11y/navigation.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/a11y/navigation.e2e-spec.ts index 3a0a1a7dc90..d1dd0083901 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/a11y/navigation.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/a11y/navigation.e2e-spec.ts @@ -10,7 +10,11 @@ describe('Navigation accessibility', { retries: 0 }, () => { it('top-nav should have no accessibility violations', () => { cy.injectAxe(); - cy.checkAccessibility('.cd-navbar-top'); + cy.checkAccessibility('cds-header', { + rules: { + 'nested-interactive': { enabled: false } + } + }); }); it('sidebar should have no accessibility violations', () => { diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/block/images.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/block/images.e2e-spec.ts index 962c135d56f..bd8932509f8 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/block/images.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/block/images.e2e-spec.ts @@ -11,7 +11,7 @@ describe('Images page', () => { cy.login(); // Need pool for image testing pools.navigateTo('create'); - pools.create(poolName, 8, 'rbd'); + pools.create(poolName, 8, ['rbd']); pools.existTableCell(poolName); }); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/block/mirroring.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/block/mirroring.e2e-spec.ts index fb7db27122d..73f668a17bc 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/block/mirroring.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/block/mirroring.e2e-spec.ts @@ -32,7 +32,7 @@ describe('Mirroring page', () => { cy.ceph2Login(); cy.login(); pools.navigateTo('create'); - pools.create(poolName, 8, 'rbd'); + pools.create(poolName, 8, ['rbd']); pools.navigateTo(); pools.existTableCell(poolName, true); mirroring.navigateTo(); @@ -49,16 +49,17 @@ describe('Mirroring page', () => { // so writing the code to copy the token inside the origin manually // rather than using a function call // @ts-ignore + cy.ceph2Login(); cy.origin(url, { args }, ({ name, bootstrapToken }) => { // Create an rbd pool in the second cluster + cy.visit('#/pool/create').wait(100); // Login to the second cluster // Somehow its not working with the cypress login function - cy.visit('#/pool/create').wait(100); - cy.get('[name=username]').type('admin'); cy.get('#password').type('admin'); cy.get('[type=submit]').click(); + cy.get('input[name=name]').clear().type(name); cy.get(`select[name=poolType]`).select('replicated'); cy.get(`select[name=poolType] option:checked`).contains('replicated'); @@ -93,7 +94,7 @@ describe('Mirroring page', () => { beforeEach(() => { pools.navigateTo('create'); // Need pool for mirroring testing - pools.create(poolName, 8, 'rbd'); + pools.create(poolName, 8, ['rbd']); pools.navigateTo(); pools.existTableCell(poolName, true); }); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/create-cluster.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/create-cluster.po.ts index 300eddbcc3d..2ec31869daf 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/create-cluster.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/create-cluster.po.ts @@ -4,7 +4,7 @@ import { HostsPageHelper } from './hosts.po'; import { ServicesPageHelper } from './services.po'; const pages = { - index: { url: '#/expand-cluster', id: 'cd-create-cluster' } + index: { url: '#/expand-cluster?welcome=true', id: 'cd-create-cluster' } }; export class CreateClusterWizardHelper extends PageHelper { pages = pages; @@ -28,7 +28,7 @@ export class CreateClusterWizardHelper extends PageHelper { export class CreateClusterHostPageHelper extends HostsPageHelper { pages = { - index: { url: '#/expand-cluster', id: 'cd-wizard' }, + index: { url: '#/expand-cluster?welcome=true', id: 'cd-wizard' }, add: { url: '', id: 'cd-host-form' } }; @@ -42,7 +42,7 @@ export class CreateClusterHostPageHelper extends HostsPageHelper { export class CreateClusterServicePageHelper extends ServicesPageHelper { pages = { - index: { url: '#/expand-cluster', id: 'cd-wizard' }, + index: { url: '#/expand-cluster?welcome=true', id: 'cd-wizard' }, create: { url: '', id: 'cd-service-form' } }; diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/logs.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/logs.e2e-spec.ts index 606f6a3cd9d..f4c869c10e3 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/logs.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/logs.e2e-spec.ts @@ -46,7 +46,7 @@ describe('Logs page', () => { describe('audit logs respond to pool creation and deletion test', () => { it('should create pool and check audit logs reacted', () => { pools.navigateTo('create'); - pools.create(poolname, 8); + pools.create(poolname, 8, ['rbd']); pools.navigateTo(); pools.existTableCell(poolname, true); logs.checkAuditForPoolFunction(poolname, 'create', hour, minute); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/osds.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/osds.po.ts index cd812f474fb..e96518bceb7 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/osds.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/osds.po.ts @@ -15,6 +15,11 @@ export class OSDsPageHelper extends PageHelper { create(deviceType: 'hdd' | 'ssd', hostname?: string, expandCluster = false) { cy.get('[aria-label="toggle advanced mode"]').click(); + cy.get('[aria-label="toggle advanced mode"]').then(($button) => { + if ($button.hasClass('collapsed')) { + cy.wrap($button).click(); + } + }); // Click Primary devices Add button cy.get('cd-osd-devices-selection-groups[name="Primary"]').as('primaryGroups'); cy.get('@primaryGroups').find('button').click(); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/services.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/services.po.ts index 3db483a16a2..e1a3a002548 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/services.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/cluster/services.po.ts @@ -49,7 +49,9 @@ export class ServicesPageHelper extends PageHelper { switch (serviceType) { case 'rgw': cy.get('#service_id').type('foo'); - unmanaged ? cy.get('label[for=unmanaged]').click() : cy.get('#count').type(String(count)); + unmanaged + ? cy.get('label[for=unmanaged]').click() + : cy.get('#count').clear().type(String(count)); break; case 'ingress': @@ -65,12 +67,16 @@ export class ServicesPageHelper extends PageHelper { case 'nfs': cy.get('#service_id').type('testnfs'); - unmanaged ? cy.get('label[for=unmanaged]').click() : cy.get('#count').type(String(count)); + unmanaged + ? cy.get('label[for=unmanaged]').click() + : cy.get('#count').clear().type(String(count)); break; case 'smb': cy.get('#service_id').type('testsmb'); - unmanaged ? cy.get('label[for=unmanaged]').click() : cy.get('#count').type(String(count)); + unmanaged + ? cy.get('label[for=unmanaged]').click() + : cy.get('#count').clear().type(String(count)); cy.get('#cluster_id').type('cluster_foo'); cy.get('#config_uri').type('rados://.smb/foo/scc.toml'); break; @@ -96,7 +102,9 @@ export class ServicesPageHelper extends PageHelper { default: cy.get('#service_id').type('test'); - unmanaged ? cy.get('label[for=unmanaged]').click() : cy.get('#count').type(String(count)); + unmanaged + ? cy.get('label[for=unmanaged]').click() + : cy.get('#count').clear().type(String(count)); break; } if (serviceType === 'snmp-gateway') { diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/global.feature.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/global.feature.po.ts index c6132ae3dd0..cffed0b9b60 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/global.feature.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/global.feature.po.ts @@ -38,3 +38,7 @@ And('I go to the {string} tab', (names: string) => { cy.contains('.nav.nav-tabs a', name).click(); } }); + +And('I wait for {string} seconds', (seconds: number) => { + cy.wait(seconds * 1000); +}); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/urls.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/urls.po.ts index 6f7316f98f5..5b664f96748 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/urls.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/common/urls.po.ts @@ -3,7 +3,7 @@ import { PageHelper } from '../page-helper.po'; export class UrlsCollection extends PageHelper { pages = { // Cluster expansion - welcome: { url: '#/expand-cluster', id: 'cd-create-cluster' }, + welcome: { url: '#/expand-cluster?welcome=true', id: 'cd-create-cluster' }, // Landing page dashboard: { url: '#/dashboard', id: 'cd-dashboard' }, @@ -42,7 +42,7 @@ export class UrlsCollection extends PageHelper { 'rgw daemons': { url: '#/rgw/daemon', id: 'cd-rgw-daemon-list' }, // CephFS - cephfs: { url: '#/cephfs', id: 'cd-cephfs-list' }, - 'create cephfs': { url: '#/cephfs/create', id: 'cd-cephfs-form' } + cephfs: { url: '#/cephfs/fs', id: 'cd-cephfs-list' }, + 'create cephfs': { url: '#/cephfs/fs/create', id: 'cd-cephfs-form' } }; } diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature index 002282172bb..94d6397b66d 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/filesystems/snapshots.e2e-spec.feature @@ -1,6 +1,6 @@ Feature: CephFS Snapshot Management - Goal: To test out the CephFS snapshot management features + Goal: To test out the CephFS snapshot and clone management features Background: Login Given I am logged in @@ -33,6 +33,48 @@ Feature: CephFS Snapshot Management And I go to the "Snapshots" tab Then I should see a table in the expanded row + Scenario: Create a CephFS Subvolume Snapshot + Given I am on the "cephfs" page + When I expand the row "test_cephfs" + And I go to the "Snapshots" tab + And I click on "Create" button from the expanded row + And enter "snapshotName" "test_snapshot" in the modal + And I click on "Create Snapshot" button + Then I should see a row with "test_snapshot" in the expanded row + + Scenario: Create a CephFS Subvolume Snapshot Clone + Given I am on the "cephfs" page + When I expand the row "test_cephfs" + And I go to the "Snapshots" tab + And I select a row "test_snapshot" in the expanded row + And I click on "Clone" button from the table actions in the expanded row + And enter "cloneName" "test_clone" in the modal + And I click on "Create Clone" button + Then I wait for "5" seconds + And I go to the "Subvolumes" tab + Then I should see a row with "test_clone" in the expanded row + + Scenario: Remove a CephFS Subvolume Snapshot Clone + Given I am on the "cephfs" page + When I expand the row "test_cephfs" + And I go to the "Subvolumes" tab + And I select a row "test_clone" in the expanded row + And I click on "Remove" button from the table actions in the expanded row + And I check the tick box in modal + And I click on "Remove Subvolume" button + Then I wait for "5" seconds + And I should not see a row with "test_clone" in the expanded row + + Scenario: Remove a CephFS Subvolume Snapshot + Given I am on the "cephfs" page + When I expand the row "test_cephfs" + And I go to the "Snapshots" tab + And I select a row "test_snapshot" in the expanded row + And I click on "Remove" button from the table actions in the expanded row + And I check the tick box in modal + And I click on "Remove Snapshot" button + Then I should not see a row with "test_snapshot" in the expanded row + Scenario: Remove a CephFS Subvolume Given I am on the "cephfs" page When I expand the row "test_cephfs" diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/multi-cluster/multi-cluster.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/multi-cluster/multi-cluster.e2e-spec.ts new file mode 100644 index 00000000000..74f26df8e56 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/multi-cluster/multi-cluster.e2e-spec.ts @@ -0,0 +1,54 @@ +import { DashboardPageHelper } from '../ui/dashboard.po'; +import { MultiClusterPageHelper } from './multi-cluster.po'; + +describe('Muti-cluster management page', () => { + const multiCluster = new MultiClusterPageHelper(); + const dashboard = new DashboardPageHelper(); + + const hubName = 'local-cluster'; + const url = Cypress.env('CEPH2_URL'); + const alias = 'ceph2'; + const username = 'admin'; + const password = 'admin'; + + const editedAlias = 'ceph2-edited'; + + beforeEach(() => { + cy.login(); + multiCluster.navigateTo('manage-clusters'); + }); + + it('should authenticate the second cluster', () => { + multiCluster.auth(url, alias, username, password); + multiCluster.existTableCell(alias); + }); + + it('should switch to the second cluster and back to hub', () => { + dashboard.navigateTo(); + cy.get('[data-testid="selected-cluster"]').click(); + cy.get('[data-testid="select-a-cluster"]').contains(alias).click(); + cy.get('[data-testid="selected-cluster"]').contains(alias); + cy.get('cd-dashboard-v3').should('exist'); + + // now switch back to the hub cluster + cy.get('[data-testid="selected-cluster"]').click(); + cy.get('[data-testid="select-a-cluster"]').contains(hubName).click(); + cy.get('[data-testid="selected-cluster"]').contains(hubName); + cy.get('cd-dashboard-v3').should('exist'); + }); + + it('should reconnect the second cluster', () => { + multiCluster.reconnect(alias, password); + multiCluster.existTableCell(alias); + }); + + it('should edit the second cluster', () => { + multiCluster.edit(alias, editedAlias); + multiCluster.existTableCell(editedAlias); + }); + + it('should disconnect the second cluster', () => { + multiCluster.disconnect(editedAlias); + multiCluster.existTableCell(editedAlias, false); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/multi-cluster/multi-cluster.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/multi-cluster/multi-cluster.po.ts new file mode 100644 index 00000000000..08fbe7b843d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/multi-cluster/multi-cluster.po.ts @@ -0,0 +1,55 @@ +import { PageHelper } from '../page-helper.po'; + +const pages = { + index: { url: '#/multi-cluster/overview', id: 'cd-multi-cluster' }, + 'manage-clusters': { url: '#/multi-cluster/manage-clusters', id: 'cd-multi-cluster-list' } +}; + +const WAIT_TIMER = 1000; + +export class MultiClusterPageHelper extends PageHelper { + pages = pages; + + auth(url: string, alias: string, username: string, password: string) { + this.clickActionButton('connect'); + cy.get('cd-multi-cluster-form').should('exist'); + cy.get('cd-modal').within(() => { + cy.get('input[name=remoteClusterUrl]').type(url); + cy.get('input[name=clusterAlias]').type(alias); + cy.get('input[name=username]').type(username); + cy.get('input[name=password]').type(password); + cy.get('cd-submit-button').click(); + }); + cy.wait(WAIT_TIMER); + } + + disconnect(alias: string) { + this.getFirstTableCell(alias).click(); + this.clickActionButton('disconnect'); + cy.get('cd-modal').within(() => { + cy.get('#confirmation').click(); + cy.get('cd-submit-button').click(); + }); + cy.wait(WAIT_TIMER); + } + + reconnect(alias: string, password: string) { + this.getFirstTableCell(alias).click(); + this.clickActionButton('reconnect'); + cy.get('cd-modal').within(() => { + cy.get('input[name=password]').type(password); + cy.get('cd-submit-button').click(); + }); + cy.wait(WAIT_TIMER); + } + + edit(alias: string, newAlias: string) { + this.getFirstTableCell(alias).click(); + this.clickActionButton('edit'); + cy.get('cd-modal').within(() => { + cy.get('input[name=clusterAlias]').clear().type(newAlias); + cy.get('cd-submit-button').click(); + }); + cy.wait(WAIT_TIMER); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/orchestrator/workflow/10-nfs-exports.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/orchestrator/workflow/10-nfs-exports.e2e-spec.ts index fdd96d7e975..ff2e7581bb6 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/orchestrator/workflow/10-nfs-exports.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/orchestrator/workflow/10-nfs-exports.e2e-spec.ts @@ -19,11 +19,11 @@ describe('nfsExport page', () => { beforeEach(() => { cy.login(); - nfsExport.navigateTo(); }); describe('breadcrumb test', () => { it('should open and show breadcrumb', () => { + nfsExport.navigateTo('rgw_index'); nfsExport.expectBreadcrumbText('NFS'); }); }); @@ -43,23 +43,24 @@ describe('nfsExport page', () => { buckets.navigateTo('create'); buckets.create(bucketName, 'dashboard'); - nfsExport.navigateTo(); + nfsExport.navigateTo('rgw_index'); nfsExport.existTableCell(rgwPseudo, false); - nfsExport.navigateTo('create'); + nfsExport.navigateTo('rgw_create'); nfsExport.create(backends[1], squash, client, rgwPseudo, bucketName); nfsExport.existTableCell(rgwPseudo); }); // @TODO: uncomment this when a CephFS volume can be created through Dashboard. // it('should create a nfs-export with CephFS backend', () => { - // nfsExport.navigateTo(); + // nfsExport.navigateTo('cephfs_index'); // nfsExport.existTableCell(fsPseudo, false); - // nfsExport.navigateTo('create'); + // nfsExport.navigateTo('cephfs_create'); // nfsExport.create(backends[0], squash, client, fsPseudo); // nfsExport.existTableCell(fsPseudo); // }); it('should show Clients', () => { + nfsExport.navigateTo('rgw_index'); nfsExport.clickTab('cd-nfs-details', rgwPseudo, 'Clients (1)'); cy.get('cd-nfs-details').within(() => { nfsExport.getTableCount('total').should('be.gte', 0); @@ -67,12 +68,13 @@ describe('nfsExport page', () => { }); it('should edit an export', () => { - nfsExport.editExport(rgwPseudo, editPseudo); + nfsExport.editExport(rgwPseudo, editPseudo, 'rgw_index'); nfsExport.existTableCell(editPseudo); }); it('should delete exports and bucket', () => { + nfsExport.navigateTo('rgw_index'); nfsExport.delete(editPseudo); buckets.navigateTo(); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/orchestrator/workflow/nfs/nfs-export.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/orchestrator/workflow/nfs/nfs-export.po.ts index c700ef0581d..3639eb9a8ab 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/orchestrator/workflow/nfs/nfs-export.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/orchestrator/workflow/nfs/nfs-export.po.ts @@ -3,21 +3,18 @@ import { PageHelper } from '../../../page-helper.po'; /* tslint:enable*/ const pages = { - index: { url: '#/nfs', id: 'cd-nfs-list' }, - create: { url: '#/nfs/create', id: 'cd-nfs-form' } + cephfs_index: { url: '#cephfs/nfs', id: 'cd-nfs-list' }, + cephfs_create: { url: '#cephfs/nfs/create', id: 'cd-nfs-form' }, + rgw_index: { url: '#rgw/nfs', id: 'cd-nfs-list' }, + rgw_create: { url: '#rgw/nfs/create', id: 'cd-nfs-form' } }; export class NFSPageHelper extends PageHelper { pages = pages; - - @PageHelper.restrictTo(pages.create.url) create(backend: string, squash: string, client: object, pseudo: string, rgwPath?: string) { this.selectOption('cluster_id', 'testnfs'); - // select a storage backend - this.selectOption('name', backend); if (backend === 'CephFS') { this.selectOption('fs_name', 'myfs'); - cy.get('#security_label').click({ force: true }); } else { cy.get('input[data-testid=rgw_path]').type(rgwPath); @@ -38,8 +35,8 @@ export class NFSPageHelper extends PageHelper { cy.get('cd-submit-button').click(); } - editExport(pseudo: string, editPseudo: string) { - this.navigateEdit(pseudo); + editExport(pseudo: string, editPseudo: string, url: string) { + this.navigateEdit(pseudo, true, true, url); cy.get('input[name=pseudo]').clear().type(editPseudo); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/page-helper.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/page-helper.po.ts index 2a16ff7e141..49144b25fbf 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/page-helper.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/page-helper.po.ts @@ -52,9 +52,9 @@ export abstract class PageHelper { /** * Navigates to the edit page */ - navigateEdit(name: string, select = true, breadcrumb = true) { + navigateEdit(name: string, select = true, breadcrumb = true, navigateTo: string = null) { if (select) { - this.navigateTo(); + this.navigateTo(navigateTo); this.getFirstTableCell(name).click(); } cy.contains('Creating...').should('not.exist'); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/pools/pools.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/pools/pools.e2e-spec.ts index dd4ab6f3b75..47260be41aa 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/pools/pools.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/pools/pools.e2e-spec.ts @@ -31,7 +31,7 @@ describe('Pools page', () => { it('should create a pool', () => { pools.existTableCell(poolName, false); pools.navigateTo('create'); - pools.create(poolName, 8, 'rbd'); + pools.create(poolName, 8, ['rbd']); pools.existTableCell(poolName); }); @@ -50,4 +50,22 @@ describe('Pools page', () => { pools.delete(poolName); }); }); + + describe('Pool with mirroring', () => { + it('should create a pool with mirroring enabled', () => { + pools.existTableCell(poolName, false); + pools.navigateTo('create'); + pools.create(poolName, 8, ['rbd'], true); + pools.existTableCell(poolName); + }); + + it('should edit a pools placement group with mirroring enabled', () => { + pools.existTableCell(poolName); + pools.edit_pool_pg(poolName, 32, true, true); + }); + + it('should delete the pool', () => { + pools.delete(poolName); + }); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/pools/pools.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/pools/pools.po.ts index af46355ff1c..0701a84a2d9 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/pools/pools.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/pools/pools.po.ts @@ -14,7 +14,7 @@ export class PoolPageHelper extends PageHelper { } @PageHelper.restrictTo(pages.create.url) - create(name: string, placement_groups: number, ...apps: string[]) { + create(name: string, placement_groups: number, apps: string[], mirroring = false) { cy.get('input[name=name]').clear().type(name); this.isPowerOf2(placement_groups); @@ -25,13 +25,20 @@ export class PoolPageHelper extends PageHelper { this.selectOption('pgAutoscaleMode', 'off'); // To show pgNum field cy.get('input[name=pgNum]').clear().type(`${placement_groups}`); this.setApplications(apps); + if (mirroring) { + cy.get('#rbdMirroring').check({ force: true }); + } cy.get('cd-submit-button').click(); } - edit_pool_pg(name: string, new_pg: number, wait = true) { + edit_pool_pg(name: string, new_pg: number, wait = true, mirroring = false) { this.isPowerOf2(new_pg); this.navigateEdit(name); + if (mirroring) { + cy.get('#rbdMirroring').should('be.checked'); + } + cy.get('input[name=pgNum]').clear().type(`${new_pg}`); cy.get('cd-submit-button').click(); const str = `${new_pg} active+clean`; diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/buckets.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/buckets.e2e-spec.ts index 8b05c309f69..4bfc672ccf2 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/buckets.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/buckets.e2e-spec.ts @@ -31,11 +31,6 @@ describe('RGW buckets page', () => { buckets.delete(bucket_name); }); - it('should check default encryption is SSE-S3', () => { - buckets.navigateTo('create'); - buckets.checkForDefaultEncryption(); - }); - it('should create bucket with object locking enabled', () => { buckets.navigateTo('create'); buckets.create(bucket_name, BucketsPageHelper.USERS[0], true); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/buckets.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/buckets.po.ts index 91f852024ff..069b48f888d 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/buckets.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/buckets.po.ts @@ -1,5 +1,6 @@ import { PageHelper } from '../page-helper.po'; +const WAIT_TIMER = 500; const pages = { index: { url: '#/rgw/bucket', id: 'cd-rgw-bucket-list' }, create: { url: '#/rgw/bucket/create', id: 'cd-rgw-bucket-form' } @@ -44,19 +45,11 @@ export class BucketsPageHelper extends PageHelper { } // Click the create button and wait for bucket to be made - cy.contains('button', 'Create Bucket').click(); + cy.contains('button', 'Create Bucket').wait(WAIT_TIMER).click(); this.getFirstTableCell(name).should('exist'); } - @PageHelper.restrictTo(pages.create.url) - checkForDefaultEncryption() { - cy.get("a[aria-label='click here']").click(); - cy.get('cd-modal').within(() => { - cy.get('input[id=s3Enabled]').should('be.checked'); - }); - } - @PageHelper.restrictTo(pages.index.url) edit(name: string, new_owner: string, isLocking = false) { this.navigateEdit(name); @@ -119,7 +112,7 @@ export class BucketsPageHelper extends PageHelper { cy.get('label[for=versioning]').click(); cy.get('input[id=versioning]').should('not.be.checked'); - cy.contains('button', 'Edit Bucket').click(); + cy.contains('button', 'Edit Bucket').wait(WAIT_TIMER).click(); // Check versioning suspended: this.getExpandCollapseElement(name).click(); @@ -134,7 +127,7 @@ export class BucketsPageHelper extends PageHelper { // Gives an invalid name (too short), then waits for dashboard to determine validity cy.get('@nameInputField').type('rq'); - cy.contains('button', 'Create Bucket').click(); // To trigger a validation + cy.contains('button', 'Create Bucket').wait(WAIT_TIMER).click(); // To trigger a validation // Waiting for website to decide if name is valid or not // Check that name input field was marked invalid in the css @@ -166,7 +159,7 @@ export class BucketsPageHelper extends PageHelper { // Clicks the Create Bucket button but the page doesn't move. // Done by testing for the breadcrumb - cy.contains('button', 'Create Bucket').click(); // Clicks Create Bucket button + cy.contains('button', 'Create Bucket').wait(WAIT_TIMER).click(); // Clicks Create Bucket button this.expectBreadcrumbText('Create'); // content in fields seems to subsist through tests if not cleared, so it is cleared cy.get('@nameInputField').clear(); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/configuration.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/configuration.e2e-spec.ts new file mode 100644 index 00000000000..d1e4836aeb1 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/configuration.e2e-spec.ts @@ -0,0 +1,36 @@ +import { ConfigurationPageHelper } from './configuration.po'; + +describe('RGW configuration page', () => { + const configurations = new ConfigurationPageHelper(); + + beforeEach(() => { + cy.login(); + configurations.navigateTo(); + }); + + describe('breadcrumb and tab tests', () => { + it('should open and show breadcrumb', () => { + configurations.expectBreadcrumbText('Configuration'); + }); + + it('should show one tab', () => { + configurations.getTabsCount().should('eq', 1); + }); + + it('should show Server-side Encryption Config list tab at first', () => { + configurations.getTabText(0).should('eq', 'Server-side Encryption'); + }); + }); + + describe('create and edit encryption configuration', () => { + it('should create configuration', () => { + configurations.create('vault', 'agent', 'transit', 'https://localhost:8080'); + configurations.getFirstTableCell('SSE_KMS').should('exist'); + }); + + it('should edit configuration', () => { + configurations.edit('https://localhost:9090'); + configurations.getDataTables().should('contain.text', 'https://localhost:9090'); + }); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/configuration.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/configuration.po.ts new file mode 100644 index 00000000000..3caa248b5ba --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/configuration.po.ts @@ -0,0 +1,55 @@ +import { PageHelper } from '../page-helper.po'; + +export class ConfigurationPageHelper extends PageHelper { + pages = { + index: { url: '#/rgw/configuration', id: 'cd-rgw-configuration-page' } + }; + + columnIndex = { + address: 4 + }; + + create(provider: string, auth_method: string, secret_engine: string, address: string) { + cy.contains('button', 'Create').click(); + this.selectKmsProvider(provider); + cy.get('#kms_provider').should('have.class', 'ng-valid'); + this.selectAuthMethod(auth_method); + cy.get('#auth_method').should('have.class', 'ng-valid'); + this.selectSecretEngine(secret_engine); + cy.get('#secret_engine').should('have.class', 'ng-valid'); + cy.get('#address').type(address); + cy.get('#address').should('have.value', address); + cy.get('#address').should('have.class', 'ng-valid'); + cy.contains('button', 'Submit').click(); + cy.wait(500); + cy.get('cd-table').should('exist'); + this.getFirstTableCell('SSE_KMS').should('exist'); + } + + edit(new_address: string) { + this.navigateEdit('SSE_KMS', true, false); + cy.get('#address').clear().type(new_address); + cy.get('#address').should('have.class', 'ng-valid'); + cy.get('#kms_provider').should('be.disabled'); + cy.contains('button', 'Submit').click(); + this.getTableCell(this.columnIndex.address, new_address) + .parent() + .find(`datatable-body-cell:nth-child(${this.columnIndex.address})`) + .should(($elements) => { + const address = $elements.text(); + expect(address).to.eq(new_address); + }); + } + + private selectKmsProvider(provider: string) { + return this.selectOption('kms_provider', provider); + } + + private selectAuthMethod(auth_method: string) { + return this.selectOption('auth_method', auth_method); + } + + private selectSecretEngine(secret_engine: string) { + return this.selectOption('secret_engine', secret_engine); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/multisite.e2e.spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/multisite.e2e.spec.ts new file mode 100644 index 00000000000..5633bb2f5b4 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/multisite.e2e.spec.ts @@ -0,0 +1,46 @@ +import { MultisitePageHelper } from './multisite.po'; + +describe('Multisite page', () => { + const multisite = new MultisitePageHelper(); + + beforeEach(() => { + cy.login(); + multisite.navigateTo(); + }); + + describe('tabs and table tests', () => { + it('should show two tabs', () => { + multisite.getTabsCount().should('eq', 2); + }); + + it('should show Configuration tab as a first tab', () => { + multisite.getTabText(0).should('eq', 'Configuration'); + }); + + it('should show sync policy tab as a second tab', () => { + multisite.getTabText(1).should('eq', 'Sync Policy'); + }); + + it('should show empty table in Sync Policy page', () => { + multisite.getTab('Sync Policy').click(); + multisite.getDataTables().should('exist'); + }); + }); + + describe('create, edit & delete sync group policy', () => { + it('should create policy', () => { + multisite.navigateTo('create'); + multisite.create('test', 'Enabled'); + multisite.getFirstTableCell('test').should('exist'); + }); + + it('should edit policy status', () => { + multisite.edit('test', 'Forbidden'); + }); + + it('should delete policy', () => { + multisite.getTab('Sync Policy').click(); + multisite.delete('test'); + }); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/multisite.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/multisite.po.ts new file mode 100644 index 00000000000..bbeda74e9cf --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/rgw/multisite.po.ts @@ -0,0 +1,42 @@ +import { PageHelper } from '../page-helper.po'; + +const WAIT_TIMER = 1000; +const pages = { + index: { url: '#/rgw/multisite', id: 'cd-rgw-multisite-details' }, + create: { url: '#/rgw/multisite/sync-policy/create', id: 'cd-rgw-multisite-sync-policy-form' }, + edit: { url: '#/rgw/multisite/sync-policy/edit', id: 'cd-rgw-multisite-sync-policy-form' } +}; +export class MultisitePageHelper extends PageHelper { + pages = pages; + + columnIndex = { + status: 3 + }; + + @PageHelper.restrictTo(pages.create.url) + create(group_id: string, status: string) { + // Enter in group_id + cy.get('#group_id').type(group_id); + // Show Status + this.selectOption('status', status); + cy.get('#status').should('have.class', 'ng-valid'); + + // Click the create button and wait for policy to be made + cy.contains('button', 'Create Sync Policy Group').wait(WAIT_TIMER).click(); + this.getFirstTableCell(group_id).should('exist'); + } + + @PageHelper.restrictTo(pages.index.url) + edit(group_id: string, status: string) { + cy.visit(`${pages.edit.url}/${group_id}`); + + // Change the status field + this.selectOption('status', status); + cy.contains('button', 'Edit Sync Policy Group').click(); + + this.searchTable(group_id); + cy.get(`datatable-body-cell:nth-child(${this.columnIndex.status})`) + .find('.badge-warning') + .should('contain', status); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/language.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/language.po.ts index 80e21ba1e3d..fc443f28ffd 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/language.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/language.po.ts @@ -6,10 +6,10 @@ export class LanguagePageHelper extends PageHelper { }; getLanguageBtn() { - return cy.get('cd-language-selector a').first(); + return cy.get('cd-language-selector cds-header-menu a').first(); } getAllLanguages() { - return cy.get('cd-language-selector button'); + return cy.get('cd-language-selector cds-header-menu cds-header-item'); } } diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/login.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/login.e2e-spec.ts index 2b337e63416..bec37e46f62 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/login.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/login.e2e-spec.ts @@ -15,9 +15,9 @@ describe('Login page', () => { login.doLogout(); }); - it('should have no accessibility violations', () => { - login.navigateTo(); - cy.injectAxe(); - cy.checkA11y(); - }); + // it('should have no accessibility violations', () => { + // login.navigateTo(); + // cy.injectAxe(); + // cy.checkA11y(); + // }); }); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/login.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/login.po.ts index d4d2c692116..b275133f015 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/login.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/login.po.ts @@ -14,8 +14,8 @@ export class LoginPageHelper extends PageHelper { } doLogout() { - cy.get('cd-identity a').click(); - cy.contains('cd-identity span', 'Sign out').click(); + cy.get('cd-identity').click(); + cy.get('[data-testid="logout"]').click(); cy.get('cd-login').should('exist'); cy.location('hash').should('eq', '#/login'); } diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/navigation.po.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/navigation.po.ts index f2eefd826d8..89c4c7394d9 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/navigation.po.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/navigation.po.ts @@ -75,7 +75,7 @@ export class NavigationPageHelper extends PageHelper { } getMenuToggler() { - return cy.get('[aria-label="toggle sidebar visibility"]'); + return cy.get('[data-testid="main-menu-toggler"]'); } checkNavigations(navs: any) { @@ -85,7 +85,7 @@ export class NavigationPageHelper extends PageHelper { cy.intercept('/ui-api/block/rbd/status', { fixture: 'block-rbd-status.json' }); navs.forEach((nav: any) => { - cy.get('.simplebar-content li.nav-item a').each(($link) => { + cy.get('cds-sidenav-item').each(($link) => { if ($link.text().trim() === nav.menu.trim()) { cy.wrap($link).click(); } @@ -100,9 +100,9 @@ export class NavigationPageHelper extends PageHelper { checkNavSubMenu(menu: any, submenu: any) { submenu.forEach((nav: any) => { - cy.get('.simplebar-content li.nav-item a').each(($link) => { + cy.get('cds-sidenav-item').each(($link) => { if ($link.text().trim() === menu.trim()) { - cy.contains(`ul.list-unstyled li a`, nav.menu).click(); + cy.contains(`cds-sidenav-menu`, nav.menu).click(); } }); }); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/notification.e2e-spec.ts b/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/notification.e2e-spec.ts index 1f7e57325ce..9eb6cbc7cff 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/notification.e2e-spec.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/e2e/ui/notification.e2e-spec.ts @@ -9,7 +9,7 @@ describe('Notification page', () => { before(() => { cy.login(); pools.navigateTo('create'); - pools.create(poolName, 8); + pools.create(poolName, 8, ['rbd']); pools.edit_pool_pg(poolName, 4, false); }); diff --git a/src/pybind/mgr/dashboard/frontend/cypress/support/commands.ts b/src/pybind/mgr/dashboard/frontend/cypress/support/commands.ts index 09a2788eb00..f437ebbdc55 100644 --- a/src/pybind/mgr/dashboard/frontend/cypress/support/commands.ts +++ b/src/pybind/mgr/dashboard/frontend/cypress/support/commands.ts @@ -27,6 +27,7 @@ const fillAuth = () => { window.localStorage.setItem('user_pwd_expiration_date', auth.pwdExpirationDate); window.localStorage.setItem('user_pwd_update_required', auth.pwdUpdateRequired); window.localStorage.setItem('sso', auth.sso); + window.localStorage.setItem('telemetry_notification_hidden', 'true'); // disable telemetry notification in e2e }; Cypress.Commands.add('login', (username, password) => { @@ -68,6 +69,7 @@ Cypress.Commands.add('ceph2Login', (username, password) => { window.localStorage.setItem('user_pwd_expiration_date', pwdExpirationDate); window.localStorage.setItem('user_pwd_update_required', pwdUpdateRequired); window.localStorage.setItem('sso', sso); + window.localStorage.setItem('telemetry_notification_hidden', 'true'); // disable telemetry notification in e2e } ); }); diff --git a/src/pybind/mgr/dashboard/frontend/jest.config.cjs b/src/pybind/mgr/dashboard/frontend/jest.config.cjs index 9cdf6be4b46..6777546d963 100644 --- a/src/pybind/mgr/dashboard/frontend/jest.config.cjs +++ b/src/pybind/mgr/dashboard/frontend/jest.config.cjs @@ -20,7 +20,8 @@ const jestConfig = { globalSetup: 'jest-preset-angular/global-setup', moduleNameMapper: { '\\.scss$': 'identity-obj-proxy', - '~/(.*)$': '<rootDir>/src/$1' + '~/(.*)$': '<rootDir>/src/$1', + '^@carbon/icons/es/(.*)$': '@carbon/icons/lib/$1.js', }, moduleFileExtensions: ['ts', 'html', 'js', 'json', 'mjs', 'cjs'], preset: 'jest-preset-angular', @@ -32,7 +33,7 @@ const jestConfig = { }, setupFiles: ['jest-canvas-mock'], coverageReporters: ['cobertura', 'html'], - modulePathIgnorePatterns: ['<rootDir>/coverage/', '<rootDir>/node_modules/simplebar-angular'], + modulePathIgnorePatterns: ['<rootDir>/coverage/', '<rootDir>/node_modules/simplebar-angular', '<rootDir>/cypress'], testMatch: ['**/*.spec.ts'], testRunner: 'jest-jasmine2' }; diff --git a/src/pybind/mgr/dashboard/frontend/package-lock.json b/src/pybind/mgr/dashboard/frontend/package-lock.json index f3e393d8523..08726ef00f2 100644 --- a/src/pybind/mgr/dashboard/frontend/package-lock.json +++ b/src/pybind/mgr/dashboard/frontend/package-lock.json @@ -19,7 +19,10 @@ "@angular/platform-browser": "15.2.9", "@angular/platform-browser-dynamic": "15.2.9", "@angular/router": "15.2.9", + "@carbon/icons": "11.41.0", + "@carbon/styles": "1.57.0", "@circlon/angular-tree-component": "10.0.0", + "@ibm/plex": "6.4.0", "@ng-bootstrap/ng-bootstrap": "14.2.0", "@ngx-formly/bootstrap": "6.1.1", "@ngx-formly/core": "6.1.1", @@ -28,6 +31,7 @@ "@types/file-saver": "2.0.1", "async-mutex": "0.2.4", "bootstrap": "5.2.3", + "carbon-components-angular": "5.25.1", "chart.js": "4.4.0", "chartjs-adapter-moment": "1.0.1", "detect-browser": "5.2.0", @@ -3943,6 +3947,128 @@ "resolved": "https://registry.npmjs.org/@braintree/sanitize-url/-/sanitize-url-6.0.0.tgz", "integrity": "sha512-mgmE7XBYY/21erpzhexk4Cj1cyTQ9LzvnTxtzM17BJ7ERMNE6W72mQRo0I1Ud8eFJ+RVVIcBNhLFZ3GX4XFz5w==" }, + "node_modules/@carbon/colors": { + "version": "11.22.0", + "resolved": "https://registry.npmjs.org/@carbon/colors/-/colors-11.22.0.tgz", + "integrity": "sha512-IRbzstMpIhD1ULhfYhZ5ne7kIKdhQhiMeltWRPw+7wlFB5ezFoX+kX3ILqdz20CkcrpLu+TVKLD79Zv/+4RD6w==", + "hasInstallScript": true, + "dependencies": { + "@ibm/telemetry-js": "^1.5.0" + } + }, + "node_modules/@carbon/feature-flags": { + "version": "0.20.0", + "resolved": "https://registry.npmjs.org/@carbon/feature-flags/-/feature-flags-0.20.0.tgz", + "integrity": "sha512-OEYrazJa0nEEHbBDyarXIz6kjWgqsJggjbNAcVOxx0Nvma1nZBd+SwXKwdbMkBZagSSC816dV12oZJtr+GIZZg==", + "hasInstallScript": true, + "dependencies": { + "@ibm/telemetry-js": "^1.5.0" + } + }, + "node_modules/@carbon/grid": { + "version": "11.23.0", + "resolved": "https://registry.npmjs.org/@carbon/grid/-/grid-11.23.0.tgz", + "integrity": "sha512-/8SiXzefUdUeIRzMxKB2+xq65knjkDas2TcZj0NS7dnDIEr5HarWTABh/H5b5BTFEJXos3PfEH6X5OUDuK4qpg==", + "hasInstallScript": true, + "dependencies": { + "@carbon/layout": "^11.22.0", + "@ibm/telemetry-js": "^1.5.0" + } + }, + "node_modules/@carbon/icon-helpers": { + "version": "10.37.0", + "resolved": "https://registry.npmjs.org/@carbon/icon-helpers/-/icon-helpers-10.37.0.tgz", + "integrity": "sha512-YXed2JUSCGddp3UnY5OffR3W8Pl+dy9a+vfUtYhSLH9TbIEBR6EvYIfvruFMhA8JIVMCUClUqgyMQXM5oMFQ0g==" + }, + "node_modules/@carbon/icons": { + "version": "11.41.0", + "resolved": "https://registry.npmjs.org/@carbon/icons/-/icons-11.41.0.tgz", + "integrity": "sha512-9RGaOnihPQx74yBQ0UnEr9JJ+e2aa/J+tmTG/sZ203q2hfoeMF2PqipwOhNS1fqCnyW1zvsYQNydUsNIDzCqaA==", + "hasInstallScript": true, + "dependencies": { + "@ibm/telemetry-js": "^1.5.0" + } + }, + "node_modules/@carbon/layout": { + "version": "11.22.0", + "resolved": "https://registry.npmjs.org/@carbon/layout/-/layout-11.22.0.tgz", + "integrity": "sha512-G9HUJhGW+hNfUKyCLUZior5PDz808prB2Xr3vWF/rqNwLIDKhva/wCXBW2Xl0LavzonuibaCavcSYJGDkpDKhw==", + "hasInstallScript": true, + "dependencies": { + "@ibm/telemetry-js": "^1.5.0" + } + }, + "node_modules/@carbon/motion": { + "version": "11.18.0", + "resolved": "https://registry.npmjs.org/@carbon/motion/-/motion-11.18.0.tgz", + "integrity": "sha512-hVTmRxhXCA+xznXZSTd6m0kmuIRrR8mxnDHvrVKFvN3ksTYDni5Mtx4XNylI4u/fmzyUcvrvVeTHqJ8LbPsDvA==", + "hasInstallScript": true, + "dependencies": { + "@ibm/telemetry-js": "^1.5.0" + } + }, + "node_modules/@carbon/styles": { + "version": "1.57.0", + "resolved": "https://registry.npmjs.org/@carbon/styles/-/styles-1.57.0.tgz", + "integrity": "sha512-1GOJi0AAAOJXz411e9hoA3DTrK6SXsseSl7BDjQ5cO4ljlqCIPW5JS213yaF4MoYiLw5coDeGP7n6mgfWjbymA==", + "hasInstallScript": true, + "dependencies": { + "@carbon/colors": "^11.22.0", + "@carbon/feature-flags": "^0.20.0", + "@carbon/grid": "^11.23.0", + "@carbon/layout": "^11.22.0", + "@carbon/motion": "^11.18.0", + "@carbon/themes": "^11.35.0", + "@carbon/type": "^11.27.0", + "@ibm/plex": "6.0.0-next.6", + "@ibm/telemetry-js": "^1.5.0" + }, + "peerDependencies": { + "sass": "^1.33.0" + }, + "peerDependenciesMeta": { + "sass": { + "optional": true + } + } + }, + "node_modules/@carbon/styles/node_modules/@ibm/plex": { + "version": "6.0.0-next.6", + "resolved": "https://registry.npmjs.org/@ibm/plex/-/plex-6.0.0-next.6.tgz", + "integrity": "sha512-B3uGruTn2rS5gweynLmfSe7yCawSRsJguJJQHVQiqf4rh2RNgJFu8YLE2Zd/JHV0ZXoVMOslcXP2k3hMkxKEyA==", + "engines": { + "node": ">=14" + } + }, + "node_modules/@carbon/themes": { + "version": "11.35.0", + "resolved": "https://registry.npmjs.org/@carbon/themes/-/themes-11.35.0.tgz", + "integrity": "sha512-Sgh8u2JhpOhpfjaj8U2jStmGtLNDGWSLojZdxKl9FnVg1yNe02+IlhnK5bFeCNOGx4dFhrLFIhLtdh9T0Hy8rg==", + "hasInstallScript": true, + "dependencies": { + "@carbon/colors": "^11.22.0", + "@carbon/layout": "^11.22.0", + "@carbon/type": "^11.27.0", + "@ibm/telemetry-js": "^1.5.0", + "color": "^4.0.0" + } + }, + "node_modules/@carbon/type": { + "version": "11.27.0", + "resolved": "https://registry.npmjs.org/@carbon/type/-/type-11.27.0.tgz", + "integrity": "sha512-+YsFTKsch8xcdZ7y40K69B+47j86H7u8HEZ9OfymmXfMYAT+73MTfAtwyO3leS9rWGljKIh0h3I+Ga7wxE0Q6w==", + "hasInstallScript": true, + "dependencies": { + "@carbon/grid": "^11.23.0", + "@carbon/layout": "^11.22.0", + "@ibm/telemetry-js": "^1.5.0" + } + }, + "node_modules/@carbon/utils-position": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/@carbon/utils-position/-/utils-position-1.1.4.tgz", + "integrity": "sha512-/01kFPKr+wD2pPd5Uck2gElm3K/+eNxX7lEn2j1NKzzE4+eSZXDfQtLR/UHcvOSgkP+Av42LET6B9h9jXGV+HA==" + }, "node_modules/@circlon/angular-tree-component": { "version": "10.0.0", "resolved": "https://registry.npmjs.org/@circlon/angular-tree-component/-/angular-tree-component-10.0.0.tgz", @@ -5011,6 +5137,28 @@ "node": ">=14" } }, + "node_modules/@floating-ui/core": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.1.tgz", + "integrity": "sha512-42UH54oPZHPdRHdw6BgoBD6cg/eVTmVrFcgeRDM3jbO7uxSoipVcmcIGFcA5jmOHO5apcyvBhkSKES3fQJnu7A==", + "dependencies": { + "@floating-ui/utils": "^0.2.0" + } + }, + "node_modules/@floating-ui/dom": { + "version": "1.6.3", + "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.3.tgz", + "integrity": "sha512-RnDthu3mzPlQ31Ss/BTwQ1zjzIhr3lk1gZB1OC56h/1vEtaXkESrOqL5fQVMfXpwGtRwX+YsZBdyHtJMQnkArw==", + "dependencies": { + "@floating-ui/core": "^1.0.0", + "@floating-ui/utils": "^0.2.0" + } + }, + "node_modules/@floating-ui/utils": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.2.tgz", + "integrity": "sha512-J4yDIIthosAsRZ5CPYP/jQvUAQtlZTTD/4suA08/FEnlxqW3sKS9iAhgsa9VYLZ6vDHn/ixJgIqRQPotoBjxIw==" + }, "node_modules/@foliojs-fork/fontkit": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/@foliojs-fork/fontkit/-/fontkit-1.9.1.tgz", @@ -5127,6 +5275,19 @@ "integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA==", "dev": true }, + "node_modules/@ibm/plex": { + "version": "6.4.0", + "resolved": "https://registry.npmjs.org/@ibm/plex/-/plex-6.4.0.tgz", + "integrity": "sha512-P70hmNoSJhpV6fGG4++JEivoccUVuvkyZoXprsDmPTtv3s6QvL+Q8bK3HFSGmK/VgyLMDptoKPV7b/h/1xaWAw==" + }, + "node_modules/@ibm/telemetry-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/@ibm/telemetry-js/-/telemetry-js-1.5.1.tgz", + "integrity": "sha512-Hu8iJAy9UGvjWjpMmHTNgekr2+b44nvp37RxSdWogpkSO7bPajR3CbDvb0QWAvJ7KnW+VmB3aDi1rlNsIyrZVw==", + "bin": { + "ibmtelemetry": "dist/collect.js" + } + }, "node_modules/@isaacs/cliui": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", @@ -11076,6 +11237,34 @@ } ] }, + "node_modules/carbon-components-angular": { + "version": "5.25.1", + "resolved": "https://registry.npmjs.org/carbon-components-angular/-/carbon-components-angular-5.25.1.tgz", + "integrity": "sha512-v49djZmcHs47G7wzaS+SQUTqp+vErlHDc4ohbsx29Q+Jq1m6IJSaTUCN9GuQG/lLa7W1se0vS23TOToKwjIbcw==", + "hasInstallScript": true, + "dependencies": { + "@carbon/icon-helpers": "10.37.0", + "@carbon/icons": "11.14.0", + "@carbon/utils-position": "1.1.4", + "@floating-ui/dom": "1.6.3", + "@ibm/telemetry-js": "^1.5.0", + "flatpickr": "4.6.13", + "tslib": "2.3.0" + }, + "peerDependencies": { + "@carbon/styles": "^1.54.0" + } + }, + "node_modules/carbon-components-angular/node_modules/@carbon/icons": { + "version": "11.14.0", + "resolved": "https://registry.npmjs.org/@carbon/icons/-/icons-11.14.0.tgz", + "integrity": "sha512-6XaySbscz1ubJ/3GtyXB8qJpcAL8kcIzBA6JZpFCcha43tuB1Kps87ADj/v3yx0sLPxyIzRWgkw2n1bnkAcsNA==" + }, + "node_modules/carbon-components-angular/node_modules/tslib": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.3.0.tgz", + "integrity": "sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg==" + }, "node_modules/caseless": { "version": "0.12.0", "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", @@ -11735,6 +11924,18 @@ "node": ">=0.10.0" } }, + "node_modules/color": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz", + "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==", + "dependencies": { + "color-convert": "^2.0.1", + "color-string": "^1.9.0" + }, + "engines": { + "node": ">=12.5.0" + } + }, "node_modules/color-convert": { "version": "1.9.3", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", @@ -11748,6 +11949,15 @@ "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" }, + "node_modules/color-string": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", + "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", + "dependencies": { + "color-name": "^1.0.0", + "simple-swizzle": "^0.2.2" + } + }, "node_modules/color-support": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/color-support/-/color-support-1.1.3.tgz", @@ -11757,6 +11967,22 @@ "color-support": "bin.js" } }, + "node_modules/color/node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color/node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + }, "node_modules/colorette": { "version": "2.0.20", "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.20.tgz", @@ -15815,6 +16041,11 @@ "node": "^10.12.0 || >=12.0.0" } }, + "node_modules/flatpickr": { + "version": "4.6.13", + "resolved": "https://registry.npmjs.org/flatpickr/-/flatpickr-4.6.13.tgz", + "integrity": "sha512-97PMG/aywoYpB4IvbvUJi0RQi8vearvU0oov1WW3k0WZPBMrTQVqekSX5CjSG/M4Q3i6A/0FKXC7RyAoAUUSPw==" + }, "node_modules/flatted": { "version": "3.2.9", "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.2.9.tgz", @@ -30060,6 +30291,19 @@ "simple-concat": "^1.0.0" } }, + "node_modules/simple-swizzle": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", + "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==", + "dependencies": { + "is-arrayish": "^0.3.1" + } + }, + "node_modules/simple-swizzle/node_modules/is-arrayish": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz", + "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==" + }, "node_modules/simplebar": { "version": "5.3.9", "resolved": "https://registry.npmjs.org/simplebar/-/simplebar-5.3.9.tgz", diff --git a/src/pybind/mgr/dashboard/frontend/package.json b/src/pybind/mgr/dashboard/frontend/package.json index f72dbdbe412..2856af154fb 100644 --- a/src/pybind/mgr/dashboard/frontend/package.json +++ b/src/pybind/mgr/dashboard/frontend/package.json @@ -53,7 +53,10 @@ "@angular/platform-browser": "15.2.9", "@angular/platform-browser-dynamic": "15.2.9", "@angular/router": "15.2.9", + "@carbon/icons": "11.41.0", + "@carbon/styles": "1.57.0", "@circlon/angular-tree-component": "10.0.0", + "@ibm/plex": "6.4.0", "@ng-bootstrap/ng-bootstrap": "14.2.0", "@ngx-formly/bootstrap": "6.1.1", "@ngx-formly/core": "6.1.1", @@ -62,6 +65,7 @@ "@types/file-saver": "2.0.1", "async-mutex": "0.2.4", "bootstrap": "5.2.3", + "carbon-components-angular": "5.25.1", "chart.js": "4.4.0", "chartjs-adapter-moment": "1.0.1", "detect-browser": "5.2.0", diff --git a/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts index 6744e9cf23b..2316896863e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/app-routing.module.ts @@ -180,6 +180,11 @@ const routes: Routes = [ outlet: 'modal' }, { + path: `${URLVerbs.CREATE}/:type`, + component: ServiceFormComponent, + outlet: 'modal' + }, + { path: `${URLVerbs.EDIT}/:type/:name`, component: ServiceFormComponent, outlet: 'modal' @@ -370,18 +375,48 @@ const routes: Routes = [ { path: 'cephfs', canActivate: [FeatureTogglesGuardService], - data: { breadcrumbs: 'File/File Systems' }, children: [ - { path: '', component: CephfsListComponent }, { - path: URLVerbs.CREATE, + path: 'fs', + component: CephfsListComponent, + data: { breadcrumbs: 'File/File Systems' } + }, + { + path: `fs/${URLVerbs.CREATE}`, component: CephfsVolumeFormComponent, data: { breadcrumbs: ActionLabels.CREATE } }, { - path: `${URLVerbs.EDIT}/:id`, + path: `fs/${URLVerbs.EDIT}/:id`, component: CephfsVolumeFormComponent, data: { breadcrumbs: ActionLabels.EDIT } + }, + { + path: 'nfs', + canActivateChild: [FeatureTogglesGuardService, ModuleStatusGuardService], + data: { + moduleStatusGuardConfig: { + uiApiPath: 'nfs-ganesha', + redirectTo: 'error', + section: 'nfs-ganesha', + section_info: 'NFS GANESHA', + header: 'NFS-Ganesha is not configured' + }, + breadcrumbs: 'File/NFS' + }, + children: [ + { path: '', component: NfsListComponent }, + { + path: URLVerbs.CREATE, + component: NfsFormComponent, + data: { breadcrumbs: ActionLabels.CREATE } + }, + { + path: `${URLVerbs.EDIT}/:cluster_id/:export_id`, + component: NfsFormComponent, + data: { breadcrumbs: ActionLabels.EDIT } + } + ] } ] }, @@ -421,34 +456,6 @@ const routes: Routes = [ data: { breadcrumbs: ActionLabels.EDIT } } ] - }, - // NFS - { - path: 'nfs', - canActivateChild: [FeatureTogglesGuardService, ModuleStatusGuardService], - data: { - moduleStatusGuardConfig: { - uiApiPath: 'nfs-ganesha', - redirectTo: 'error', - section: 'nfs-ganesha', - section_info: 'NFS GANESHA', - header: 'NFS-Ganesha is not configured' - }, - breadcrumbs: 'NFS' - }, - children: [ - { path: '', component: NfsListComponent }, - { - path: URLVerbs.CREATE, - component: NfsFormComponent, - data: { breadcrumbs: ActionLabels.CREATE } - }, - { - path: `${URLVerbs.EDIT}/:cluster_id/:export_id`, - component: NfsFormComponent, - data: { breadcrumbs: ActionLabels.EDIT } - } - ] } ] }, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/block.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/block.module.ts index b9995ac029d..8e926a40d99 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/block.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/block.module.ts @@ -38,6 +38,11 @@ import { RbdTrashListComponent } from './rbd-trash-list/rbd-trash-list.component import { RbdTrashMoveModalComponent } from './rbd-trash-move-modal/rbd-trash-move-modal.component'; import { RbdTrashPurgeModalComponent } from './rbd-trash-purge-modal/rbd-trash-purge-modal.component'; import { RbdTrashRestoreModalComponent } from './rbd-trash-restore-modal/rbd-trash-restore-modal.component'; +import { NvmeofGatewayComponent } from './nvmeof-gateway/nvmeof-gateway.component'; +import { NvmeofSubsystemsComponent } from './nvmeof-subsystems/nvmeof-subsystems.component'; +import { NvmeofSubsystemsDetailsComponent } from './nvmeof-subsystems-details/nvmeof-subsystems-details.component'; +import { NvmeofTabsComponent } from './nvmeof-tabs/nvmeof-tabs.component'; +import { NvmeofSubsystemsFormComponent } from './nvmeof-subsystems-form/nvmeof-subsystems-form.component'; @NgModule({ imports: [ @@ -77,7 +82,12 @@ import { RbdTrashRestoreModalComponent } from './rbd-trash-restore-modal/rbd-tra RbdConfigurationListComponent, RbdConfigurationFormComponent, RbdTabsComponent, - RbdPerformanceComponent + RbdPerformanceComponent, + NvmeofGatewayComponent, + NvmeofSubsystemsComponent, + NvmeofSubsystemsDetailsComponent, + NvmeofTabsComponent, + NvmeofSubsystemsFormComponent ], exports: [RbdConfigurationListComponent, RbdConfigurationFormComponent] }) @@ -198,6 +208,47 @@ const routes: Routes = [ ] } ] + }, + // NVMe/TCP + { + path: 'nvmeof', + canActivate: [ModuleStatusGuardService], + data: { + breadcrumbs: true, + text: 'NVMe/TCP', + path: 'nvmeof', + disableSplit: true, + moduleStatusGuardConfig: { + uiApiPath: 'nvmeof', + redirectTo: 'error', + header: $localize`NVMe/TCP Gateway not configured`, + button_name: $localize`Configure NVMe/TCP`, + button_route: ['/services', { outlets: { modal: ['create', 'nvmeof'] } }], + uiConfig: false + } + }, + children: [ + { path: '', redirectTo: 'subsystems', pathMatch: 'full' }, + { + path: 'subsystems', + component: NvmeofSubsystemsComponent, + data: { breadcrumbs: 'Subsystems' }, + children: [ + { path: '', component: NvmeofSubsystemsComponent }, + { + path: URLVerbs.CREATE, + component: NvmeofSubsystemsFormComponent, + outlet: 'modal' + }, + { + path: `${URLVerbs.EDIT}/:subsystem_nqn`, + component: NvmeofSubsystemsFormComponent, + outlet: 'modal' + } + ] + }, + { path: 'gateways', component: NvmeofGatewayComponent, data: { breadcrumbs: 'Gateways' } } + ] } ]; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.html new file mode 100644 index 00000000000..18a4000e14d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.html @@ -0,0 +1,14 @@ +<cd-nvmeof-tabs></cd-nvmeof-tabs> + +<legend i18n> + Gateways + <cd-help-text> + The NVMe-oF gateway integrates Ceph with the NVMe over TCP (NVMe/TCP) protocol to provide an NVMe/TCP target that exports RADOS Block Device (RBD) images. + </cd-help-text> +</legend> +<div> + <cd-table [data]="gateways" + (fetchData)="getGateways()" + [columns]="gatewayColumns"> + </cd-table> +</div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.spec.ts new file mode 100644 index 00000000000..53187cd0f8d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.spec.ts @@ -0,0 +1,53 @@ +import { ComponentFixture, TestBed, fakeAsync, tick } from '@angular/core/testing'; +import { of } from 'rxjs'; +import { NvmeofGatewayComponent } from './nvmeof-gateway.component'; +import { NvmeofService } from '../../../shared/api/nvmeof.service'; +import { HttpClientModule } from '@angular/common/http'; +import { SharedModule } from '~/app/shared/shared.module'; + +const mockGateways = [ + { + cli_version: '', + version: '1.2.5', + name: 'client.nvmeof.rbd.ceph-node-01.jnmnwa', + group: '', + addr: '192.168.100.101', + port: '5500', + load_balancing_group: 1, + spdk_version: '24.01' + } +]; + +class MockNvmeOfService { + listGateways() { + return of(mockGateways); + } +} + +describe('NvmeofGatewayComponent', () => { + let component: NvmeofGatewayComponent; + let fixture: ComponentFixture<NvmeofGatewayComponent>; + + beforeEach(fakeAsync(() => { + TestBed.configureTestingModule({ + declarations: [NvmeofGatewayComponent], + imports: [HttpClientModule, SharedModule], + providers: [{ provide: NvmeofService, useClass: MockNvmeOfService }] + }).compileComponents(); + })); + + beforeEach(() => { + fixture = TestBed.createComponent(NvmeofGatewayComponent); + component = fixture.componentInstance; + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); + + it('should retrieve gateways', fakeAsync(() => { + component.getGateways(); + tick(); + expect(component.gateways).toEqual(mockGateways); + })); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.ts new file mode 100644 index 00000000000..46600388bd9 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-gateway/nvmeof-gateway.component.ts @@ -0,0 +1,44 @@ +import { Component } from '@angular/core'; + +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; +import { CdTableSelection } from '~/app/shared/models/cd-table-selection'; +import { NvmeofGateway } from '~/app/shared/models/nvmeof'; + +import { NvmeofService } from '../../../shared/api/nvmeof.service'; + +@Component({ + selector: 'cd-nvmeof-gateway', + templateUrl: './nvmeof-gateway.component.html', + styleUrls: ['./nvmeof-gateway.component.scss'] +}) +export class NvmeofGatewayComponent { + gateways: NvmeofGateway[] = []; + gatewayColumns: any; + selection = new CdTableSelection(); + + constructor(private nvmeofService: NvmeofService, public actionLabels: ActionLabelsI18n) {} + + ngOnInit() { + this.gatewayColumns = [ + { + name: $localize`Name`, + prop: 'name' + }, + { + name: $localize`Address`, + prop: 'addr' + }, + { + name: $localize`Port`, + prop: 'port' + } + ]; + } + + getGateways() { + this.nvmeofService.listGateways().subscribe((gateways: NvmeofGateway[] | NvmeofGateway) => { + if (Array.isArray(gateways)) this.gateways = gateways; + else this.gateways = [gateways]; + }); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.html new file mode 100644 index 00000000000..56a05dfecda --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.html @@ -0,0 +1,17 @@ +<ng-container *ngIf="selection"> + <nav ngbNav + #nav="ngbNav" + class="nav-tabs" + cdStatefulTab="subsystem-details"> + <ng-container ngbNavItem="details"> + <a ngbNavLink + i18n>Details</a> + <ng-template ngbNavContent> + <cd-table-key-value [data]="data"> + </cd-table-key-value> + </ng-template> + </ng-container> + </nav> + + <div [ngbNavOutlet]="nav"></div> +</ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.spec.ts new file mode 100644 index 00000000000..80cdf927b9a --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.spec.ts @@ -0,0 +1,49 @@ +import { HttpClientTestingModule } from '@angular/common/http/testing'; +import { BrowserAnimationsModule } from '@angular/platform-browser/animations'; +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { NgbNavModule } from '@ng-bootstrap/ng-bootstrap'; + +import { SharedModule } from '~/app/shared/shared.module'; +import { NvmeofSubsystemsDetailsComponent } from './nvmeof-subsystems-details.component'; + +describe('NvmeofSubsystemsDetailsComponent', () => { + let component: NvmeofSubsystemsDetailsComponent; + let fixture: ComponentFixture<NvmeofSubsystemsDetailsComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [NvmeofSubsystemsDetailsComponent], + imports: [BrowserAnimationsModule, SharedModule, HttpClientTestingModule, NgbNavModule] + }).compileComponents(); + + fixture = TestBed.createComponent(NvmeofSubsystemsDetailsComponent); + component = fixture.componentInstance; + component.selection = { + serial_number: 'Ceph30487186726692', + model_number: 'Ceph bdev Controller', + min_cntlid: 1, + max_cntlid: 2040, + subtype: 'NVMe', + nqn: 'nqn.2001-07.com.ceph:1720603703820', + namespace_count: 1, + max_namespaces: 256 + }; + component.ngOnChanges(); + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); + + it('should prepare data', () => { + expect(component.data).toEqual({ + 'Serial Number': 'Ceph30487186726692', + 'Model Number': 'Ceph bdev Controller', + 'Minimum Controller Identifier': 1, + 'Maximum Controller Identifier': 2040, + 'Subsystem Type': 'NVMe' + }); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.ts new file mode 100644 index 00000000000..a79b01d6704 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.ts @@ -0,0 +1,27 @@ +import { Component, Input, OnChanges } from '@angular/core'; +import { NvmeofSubsystem } from '~/app/shared/models/nvmeof'; + +@Component({ + selector: 'cd-nvmeof-subsystems-details', + templateUrl: './nvmeof-subsystems-details.component.html', + styleUrls: ['./nvmeof-subsystems-details.component.scss'] +}) +export class NvmeofSubsystemsDetailsComponent implements OnChanges { + @Input() + selection: NvmeofSubsystem; + + selectedItem: any; + data: any; + + ngOnChanges() { + if (this.selection) { + this.selectedItem = this.selection; + this.data = {}; + this.data[$localize`Serial Number`] = this.selectedItem.serial_number; + this.data[$localize`Model Number`] = this.selectedItem.model_number; + this.data[$localize`Minimum Controller Identifier`] = this.selectedItem.min_cntlid; + this.data[$localize`Maximum Controller Identifier`] = this.selectedItem.max_cntlid; + this.data[$localize`Subsystem Type`] = this.selectedItem.subtype; + } + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.html new file mode 100644 index 00000000000..93e27094e17 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.html @@ -0,0 +1,74 @@ +<cd-modal [pageURL]="pageURL" + [modalRef]="activeModal"> + <span class="modal-title" + i18n>{{ action | titlecase }} {{ resource | upperFirst }}</span> + <ng-container class="modal-content"> + <form name="subsystemForm" + #formDir="ngForm" + [formGroup]="subsystemForm" + novalidate> + <div class="modal-body"> + <!-- NQN --> + <div class="form-group row"> + <label class="cd-col-form-label" + for="nqn"> + <span class="required" + i18n>NQN</span> + </label> + <div class="cd-col-form-input"> + <input name="nqn" + class="form-control" + type="text" + formControlName="nqn"> + <cd-help-text> + The NVMe Qualified Name (NQN) is a unique and permanent name for the lifetime of the subsystem. + </cd-help-text> + <span class="invalid-feedback" + *ngIf="subsystemForm.showError('nqn', formDir, 'required')" + i18n>This field is required.</span> + <span class="invalid-feedback" + *ngIf="subsystemForm.showError('nqn', formDir, 'unique')" + i18n>This NQN is already in use.</span> + <span class="invalid-feedback" + *ngIf="subsystemForm.showError('nqn', formDir, 'pattern')" + i18n>An NQN should follow the format of<br/><<code>nqn.$year-$month.$reverseDomainName:$definedName</code>".></span> + <span class="invalid-feedback" + *ngIf="subsystemForm.showError('nqn', formDir, 'maxLength')" + i18n>An NQN should not be more than 223 bytes in length.</span> + </div> + </div> + <!-- Maximum Namespaces --> + <div class="form-group row"> + <label class="cd-col-form-label" + for="max_namespaces"> + <span i18n>Maximum Namespaces</span> + </label> + <div class="cd-col-form-input"> + <input id="max_namespaces" + class="form-control" + type="text" + name="max_namespaces" + formControlName="max_namespaces"> + <cd-help-text i18n>The maximum namespaces per subsystem. Default is 256.</cd-help-text> + <span class="invalid-feedback" + *ngIf="subsystemForm.showError('max_namespaces', formDir, 'min')" + i18n>The value must be at least 1.</span> + <span class="invalid-feedback" + *ngIf="subsystemForm.showError('max_namespaces', formDir, 'max')" + i18n>The value cannot be greated than 256.</span> + <span class="invalid-feedback" + *ngIf="subsystemForm.showError('max_namespaces', formDir, 'pattern')" + i18n>The value must be a positive integer.</span> + </div> + </div> + </div> + <div class="modal-footer"> + <div class="text-right"> + <cd-form-button-panel (submitActionEvent)="onSubmit()" + [form]="subsystemForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)"></cd-form-button-panel> + </div> + </div> + </form> + </ng-container> +</cd-modal> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.spec.ts new file mode 100644 index 00000000000..bfa642de1cb --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.spec.ts @@ -0,0 +1,91 @@ +import { HttpClientTestingModule } from '@angular/common/http/testing'; +import { ReactiveFormsModule } from '@angular/forms'; +import { RouterTestingModule } from '@angular/router/testing'; +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { ToastrModule } from 'ngx-toastr'; + +import { NgbActiveModal, NgbTypeaheadModule } from '@ng-bootstrap/ng-bootstrap'; + +import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; +import { SharedModule } from '~/app/shared/shared.module'; +import { NvmeofSubsystemsFormComponent } from './nvmeof-subsystems-form.component'; +import { FormHelper } from '~/testing/unit-test-helper'; +import { NvmeofService } from '~/app/shared/api/nvmeof.service'; + +describe('NvmeofSubsystemsFormComponent', () => { + let component: NvmeofSubsystemsFormComponent; + let fixture: ComponentFixture<NvmeofSubsystemsFormComponent>; + let nvmeofService: NvmeofService; + let form: CdFormGroup; + let formHelper: FormHelper; + const mockTimestamp = 1720693470789; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [NvmeofSubsystemsFormComponent], + providers: [NgbActiveModal], + imports: [ + HttpClientTestingModule, + NgbTypeaheadModule, + ReactiveFormsModule, + RouterTestingModule, + SharedModule, + ToastrModule.forRoot() + ] + }).compileComponents(); + + fixture = TestBed.createComponent(NvmeofSubsystemsFormComponent); + component = fixture.componentInstance; + component.ngOnInit(); + form = component.subsystemForm; + formHelper = new FormHelper(form); + spyOn(Date, 'now').and.returnValue(mockTimestamp); + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); + + describe('should test form', () => { + beforeEach(() => { + nvmeofService = TestBed.inject(NvmeofService); + spyOn(nvmeofService, 'createSubsystem').and.stub(); + }); + + it('should be creating request correctly', () => { + const expectedNqn = 'nqn.2001-07.com.ceph:' + mockTimestamp; + component.onSubmit(); + expect(nvmeofService.createSubsystem).toHaveBeenCalledWith({ + nqn: expectedNqn, + max_namespaces: 256, + enable_ha: true + }); + }); + + it('should give error on invalid nqn', () => { + formHelper.setValue('nqn', 'nqn:2001-07.com.ceph:'); + component.onSubmit(); + formHelper.expectError('nqn', 'pattern'); + }); + + it('should give error on invalid max_namespaces', () => { + formHelper.setValue('max_namespaces', -56); + component.onSubmit(); + formHelper.expectError('max_namespaces', 'pattern'); + }); + + it('should give error on max_namespaces greater than 256', () => { + formHelper.setValue('max_namespaces', 300); + component.onSubmit(); + formHelper.expectError('max_namespaces', 'max'); + }); + + it('should give error on max_namespaces lesser than 1', () => { + formHelper.setValue('max_namespaces', 0); + component.onSubmit(); + formHelper.expectError('max_namespaces', 'min'); + }); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.ts new file mode 100644 index 00000000000..2af218af54a --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.ts @@ -0,0 +1,102 @@ +import { Component, OnInit } from '@angular/core'; +import { UntypedFormControl, Validators } from '@angular/forms'; +import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'; + +import { ActionLabelsI18n, URLVerbs } from '~/app/shared/constants/app.constants'; +import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; +import { CdValidators } from '~/app/shared/forms/cd-validators'; +import { Permission } from '~/app/shared/models/permissions'; +import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; +import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; +import { FinishedTask } from '~/app/shared/models/finished-task'; +import { Router } from '@angular/router'; +import { NvmeofService } from '~/app/shared/api/nvmeof.service'; + +@Component({ + selector: 'cd-nvmeof-subsystems-form', + templateUrl: './nvmeof-subsystems-form.component.html', + styleUrls: ['./nvmeof-subsystems-form.component.scss'] +}) +export class NvmeofSubsystemsFormComponent implements OnInit { + permission: Permission; + subsystemForm: CdFormGroup; + + action: string; + resource: string; + pageURL: string; + + NQN_REGEX = /^nqn\.(19|20)\d\d-(0[1-9]|1[0-2])\.\D{2,3}(\.[A-Za-z0-9-]+)+(:[A-Za-z0-9-\.]+)$/; + + constructor( + private authStorageService: AuthStorageService, + public actionLabels: ActionLabelsI18n, + public activeModal: NgbActiveModal, + private nvmeofService: NvmeofService, + private taskWrapperService: TaskWrapperService, + private router: Router + ) { + this.permission = this.authStorageService.getPermissions().nvmeof; + this.resource = $localize`Subsystem`; + this.pageURL = 'block/nvmeof/subsystems'; + } + + ngOnInit() { + this.createForm(); + this.action = this.actionLabels.CREATE; + } + + createForm() { + this.subsystemForm = new CdFormGroup({ + nqn: new UntypedFormControl('nqn.2001-07.com.ceph:' + Date.now(), { + validators: [ + Validators.required, + Validators.pattern(this.NQN_REGEX), + CdValidators.custom( + 'maxLength', + (nqnInput: string) => new TextEncoder().encode(nqnInput).length > 223 + ) + ], + asyncValidators: [ + CdValidators.unique(this.nvmeofService.isSubsystemPresent, this.nvmeofService) + ] + }), + max_namespaces: new UntypedFormControl(256, { + validators: [CdValidators.number(false), Validators.max(256), Validators.min(1)] + }) + }); + } + + onSubmit() { + const component = this; + const nqn: string = this.subsystemForm.getValue('nqn'); + let max_namespaces: number = Number(this.subsystemForm.getValue('max_namespaces')); + + const request = { + nqn, + max_namespaces, + enable_ha: true + }; + + if (!max_namespaces) { + delete request.max_namespaces; + } + + let taskUrl = `nvmeof/subsystem/${URLVerbs.CREATE}`; + + this.taskWrapperService + .wrapTaskAroundCall({ + task: new FinishedTask(taskUrl, { + nqn: nqn + }), + call: this.nvmeofService.createSubsystem(request) + }) + .subscribe({ + error() { + component.subsystemForm.setErrors({ cdSubmitButton: true }); + }, + complete: () => { + this.router.navigate([this.pageURL, { outlets: { modal: null } }]); + } + }); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.html new file mode 100644 index 00000000000..4dc04437330 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.html @@ -0,0 +1,29 @@ +<cd-nvmeof-tabs></cd-nvmeof-tabs> +<legend i18n> + Subsystems + <cd-help-text> + A subsystem presents a collection of controllers which are used to access namespaces. + </cd-help-text> +</legend> +<cd-table [data]="subsystems" + columnMode="flex" + (fetchData)="getSubsystems()" + [columns]="subsystemsColumns" + selectionType="single" + [hasDetails]="true" + (setExpandedRow)="setExpandedRow($event)" + (updateSelection)="updateSelection($event)"> + + <div class="table-actions btn-toolbar"> + <cd-table-actions [permission]="permission" + [selection]="selection" + class="btn-group" + [tableActions]="tableActions"> + </cd-table-actions> + </div> + + <cd-nvmeof-subsystems-details cdTableDetail + [selection]="expandedRow"> + </cd-nvmeof-subsystems-details> +</cd-table> +<router-outlet name="modal"></router-outlet> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.spec.ts new file mode 100644 index 00000000000..1efd28dd114 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.spec.ts @@ -0,0 +1,80 @@ +import { ComponentFixture, TestBed, fakeAsync, tick } from '@angular/core/testing'; +import { HttpClientModule } from '@angular/common/http'; +import { of } from 'rxjs'; +import { RouterTestingModule } from '@angular/router/testing'; +import { SharedModule } from '~/app/shared/shared.module'; + +import { NvmeofService } from '../../../shared/api/nvmeof.service'; +import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; +import { ModalService } from '~/app/shared/services/modal.service'; +import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; +import { NvmeofSubsystemsComponent } from './nvmeof-subsystems.component'; +import { NvmeofTabsComponent } from '../nvmeof-tabs/nvmeof-tabs.component'; +import { NvmeofSubsystemsDetailsComponent } from '../nvmeof-subsystems-details/nvmeof-subsystems-details.component'; + +const mockSubsystems = [ + { + nqn: 'nqn.2001-07.com.ceph:1720603703820', + enable_ha: true, + serial_number: 'Ceph30487186726692', + model_number: 'Ceph bdev Controller', + min_cntlid: 1, + max_cntlid: 2040, + namespace_count: 0, + subtype: 'NVMe', + max_namespaces: 256 + } +]; + +class MockNvmeOfService { + listSubsystems() { + return of(mockSubsystems); + } +} + +class MockAuthStorageService { + getPermissions() { + return { nvmeof: {} }; + } +} + +class MockModalService {} + +class MockTaskWrapperService {} + +describe('NvmeofSubsystemsComponent', () => { + let component: NvmeofSubsystemsComponent; + let fixture: ComponentFixture<NvmeofSubsystemsComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ + NvmeofSubsystemsComponent, + NvmeofTabsComponent, + NvmeofSubsystemsDetailsComponent + ], + imports: [HttpClientModule, RouterTestingModule, SharedModule], + providers: [ + { provide: NvmeofService, useClass: MockNvmeOfService }, + { provide: AuthStorageService, useClass: MockAuthStorageService }, + { provide: ModalService, useClass: MockModalService }, + { provide: TaskWrapperService, useClass: MockTaskWrapperService } + ] + }).compileComponents(); + + fixture = TestBed.createComponent(NvmeofSubsystemsComponent); + component = fixture.componentInstance; + component.ngOnInit(); + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); + + it('should retrieve subsystems', fakeAsync(() => { + component.getSubsystems(); + tick(); + expect(component.subsystems).toEqual(mockSubsystems); + })); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.ts new file mode 100644 index 00000000000..d281901627b --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems/nvmeof-subsystems.component.ts @@ -0,0 +1,103 @@ +import { Component, OnInit } from '@angular/core'; +import { Router } from '@angular/router'; + +import { ActionLabelsI18n, URLVerbs } from '~/app/shared/constants/app.constants'; +import { CdTableSelection } from '~/app/shared/models/cd-table-selection'; +import { NvmeofSubsystem } from '~/app/shared/models/nvmeof'; +import { Permission } from '~/app/shared/models/permissions'; +import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; +import { ListWithDetails } from '~/app/shared/classes/list-with-details.class'; +import { CdTableAction } from '~/app/shared/models/cd-table-action'; +import { Icons } from '~/app/shared/enum/icons.enum'; +import { CriticalConfirmationModalComponent } from '~/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component'; +import { FinishedTask } from '~/app/shared/models/finished-task'; +import { ModalService } from '~/app/shared/services/modal.service'; +import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; +import { NvmeofService } from '~/app/shared/api/nvmeof.service'; + +const BASE_URL = 'block/nvmeof/subsystems'; + +@Component({ + selector: 'cd-nvmeof-subsystems', + templateUrl: './nvmeof-subsystems.component.html', + styleUrls: ['./nvmeof-subsystems.component.scss'] +}) +export class NvmeofSubsystemsComponent extends ListWithDetails implements OnInit { + subsystems: NvmeofSubsystem[] = []; + subsystemsColumns: any; + permission: Permission; + selection = new CdTableSelection(); + tableActions: CdTableAction[]; + subsystemDetails: any[]; + + constructor( + private nvmeofService: NvmeofService, + private authStorageService: AuthStorageService, + public actionLabels: ActionLabelsI18n, + private router: Router, + private modalService: ModalService, + private taskWrapper: TaskWrapperService + ) { + super(); + this.permission = this.authStorageService.getPermissions().nvmeof; + } + + ngOnInit() { + this.subsystemsColumns = [ + { + name: $localize`NQN`, + prop: 'nqn' + }, + { + name: $localize`# Namespaces`, + prop: 'namespace_count' + }, + { + name: $localize`# Maximum Namespaces`, + prop: 'max_namespaces' + } + ]; + this.tableActions = [ + { + name: this.actionLabels.CREATE, + permission: 'create', + icon: Icons.add, + click: () => this.router.navigate([BASE_URL, { outlets: { modal: [URLVerbs.CREATE] } }]), + canBePrimary: (selection: CdTableSelection) => !selection.hasSelection + }, + { + name: this.actionLabels.DELETE, + permission: 'delete', + icon: Icons.destroy, + click: () => this.deleteSubsystemModal() + } + ]; + } + + updateSelection(selection: CdTableSelection) { + this.selection = selection; + } + + getSubsystems() { + this.nvmeofService + .listSubsystems() + .subscribe((subsystems: NvmeofSubsystem[] | NvmeofSubsystem) => { + if (Array.isArray(subsystems)) this.subsystems = subsystems; + else this.subsystems = [subsystems]; + }); + } + + deleteSubsystemModal() { + const subsystem = this.selection.first(); + this.modalService.show(CriticalConfirmationModalComponent, { + itemDescription: 'Subsystem', + itemNames: [subsystem.nqn], + actionDescription: 'delete', + submitActionObservable: () => + this.taskWrapper.wrapTaskAroundCall({ + task: new FinishedTask('nvmeof/subsystem/delete', { nqn: subsystem.nqn }), + call: this.nvmeofService.deleteSubsystem(subsystem.nqn) + }) + }); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.html new file mode 100644 index 00000000000..29f1e2ad664 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.html @@ -0,0 +1,16 @@ +<ul class="nav nav-tabs"> + <li class="nav-item"> + <a class="nav-link" + routerLink="/block/nvmeof/subsystems" + routerLinkActive="active" + ariaCurrentWhenActive="page" + i18n>Subsystems</a> + </li> + <li class="nav-item"> + <a class="nav-link" + routerLink="/block/nvmeof/gateways" + routerLinkActive="active" + ariaCurrentWhenActive="page" + i18n>Gateways</a> + </li> +</ul> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.spec.ts new file mode 100644 index 00000000000..23e334a6e14 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.spec.ts @@ -0,0 +1,22 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { NvmeofTabsComponent } from './nvmeof-tabs.component'; + +describe('NvmeofTabsComponent', () => { + let component: NvmeofTabsComponent; + let fixture: ComponentFixture<NvmeofTabsComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [NvmeofTabsComponent] + }).compileComponents(); + + fixture = TestBed.createComponent(NvmeofTabsComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.ts new file mode 100644 index 00000000000..507116c466f --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-tabs/nvmeof-tabs.component.ts @@ -0,0 +1,8 @@ +import { Component } from '@angular/core'; + +@Component({ + selector: 'cd-nvmeof-tabs', + templateUrl: './nvmeof-tabs.component.html', + styleUrls: ['./nvmeof-tabs.component.scss'] +}) +export class NvmeofTabsComponent {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-feature.interface.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-feature.interface.ts index 825b1d2bb39..898bc452319 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-feature.interface.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-feature.interface.ts @@ -7,4 +7,5 @@ export interface RbdImageFeature { key?: string; initDisabled?: boolean; helperHtml?: string; + helperText?: string; } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form-edit-request.model.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form-edit-request.model.ts index 2eede58521f..670203dd5f0 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form-edit-request.model.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form-edit-request.model.ts @@ -12,4 +12,5 @@ export class RbdFormEditRequestModel { force?: boolean; schedule_interval: string; remove_scheduling? = false; + image_mirror_mode?: string; } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.html index af6cd396365..4c86ef15e27 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.html @@ -75,7 +75,7 @@ i18n>Loading...</option> <option *ngIf="pools !== null && pools.length === 0" [ngValue]="null" - i18n>-- No rbd pools available --</option> + i18n>-- No block pools available --</option> <option *ngIf="pools !== null && pools.length > 0" [ngValue]="null" i18n>-- Select a pool --</option> @@ -88,6 +88,135 @@ </div> </div> + <div class="form-group row"> + <div class="cd-col-form-offset"> + <!-- Mirroring --> + <div class="custom-control custom-checkbox"> + <input type="checkbox" + class="custom-control-input" + id="mirroring" + name="mirroring" + (change)="setMirrorMode()" + [(ngModel)]="mirroring && this.currentPoolName" + formControlName="mirroring"> + <label class="custom-control-label" + for="mirroring">Mirroring</label> + <cd-help-text>Allow data to be asynchronously mirrored between two Ceph clusters</cd-help-text> + <cd-alert-panel *ngIf="showMirrorDisableMessage" + [showTitle]="false" + type="info">Mirroring can not be disabled on <b>Pool</b> mirror mode. + You need to change the mirror mode to enable this option. + </cd-alert-panel> + <cd-alert-panel *ngIf="currentPoolMirrorMode === 'disabled'" + type="info" + [showTitle]="false" + i18n>You need to set <b>mirror mode</b> in the selected pool to enable mirroring. + <button class="btn btn-light" + type="button" + [routerLink]="['/block/mirroring', {outlets: {modal: ['edit', rbdForm.getValue('pool')]}}]">Set Mode</button> + </cd-alert-panel> + </div> + <div *ngIf="mirroring && currentPoolMirrorMode !== 'disabled'"> + <div class="custom-control custom-radio ms-2" + *ngFor="let option of mirroringOptions"> + <input type="radio" + class="form-check-input" + [id]="option.value" + [value]="option.value" + name="mirroringMode" + (change)="setExclusiveLock()" + formControlName="mirroringMode" + [attr.disabled]="shouldDisable(option.value)"> + <label class="form-check-label" + [for]="option.value">{{ option.value | titlecase }}</label> + <cd-help-text> {{ option.text}} </cd-help-text> + <cd-alert-panel *ngIf="shouldDisable(option.value) && mode !== 'editing'" + type="info" + [showTitle]="false" + i18n>You need to set mode as <b>Image</b> in the selected pool to enable snapshot mirroring. + <button class="btn btn-light mx-2" + type="button" + [routerLink]="['/block/mirroring', {outlets: {modal: ['edit', rbdForm.getValue('pool')]}}]">Set Mode</button> + </cd-alert-panel> + </div> + </div><br> + <div class="form-group row" + *ngIf="rbdForm.getValue('mirroringMode') === 'snapshot' && mirroring"> + <label class="cd-col-form-label required" + [ngClass]="{'required': mode !== 'editing'}" + i18n>Schedule Interval</label> + <div class="cd-col-form-input"> + <input id="schedule" + name="schedule" + class="form-control" + type="text" + formControlName="schedule" + i18n-placeholder + placeholder="12h or 1d or 10m" + [attr.disabled]="(peerConfigured === false) ? true : null"> + <cd-help-text> + <span i18n>Specify the interval to create mirror snapshots automatically. The interval can be specified in days, hours, or minutes using d, h, m suffix respectively</span> + </cd-help-text> + <span *ngIf="rbdForm.showError('schedule', formDir, 'required')" + class="invalid-feedback" + i18n>This field is required.</span> + </div> + </div> + <!-- Use a dedicated pool --> + <div class="custom-control custom-checkbox" + *ngIf="allDataPools.length > 1 || mode === 'editing'"> + <input type="checkbox" + class="custom-control-input" + id="useDataPool" + name="useDataPool" + formControlName="useDataPool" + (change)="onUseDataPoolChange()"> + <label class="custom-control-label" + for="useDataPool" + i18n>Dedicated data pool</label> + <cd-help-text>Use a dedicated pool to store the mirror data. If not selected, the mirror data will be stored in the same pool as the image data.</cd-help-text> + <cd-helper *ngIf="allDataPools.length <= 1 && mode !== 'editing'"> + <span i18n>You need more than one pool with the rbd application label use to use a dedicated data pool.</span> + </cd-helper> + </div> + <!-- Data Pool --> + <div class="form-group row" + *ngIf="rbdForm.getValue('useDataPool')"> + <div class="cd-col-form-input pt-2 ms-4"> + <input class="form-control" + type="text" + placeholder="Data pool name..." + id="dataPool" + name="dataPool" + formControlName="dataPool" + *ngIf="mode === 'editing' || !poolPermission.read"> + <select id="dataPool" + name="dataPool" + class="form-select" + formControlName="dataPool" + (change)="onDataPoolChange($event.target.value)" + *ngIf="mode !== 'editing' && poolPermission.read"> + <option *ngIf="dataPools === null" + [ngValue]="null" + i18n>Loading...</option> + <option *ngIf="dataPools !== null && dataPools.length === 0" + [ngValue]="null" + i18n>-- No data pools available --</option> + <option *ngIf="dataPools !== null && dataPools.length > 0" + [ngValue]="null">-- Select a data pool -- + </option> + <option *ngFor="let dataPool of dataPools" + [value]="dataPool.pool_name">{{ dataPool.pool_name }}</option> + </select> + <cd-help-text>Dedicated pool that stores the object-data of the RBD.</cd-help-text> + <span class="invalid-feedback" + *ngIf="rbdForm.showError('dataPool', formDir, 'required')" + i18n>This field is required.</span> + </div> + </div> + </div> + </div> + <!-- Namespace --> <div class="form-group row" *ngIf="mode !== 'editing' && rbdForm.getValue('pool') && namespaces === null"> @@ -126,69 +255,8 @@ <option *ngFor="let namespace of namespaces" [value]="namespace">{{ namespace }}</option> </select> - </div> - </div> - - <!-- Use a dedicated pool --> - <div class="form-group row"> - <div class="cd-col-form-offset"> - <div class="custom-control custom-checkbox"> - <input type="checkbox" - class="custom-control-input" - id="useDataPool" - name="useDataPool" - formControlName="useDataPool" - (change)="onUseDataPoolChange()"> - <label class="custom-control-label" - for="useDataPool" - i18n>Use a dedicated data pool</label> - <cd-helper *ngIf="allDataPools.length <= 1"> - <span i18n>You need more than one pool with the rbd application label use to use a dedicated data pool.</span> - </cd-helper> - </div> - </div> - </div> - - <!-- Data Pool --> - <div class="form-group row" - *ngIf="rbdForm.getValue('useDataPool')"> - <label class="cd-col-form-label" - for="dataPool"> - <span [ngClass]="{'required': mode !== 'editing'}" - i18n>Data pool</span> - <cd-helper i18n-html - html="Dedicated pool that stores the object-data of the RBD."> - </cd-helper> - </label> - <div class="cd-col-form-input"> - <input class="form-control" - type="text" - placeholder="Data pool name..." - id="dataPool" - name="dataPool" - formControlName="dataPool" - *ngIf="mode === 'editing' || !poolPermission.read"> - <select id="dataPool" - name="dataPool" - class="form-select" - formControlName="dataPool" - (change)="onDataPoolChange($event.target.value)" - *ngIf="mode !== 'editing' && poolPermission.read"> - <option *ngIf="dataPools === null" - [ngValue]="null" - i18n>Loading...</option> - <option *ngIf="dataPools !== null && dataPools.length === 0" - [ngValue]="null" - i18n>-- No data pools available --</option> - <option *ngIf="dataPools !== null && dataPools.length > 0" - [ngValue]="null">-- Select a data pool -- - </option> - <option *ngFor="let dataPool of dataPools" - [value]="dataPool.pool_name">{{ dataPool.pool_name }}</option> - </select> - <span class="invalid-feedback" - *ngIf="rbdForm.showError('dataPool', formDir, 'required')" - i18n>This field is required.</span> + <cd-help-text>Namespace allows you to logically group RBD images within your Ceph Cluster. + Choosing a namespace makes it easier to locate and manage related RBD images efficiently</cd-help-text> </div> </div> @@ -204,7 +272,7 @@ type="text" formControlName="size" i18n-placeholder - placeholder="e.g., 10GiB" + placeholder="10 GiB" defaultUnit="GiB" cdDimlessBinary> <span class="invalid-feedback" @@ -216,62 +284,7 @@ <span *ngIf="rbdForm.showError('size', formDir, 'pattern')" class="invalid-feedback" i18n>Size must be a number or in a valid format. eg: 5 GiB</span> - </div> - </div> - - <!-- Mirroring --> - <div class="form-group row"> - <div class="cd-col-form-offset"> - <div class="custom-control custom-checkbox"> - <input type="checkbox" - class="custom-control-input" - id="mirroring" - name="mirroring" - (change)="setMirrorMode()" - formControlName="mirroring"> - <label class="custom-control-label" - for="mirroring">Mirroring</label> - <cd-helper *ngIf="mirroring === false && this.currentPoolName"> - <span i18n>You need to enable a <b>mirror mode</b> in the selected pool. Please <a [routerLink]="['/block/mirroring', {outlets: {modal: ['edit', currentPoolName]}}]">click here to select a mode and enable it in this pool.</a></span> - </cd-helper> - </div> - <div *ngIf="mirroring"> - <div class="custom-control custom-radio ms-2" - *ngFor="let option of mirroringOptions"> - <input type="radio" - class="form-check-input" - [id]="option" - [value]="option" - name="mirroringMode" - (change)="setExclusiveLock()" - formControlName="mirroringMode" - [attr.disabled]="(poolMirrorMode === 'pool' && option === 'snapshot') ? true : null"> - <label class="form-check-label" - [for]="option">{{ option | titlecase }}</label> - <cd-helper *ngIf="poolMirrorMode === 'pool' && option === 'snapshot'"> - <span i18n>You need to enable <b>image mirror mode</b> in the selected pool. Please <a [routerLink]="['/block/mirroring', {outlets: {modal: ['edit', currentPoolName]}}]">click here to select a mode and enable it in this pool.</a></span> - </cd-helper> - </div> - </div> - </div> - </div> - - <div class="form-group row" - *ngIf="rbdForm.getValue('mirroringMode') === 'snapshot' && mirroring"> - <label class="cd-col-form-label" - i18n>Schedule Interval - <cd-helper i18n-html - html="Create Mirror-Snapshots automatically on a periodic basis. The interval can be specified in days, hours, or minutes using d, h, m suffix respectively. To create mirror snapshots, you must import or create and have available peers to mirror"> - </cd-helper></label> - <div class="cd-col-form-input"> - <input id="schedule" - name="schedule" - class="form-control" - type="text" - formControlName="schedule" - i18n-placeholder - placeholder="e.g., 12h or 1d or 10m" - [attr.disabled]="(peerConfigured === false) ? true : null"> + <cd-help-text>Supported Units: KiB, MiB, GiB, TiB, PiB etc</cd-help-text> </div> </div> @@ -292,10 +305,14 @@ name="{{ feature.key }}" formControlName="{{ feature.key }}"> <label class="custom-control-label" - for="{{ feature.key }}">{{ feature.desc }}</label> - <cd-helper *ngIf="feature.helperHtml" - html="{{ feature.helperHtml }}"> - </cd-helper> + for="{{ feature.key }}">{{ feature.desc }}</label><br> + <cd-help-text *ngIf="feature.helperText"> + {{ feature.helperText }} + </cd-help-text> + <cd-alert-panel type="warning" + *ngIf="feature.helperHtml && rbdForm.getValue(feature.key) === false"> + {{ feature.helperHtml }} + </cd-alert-panel> </div> </div> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.spec.ts index 7605348d406..fbdebde67a7 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.spec.ts @@ -453,7 +453,7 @@ describe('RbdFormComponent', () => { }); it('should verify only snapshot is disabled for pools that are in pool mirror mode', () => { - component.poolMirrorMode = 'pool'; + component.currentPoolMirrorMode = 'pool'; fixture.detectChanges(); const journal = fixture.debugElement.query(By.css('#journal')).nativeElement; const snapshot = fixture.debugElement.query(By.css('#snapshot')).nativeElement; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.ts index 33e67b09bbf..1a8c7627b85 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/rbd-form/rbd-form.component.ts @@ -33,6 +33,7 @@ import { RbdFormCreateRequestModel } from './rbd-form-create-request.model'; import { RbdFormEditRequestModel } from './rbd-form-edit-request.model'; import { RbdFormMode } from './rbd-form-mode.enum'; import { RbdFormResponseModel } from './rbd-form-response.model'; +import { CdValidators } from '~/app/shared/forms/cd-validators'; class ExternalData { rbd: RbdFormResponseModel; @@ -79,10 +80,22 @@ export class RbdFormComponent extends CdForm implements OnInit { defaultObjectSize = '4 MiB'; - mirroringOptions = ['journal', 'snapshot']; + mirroringOptions = [ + { + value: 'journal', + text: + 'Ensures reliable replication by logging changes before updating the image, but doubles write time, impacting performance. Not recommended for high-speed data processing tasks.' + }, + { + value: 'snapshot', + text: + 'This mode replicates RBD images between clusters using snapshots, efficiently copying data changes but requiring complete delta syncing during failover. Ideal for less demanding tasks due to its less granular approach compared to journaling.' + } + ]; poolMirrorMode: string; mirroring = false; currentPoolName = ''; + currentPoolMirrorMode = ''; objectSizes: Array<string> = [ '4 KiB', @@ -111,6 +124,8 @@ export class RbdFormComponent extends CdForm implements OnInit { private routerUrl: string; icons = Icons; + currentImageMirrorMode = ''; + showMirrorDisableMessage = false; constructor( private authStorageService: AuthStorageService, @@ -134,27 +149,31 @@ export class RbdFormComponent extends CdForm implements OnInit { requires: null, allowEnable: false, allowDisable: true, - helperHtml: $localize`Feature can be disabled but can't be re-enabled later` + helperHtml: $localize`Feature can be disabled but can't be re-enabled later`, + helperText: $localize`Speeds up the process of deleting a clone by removing the dependency on the parent image.` }, layering: { desc: $localize`Layering`, requires: null, allowEnable: false, allowDisable: false, - helperHtml: $localize`Feature flag can't be manipulated after the image is created. Disabling this option will also disable the Protect and Clone actions on Snapshot` + helperHtml: $localize`Feature flag can't be manipulated after the image is created. Disabling this option will also disable the Protect and Clone actions on Snapshot`, + helperText: $localize`Allows the creation of snapshots and clones of an image.` }, 'exclusive-lock': { desc: $localize`Exclusive lock`, requires: null, allowEnable: true, - allowDisable: true + allowDisable: true, + helperText: $localize`Ensures that only one client can write to the image at a time.` }, 'object-map': { desc: $localize`Object map (requires exclusive-lock)`, requires: 'exclusive-lock', allowEnable: true, allowDisable: true, - initDisabled: true + initDisabled: true, + helperText: $localize`Tracks which objects actually exist (have data stored on a device). Enabling object map support speeds up I/O operations for cloning, importing and exporting a sparsely populated image, and deleting.` }, 'fast-diff': { desc: $localize`Fast diff (interlocked with object-map)`, @@ -162,7 +181,8 @@ export class RbdFormComponent extends CdForm implements OnInit { allowEnable: true, allowDisable: true, interlockedWith: 'object-map', - initDisabled: true + initDisabled: true, + helperText: $localize`Speeds up the process of comparing two images.` } }; this.featuresList = this.objToArray(this.features); @@ -196,9 +216,15 @@ export class RbdFormComponent extends CdForm implements OnInit { return acc; }, {}) ), - mirroring: new UntypedFormControl(''), + mirroring: new UntypedFormControl(false), schedule: new UntypedFormControl('', { - validators: [Validators.pattern(/^([0-9]+)d|([0-9]+)h|([0-9]+)m$/)] // check schedule interval to be in format - 1d or 1h or 1m + validators: [ + Validators.pattern(/^([0-9]+)d|([0-9]+)h|([0-9]+)m$/), + CdValidators.requiredIf({ + mirroringMode: 'snapshot', + mirroring: true + }) + ] // check schedule interval to be in format - 1d or 1h or 1m }), mirroringMode: new UntypedFormControl(''), stripingUnit: new UntypedFormControl(this.defaultStripingUnit), @@ -256,14 +282,14 @@ export class RbdFormComponent extends CdForm implements OnInit { this.rbdForm.get('exclusive-lock').disable(); } else { this.rbdForm.get('exclusive-lock').enable(); - if (this.poolMirrorMode === 'pool') { - this.rbdForm.get('mirroringMode').setValue(this.mirroringOptions[0]); - } } } setMirrorMode() { this.mirroring = !this.mirroring; + if (this.mirroring) { + this.rbdForm.get('mirroringMode').setValue(this.mirroringOptions[0].value); + } this.setExclusiveLock(); this.checkPeersConfigured(); } @@ -286,14 +312,34 @@ export class RbdFormComponent extends CdForm implements OnInit { this.rbdMirroringService.refresh(); this.rbdMirroringService.subscribeSummary((data) => { const pool = data.content_data.pools.find((o: any) => o.name === this.currentPoolName); - this.poolMirrorMode = pool.mirror_mode; - - if (pool.mirror_mode === 'disabled') { - this.mirroring = false; - this.rbdForm.get('mirroring').setValue(this.mirroring); - this.rbdForm.get('mirroring').disable(); + this.currentPoolMirrorMode = pool.mirror_mode; + if (this.mode === this.rbdFormMode.editing) { + if (this.currentPoolMirrorMode === 'pool') { + this.showMirrorDisableMessage = true; + } else { + this.showMirrorDisableMessage = false; + } + if (this.currentPoolMirrorMode !== 'image') { + this.rbdForm.get('mirroring').disable(); + this.rbdForm.get('mirroringMode').disable(); + } + } else { + if (pool.mirror_mode === 'disabled') { + this.mirroring = false; + this.rbdForm.get('mirroring').setValue(this.mirroring); + this.rbdForm.get('mirroring').disable(); + } else { + this.mirroring = true; + this.rbdForm.get('mirroring').enable(); + this.rbdForm.get('mirroring').setValue(this.mirroring); + this.rbdForm.get('mirroringMode').setValue(this.mirroringOptions[0].value); + } } }); + } else { + if (this.mode !== this.rbdFormMode.editing) { + this.rbdForm.get('mirroring').disable(); + } } this.setExclusiveLock(); } @@ -390,8 +436,9 @@ export class RbdFormComponent extends CdForm implements OnInit { this.allPools = pools; this.dataPools = dataPools; this.allDataPools = dataPools; - if (this.pools.length === 1) { - const poolName = this.pools[0].pool_name; + if (this.pools.length >= 1) { + const allPoolNames = this.pools.map((pool) => pool.pool_name); + const poolName = allPoolNames.includes('rbd') ? 'rbd' : this.pools[0].pool_name; this.rbdForm.get('pool').setValue(poolName); this.onPoolChange(poolName); } @@ -464,7 +511,7 @@ export class RbdFormComponent extends CdForm implements OnInit { sizeControlErrors = { required: true }; } else { const sizeInBytes = formatter.toBytes(sizeControl.value); - if (stripingCount * objectSizeInBytes > sizeInBytes) { + if (stripingCount * objectSizeInBytes >= sizeInBytes) { sizeControlErrors = { invalidSizeObject: true }; } } @@ -616,6 +663,7 @@ export class RbdFormComponent extends CdForm implements OnInit { this.mirroring = true; this.rbdForm.get('mirroring').setValue(this.mirroring); this.rbdForm.get('mirroringMode').setValue(response?.mirror_mode); + this.currentImageMirrorMode = response?.mirror_mode; this.rbdForm.get('schedule').setValue(response?.schedule_interval); } else { this.mirroring = false; @@ -651,12 +699,11 @@ export class RbdFormComponent extends CdForm implements OnInit { request.name = this.rbdForm.getValue('name'); request.schedule_interval = this.rbdForm.getValue('schedule'); request.size = this.formatter.toBytes(this.rbdForm.getValue('size')); - - if (this.poolMirrorMode === 'image') { - request.mirror_mode = this.rbdForm.getValue('mirroringMode'); - } this.addObjectSizeAndStripingToRequest(request); request.configuration = this.getDirtyConfigurationValues(); + if (this.mirroring && this.currentPoolMirrorMode === 'image') { + request.mirror_mode = this.rbdForm.getValue('mirroringMode'); + } return request; } @@ -688,7 +735,8 @@ export class RbdFormComponent extends CdForm implements OnInit { namespace: request.namespace, image_name: request.name, schedule_interval: request.schedule_interval, - start_time: request.start_time + start_time: request.start_time, + mirror_mode: request.mirror_mode }), call: this.rbdService.create(request) }); @@ -698,19 +746,20 @@ export class RbdFormComponent extends CdForm implements OnInit { const request = new RbdFormEditRequestModel(); request.name = this.rbdForm.getValue('name'); request.schedule_interval = this.rbdForm.getValue('schedule'); - request.name = this.rbdForm.getValue('name'); + request.enable_mirror = this.mirroring; request.size = this.formatter.toBytes(this.rbdForm.getValue('size')); _.forIn(this.features, (feature) => { if (this.rbdForm.getValue(feature.key)) { request.features.push(feature.key); } }); - request.enable_mirror = this.rbdForm.getValue('mirroring'); if (request.enable_mirror) { + request.image_mirror_mode = this.currentImageMirrorMode; if (this.rbdForm.getValue('mirroringMode') === 'journal') { + request.mirror_mode = 'journal'; request.features.push('journaling'); } - if (this.poolMirrorMode === 'image') { + if (this.currentPoolMirrorMode === 'image') { request.mirror_mode = this.rbdForm.getValue('mirroringMode'); } } else { @@ -803,6 +852,10 @@ export class RbdFormComponent extends CdForm implements OnInit { }); } + shouldDisable(option: string): boolean { + return this.currentPoolMirrorMode === 'pool' && option === 'snapshot' ? true : null; + } + submit() { if (!this.mode) { this.rbdImage.next('create'); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts index dbbe522fa0a..0506c4c7734 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-form/cephfs-form.component.ts @@ -68,7 +68,7 @@ export class CephfsVolumeFormComponent extends CdForm implements OnInit { private route: ActivatedRoute ) { super(); - this.editing = this.router.url.startsWith(`/cephfs/${URLVerbs.EDIT}`); + this.editing = this.router.url.startsWith(`/cephfs/fs/${URLVerbs.EDIT}`); this.action = this.editing ? this.actionLabels.EDIT : this.actionLabels.CREATE; this.resource = $localize`File System`; this.hosts = { @@ -176,7 +176,7 @@ export class CephfsVolumeFormComponent extends CdForm implements OnInit { this.form.setErrors({ cdSubmitButton: true }); }, complete: () => { - this.router.navigate([BASE_URL]); + this.router.navigate([`${BASE_URL}/fs`]); } }); } else { @@ -210,7 +210,7 @@ export class CephfsVolumeFormComponent extends CdForm implements OnInit { self.form.setErrors({ cdSubmitButton: true }); }, complete: () => { - this.router.navigate([BASE_URL]); + this.router.navigate([`${BASE_URL}/fs`]); } }); } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts index 2957401d86a..748eeee0ee4 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-list/cephfs-list.component.ts @@ -27,7 +27,7 @@ import { map, switchMap } from 'rxjs/operators'; import { HealthService } from '~/app/shared/api/health.service'; import { CephfsAuthModalComponent } from '~/app/ceph/cephfs/cephfs-auth-modal/cephfs-auth-modal.component'; -const BASE_URL = 'cephfs'; +const BASE_URL = 'cephfs/fs'; @Component({ selector: 'cd-cephfs-list', diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts index c14903edc8c..05c93faf161 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cephfs/cephfs-subvolume-snapshots-list/cephfs-subvolume-snapshots-list.component.ts @@ -113,7 +113,7 @@ export class CephfsSubvolumeSnapshotsListComponent implements OnInit, OnChanges click: () => this.cloneModal() }, { - name: this.actionLabels.DELETE, + name: this.actionLabels.REMOVE, permission: 'delete', icon: Icons.destroy, disable: () => !this.selection.hasSingleSelection, @@ -224,7 +224,7 @@ export class CephfsSubvolumeSnapshotsListComponent implements OnInit, OnChanges const subVolumeGroupName = this.activeGroupName; const fsName = this.fsName; this.modalRef = this.modalService.show(CriticalConfirmationModalComponent, { - actionDescription: 'Delete', + actionDescription: this.actionLabels.REMOVE, itemNames: [snapshotName], itemDescription: 'Snapshot', submitAction: () => @@ -270,7 +270,8 @@ export class CephfsSubvolumeSnapshotsListComponent implements OnInit, OnChanges this.cephfsSubvolumeService, null, null, - this.fsName + this.fsName, + this.activeGroupName ) ], required: true, @@ -284,12 +285,23 @@ export class CephfsSubvolumeSnapshotsListComponent implements OnInit, OnChanges name: 'groupName', value: this.activeGroupName, label: $localize`Group name`, + valueChangeListener: true, + dependsOn: 'cloneName', typeConfig: { options: allGroups } } ], submitButtonText: $localize`Create Clone`, + updateAsyncValidators: (value: any) => + CdValidators.unique( + this.cephfsSubvolumeService.exists, + this.cephfsSubvolumeService, + null, + null, + this.fsName, + value + ), onSubmit: (value: any) => { this.cephfsSubvolumeService .createSnapshotClone( diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html index a2ae23b2c2b..df61fd40a95 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.html @@ -9,20 +9,20 @@ class="bold">Hosts</td> <td>{{ hostsCount }}</td> </tr> - <tr> + <tr *ngIf="!isSimpleDeployment; else simpleDeploymentTextTpl"> <td> - <dl> - <dt> - <p i18n>Storage Capacity</p> - </dt> - <dd> - <p i18n>Number of devices</p> - </dd> - <dd> - <p i18n>Raw capacity</p> - </dd> - </dl> - </td> + <dl> + <dt> + <p i18n>Storage Capacity</p> + </dt> + <dd> + <p i18n>Number of devices</p> + </dd> + <dd> + <p i18n>Raw capacity</p> + </dd> + </dl> + </td> <td class="pt-5"><p>{{ totalDevices }}</p><p> {{ totalCapacity | dimlessBinary }}</p></td> </tr> @@ -40,13 +40,28 @@ </fieldset> </div> -<div class="col-lg-9"> + <div class="col-lg-9"> <legend i18n class="cd-header">Host Details</legend> <cd-hosts [hiddenColumns]="['services', 'status']" [hideToolHeader]="true" [hasTableDetails]="false" - [showGeneralActionsOnly]="true"> - </cd-hosts> -</div> + [showGeneralActionsOnly]="true" + [showExpandClusterBtn]="false"> + </cd-hosts> + </div> </div> +<ng-template #simpleDeploymentTextTpl> + <tr> + <td> + <dl> + <dt> + <p i18n>Storage Capacity</p> + </dt> + <dd> + <p i18n>{{deploymentDescText}}</p> + </dd> + </dl> + </td> + </tr> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts index 964fd7594e7..ed60ddf805a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster-review.component.ts @@ -23,6 +23,8 @@ export class CreateClusterReviewComponent implements OnInit { services: Array<CephServiceSpec> = []; totalCPUs = 0; totalMemory = 0; + deploymentDescText: string; + isSimpleDeployment = true; constructor( public wizardStepsService: WizardStepsService, @@ -40,6 +42,7 @@ export class CreateClusterReviewComponent implements OnInit { let dbDevices = 0; let dbDeviceCapacity = 0; + this.isSimpleDeployment = this.osdService.isDeployementModeSimple; const hostContext = new CdTableFetchDataContext(() => undefined); this.hostService.list(hostContext.toParams(), 'true').subscribe((resp: object[]) => { this.hosts = resp; @@ -67,6 +70,21 @@ export class CreateClusterReviewComponent implements OnInit { dbDeviceCapacity = this.osdService.osdDevices['db']['capacity']; } + if (this.isSimpleDeployment) { + this.osdService.getDeploymentOptions().subscribe((optionsObj) => { + if (!_.isEmpty(optionsObj)) { + Object.keys(optionsObj.options).forEach((option) => { + if ( + this.osdService.selectedFormValues && + this.osdService.selectedFormValues.get('deploymentOption').value === option + ) { + this.deploymentDescText = optionsObj.options[option].desc; + } + }); + } + }); + } + this.totalDevices = dataDevices + walDevices + dbDevices; this.osdService.osdDevices['totalDevices'] = this.totalDevices; this.totalCapacity = dataDeviceCapacity + walDeviceCapacity + dbDeviceCapacity; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html index 272b5b0b916..930c6b42ac9 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.html @@ -1,5 +1,5 @@ <div class="container h-75" - *ngIf="!startClusterCreation"> + *ngIf="startClusterCreation"> <div class="row h-100 justify-content-center align-items-center"> <div class="blank-page"> <!-- htmllint img-req-src="false" --> @@ -30,7 +30,7 @@ </div> <div class="card" - *ngIf="startClusterCreation"> + *ngIf="!startClusterCreation"> <div class="card-header" i18n>Expand Cluster</div> <div class="container-fluid"> @@ -45,7 +45,8 @@ <cd-hosts [hiddenColumns]="['services']" [hideMaintenance]="true" [hasTableDetails]="false" - [showGeneralActionsOnly]="true"></cd-hosts> + [showGeneralActionsOnly]="true" + [showExpandClusterBtn]="false"></cd-hosts> </div> <div *ngSwitchCase="'2'" class="ms-5"> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts index ca343553606..943d5c8ff16 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.spec.ts @@ -59,6 +59,8 @@ describe('CreateClusterComponent', () => { }); it('should have project name as heading in welcome screen', () => { + component.startClusterCreation = true; + fixture.detectChanges(); const heading = fixture.debugElement.query(By.css('h3')).nativeElement; expect(heading.innerHTML).toBe(`Welcome to ${projectConstants.projectName}`); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts index 670a3e00dfe..25d87175130 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/create-cluster/create-cluster.component.ts @@ -7,7 +7,7 @@ import { TemplateRef, ViewChild } from '@angular/core'; -import { Router } from '@angular/router'; +import { ActivatedRoute, Router } from '@angular/router'; import { NgbModalRef } from '@ng-bootstrap/ng-bootstrap'; import _ from 'lodash'; @@ -68,7 +68,8 @@ export class CreateClusterComponent implements OnInit, OnDestroy { private clusterService: ClusterService, private modalService: ModalService, private taskWrapper: TaskWrapperService, - private osdService: OsdService + private osdService: OsdService, + private route: ActivatedRoute ) { this.permissions = this.authStorageService.getPermissions(); this.currentStepSub = this.wizardStepsService @@ -80,6 +81,14 @@ export class CreateClusterComponent implements OnInit, OnDestroy { } ngOnInit(): void { + this.route.queryParams.subscribe((params) => { + // reading 'welcome' value true/false to toggle expand-cluster wizand view and welcome view + const showWelcomeScreen = params['welcome']; + if (showWelcomeScreen) { + this.startClusterCreation = showWelcomeScreen; + } + }); + this.osdService.getDeploymentOptions().subscribe((options) => { this.deploymentOption = options; this.selectedOption = { option: options.recommended_option, encrypted: false }; @@ -91,7 +100,7 @@ export class CreateClusterComponent implements OnInit, OnDestroy { } createCluster() { - this.startClusterCreation = true; + this.startClusterCreation = false; } skipClusterCreation() { @@ -244,5 +253,6 @@ export class CreateClusterComponent implements OnInit, OnDestroy { ngOnDestroy(): void { this.currentStepSub.unsubscribe(); + this.osdService.selectedFormValues = null; } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.html index c02b29d101f..43d41c8ce7f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.html @@ -26,6 +26,12 @@ id="host-actions" [tableActions]="tableActions"> </cd-table-actions> + <cd-table-actions [permission]="permissions.hosts" + [selection]="selection" + btnColor="light" + class="btn-group" + [tableActions]="expandClusterActions"> + </cd-table-actions> </div> <cd-host-details cdTableDetail [permissions]="permissions" diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts index e04269aa67c..fef729de58a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/hosts/hosts.component.ts @@ -78,12 +78,16 @@ export class HostsComponent extends ListWithDetails implements OnDestroy, OnInit @Input() showGeneralActionsOnly = false; + @Input() + showExpandClusterBtn = true; + permissions: Permissions; columns: Array<CdTableColumn> = []; hosts: Array<object> = []; isLoadingHosts = false; cdParams = { fromLink: '/hosts' }; tableActions: CdTableAction[]; + expandClusterActions: CdTableAction[]; selection = new CdTableSelection(); modalRef: NgbModalRef; isExecuting = false; @@ -125,6 +129,16 @@ export class HostsComponent extends ListWithDetails implements OnDestroy, OnInit ) { super(); this.permissions = this.authStorageService.getPermissions(); + this.expandClusterActions = [ + { + name: this.actionLabels.EXPAND_CLUSTER, + permission: 'create', + icon: Icons.expand, + routerLink: '/expand-cluster', + disable: (selection: CdTableSelection) => this.getDisable('add', selection), + visible: () => this.showExpandClusterBtn + } + ]; this.tableActions = [ { name: this.actionLabels.ADD, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/multi-cluster/multi-cluster-form/multi-cluster-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/multi-cluster/multi-cluster-form/multi-cluster-form.component.html index 6cc09c772b1..4bdb2ad45c9 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/multi-cluster/multi-cluster-form/multi-cluster-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/multi-cluster/multi-cluster-form/multi-cluster-form.component.html @@ -117,7 +117,7 @@ </cd-copy-2-clipboard-button> </span> <span class="invalid-feedback" - *ngIf="remoteClusterForm.showError('password', frm, 'required')" + *ngIf="remoteClusterForm.showError('password', frm, 'requiredNotEdit')" i18n>This field is required. </span> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/multi-cluster/multi-cluster-form/multi-cluster-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/multi-cluster/multi-cluster-form/multi-cluster-form.component.ts index 32548f63c8a..596ddff11ec 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/multi-cluster/multi-cluster-form/multi-cluster-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/multi-cluster/multi-cluster-form/multi-cluster-form.component.ts @@ -52,7 +52,6 @@ export class MultiClusterFormComponent implements OnInit, OnDestroy { ngOnInit(): void { if (this.action === 'edit') { this.remoteClusterForm.get('remoteClusterUrl').setValue(this.cluster.url); - this.remoteClusterForm.get('remoteClusterUrl').disable(); this.remoteClusterForm.get('clusterAlias').setValue(this.cluster.cluster_alias); this.remoteClusterForm.get('ssl').setValue(this.cluster.ssl_verify); this.remoteClusterForm.get('ssl_cert').setValue(this.cluster.ssl_certificate); @@ -76,7 +75,6 @@ export class MultiClusterFormComponent implements OnInit, OnDestroy { createForm() { this.remoteClusterForm = new CdFormGroup({ - // showToken: new FormControl(false), username: new FormControl('', [ CdValidators.custom('uniqueUrlandUser', (username: string) => { let remoteClusterUrl = ''; @@ -96,7 +94,12 @@ export class MultiClusterFormComponent implements OnInit, OnDestroy { ); }) ]), - password: new FormControl('', []), + password: new FormControl( + null, + CdValidators.custom('requiredNotEdit', (value: string) => { + return this.action !== 'edit' && !value; + }) + ), remoteClusterUrl: new FormControl(null, { validators: [ CdValidators.custom('endpoint', (value: string) => { @@ -116,11 +119,6 @@ export class MultiClusterFormComponent implements OnInit, OnDestroy { Validators.required ] }), - // apiToken: new FormControl('', [ - // CdValidators.requiredIf({ - // showToken: true - // }) - // ]), clusterAlias: new FormControl(null, { validators: [ Validators.required, @@ -128,7 +126,9 @@ export class MultiClusterFormComponent implements OnInit, OnDestroy { return ( (this.action === 'connect' || this.action === 'edit') && this.clusterAliasNames && - this.clusterAliasNames.indexOf(clusterAlias) !== -1 + this.clusterAliasNames.indexOf(clusterAlias) !== -1 && + this.cluster?.cluster_alias && + this.cluster.cluster_alias !== clusterAlias ); }) ] @@ -197,7 +197,14 @@ export class MultiClusterFormComponent implements OnInit, OnDestroy { case 'edit': this.subs.add( this.multiClusterService - .editCluster(this.cluster.url, clusterAlias, this.cluster.user, ssl, ssl_certificate) + .editCluster( + this.cluster.name, + url, + clusterAlias, + this.cluster.user, + ssl, + ssl_certificate + ) .subscribe({ ...commonSubscribtion, complete: () => this.handleSuccess($localize`Cluster updated successfully`) diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.spec.ts index 725fc953fbb..162a429f690 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.spec.ts @@ -237,6 +237,7 @@ describe('OsdFormComponent', () => { describe('without data devices selected', () => { it('should disable preview button', () => { + component.simpleDeployment = false; expectPreviewButton(false); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts index 00a162dac1e..16b223b9cbc 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts @@ -1,4 +1,12 @@ -import { Component, EventEmitter, Input, OnInit, Output, ViewChild } from '@angular/core'; +import { + Component, + EventEmitter, + Input, + OnDestroy, + OnInit, + Output, + ViewChild +} from '@angular/core'; import { UntypedFormControl } from '@angular/forms'; import { Router } from '@angular/router'; @@ -34,7 +42,7 @@ import { OsdFeature } from './osd-feature.interface'; templateUrl: './osd-form.component.html', styleUrls: ['./osd-form.component.scss'] }) -export class OsdFormComponent extends CdForm implements OnInit { +export class OsdFormComponent extends CdForm implements OnInit, OnDestroy { @ViewChild('dataDeviceSelectionGroups') dataDeviceSelectionGroups: OsdDevicesSelectionGroupsComponent; @@ -121,12 +129,23 @@ export class OsdFormComponent extends CdForm implements OnInit { this.osdService.getDeploymentOptions().subscribe((options) => { this.deploymentOptions = options; - this.form.get('deploymentOption').setValue(this.deploymentOptions?.recommended_option); + if (!this.osdService.selectedFormValues) { + this.form.get('deploymentOption').setValue(this.deploymentOptions?.recommended_option); + } if (this.deploymentOptions?.recommended_option) { this.enableFeatures(); } }); + + // restoring form value on back/next + if (this.osdService.selectedFormValues) { + this.form = _.cloneDeep(this.osdService.selectedFormValues); + this.form + .get('deploymentOption') + .setValue(this.osdService.selectedFormValues.value?.deploymentOption); + } + this.simpleDeployment = this.osdService.isDeployementModeSimple; this.form.get('walSlots').valueChanges.subscribe((value) => this.setSlots('wal', value)); this.form.get('dbSlots').valueChanges.subscribe((value) => this.setSlots('db', value)); _.each(this.features, (feature) => { @@ -283,4 +302,9 @@ export class OsdFormComponent extends CdForm implements OnInit { this.previewButtonPanel.submitButton.loading = false; } } + + ngOnDestroy() { + this.osdService.selectedFormValues = _.cloneDeep(this.form); + this.osdService.isDeployementModeSimple = this.dataDeviceSelectionGroups?.devices?.length === 0; + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.html index e7278a09868..c659d76b97e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.html @@ -15,6 +15,7 @@ (click)="createMultisiteSetup()"> Click here</a> to create a new Realm/Zone Group/Zone </cd-alert-panel> + <!-- Service type --> <div class="form-group row"> <label class="cd-col-form-label required" @@ -25,7 +26,7 @@ name="service_type" class="form-select" formControlName="service_type" - (change)="getServiceIds($event.target.value)"> + (change)="onServiceTypeChange($event.target.value)"> <option i18n [ngValue]="null">-- Select a service type --</option> <option *ngFor="let serviceType of serviceTypes" @@ -40,50 +41,90 @@ </div> <!-- backend_service --> - <div *ngIf="serviceForm.controls.service_type.value === 'ingress'" - class="form-group row"> - <label i18n - class="cd-col-form-label" - [ngClass]="{'required': ['ingress'].includes(serviceForm.controls.service_type.value)}" - for="backend_service">Backend Service</label> - <div class="cd-col-form-input"> - <select id="backend_service" - name="backend_service" - class="form-select" - formControlName="backend_service" - (change)="prePopulateId()"> - <option *ngIf="services === null" - [ngValue]="null" - i18n>Loading...</option> - <option *ngIf="services !== null && services.length === 0" - [ngValue]="null" - i18n>-- No service available --</option> - <option *ngIf="services !== null && services.length > 0" - [ngValue]="null" - i18n>-- Select an existing service --</option> - <option *ngFor="let service of services" - [value]="service.service_name">{{ service.service_name }}</option> - </select> - <span class="invalid-feedback" - *ngIf="serviceForm.showError('backend_service', frm, 'required')" - i18n>This field is required.</span> - </div> + <div *ngIf="serviceForm.controls.service_type.value === 'ingress'" + class="form-group row"> + <label i18n + class="cd-col-form-label" + [ngClass]="{'required': ['ingress'].includes(serviceForm.controls.service_type.value)}" + for="backend_service">Backend Service</label> + <div class="cd-col-form-input"> + <select id="backend_service" + name="backend_service" + class="form-select" + formControlName="backend_service" + (change)="prePopulateId()"> + <option *ngIf="services === null" + [ngValue]="null" + i18n>Loading...</option> + <option *ngIf="services !== null && services.length === 0" + [ngValue]="null" + i18n>-- No service available --</option> + <option *ngIf="services !== null && services.length > 0" + [ngValue]="null" + i18n>-- Select an existing service --</option> + <option *ngFor="let service of services" + [value]="service.service_name">{{ service.service_name }}</option> + </select> + <span class="invalid-feedback" + *ngIf="serviceForm.showError('backend_service', frm, 'required')" + i18n>This field is required.</span> </div> + </div> + + <!-- NVMe/TCP --> + <!-- Block Pool --> + <div class="form-group row" + *ngIf="serviceForm.controls.service_type.value === 'nvmeof'"> + <label i18n + class="cd-col-form-label required" + for="pool">Block Pool</label> + <div class="cd-col-form-input"> + <select id="pool" + name="pool" + class="form-select" + formControlName="pool" + (change)="onBlockPoolChange()"> + <option *ngIf="rbdPools === null" + [ngValue]="null" + i18n>Loading...</option> + <option *ngIf="rbdPools && rbdPools.length === 0" + [ngValue]="null" + i18n>-- No block pools available --</option> + <option *ngIf="rbdPools && rbdPools.length > 0" + [ngValue]="null" + i18n>-- Select a pool --</option> + <option *ngFor="let pool of rbdPools" + [value]="pool.pool_name">{{ pool.pool_name }}</option> + </select> + <cd-help-text i18n> + A pool in which the gateway configuration can be managed. + </cd-help-text> + <span class="invalid-feedback" + *ngIf="serviceForm.showError('pool', frm, 'required')" + i18n>This field is required.</span> + </div> + </div> <!-- Service id --> <div class="form-group row" *ngIf="serviceForm.controls.service_type.value !== 'snmp-gateway'"> <label class="cd-col-form-label" - [ngClass]="{'required': ['mds', 'rgw', 'nfs', 'iscsi', 'smb', 'ingress'].includes(serviceForm.controls.service_type.value)}" + [ngClass]="{'required': ['mds', 'rgw', 'nfs', 'iscsi', 'nvmeof', 'smb', 'ingress'].includes(serviceForm.controls.service_type.value)}" for="service_id"> - <span i18n>Id</span> - <cd-helper i18n>Used in the service name which is <service_type.service_id></cd-helper> + <span i18n>Service Name</span> </label> <div class="cd-col-form-input"> - <input id="service_id" - class="form-control" - type="text" - formControlName="service_id"> + <div class="input-group"> + <span class="input-group-text" + *ngIf="serviceForm.controls.service_type.value && ['mds', 'rgw', 'nfs', 'iscsi', 'nvmeof', 'smb', 'ingress'].includes(serviceForm.controls.service_type.value)" + for="userId" + i18n>{{serviceForm.controls.service_type.value}}. + </span> + <input id="service_id" + class="form-control" + type="text" + formControlName="service_id"> + </div> <span class="invalid-feedback" *ngIf="serviceForm.showError('service_id', frm, 'required')" i18n>This field is required.</span> @@ -164,11 +205,10 @@ id="unmanaged" type="checkbox" formControlName="unmanaged"> - <label class="custom-control-label" + <label class="custom-control-label m-0" for="unmanaged" i18n>Unmanaged</label> - <cd-helper i18n>If set to true, the orchestrator will not start nor stop any daemon associated with this service. - Placement and all other properties will be ignored.</cd-helper> + <cd-help-text i18n>If Unmanaged is selected, the orchestrator will not stop or stop any daemons associated with this service. Placement and all other properties will be ignored.</cd-help-text> </div> </div> </div> @@ -182,7 +222,8 @@ <div class="cd-col-form-input"> <select id="placement" class="form-select" - formControlName="placement"> + formControlName="placement" + (change)="onPlacementChange($event.target.value)"> <option i18n value="hosts">Hosts</option> <option i18n @@ -226,13 +267,12 @@ </div> </div> - <!-- count --> + <!-- Count --> <div *ngIf="!serviceForm.controls.unmanaged.value" class="form-group row"> <label class="cd-col-form-label" for="count"> <span i18n>Count</span> - <cd-helper i18n>Only that number of daemons will be created.</cd-helper> </label> <div class="cd-col-form-input"> <input id="count" @@ -240,6 +280,7 @@ type="number" formControlName="count" min="1"> + <cd-help-text i18n>Number of deamons that will be deployed</cd-help-text> <span class="invalid-feedback" *ngIf="serviceForm.showError('count', frm, 'min')" i18n>The value must be at least 1.</span> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.spec.ts index 4f71abcec7a..db1e7851c2e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.spec.ts @@ -387,6 +387,33 @@ x4Ea7kGVgx9kWh5XjWz9wjZvY49UKIT5ppIAWPMbLl3UpfckiuNhTA== }); }); + describe('should test service nvmeof', () => { + beforeEach(() => { + formHelper.setValue('service_type', 'nvmeof'); + formHelper.setValue('service_id', 'svc'); + formHelper.setValue('pool', 'xyz'); + }); + + it('should submit nvmeof', () => { + component.onSubmit(); + expect(cephServiceService.create).toHaveBeenCalledWith({ + service_type: 'nvmeof', + service_id: 'svc', + placement: {}, + unmanaged: false, + pool: 'xyz' + }); + }); + + it('should throw error when there is no service id', () => { + formHelper.expectErrorChange('service_id', '', 'required'); + }); + + it('should throw error when there is no pool', () => { + formHelper.expectErrorChange('pool', '', 'required'); + }); + }); + describe('should test service smb', () => { beforeEach(() => { formHelper.setValue('service_type', 'smb'); @@ -608,6 +635,15 @@ x4Ea7kGVgx9kWh5XjWz9wjZvY49UKIT5ppIAWPMbLl3UpfckiuNhTA== expect(serviceType.disabled).toBeTruthy(); expect(serviceId.disabled).toBeTruthy(); }); + + it('should not edit pools for nvmeof service', () => { + component.serviceType = 'nvmeof'; + formHelper.setValue('service_type', 'nvmeof'); + component.ngOnInit(); + fixture.detectChanges(); + const poolId = fixture.debugElement.query(By.css('#pool')).nativeElement; + expect(poolId.disabled).toBeTruthy(); + }); }); }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts index c0f66ed3362..da7fca61bc2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-form/service-form.component.ts @@ -7,12 +7,14 @@ import { NgbActiveModal, NgbModalRef, NgbTypeahead } from '@ng-bootstrap/ng-boot import _ from 'lodash'; import { forkJoin, merge, Observable, Subject, Subscription } from 'rxjs'; import { debounceTime, distinctUntilChanged, filter, map } from 'rxjs/operators'; +import { Pool } from '~/app/ceph/pool/pool'; import { CreateRgwServiceEntitiesComponent } from '~/app/ceph/rgw/create-rgw-service-entities/create-rgw-service-entities.component'; import { RgwRealm, RgwZonegroup, RgwZone } from '~/app/ceph/rgw/models/rgw-multisite'; import { CephServiceService } from '~/app/shared/api/ceph-service.service'; import { HostService } from '~/app/shared/api/host.service'; import { PoolService } from '~/app/shared/api/pool.service'; +import { RbdService } from '~/app/shared/api/rbd.service'; import { RgwMultisiteService } from '~/app/shared/api/rgw-multisite.service'; import { RgwRealmService } from '~/app/shared/api/rgw-realm.service'; import { RgwZoneService } from '~/app/shared/api/rgw-zone.service'; @@ -68,7 +70,8 @@ export class ServiceFormComponent extends CdForm implements OnInit { labels: string[]; labelClick = new Subject<string>(); labelFocus = new Subject<string>(); - pools: Array<object>; + pools: Array<Pool>; + rbdPools: Array<Pool>; services: Array<CephServiceSpec> = []; pageURL: string; serviceList: CephServiceSpec[]; @@ -94,6 +97,7 @@ export class ServiceFormComponent extends CdForm implements OnInit { private formBuilder: CdFormBuilder, private hostService: HostService, private poolService: PoolService, + private rbdService: RbdService, private router: Router, private taskWrapperService: TaskWrapperService, public timerService: TimerService, @@ -146,6 +150,9 @@ export class ServiceFormComponent extends CdForm implements OnInit { service_type: 'iscsi' }), CdValidators.requiredIf({ + service_type: 'nvmeof' + }), + CdValidators.requiredIf({ service_type: 'ingress' }), CdValidators.requiredIf({ @@ -176,11 +183,15 @@ export class ServiceFormComponent extends CdForm implements OnInit { count: [null, [CdValidators.number(false)]], unmanaged: [false], // iSCSI + // NVMe/TCP pool: [ null, [ CdValidators.requiredIf({ service_type: 'iscsi' + }), + CdValidators.requiredIf({ + service_type: 'nvmeof' }) ] ], @@ -413,10 +424,15 @@ export class ServiceFormComponent extends CdForm implements OnInit { }); } - ngOnInit(): void { - this.action = this.actionLabels.CREATE; + resolveRoute() { if (this.router.url.includes('services/(modal:create')) { this.pageURL = 'services'; + this.route.params.subscribe((params: { type: string }) => { + if (params?.type) { + this.serviceType = params.type; + this.serviceForm.get('service_type').setValue(this.serviceType); + } + }); } else if (this.router.url.includes('services/(modal:edit')) { this.editing = true; this.pageURL = 'services'; @@ -425,6 +441,11 @@ export class ServiceFormComponent extends CdForm implements OnInit { this.serviceType = params.type; }); } + } + + ngOnInit(): void { + this.action = this.actionLabels.CREATE; + this.resolveRoute(); this.cephServiceService .list(new HttpParams({ fromObject: { limit: -1, offset: 0 } })) @@ -457,8 +478,12 @@ export class ServiceFormComponent extends CdForm implements OnInit { this.hostService.getLabels().subscribe((resp: string[]) => { this.labels = resp; }); - this.poolService.getList().subscribe((resp: Array<object>) => { + this.poolService.getList().subscribe((resp: Pool[]) => { this.pools = resp; + this.rbdPools = this.pools.filter(this.rbdService.isRBDPool); + if (!this.editing && this.serviceType) { + this.onServiceTypeChange(this.serviceType); + } }); if (this.editing) { @@ -495,12 +520,14 @@ export class ServiceFormComponent extends CdForm implements OnInit { this.serviceForm.get('ssl_key').setValue(response[0].spec?.ssl_key); } break; + case 'nvmeof': + this.serviceForm.get('pool').setValue(response[0].spec.pool); + break; case 'rgw': this.serviceForm .get('rgw_frontend_port') .setValue(response[0].spec?.rgw_frontend_port); - this.getServiceIds( - 'rgw', + this.setRgwFields( response[0].spec?.rgw_realm, response[0].spec?.rgw_zonegroup, response[0].spec?.rgw_zone @@ -595,7 +622,7 @@ export class ServiceFormComponent extends CdForm implements OnInit { } } - getDefaultsEntities( + getDefaultsEntitiesForRgw( defaultRealmId: string, defaultZonegroupId: string, defaultZoneId: string @@ -625,100 +652,177 @@ export class ServiceFormComponent extends CdForm implements OnInit { }; } - getServiceIds( - selectedServiceType: string, - realm_name?: string, - zonegroup_name?: string, - zone_name?: string - ) { + getDefaultPlacementCount(serviceType: string) { + /** + * `defaults` from src/pybind/mgr/cephadm/module.py + */ + switch (serviceType) { + case 'mon': + this.serviceForm.get('count').setValue(5); + break; + case 'mgr': + case 'mds': + case 'rgw': + case 'ingress': + case 'rbd-mirror': + this.serviceForm.get('count').setValue(2); + break; + case 'iscsi': + case 'nvmeof': + case 'cephfs-mirror': + case 'nfs': + case 'grafana': + case 'alertmanager': + case 'prometheus': + case 'loki': + case 'container': + case 'snmp-gateway': + case 'elastic-serach': + case 'jaeger-collector': + case 'jaeger-query': + case 'smb': + this.serviceForm.get('count').setValue(1); + break; + default: + this.serviceForm.get('count').setValue(null); + } + } + + setRgwFields(realm_name?: string, zonegroup_name?: string, zone_name?: string) { + const observables = [ + this.rgwRealmService.getAllRealmsInfo(), + this.rgwZonegroupService.getAllZonegroupsInfo(), + this.rgwZoneService.getAllZonesInfo() + ]; + this.sub = forkJoin(observables).subscribe( + (multisiteInfo: [object, object, object]) => { + this.multisiteInfo = multisiteInfo; + this.realmList = + this.multisiteInfo[0] !== undefined && this.multisiteInfo[0].hasOwnProperty('realms') + ? this.multisiteInfo[0]['realms'] + : []; + this.zonegroupList = + this.multisiteInfo[1] !== undefined && this.multisiteInfo[1].hasOwnProperty('zonegroups') + ? this.multisiteInfo[1]['zonegroups'] + : []; + this.zoneList = + this.multisiteInfo[2] !== undefined && this.multisiteInfo[2].hasOwnProperty('zones') + ? this.multisiteInfo[2]['zones'] + : []; + this.realmNames = this.realmList.map((realm) => { + return realm['name']; + }); + this.zonegroupNames = this.zonegroupList.map((zonegroup) => { + return zonegroup['name']; + }); + this.zoneNames = this.zoneList.map((zone) => { + return zone['name']; + }); + this.defaultRealmId = multisiteInfo[0]['default_realm']; + this.defaultZonegroupId = multisiteInfo[1]['default_zonegroup']; + this.defaultZoneId = multisiteInfo[2]['default_zone']; + this.defaultsInfo = this.getDefaultsEntitiesForRgw( + this.defaultRealmId, + this.defaultZonegroupId, + this.defaultZoneId + ); + if (!this.editing) { + this.serviceForm.get('realm_name').setValue(this.defaultsInfo['defaultRealmName']); + this.serviceForm + .get('zonegroup_name') + .setValue(this.defaultsInfo['defaultZonegroupName']); + this.serviceForm.get('zone_name').setValue(this.defaultsInfo['defaultZoneName']); + } else { + if (realm_name && !this.realmNames.includes(realm_name)) { + const realm = new RgwRealm(); + realm.name = realm_name; + this.realmList.push(realm); + } + if (zonegroup_name && !this.zonegroupNames.includes(zonegroup_name)) { + const zonegroup = new RgwZonegroup(); + zonegroup.name = zonegroup_name; + this.zonegroupList.push(zonegroup); + } + if (zone_name && !this.zoneNames.includes(zone_name)) { + const zone = new RgwZone(); + zone.name = zone_name; + this.zoneList.push(zone); + } + if (zonegroup_name === undefined && zone_name === undefined) { + zonegroup_name = 'default'; + zone_name = 'default'; + } + this.serviceForm.get('realm_name').setValue(realm_name); + this.serviceForm.get('zonegroup_name').setValue(zonegroup_name); + this.serviceForm.get('zone_name').setValue(zone_name); + } + if (this.realmList.length === 0) { + this.showRealmCreationForm = true; + } else { + this.showRealmCreationForm = false; + } + }, + (_error) => { + const defaultZone = new RgwZone(); + defaultZone.name = 'default'; + const defaultZonegroup = new RgwZonegroup(); + defaultZonegroup.name = 'default'; + this.zoneList.push(defaultZone); + this.zonegroupList.push(defaultZonegroup); + } + ); + } + + setNvmeofServiceId(): void { + const defaultRbdPool: string = this.rbdPools?.find((p: Pool) => p.pool_name === 'rbd') + ?.pool_name; + if (defaultRbdPool) { + this.serviceForm.get('pool').setValue(defaultRbdPool); + this.serviceForm.get('service_id').setValue(defaultRbdPool); + } + } + + requiresServiceId(serviceType: string) { + return ['mds', 'rgw', 'nfs', 'iscsi', 'nvmeof', 'smb', 'ingress'].includes(serviceType); + } + + setServiceId(serviceId: string): void { + const requiresServiceId: boolean = this.requiresServiceId(serviceId); + if (requiresServiceId && serviceId === 'nvmeof') { + this.setNvmeofServiceId(); + } else if (requiresServiceId) { + this.serviceForm.get('service_id').setValue(null); + } else { + this.serviceForm.get('service_id').setValue(serviceId); + } + } + + onServiceTypeChange(selectedServiceType: string) { + this.setServiceId(selectedServiceType); + this.serviceIds = this.serviceList ?.filter((service) => service['service_type'] === selectedServiceType) .map((service) => service['service_id']); + this.getDefaultPlacementCount(selectedServiceType); + if (selectedServiceType === 'rgw') { - const observables = [ - this.rgwRealmService.getAllRealmsInfo(), - this.rgwZonegroupService.getAllZonegroupsInfo(), - this.rgwZoneService.getAllZonesInfo() - ]; - this.sub = forkJoin(observables).subscribe( - (multisiteInfo: [object, object, object]) => { - this.multisiteInfo = multisiteInfo; - this.realmList = - this.multisiteInfo[0] !== undefined && this.multisiteInfo[0].hasOwnProperty('realms') - ? this.multisiteInfo[0]['realms'] - : []; - this.zonegroupList = - this.multisiteInfo[1] !== undefined && - this.multisiteInfo[1].hasOwnProperty('zonegroups') - ? this.multisiteInfo[1]['zonegroups'] - : []; - this.zoneList = - this.multisiteInfo[2] !== undefined && this.multisiteInfo[2].hasOwnProperty('zones') - ? this.multisiteInfo[2]['zones'] - : []; - this.realmNames = this.realmList.map((realm) => { - return realm['name']; - }); - this.zonegroupNames = this.zonegroupList.map((zonegroup) => { - return zonegroup['name']; - }); - this.zoneNames = this.zoneList.map((zone) => { - return zone['name']; - }); - this.defaultRealmId = multisiteInfo[0]['default_realm']; - this.defaultZonegroupId = multisiteInfo[1]['default_zonegroup']; - this.defaultZoneId = multisiteInfo[2]['default_zone']; - this.defaultsInfo = this.getDefaultsEntities( - this.defaultRealmId, - this.defaultZonegroupId, - this.defaultZoneId - ); - if (!this.editing) { - this.serviceForm.get('realm_name').setValue(this.defaultsInfo['defaultRealmName']); - this.serviceForm - .get('zonegroup_name') - .setValue(this.defaultsInfo['defaultZonegroupName']); - this.serviceForm.get('zone_name').setValue(this.defaultsInfo['defaultZoneName']); - } else { - if (realm_name && !this.realmNames.includes(realm_name)) { - const realm = new RgwRealm(); - realm.name = realm_name; - this.realmList.push(realm); - } - if (zonegroup_name && !this.zonegroupNames.includes(zonegroup_name)) { - const zonegroup = new RgwZonegroup(); - zonegroup.name = zonegroup_name; - this.zonegroupList.push(zonegroup); - } - if (zone_name && !this.zoneNames.includes(zone_name)) { - const zone = new RgwZone(); - zone.name = zone_name; - this.zoneList.push(zone); - } - if (zonegroup_name === undefined && zone_name === undefined) { - zonegroup_name = 'default'; - zone_name = 'default'; - } - this.serviceForm.get('realm_name').setValue(realm_name); - this.serviceForm.get('zonegroup_name').setValue(zonegroup_name); - this.serviceForm.get('zone_name').setValue(zone_name); - } - if (this.realmList.length === 0) { - this.showRealmCreationForm = true; - } else { - this.showRealmCreationForm = false; - } - }, - (_error) => { - const defaultZone = new RgwZone(); - defaultZone.name = 'default'; - const defaultZonegroup = new RgwZonegroup(); - defaultZonegroup.name = 'default'; - this.zoneList.push(defaultZone); - this.zonegroupList.push(defaultZonegroup); - } - ); + this.setRgwFields(); + } + } + + onPlacementChange(selected: string) { + if (selected === 'label') { + this.serviceForm.get('count').setValue(null); + } + } + + onBlockPoolChange() { + const selectedBlockPool = this.serviceForm.get('pool').value; + if (selectedBlockPool) { + this.serviceForm.get('service_id').setValue(selectedBlockPool); + } else { + this.serviceForm.get('service_id').setValue(null); } } @@ -730,6 +834,10 @@ export class ServiceFormComponent extends CdForm implements OnInit { switch (serviceType) { case 'ingress': this.serviceForm.get('backend_service').disable(); + break; + case 'nvmeof': + this.serviceForm.get('pool').disable(); + break; } } @@ -780,19 +888,16 @@ export class ServiceFormComponent extends CdForm implements OnInit { placement: {}, unmanaged: values['unmanaged'] }; - let svcId: string; if (serviceType === 'rgw') { serviceSpec['rgw_realm'] = values['realm_name'] ? values['realm_name'] : null; serviceSpec['rgw_zonegroup'] = values['zonegroup_name'] !== 'default' ? values['zonegroup_name'] : null; serviceSpec['rgw_zone'] = values['zone_name'] !== 'default' ? values['zone_name'] : null; - svcId = values['service_id']; - } else { - svcId = values['service_id']; } - const serviceId: string = svcId; + + const serviceId: string = values['service_id']; let serviceName: string = serviceType; - if (_.isString(serviceId) && !_.isEmpty(serviceId)) { + if (_.isString(serviceId) && !_.isEmpty(serviceId) && serviceId !== serviceType) { serviceName = `${serviceType}.${serviceId}`; serviceSpec['service_id'] = serviceId; } @@ -814,6 +919,7 @@ export class ServiceFormComponent extends CdForm implements OnInit { } break; + case 'nvmeof': case 'iscsi': serviceSpec['pool'] = values['pool']; break; @@ -947,7 +1053,7 @@ export class ServiceFormComponent extends CdForm implements OnInit { size: 'lg' }); this.bsModalRef.componentInstance.submitAction.subscribe(() => { - this.getServiceIds('rgw'); + this.setRgwFields(); }); } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.html index d84449e237e..567f6ae099b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.html @@ -37,3 +37,24 @@ [ngClass]="[icons.warning]"> </i> </ng-template> + +<ng-template #urlTpl + let-row="row"> + <ng-container *ngIf="serviceUrls[row.service_type] else noUrl"> + <a *ngIf="!isMgmtGateway else mgmtGateway" + target="_blank" + [href]="serviceUrls[row.service_type]"> + {{ row.service_name }} + <i class="fa fa-external-link"></i> + </a> + + <ng-template #mgmtGateway> + <a target="_blank" + [href]="row.service_type"> + {{ row.service_name }} + <i class="fa fa-external-link"></i> + </a> + </ng-template> + </ng-container> + <ng-template #noUrl>{{row.service_name}}</ng-template> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.ts index 82a975c9df4..72a07de9718 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/services.component.ts @@ -27,6 +27,7 @@ import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; import { URLBuilderService } from '~/app/shared/services/url-builder.service'; import { PlacementPipe } from './placement.pipe'; import { ServiceFormComponent } from './service-form/service-form.component'; +import { SettingsService } from '~/app/shared/api/settings.service'; const BASE_URL = 'services'; @@ -41,6 +42,8 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI table: TableComponent; @ViewChild('runningTpl', { static: true }) public runningTpl: TemplateRef<any>; + @ViewChild('urlTpl', { static: true }) + public urlTpl: TemplateRef<any>; @Input() hostname: string; @@ -71,6 +74,8 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI isLoadingServices = false; selection: CdTableSelection = new CdTableSelection(); icons = Icons; + serviceUrls = { grafana: '', prometheus: '', alertmanager: '' }; + isMgmtGateway: boolean = false; constructor( private actionLabels: ActionLabelsI18n, @@ -80,7 +85,8 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI private cephServiceService: CephServiceService, private relativeDatePipe: RelativeDatePipe, private taskWrapperService: TaskWrapperService, - private router: Router + private router: Router, + private settingsService: SettingsService ) { super(); this.permissions = this.authStorageService.getPermissions(); @@ -148,7 +154,8 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI { name: $localize`Service`, prop: 'service_name', - flexGrow: 1 + flexGrow: 1, + cellTemplate: this.urlTpl }, { name: $localize`Placement`, @@ -178,6 +185,12 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI this.orchStatus = status; this.showDocPanel = !status.available; }); + + if (!this.isMgmtGateway) { + this.configureServiceUrl('api/grafana/url', 'grafana'); + this.configureServiceUrl('ui-api/prometheus/prometheus-api-host', 'prometheus'); + this.configureServiceUrl('ui-api/prometheus/alertmanager-api-host', 'alertmanager'); + } } ngOnChanges() { @@ -219,6 +232,9 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI this.services = services; this.count = pagination_obs.count; this.services = this.services.filter((col: any) => { + if (col.service_type === 'mgmt-gateway' && col.status.running) { + this.isMgmtGateway = true; + } return !this.hiddenServices.includes(col.service_name); }); this.isLoadingServices = false; @@ -229,6 +245,15 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI context.error(); } ); + if ( + this.isMgmtGateway && + !this.services.find( + (service: CephServiceSpec) => + service.service_type !== 'mgmt-gateway' && service.status.running > 0 + ) + ) { + this.isMgmtGateway = false; + } } updateSelection(selection: CdTableSelection) { @@ -258,4 +283,10 @@ export class ServicesComponent extends ListWithDetails implements OnChanges, OnI ) }); } + + private configureServiceUrl(url: string, serviceType: string) { + this.settingsService.ifSettingConfigured(url, (url) => { + this.serviceUrls[serviceType] = url; + }); + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/upgrade/upgrade.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/upgrade/upgrade.component.html index 5db15a42b51..3e7f7a2c090 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/upgrade/upgrade.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/upgrade/upgrade.component.html @@ -47,13 +47,7 @@ id="clusterStatus"> <div class="d-flex flex-column justify-content-center align-items-center"> <ng-template #healthChecks> - <ul> - <li *ngFor="let check of healthData.health.checks"> - <span [ngStyle]="check.severity | healthColor" - [class.health-warn-description]="check.severity === 'HEALTH_WARN'"> - {{ check.type }}</span>: {{ check.summary.message }} - </li> - </ul> + <cd-health-checks [healthData]="healthData.health.checks"></cd-health-checks> </ng-template> <ng-template #healthWarningAndError> <div class="info-card-content-clickable mt-1" diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.html index 46aa67721dd..fda0b407587 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard-v3/dashboard/dashboard-v3.component.html @@ -148,14 +148,8 @@ </div> <div class="d-flex flex-column ms-4 me-4 mt-4 mb-4"> <ng-template #healthChecks> - <ng-container *ngTemplateOutlet="logsLink"></ng-container> - <ul> - <li *ngFor="let check of healthData.health.checks"> - <span [ngStyle]="check.severity | healthColor" - [class.health-warn-description]="check.severity === 'HEALTH_WARN'"> - {{ check.type }}</span>: {{ check.summary.message }} - </li> - </ul> + <cd-health-checks *ngIf="healthData?.health?.checks" + [healthData]="healthData.health.checks"></cd-health-checks> </ng-template> <div class="d-flex flex-row col-md-3 ms-4"> @@ -168,7 +162,7 @@ popoverClass="info-card-popover-cluster-status" [openDelay]="300" [closeDelay]="500" - triggers="mouseenter:mouseleave" + triggers="mouseenter" *ngIf="healthData.health?.checks?.length" i18n>Cluster</a> <span class="ms-2 mt-n1 lead" diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard/health/health.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard/health/health.component.html index c440a5f2df0..9e68fc22ca0 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard/health/health.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/dashboard/health/health.component.html @@ -18,14 +18,7 @@ *ngIf="healthData.health?.status"> <ng-container *ngIf="healthData.health?.checks?.length > 0"> <ng-template #healthChecks> - <ng-container *ngTemplateOutlet="logsLink"></ng-container> - <ul> - <li *ngFor="let check of healthData.health.checks"> - <span [ngStyle]="check.severity | healthColor" - [class.health-warn-description]="check.severity === 'HEALTH_WARN'"> - {{ check.type }}</span>: {{ check.summary.message }} - </li> - </ul> + <cd-health-checks [healthData]="healthData"></cd-health-checks> </ng-template> <div class="info-card-content-clickable" [ngStyle]="healthData.health.status | healthColor" diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/models/nfs.fsal.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/models/nfs.fsal.ts index f204ac6d8b6..cbdc44f3ca8 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/models/nfs.fsal.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/models/nfs.fsal.ts @@ -1,5 +1,9 @@ +export enum SUPPORTED_FSAL { + CEPH = 'CEPH', + RGW = 'RGW' +} export interface NfsFSAbstractionLayer { - value: string; + value: SUPPORTED_FSAL; descr: string; disabled: boolean; } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html index f54361a5f7d..01350b4def4 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html @@ -15,9 +15,6 @@ for="cluster_id"> <span class="required" i18n>Cluster</span> - <cd-helper> - <p i18n>This is the ID of an NFS Service.</p> - </cd-helper> </label> <div class="cd-col-form-input"> <select class="form-select" @@ -36,6 +33,9 @@ <option *ngFor="let cluster of allClusters" [value]="cluster.cluster_id">{{ cluster.cluster_id }}</option> </select> + <cd-help-text> + <p i18n>This is the ID of an NFS Service.</p> + </cd-help-text> <span class="invalid-feedback" *ngIf="nfsForm.showError('cluster_id', formDir, 'required') || allClusters?.length === 0" i18n>This field is required. @@ -46,42 +46,9 @@ <!-- FSAL --> <div formGroupName="fsal"> - <!-- Name --> - <div class="form-group row"> - <label class="cd-col-form-label required" - for="name" - i18n>Storage Backend</label> - <div class="cd-col-form-input"> - <select class="form-select" - formControlName="name" - name="name" - id="name" - (change)="fsalChangeHandler()"> - <option *ngIf="allFsals === null" - value="" - i18n>Loading...</option> - <option *ngIf="allFsals !== null && allFsals.length === 0" - value="" - i18n>-- No data pools available --</option> - <option *ngIf="allFsals !== null && allFsals.length > 0" - value="" - i18n>-- Select the storage backend --</option> - <option *ngFor="let fsal of allFsals" - [value]="fsal.value" - [disabled]="fsal.disabled">{{ fsal.descr }}</option> - </select> - <span class="invalid-feedback" - *ngIf="nfsForm.showError('name', formDir, 'required')" - i18n>This field is required.</span> - <span class="invalid-feedback" - *ngIf="fsalAvailabilityError" - i18n>{{ fsalAvailabilityError }}</span> - </div> - </div> - <!-- CephFS Volume --> <div class="form-group row" - *ngIf="nfsForm.getValue('name') === 'CEPH'"> + *ngIf="storageBackend === 'CEPH'"> <label class="cd-col-form-label required" for="fs_name" i18n>Volume</label> @@ -90,7 +57,7 @@ formControlName="fs_name" name="fs_name" id="fs_name" - (change)="pathChangeHandler()"> + (change)="volumeChangeHandler()"> <option *ngIf="allFsNames === null" value="" i18n>Loading...</option> @@ -112,7 +79,7 @@ <!-- Security Label --> <div class="form-group row" - *ngIf="nfsForm.getValue('name') === 'CEPH'"> + *ngIf="storageBackend === 'CEPH'"> <label class="cd-col-form-label" [ngClass]="{'required': nfsForm.getValue('security_label')}" for="security_label" @@ -145,16 +112,65 @@ </div> </div> + <div class="form-group row" + *ngIf="storageBackend === 'CEPH'"> + <label class="cd-col-form-label" + for="subvolume_group" + i18n>Subvolume Group</label> + <div class="cd-col-form-input"> + <select class="form-select" + formControlName="subvolume_group" + name="subvolume_group" + id="subvolume_group" + (change)="getSubVol()"> + <option *ngIf="allsubvolgrps === null" + value="" + i18n>Loading...</option> + <option *ngIf="allsubvolgrps !== null && allsubvolgrps.length === 0" + value="" + i18n>-- No CephFS subvolume group available --</option> + <option *ngIf="allsubvolgrps !== null && allsubvolgrps.length > 0" + value="" + i18n>-- Select the CephFS subvolume group --</option> + <option *ngFor="let subvol_grp of allsubvolgrps" + [value]="subvol_grp.name">{{ subvol_grp.name }}</option> + </select> + </div> + </div> + + <div class="form-group row" + *ngIf="storageBackend === 'CEPH'"> + <label class="cd-col-form-label" + for="subvolume" + i18n>Subvolume</label> + <div class="cd-col-form-input"> + <select class="form-select" + formControlName="subvolume" + name="subvolume" + id="subvolume" + (change)="getPath()"> + <option *ngIf="allsubvols === null" + value="" + i18n>Loading...</option> + <option *ngIf="allsubvols !== null && allsubvols.length === 0" + value="" + i18n>-- No CephFS subvolume available --</option> + <option *ngIf="allsubvols !== null && allsubvols.length > 0" + value="" + i18n>-- Select the CephFS subvolume --</option> + <option *ngFor="let subvolume of allsubvols" + [value]="subvolume.name">{{ subvolume.name }}</option> + </select> + </div> + </div> + <!-- Path --> <div class="form-group row" - *ngIf="nfsForm.getValue('name') === 'CEPH'"> + *ngIf="storageBackend === 'CEPH'"> <label class="cd-col-form-label" for="path"> <span class="required" i18n>CephFS Path</span> - <cd-helper> - <p i18n>A path in a CephFS file system.</p> - </cd-helper> </label> <div class="cd-col-form-input"> <input type="text" @@ -166,6 +182,9 @@ [ngbTypeahead]="pathDataSource" (selectItem)="pathChangeHandler()" (blur)="pathChangeHandler()"> + <cd-help-text> + <p i18n>A path in a CephFS file system.</p> + </cd-help-text> <span class="invalid-feedback" *ngIf="nfsForm.showError('path', formDir, 'required')" i18n>This field is required.</span> @@ -181,7 +200,7 @@ <!-- Bucket --> <div class="form-group row" - *ngIf="nfsForm.getValue('name') === 'RGW'"> + *ngIf="storageBackend === 'RGW'"> <label class="cd-col-form-label" for="path"> <span class="required" @@ -248,11 +267,6 @@ for="pseudo"> <span class="required" i18n>Pseudo</span> - <cd-helper> - <p i18n>The position that this <strong>NFS v4</strong> export occupies - in the <strong>Pseudo FS</strong> (it must be unique).</p> - <p i18n>By using different Pseudo options, the same Path may be exported multiple times.</p> - </cd-helper> </label> <div class="cd-col-form-input"> <input type="text" @@ -261,6 +275,10 @@ id="pseudo" formControlName="pseudo" minlength="2"> + <cd-help-text> + <span i18n>The position this export occupies in the Pseudo FS. It must be unique.</span><br/> + <span i18n>By using different Pseudo options, the same Path may be exported multiple times.</span> + </cd-help-text> <span class="invalid-feedback" *ngIf="nfsForm.showError('pseudo', formDir, 'required')" i18n>This field is required.</span> @@ -301,7 +319,7 @@ {{ getAccessTypeHelp(nfsForm.getValue('access_type')) }} </span> <span class="form-text text-warning" - *ngIf="nfsForm.getValue('access_type') === 'RW' && nfsForm.getValue('name') === 'RGW'" + *ngIf="nfsForm.getValue('access_type') === 'RW' && storageBackend === 'RGW'" i18n>The Object Gateway NFS backend has a number of limitations which will seriously affect applications writing to the share. Please consult the <cd-doc section="rgw-nfs"></cd-doc> @@ -317,7 +335,6 @@ <label class="cd-col-form-label" for="squash"> <span i18n>Squash</span> - <ng-container *ngTemplateOutlet="squashHelper"></ng-container> </label> <div class="cd-col-form-input"> <select class="form-select" @@ -334,6 +351,20 @@ [value]="squash">{{ squash }}</option> </select> + <cd-help-text> + <span *ngIf="nfsForm.getValue('squash') === 'root_squash'" + i18n>Maps the root user on the NFS client to an anonymous user/group with limited privileges. This prevents a root client user from having total control over the NFS export.</span> + + <span *ngIf="nfsForm.getValue('squash') === 'root_id_squash'" + i18n>Maps the root user on the NFS client to an anonymous user/group with limited privileges, preventing root access but retaining non-root group privileges.</span> + + <span *ngIf="nfsForm.getValue('squash') === 'all_squash'" + i18n>Maps all users on the NFS client to an anonymous user/group with limited privileges, ensuring that no user has special privileges on the NFS export.</span> + + <span *ngIf="nfsForm.getValue('squash') === 'no_root_squash'" + i18n>Allows the root user on the NFS client to retain full root privileges on the NFS server, which may pose security risks.</span> + + </cd-help-text> <span class="invalid-feedback" *ngIf="nfsForm.showError('squash', formDir,'required')" i18n>This field is required.</span> @@ -378,35 +409,18 @@ <cd-nfs-form-client [form]="nfsForm" [clients]="clients" #nfsClients> - <ng-template #squashHelper> - <cd-helper> - <ul class="squash-helper"> - <li> - <span class="squash-helper-item-value">no_root_squash: </span> - <span i18n>No user id squashing is performed.</span> - </li> - <li> - <span class="squash-helper-item-value">root_id_squash: </span> - <span i18n>uid 0 and gid 0 are squashed to the Anonymous_Uid and Anonymous_Gid gid 0 in alt_groups lists is also squashed.</span> - </li> - <li> - <span class="squash-helper-item-value">root_squash: </span> - <span i18n>uid 0 and gid of any value are squashed to the Anonymous_Uid and Anonymous_Gid alt_groups lists is discarded.</span> - </li> - <li> - <span class="squash-helper-item-value">all_squash: </span> - <span i18n>All users are squashed.</span> - </li> - </ul> - </cd-helper> - </ng-template> </cd-nfs-form-client> + <!-- Errors --> + <cd-alert-panel type="error" + *ngIf="!!storageBackendError"> + {{storageBackendError}} + </cd-alert-panel> </div> - <div class="card-footer"> <cd-form-button-panel (submitActionEvent)="submitAction()" [form]="nfsForm" + [disabled]="!!storageBackendError" [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" wrappingClass="text-right"></cd-form-button-panel> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.scss index 4d892a120fc..cebcc8877a2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.scss +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.scss @@ -1,11 +1,3 @@ .cd-mb { margin-bottom: 10px; } - -.squash-helper { - padding-left: 1rem; -} - -.squash-helper-item-value { - font-weight: bold; -} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.spec.ts index 65267a15791..7f88c648684 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.spec.ts @@ -1,7 +1,7 @@ import { HttpClientTestingModule, HttpTestingController } from '@angular/common/http/testing'; import { ComponentFixture, TestBed } from '@angular/core/testing'; import { ReactiveFormsModule } from '@angular/forms'; -import { ActivatedRoute } from '@angular/router'; +import { ActivatedRoute, Router } from '@angular/router'; import { RouterTestingModule } from '@angular/router/testing'; import { NgbTypeaheadModule } from '@ng-bootstrap/ng-bootstrap'; @@ -21,6 +21,7 @@ describe('NfsFormComponent', () => { let fixture: ComponentFixture<NfsFormComponent>; let httpTesting: HttpTestingController; let activatedRoute: ActivatedRouteStub; + let router: Router; configureTestBed( { @@ -45,9 +46,8 @@ describe('NfsFormComponent', () => { const matchSquash = (backendSquashValue: string, uiSquashValue: string) => { component.ngOnInit(); - httpTesting.expectOne('ui-api/nfs-ganesha/fsals').flush(['CEPH', 'RGW']); - httpTesting.expectOne('ui-api/nfs-ganesha/cephfs/filesystems').flush([{ id: 1, name: 'a' }]); httpTesting.expectOne('api/nfs-ganesha/cluster').flush(['mynfs']); + httpTesting.expectOne('ui-api/nfs-ganesha/cephfs/filesystems').flush([{ id: 1, name: 'a' }]); httpTesting.expectOne('api/nfs-ganesha/export/mynfs/1').flush({ fsal: { name: 'RGW' @@ -69,12 +69,16 @@ describe('NfsFormComponent', () => { component = fixture.componentInstance; httpTesting = TestBed.inject(HttpTestingController); activatedRoute = <ActivatedRouteStub>TestBed.inject(ActivatedRoute); + router = TestBed.inject(Router); + + Object.defineProperty(router, 'url', { + get: jasmine.createSpy('url').and.returnValue('/cephfs/nfs') + }); RgwHelper.selectDaemon(); fixture.detectChanges(); - httpTesting.expectOne('ui-api/nfs-ganesha/fsals').flush(['CEPH', 'RGW']); - httpTesting.expectOne('ui-api/nfs-ganesha/cephfs/filesystems').flush([{ id: 1, name: 'a' }]); httpTesting.expectOne('api/nfs-ganesha/cluster').flush(['mynfs']); + httpTesting.expectOne('ui-api/nfs-ganesha/cephfs/filesystems').flush([{ id: 1, name: 'a' }]); httpTesting.verify(); }); @@ -82,21 +86,12 @@ describe('NfsFormComponent', () => { expect(component).toBeTruthy(); }); - it('should process all data', () => { - expect(component.allFsals).toEqual([ - { descr: 'CephFS', value: 'CEPH', disabled: false }, - { descr: 'Object Gateway', value: 'RGW', disabled: false } - ]); - expect(component.allFsNames).toEqual([{ id: 1, name: 'a' }]); - expect(component.allClusters).toEqual([{ cluster_id: 'mynfs' }]); - }); - it('should create the form', () => { expect(component.nfsForm.value).toEqual({ access_type: 'RW', clients: [], cluster_id: 'mynfs', - fsal: { fs_name: 'a', name: 'CEPH' }, + fsal: { fs_name: '', name: 'CEPH' }, path: '/', protocolNfsv4: true, protocolNfsv3: true, @@ -104,6 +99,8 @@ describe('NfsFormComponent', () => { sec_label_xattr: 'security.selinux', security_label: false, squash: 'no_root_squash', + subvolume: '', + subvolume_group: '', transportTCP: true, transportUDP: true }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts index 0543a9eb7ab..f43067f231d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts @@ -1,4 +1,4 @@ -import { ChangeDetectorRef, Component, OnInit, ViewChild } from '@angular/core'; +import { Component, OnInit, ViewChild } from '@angular/core'; import { AbstractControl, AsyncValidatorFn, @@ -12,7 +12,7 @@ import _ from 'lodash'; import { forkJoin, Observable, of } from 'rxjs'; import { catchError, debounceTime, distinctUntilChanged, map, mergeMap } from 'rxjs/operators'; -import { NfsFSAbstractionLayer } from '~/app/ceph/nfs/models/nfs.fsal'; +import { SUPPORTED_FSAL } from '~/app/ceph/nfs/models/nfs.fsal'; import { Directory, NfsService } from '~/app/shared/api/nfs.service'; import { RgwBucketService } from '~/app/shared/api/rgw-bucket.service'; import { RgwSiteService } from '~/app/shared/api/rgw-site.service'; @@ -28,6 +28,9 @@ import { CdHttpErrorResponse } from '~/app/shared/services/api-interceptor.servi import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; import { NfsFormClientComponent } from '../nfs-form-client/nfs-form-client.component'; +import { getFsalFromRoute, getPathfromFsal } from '../utils'; +import { CephfsSubvolumeService } from '~/app/shared/api/cephfs-subvolume.service'; +import { CephfsSubvolumeGroupService } from '~/app/shared/api/cephfs-subvolume-group.service'; @Component({ selector: 'cd-nfs-form', @@ -50,9 +53,10 @@ export class NfsFormComponent extends CdForm implements OnInit { allClusters: { cluster_id: string }[] = null; icons = Icons; - allFsals: any[] = []; allFsNames: any[] = null; - fsalAvailabilityError: string = null; + + storageBackend: SUPPORTED_FSAL; + storageBackendError: string = null; defaultAccessType = { RGW: 'RO' }; nfsAccessType: any[] = []; @@ -61,6 +65,10 @@ export class NfsFormComponent extends CdForm implements OnInit { action: string; resource: string; + allsubvolgrps: any[] = []; + allsubvols: any[] = []; + fsPath: string = null; + pathDataSource = (text$: Observable<string>) => { return text$.pipe( debounceTime(200), @@ -81,31 +89,35 @@ export class NfsFormComponent extends CdForm implements OnInit { constructor( private authStorageService: AuthStorageService, private nfsService: NfsService, + private subvolService: CephfsSubvolumeService, + private subvolgrpService: CephfsSubvolumeGroupService, private route: ActivatedRoute, private router: Router, private rgwBucketService: RgwBucketService, private rgwSiteService: RgwSiteService, private formBuilder: CdFormBuilder, private taskWrapper: TaskWrapperService, - private cdRef: ChangeDetectorRef, public actionLabels: ActionLabelsI18n ) { super(); this.permission = this.authStorageService.getPermissions().pool; this.resource = $localize`NFS export`; + this.storageBackend = getFsalFromRoute(this.router.url); } ngOnInit() { this.nfsAccessType = this.nfsService.nfsAccessType; this.nfsSquash = Object.keys(this.nfsService.nfsSquash); this.createForm(); - const promises: Observable<any>[] = [ - this.nfsService.listClusters(), - this.nfsService.fsals(), - this.nfsService.filesystems() - ]; + const promises: Observable<any>[] = [this.nfsService.listClusters()]; - if (this.router.url.startsWith('/nfs/edit')) { + if (this.storageBackend === 'RGW') { + promises.push(this.rgwSiteService.get('realms')); + } else { + promises.push(this.nfsService.filesystems()); + } + + if (this.router.url.startsWith(`/${getPathfromFsal(this.storageBackend)}/nfs/edit`)) { this.isEdit = true; } @@ -115,7 +127,6 @@ export class NfsFormComponent extends CdForm implements OnInit { this.cluster_id = decodeURIComponent(params.cluster_id); this.export_id = decodeURIComponent(params.export_id); promises.push(this.nfsService.get(this.cluster_id, this.export_id)); - this.getData(promises); }); this.nfsForm.get('cluster_id').disable(); @@ -129,22 +140,63 @@ export class NfsFormComponent extends CdForm implements OnInit { forkJoin(promises).subscribe((data: any[]) => { this.resolveClusters(data[0]); this.resolveFsals(data[1]); - this.resolveFilesystems(data[2]); - if (data[3]) { - this.resolveModel(data[3]); + if (data[2]) { + this.resolveModel(data[2]); } - this.loadingReady(); }); } + volumeChangeHandler() { + this.pathChangeHandler(); + const fs_name = this.nfsForm.getValue('fsal').fs_name; + this.getSubVolGrp(fs_name); + } + + getSubVol() { + this.getPath(); + const fs_name = this.nfsForm.getValue('fsal').fs_name; + const subvolgrp = this.nfsForm.getValue('subvolume_group'); + return this.subvolService.get(fs_name, subvolgrp).subscribe((data: any) => { + this.allsubvols = data; + }); + } + + getSubVolGrp(fs_name: string) { + return this.subvolgrpService.get(fs_name).subscribe((data: any) => { + this.allsubvolgrps = data; + }); + } + + getFsPath(volList: any[], value: string) { + const match = volList.find((vol) => vol.name === value); + if (match) { + return match.info.path; + } + } + + getPath() { + const subvol = this.nfsForm.getValue('subvolume'); + if (subvol === '') { + const subvolGroup = this.nfsForm.getValue('subvolume_group'); + this.fsPath = this.getFsPath(this.allsubvolgrps, subvolGroup); + } else { + this.fsPath = this.getFsPath(this.allsubvols, subvol); + } + this.nfsForm.patchValue({ + path: this.fsPath + }); + + this.pathChangeHandler(); + } + createForm() { this.nfsForm = new CdFormGroup({ cluster_id: new UntypedFormControl('', { validators: [Validators.required] }), fsal: new CdFormGroup({ - name: new UntypedFormControl('', { + name: new UntypedFormControl(this.storageBackend, { validators: [Validators.required] }), fs_name: new UntypedFormControl('', { @@ -155,7 +207,11 @@ export class NfsFormComponent extends CdForm implements OnInit { ] }) }), - path: new UntypedFormControl('/'), + subvolume_group: new UntypedFormControl(''), + subvolume: new UntypedFormControl(''), + path: new UntypedFormControl('/', { + validators: [Validators.required] + }), protocolNfsv3: new UntypedFormControl(true, { validators: [ CdValidators.requiredIf({ protocolNfsv4: false }, (value: boolean) => { @@ -247,87 +303,44 @@ export class NfsFormComponent extends CdForm implements OnInit { } resolveFsals(res: string[]) { - res.forEach((fsal) => { - const fsalItem = this.nfsService.nfsFsal.find((currentFsalItem) => { - return fsal === currentFsalItem.value; - }); - - if (_.isObjectLike(fsalItem)) { - this.allFsals.push(fsalItem); - } - }); - if (!this.isEdit && this.allFsals.length > 0) { + if (this.storageBackend === 'RGW') { + this.setPathValidation(); + this.resolveRealms(res); + } else { + this.resolveFilesystems(res); + } + if (!this.isEdit && this.storageBackend === SUPPORTED_FSAL.RGW) { this.nfsForm.patchValue({ - fsal: { - name: this.allFsals[0].value - } + path: '', + access_type: this.defaultAccessType[SUPPORTED_FSAL.RGW] }); } } resolveFilesystems(filesystems: any[]) { this.allFsNames = filesystems; - if (!this.isEdit && filesystems.length > 0) { - this.nfsForm.patchValue({ - fsal: { - fs_name: filesystems[0].name - } - }); - } - } - - fsalChangeHandler() { - this.setPathValidation(); - const fsalValue = this.nfsForm.getValue('name'); - const checkAvailability = - fsalValue === 'RGW' - ? this.rgwSiteService.get('realms').pipe( - mergeMap((realms: string[]) => - realms.length === 0 - ? of(true) - : this.rgwSiteService.isDefaultRealm().pipe( - mergeMap((isDefaultRealm) => { - if (!isDefaultRealm) { - throw new Error('Selected realm is not the default.'); - } - return of(true); - }) - ) - ) - ) - : this.nfsService.filesystems(); - - checkAvailability.subscribe({ - next: () => { - this.setFsalAvailability(fsalValue, true); - if (!this.isEdit) { - this.nfsForm.patchValue({ - path: fsalValue === 'RGW' ? '' : '/', - pseudo: this.generatePseudo(), - access_type: this.updateAccessType() - }); - } - - this.cdRef.detectChanges(); - }, - error: (error) => { - this.setFsalAvailability(fsalValue, false, error); - this.nfsForm.get('name').setValue(''); - } - }); } - private setFsalAvailability(fsalValue: string, available: boolean, errorMessage: string = '') { - this.allFsals = this.allFsals.map((fsalItem: NfsFSAbstractionLayer) => { - if (fsalItem.value === fsalValue) { - fsalItem.disabled = !available; - - this.fsalAvailabilityError = fsalItem.disabled - ? $localize`${fsalItem.descr} backend is not available. ${errorMessage}` - : null; - } - return fsalItem; - }); + resolveRealms(realms: string[]) { + if (realms.length !== 0) { + this.rgwSiteService + .isDefaultRealm() + .pipe( + mergeMap((isDefaultRealm) => { + if (!isDefaultRealm) { + throw new Error('Selected realm is not the default.'); + } + return of(true); + }) + ) + .subscribe({ + error: (error) => { + const fsalDescr = this.nfsService.nfsFsal.find((f) => f.value === this.storageBackend) + .descr; + this.storageBackendError = $localize`${fsalDescr} backend is not available. ${error}`; + } + }); + } } accessTypeChangeHandler() { @@ -338,8 +351,7 @@ export class NfsFormComponent extends CdForm implements OnInit { setPathValidation() { const path = this.nfsForm.get('path'); - path.setValidators([Validators.required]); - if (this.nfsForm.getValue('name') === 'RGW') { + if (this.storageBackend === SUPPORTED_FSAL.RGW) { path.setAsyncValidators([CdValidators.bucketExistence(true, this.rgwBucketService)]); } else { path.setAsyncValidators([this.pathExistence(true)]); @@ -410,7 +422,7 @@ export class NfsFormComponent extends CdForm implements OnInit { let newPseudo = this.nfsForm.getValue('pseudo'); if (this.nfsForm.get('pseudo') && !this.nfsForm.get('pseudo').dirty) { newPseudo = undefined; - if (this.nfsForm.getValue('fsal') === 'CEPH') { + if (this.storageBackend === 'CEPH') { newPseudo = '/cephfs'; if (_.isString(this.nfsForm.getValue('path'))) { newPseudo += this.nfsForm.getValue('path'); @@ -420,17 +432,6 @@ export class NfsFormComponent extends CdForm implements OnInit { return newPseudo; } - private updateAccessType() { - const name = this.nfsForm.getValue('name'); - let accessType = this.defaultAccessType[name]; - - if (!accessType) { - accessType = 'RW'; - } - - return accessType; - } - submitAction() { let action: Observable<any>; const requestModel = this.buildRequest(); @@ -457,7 +458,7 @@ export class NfsFormComponent extends CdForm implements OnInit { action.subscribe({ error: (errorResponse: CdHttpErrorResponse) => this.setFormErrors(errorResponse), - complete: () => this.router.navigate(['/nfs']) + complete: () => this.router.navigate([`/${getPathfromFsal(this.storageBackend)}/nfs`]) }); } @@ -484,6 +485,9 @@ export class NfsFormComponent extends CdForm implements OnInit { delete requestModel.fsal.fs_name; } + delete requestModel.subvolume; + delete requestModel.subvolume_group; + requestModel.protocols = []; if (requestModel.protocolNfsv3) { requestModel.protocols.push(3); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-list/nfs-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-list/nfs-list.component.spec.ts index 5e43cdd658c..1e82919f402 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-list/nfs-list.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-list/nfs-list.component.spec.ts @@ -17,6 +17,7 @@ import { SharedModule } from '~/app/shared/shared.module'; import { configureTestBed, expectItemTasks, PermissionHelper } from '~/testing/unit-test-helper'; import { NfsDetailsComponent } from '../nfs-details/nfs-details.component'; import { NfsListComponent } from './nfs-list.component'; +import { SUPPORTED_FSAL } from '../models/nfs.fsal'; describe('NfsListComponent', () => { let component: NfsListComponent; @@ -45,6 +46,7 @@ describe('NfsListComponent', () => { beforeEach(() => { fixture = TestBed.createComponent(NfsListComponent); component = fixture.componentInstance; + component.fsal = SUPPORTED_FSAL.CEPH; summaryService = TestBed.inject(SummaryService); nfsService = TestBed.inject(NfsService); httpTesting = TestBed.inject(HttpTestingController); @@ -89,7 +91,9 @@ describe('NfsListComponent', () => { const model = { export_id: export_id, path: 'path_' + export_id, - fsal: 'fsal_' + export_id, + fsal: { + name: 'CEPH' + }, cluster_id: 'cluster_' + export_id }; exports.push(model); @@ -102,7 +106,9 @@ describe('NfsListComponent', () => { case 'nfs/create': task.metadata = { path: 'path_' + export_id, - fsal: 'fsal_' + export_id, + fsal: { + name: 'CEPH' + }, cluster_id: 'cluster_' + export_id }; break; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-list/nfs-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-list/nfs-list.component.ts index d5d0c263930..8be95c6febe 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-list/nfs-list.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-list/nfs-list.component.ts @@ -1,4 +1,5 @@ import { Component, OnDestroy, OnInit, TemplateRef, ViewChild } from '@angular/core'; +import { Router } from '@angular/router'; import { NgbModalRef } from '@ng-bootstrap/ng-bootstrap'; import _ from 'lodash'; @@ -22,6 +23,8 @@ import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; import { ModalService } from '~/app/shared/services/modal.service'; import { TaskListService } from '~/app/shared/services/task-list.service'; import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; +import { getFsalFromRoute, getPathfromFsal } from '../utils'; +import { SUPPORTED_FSAL } from '../models/nfs.fsal'; @Component({ selector: 'cd-nfs-list', @@ -46,6 +49,7 @@ export class NfsListComponent extends ListWithDetails implements OnInit, OnDestr exports: any[]; tableActions: CdTableAction[]; isDefaultCluster = false; + fsal: SUPPORTED_FSAL; modalRef: NgbModalRef; @@ -65,10 +69,13 @@ export class NfsListComponent extends ListWithDetails implements OnInit, OnDestr private nfsService: NfsService, private taskListService: TaskListService, private taskWrapper: TaskWrapperService, + private router: Router, public actionLabels: ActionLabelsI18n ) { super(); this.permission = this.authStorageService.getPermissions().nfs; + this.fsal = getFsalFromRoute(this.router.url); + const prefix = getPathfromFsal(this.fsal); const getNfsUri = () => this.selection.first() && `${encodeURI(this.selection.first().cluster_id)}/${encodeURI( @@ -78,7 +85,7 @@ export class NfsListComponent extends ListWithDetails implements OnInit, OnDestr const createAction: CdTableAction = { permission: 'create', icon: Icons.add, - routerLink: () => '/nfs/create', + routerLink: () => `/${prefix}/nfs/create`, canBePrimary: (selection: CdTableSelection) => !selection.hasSingleSelection, name: this.actionLabels.CREATE }; @@ -86,7 +93,7 @@ export class NfsListComponent extends ListWithDetails implements OnInit, OnDestr const editAction: CdTableAction = { permission: 'update', icon: Icons.edit, - routerLink: () => `/nfs/edit/${getNfsUri()}`, + routerLink: () => `/${prefix}/nfs/edit/${getNfsUri()}`, name: this.actionLabels.EDIT }; @@ -103,7 +110,7 @@ export class NfsListComponent extends ListWithDetails implements OnInit, OnDestr ngOnInit() { this.columns = [ { - name: $localize`Path`, + name: this.fsal === SUPPORTED_FSAL.CEPH ? $localize`Path` : $localize`Bucket`, prop: 'path', flexGrow: 2, cellTransformation: CellTemplate.executing @@ -150,12 +157,12 @@ export class NfsListComponent extends ListWithDetails implements OnInit, OnDestr prepareResponse(resp: any): any[] { let result: any[] = []; - resp.forEach((nfs: any) => { + const filteredresp = resp.filter((nfs: any) => nfs.fsal?.name === this.fsal); + filteredresp.forEach((nfs: any) => { nfs.id = `${nfs.cluster_id}:${nfs.export_id}`; nfs.state = 'LOADING'; result = result.concat(nfs); }); - return result; } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs.module.ts index 4205eb63b26..afd52472c54 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs.module.ts @@ -21,6 +21,7 @@ import { NfsListComponent } from './nfs-list/nfs-list.component'; NgbTypeaheadModule, NgbTooltipModule ], + exports: [NfsListComponent, NfsFormComponent, NfsDetailsComponent], declarations: [NfsListComponent, NfsDetailsComponent, NfsFormComponent, NfsFormClientComponent] }) export class NfsModule {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/utils.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/utils.ts new file mode 100644 index 00000000000..2cdd7bbc7e6 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/utils.ts @@ -0,0 +1,7 @@ +import { SUPPORTED_FSAL } from './models/nfs.fsal'; + +export const getFsalFromRoute = (url: string): SUPPORTED_FSAL => + url.startsWith('/rgw/nfs') ? SUPPORTED_FSAL.RGW : SUPPORTED_FSAL.CEPH; + +export const getPathfromFsal = (fsal: SUPPORTED_FSAL): string => + fsal === SUPPORTED_FSAL.CEPH ? 'cephfs' : 'rgw'; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.html index b186677c5c5..7888fa853e3 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.html @@ -252,6 +252,46 @@ </div> </div> + <div class="form-group row"> + <label for="crushNumFailureDomains" + class="cd-col-form-label"> + <ng-container i18n>Crush num failure domain</ng-container> + <cd-helper [html]="tooltips.crushNumFailureDomains"> + </cd-helper> + </label> + <div class="cd-col-form-input"> + <input type="number" + id="crushNumFailureDomains" + name="crushNumFailureDomains" + class="form-control" + formControlName="crushNumFailureDomains" + min="0"> + <span class="invalid-feedback" + *ngIf="form.showError('crushNumFailureDomains', frm, 'required')" + i18n>This field is required when crush osds per failure domain is set!</span> + </div> + </div> + + <div class="form-group row"> + <label for="crushOsdsPerFailureDomain" + class="cd-col-form-label"> + <ng-container i18n>Crush osds per failure domain</ng-container> + <cd-helper [html]="tooltips.crushOsdsPerFailureDomain"> + </cd-helper> + </label> + <div class="cd-col-form-input"> + <input type="number" + id="crushOsdsPerFailureDomain" + name="crushOsdsPerFailureDomain" + class="form-control" + formControlName="crushOsdsPerFailureDomain" + min="0"> + <span class="invalid-feedback" + *ngIf="form.showError('crushOsdsPerFailureDomain', frm, 'required')" + i18n>This field is required when crush num failure domain is set!</span> + </div> + </div> + <div class="form-group row" *ngIf="plugin === PLUGIN.LRC"> <label for="crushLocality" diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.ts index 01f7dcb1ee9..5982dfe24fb 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/erasure-code-profile-form/erasure-code-profile-form-modal.component.ts @@ -89,6 +89,14 @@ export class ErasureCodeProfileFormModalComponent [Validators.required, CdValidators.custom('max', () => this.baseValueValidation())] ], crushFailureDomain: '', // Will be preselected + crushNumFailureDomains: [ + 0, + CdValidators.requiredIf({ crushOsdsPerFailureDomain: { op: 'minValue', arg1: 1 } }) + ], + crushOsdsPerFailureDomain: [ + 0, + CdValidators.requiredIf({ crushNumFailureDomains: { op: 'minValue', arg1: 1 } }) + ], crushRoot: null, // Will be preselected crushDeviceClass: '', // Will be preselected directory: '', @@ -448,6 +456,8 @@ export class ErasureCodeProfileFormModalComponent private extendJson(name: string, ecp: ErasureCodeProfile) { const differentApiAttributes = { crushFailureDomain: 'crush-failure-domain', + crushNumFailureDomains: 'crush-num-failure-domains', + crushOsdsPerFailureDomain: 'crush-osds-per-failure-domain', crushRoot: 'crush-root', crushDeviceClass: 'crush-device-class', packetSize: 'packetsize', diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.html index 6a37c6a0e58..b76e45225f6 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.html @@ -171,11 +171,16 @@ </div> <!-- Applications --> <div class="form-group row"> - <label i18n - class="cd-col-form-label" - for="applications">Applications</label> + <label class="cd-col-form-label required" + for="applications"> + <ng-container i18n>Applications</ng-container> + <cd-helper> + <span i18n>Pools need to be associated with an application before use</span> + </cd-helper> + </label> <div class="cd-col-form-input"> <cd-select-badges id="applications" + name="applications" [customBadges]="true" [customBadgeValidators]="data.applications.validators" [messages]="data.applications.messages" @@ -189,6 +194,28 @@ title="Pools should be associated with an application tag" class="{{icons.warning}} icon-warning-color"> </i> + <span class="invalid-feedback" + *ngIf="!isApplicationsSelected && data.applications.selected <= 0" + i18n>Application selection is required!</span> + </div> + </div> + <!-- Mirroring --> + <div class="form-group row" + *ngIf="data.applications.selected.includes('rbd')"> + <div class="cd-col-form-offset"> + <div class="custom-control custom-checkbox"> + <input class="custom-control-input" + id="rbdMirroring" + name="rbdMirroring" + type="checkbox" + formControlName="rbdMirroring"> + <label class="custom-control-label" + for="rbdMirroring" + i18n>Mirroring</label> + <cd-help-text> + <span i18n>Check this option to enable Pool based mirroring on a Block(RBD) pool.</span> + </cd-help-text> + </div> </div> </div> <!-- CRUSH --> @@ -207,7 +234,8 @@ <select class="form-select" id="erasureProfile" name="erasureProfile" - formControlName="erasureProfile"> + formControlName="erasureProfile" + (change)="erasureProfileChange()"> <option *ngIf="!ecProfiles" ngValue="" i18n>Loading...</option> @@ -294,8 +322,14 @@ for="crushRule" i18n>Crush ruleset</label> <div class="cd-col-form-input"> - <span class="form-text text-muted" + <span *ngIf="!msrCrush; else msrCrushText" + class="form-text text-muted" i18n>A new crush ruleset will be implicitly created.</span> + <ng-template #msrCrushText> + <span class="form-text text-muted" + i18n>A new crush MSR ruleset will be implicitly created. + When crush-osds-per-failure-domain or crush-num-failure-domains is specified</span> + </ng-template> </div> </div> <div class="form-group row" diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.spec.ts index 7e2bccb32dd..caf8c0b6a71 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.spec.ts @@ -1099,6 +1099,7 @@ describe('PoolFormComponent', () => { // Mock that no ec profiles exist infoReturn.erasure_code_profiles = []; setUpPoolComponent(); + component.data.applications.selected = ['cephfs', 'rgw']; setMultipleValues({ name: 'minECPool', poolType: 'erasure', @@ -1108,37 +1109,46 @@ describe('PoolFormComponent', () => { pool: 'minECPool', pool_type: 'erasure', pg_autoscale_mode: 'off', - pg_num: 4 + pg_num: 4, + application_metadata: ['cephfs', 'rgw'] }); }); it('creates ec pool with erasure coded profile', () => { + component.data.applications.selected = ['cephfs', 'rgw']; const ecp = { name: 'ecpMinimalMock' }; setMultipleValues({ erasureProfile: ecp }); expectEcSubmit({ - erasure_code_profile: ecp.name + erasure_code_profile: ecp.name, + application_metadata: ['cephfs', 'rgw'] }); }); it('creates ec pool with ec_overwrite flag', () => { + component.data.applications.selected = ['cephfs', 'rgw']; setMultipleValues({ ecOverwrites: true }); expectEcSubmit({ - flags: ['ec_overwrites'] + flags: ['ec_overwrites'], + application_metadata: ['cephfs', 'rgw'] }); }); it('should ignore replicated set settings for ec pools', () => { + component.data.applications.selected = ['cephfs', 'rgw']; setMultipleValues({ size: 2 // will be ignored }); - expectEcSubmit({}); + expectEcSubmit({ + application_metadata: ['cephfs', 'rgw'] + }); }); it('creates a pool with compression', () => { + component.data.applications.selected = ['cephfs', 'rgw']; setMultipleValues({ mode: 'passive', algorithm: 'lz4', @@ -1151,7 +1161,8 @@ describe('PoolFormComponent', () => { compression_algorithm: 'lz4', compression_min_blob_size: 4096, compression_max_blob_size: 4194304, - compression_required_ratio: 0.7 + compression_required_ratio: 0.7, + application_metadata: ['cephfs', 'rgw'] }); }); @@ -1199,12 +1210,14 @@ describe('PoolFormComponent', () => { size: 2, pgNum: 32 }); + component.data.applications.selected = ['cephfs', 'rgw']; expectValidSubmit({ pool: 'minRepPool', pool_type: 'replicated', pg_num: 32, pg_autoscale_mode: 'off', - size: 2 + size: 2, + application_metadata: ['cephfs', 'rgw'] }); }); @@ -1218,8 +1231,10 @@ describe('PoolFormComponent', () => { * if type `replicated` is set, pgNum will be set to 256 with the current rule for * a replicated pool. */ + component.data.applications.selected = ['cephfs', 'rgw']; expectReplicatedSubmit({ - pg_num: 256 + pg_num: 256, + application_metadata: ['cephfs', 'rgw'] }); }); @@ -1228,9 +1243,11 @@ describe('PoolFormComponent', () => { max_bytes: 1024 * 1024, max_objects: 3000 }); + component.data.applications.selected = ['cephfs', 'rgw']; expectReplicatedSubmit({ quota_max_bytes: 1024 * 1024, - quota_max_objects: 3000 + quota_max_objects: 3000, + application_metadata: ['cephfs', 'rgw'] }); }); @@ -1238,10 +1255,12 @@ describe('PoolFormComponent', () => { component.currentConfigurationValues = { rbd_qos_bps_limit: 55 }; + component.data.applications.selected = ['cephfs', 'rgw']; expectReplicatedSubmit({ configuration: { rbd_qos_bps_limit: 55 - } + }, + application_metadata: ['cephfs', 'rgw'] }); }); }); @@ -1384,7 +1403,8 @@ describe('PoolFormComponent', () => { compression_max_blob_size: 0, compression_min_blob_size: 0, compression_required_ratio: 0, - pool: 'somePoolName' + pool: 'somePoolName', + rbd_mirroring: false }, 'pool/edit', 'update' @@ -1397,7 +1417,8 @@ describe('PoolFormComponent', () => { { application_metadata: ['ownApp', 'rbd'], compression_mode: 'unset', - pool: 'somePoolName' + pool: 'somePoolName', + rbd_mirroring: false }, 'pool/edit', 'update' diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.ts index a3f6b6fd16e..c1a54223dcc 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/pool/pool-form/pool-form.component.ts @@ -37,6 +37,8 @@ import { CrushRuleFormModalComponent } from '../crush-rule-form-modal/crush-rule import { ErasureCodeProfileFormModalComponent } from '../erasure-code-profile-form/erasure-code-profile-form-modal.component'; import { Pool } from '../pool'; import { PoolFormData } from './pool-form-data'; +import { PoolEditModeResponseModel } from '../../block/mirroring/pool-edit-mode-modal/pool-edit-mode-response.model'; +import { RbdMirroringService } from '~/app/shared/api/rbd-mirroring.service'; interface FormFieldDescription { externalFieldName: string; @@ -84,6 +86,8 @@ export class PoolFormComponent extends CdForm implements OnInit { ecpUsage: string[] = undefined; // Will only be set if a rule is used by some pool crushRuleMaxSize = 10; DEFAULT_RATIO = 0.875; + isApplicationsSelected = true; + msrCrush: boolean = false; private modalSubscription: Subscription; @@ -98,7 +102,8 @@ export class PoolFormComponent extends CdForm implements OnInit { private taskWrapper: TaskWrapperService, private ecpService: ErasureCodeProfileService, private crushRuleService: CrushRuleService, - public actionLabels: ActionLabelsI18n + public actionLabels: ActionLabelsI18n, + private rbdMirroringService: RbdMirroringService ) { super(); this.editing = this.router.url.startsWith(`/pool/${URLVerbs.EDIT}`); @@ -177,7 +182,8 @@ export class PoolFormComponent extends CdForm implements OnInit { ecOverwrites: new UntypedFormControl(false), compression: compressionForm, max_bytes: new UntypedFormControl(''), - max_objects: new UntypedFormControl(0) + max_objects: new UntypedFormControl(0), + rbdMirroring: new UntypedFormControl(false) }, [CdValidators.custom('form', (): null => null)] ); @@ -195,6 +201,7 @@ export class PoolFormComponent extends CdForm implements OnInit { this.listenToChanges(); this.setComplexValidators(); }); + this.erasureProfileChange(); } private initInfo(info: PoolFormInfo) { @@ -285,6 +292,11 @@ export class PoolFormComponent extends CdForm implements OnInit { this.data.pgs = this.form.getValue('pgNum'); this.setAvailableApps(this.data.applications.default.concat(pool.application_metadata)); this.data.applications.selected = pool.application_metadata; + this.rbdMirroringService + .getPool(pool.pool_name) + .subscribe((resp: PoolEditModeResponseModel) => { + this.form.get('rbdMirroring').setValue(resp.mirror_mode === 'pool'); + }); } private setAvailableApps(apps: string[] = this.data.applications.default) { @@ -776,7 +788,14 @@ export class PoolFormComponent extends CdForm implements OnInit { formControlName: 'max_objects', editable: true, resetValue: this.editing ? 0 : undefined - } + }, + this.data.applications.selected.includes('rbd') + ? { externalFieldName: 'rbd_mirroring', formControlName: 'rbdMirroring' } + : { + externalFieldName: 'rbd_mirroring', + formControlName: 'rbdMirroring', + resetValue: undefined + } ]); if (this.info.is_all_bluestore) { @@ -841,6 +860,12 @@ export class PoolFormComponent extends CdForm implements OnInit { const apps = this.data.applications.selected; if (apps.length > 0 || this.editing) { pool['application_metadata'] = apps; + if (apps.includes('rbd')) { + pool['rbd_mirroring'] = this.form.getValue('rbdMirroring'); + } + this.isApplicationsSelected = true; + } else { + this.isApplicationsSelected = false; } // Only collect configuration data for replicated pools, as QoS cannot be configured on EC @@ -849,6 +874,11 @@ export class PoolFormComponent extends CdForm implements OnInit { pool['configuration'] = this.currentConfigurationValues; } + if (!this.isApplicationsSelected) { + this.form.setErrors({ cdSubmitButton: true }); + return; + } + this.triggerApiTask(pool); } @@ -893,10 +923,11 @@ export class PoolFormComponent extends CdForm implements OnInit { } private triggerApiTask(pool: Record<string, any>) { + const poolName = pool.hasOwnProperty('srcpool') ? pool.srcpool : pool.pool; this.taskWrapper .wrapTaskAroundCall({ task: new FinishedTask('pool/' + (this.editing ? URLVerbs.EDIT : URLVerbs.CREATE), { - pool_name: pool.hasOwnProperty('srcpool') ? pool.srcpool : pool.pool + pool_name: poolName }), call: this.poolService[this.editing ? URLVerbs.UPDATE : URLVerbs.CREATE](pool) }) @@ -914,4 +945,12 @@ export class PoolFormComponent extends CdForm implements OnInit { appSelection() { this.form.get('name').updateValueAndValidity({ emitEvent: false, onlySelf: true }); } + + erasureProfileChange() { + const profile = this.form.get('erasureProfile').value; + if (profile) { + this.msrCrush = + profile['crush-num-failure-domains'] > 0 || profile['crush-osds-per-failure-domain'] > 0; + } + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-bucket-encryption.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-bucket-encryption.ts index e4f81f643c4..5dd7c51de6b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-bucket-encryption.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-bucket-encryption.ts @@ -1,7 +1,37 @@ -export class RgwBucketEncryptionModel { - kmsProviders = ['vault']; - authMethods = ['token', 'agent']; - secretEngines = ['kv', 'transit']; - sse_s3 = 'AES256'; - sse_kms = 'aws:kms'; +enum KmsProviders { + Vault = 'vault' } + +enum AuthMethods { + Token = 'token', + Agent = 'agent' +} + +enum SecretEngines { + KV = 'kv', + Transit = 'transit' +} + +enum sseS3 { + SSE_S3 = 'AES256' +} + +enum sseKms { + SSE_KMS = 'aws:kms' +} + +interface RgwBucketEncryptionModel { + kmsProviders: KmsProviders[]; + authMethods: AuthMethods[]; + secretEngines: SecretEngines[]; + SSE_S3: sseS3; + SSE_KMS: sseKms; +} + +export const rgwBucketEncryptionModel: RgwBucketEncryptionModel = { + kmsProviders: [KmsProviders.Vault], + authMethods: [AuthMethods.Token, AuthMethods.Agent], + secretEngines: [SecretEngines.KV, SecretEngines.Transit], + SSE_S3: sseS3.SSE_S3, + SSE_KMS: sseKms.SSE_KMS +}; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-daemon.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-daemon.ts index c685ba02700..179d7b5ab9a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-daemon.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-daemon.ts @@ -5,6 +5,7 @@ export class RgwDaemon { server_hostname: string; realm_name: string; zonegroup_name: string; + zonegroup_id: string; zone_name: string; default: boolean; port: number; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-multisite.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-multisite.ts index 1729f6418b2..f2fc381e806 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-multisite.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/models/rgw-multisite.ts @@ -50,3 +50,9 @@ export class SystemKey { access_key: string; secret_key: string; } + +export enum RgwMultisiteSyncPolicyStatus { + ENABLED = 'enabled', + FORBIDDEN = 'forbidden', + ALLOWED = 'allowed' +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html index a68ea5661e2..74b3e953b52 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html @@ -22,6 +22,11 @@ </tr> <tr> <td i18n + class="bold">Replication</td> + <td>{{ replicationStatus }}</td> + </tr> + <tr> + <td i18n class="bold">MFA Delete</td> <td>{{ selection.mfa_delete }}</td> </tr> @@ -122,44 +127,76 @@ <a ngbNavLink i18n>Policies</a> <ng-template ngbNavContent> - - <table class="table table-striped table-bordered"> - <tbody> - <tr> - <td i18n - class="bold w-25">Bucket policy</td> - <td><pre>{{ selection.bucket_policy | json}}</pre></td> - </tr> - <tr> - <td i18n - class="bold w-25">ACL</td> - <td> - <table class="table"> - <thead> - <tr i18n> - <th>Grantee</th> - <th>Permissions</th> - </tr> - </thead> - <tbody> - <tr i18n> - <td>Bucket Owner</td> - <td>{{ aclPermissions.Owner || '-'}}</td> - </tr> - <tr i18n> - <td>Everyone</td> - <td>{{ aclPermissions.AllUsers || '-'}}</td> - </tr> - <tr i18n> - <td>Authenticated users group</td> - <td>{{ aclPermissions.AuthenticatedUsers || '-'}}</td> - </tr> - </tbody> - </table> - </td> - </tr> - </tbody> - </table> + <div class="table-scroller"> + <table class="table table-striped table-bordered"> + <tbody> + <tr> + <td i18n + class="bold w-25">Bucket policy</td> + <td><pre>{{ selection.bucket_policy | json}}</pre></td> + </tr> + <tr> + <div> + <td i18n + class="bold w-25">Lifecycle + <div *ngIf="(selection.lifecycle | json) !== '{}'" + class="input-group"> + <button type="button" + class="btn btn-light" + [ngClass]="{'active': lifecycleFormat === 'json'}" + (click)="lifecycleFormat = 'json'"> + JSON + </button> + <button type="button" + class="btn btn-light" + [ngClass]="{'active': lifecycleFormat === 'xml'}" + (click)="lifecycleFormat = 'xml'"> + XML + </button> + </div> + </td> + </div> + <td> + <pre *ngIf="lifecycleFormat === 'json'">{{selection.lifecycle | json}}</pre> + <pre *ngIf="lifecycleFormat === 'xml'">{{ (selection.lifecycle | xml) || '-'}}</pre> + </td> + </tr> + <tr> + <td i18n + class="bold w-25">Replication policy</td> + <td><pre>{{ selection.replication | json}}</pre></td> + </tr> + <tr> + <td i18n + class="bold w-25">ACL</td> + <td> + <table class="table"> + <thead> + <tr i18n> + <th>Grantee</th> + <th>Permissions</th> + </tr> + </thead> + <tbody> + <tr i18n> + <td>Bucket Owner</td> + <td>{{ aclPermissions.Owner || '-'}}</td> + </tr> + <tr i18n> + <td>Everyone</td> + <td>{{ aclPermissions.AllUsers || '-'}}</td> + </tr> + <tr i18n> + <td>Authenticated users group</td> + <td>{{ aclPermissions.AuthenticatedUsers || '-'}}</td> + </tr> + </tbody> + </table> + </td> + </tr> + </tbody> + </table> + </div> </ng-template> </ng-container> </nav> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.scss index d293c9d9819..4d05a9f5df7 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.scss +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.scss @@ -5,3 +5,9 @@ table { table td { word-wrap: break-word; } + +.table-scroller { + height: 100%; + max-height: 50vh; + overflow: auto; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.ts index 451a7dd3811..15382c9fc31 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.ts @@ -13,7 +13,9 @@ export class RgwBucketDetailsComponent implements OnChanges { @Input() selection: any; + lifecycleFormat: 'json' | 'xml' = 'json'; aclPermissions: Record<string, string[]> = {}; + replicationStatus = $localize`Disabled`; constructor(private rgwBucketService: RgwBucketService) {} @@ -22,7 +24,13 @@ export class RgwBucketDetailsComponent implements OnChanges { this.rgwBucketService.get(this.selection.bid).subscribe((bucket: object) => { bucket['lock_retention_period_days'] = this.rgwBucketService.getLockDays(bucket); this.selection = bucket; + if (this.lifecycleFormat === 'json' && !this.selection.lifecycle) { + this.selection.lifecycle = {}; + } this.aclPermissions = this.parseXmlAcl(this.selection.acl, this.selection.owner); + if (this.selection.replication?.['Rule']?.['Status']) { + this.replicationStatus = this.selection.replication?.['Rule']?.['Status']; + } }); } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html index 551aad7ac55..f77526be779 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.html @@ -296,12 +296,11 @@ name="encryption_enabled" formControlName="encryption_enabled" type="checkbox" - [attr.disabled]="!kmsVaultConfig && !s3VaultConfig ? true : null"/> + [attr.disabled]="!kmsConfigured && !s3Configured ? true : null"/> <cd-help-text aria-label="encryption helper"> <span i18n>Enables encryption for the objects in the bucket. To enable encryption on a bucket you need to set the configuration values for SSE-S3 or SSE-KMS. - To set the configuration values <a href="#/rgw/bucket/create" - (click)="openConfigModal()" + To set the configuration values <a href="#/rgw/configuration" aria-label="click here">Click here</a></span> </cd-help-text> </div> @@ -317,10 +316,11 @@ type="radio" name="encryption_type" value="AES256" - [attr.disabled]="!s3VaultConfig ? true : null"> + [attr.disabled]="!s3Configured ? true : null"> <label class="form-control-label" + [ngClass]="{'text-muted': !s3Configured}" for="sse_S3_enabled" - i18n>SSE-S3 Encryption</label> + i18n>SSE-S3</label> </div> </div> </div> @@ -333,9 +333,10 @@ id="kms_enabled" name="encryption_type" value="aws:kms" - [attr.disabled]="!kmsVaultConfig ? true : null" + [attr.disabled]="!kmsConfigured ? true : null" type="radio"> <label class="form-control-label" + [ngClass]="{'text-muted': !kmsConfigured}" for="kms_enabled" i18n>Connect to an external key management service</label> </div> @@ -387,6 +388,41 @@ </div> </fieldset> + <!-- Replication --> + <fieldset> + <legend class="cd-header" + i18n>Replication</legend> + <div class="form-group row"> + <label class="cd-col-form-label pt-0" + for="replication" + i18n> + Enable + </label> + <div class="cd-col-form-input" + *ngIf="{status: multisiteStatus$, isDefaultZg: isDefaultZoneGroup$ | async} as multisiteStatus; else loadingTpl"> + <input type="checkbox" + class="form-check-input" + id="replication" + name="replication" + formControlName="replication" + [attr.disabled]="!multisiteStatus.isDefaultZg && !multisiteStatus.status.available ? true : null"> + <cd-help-text> + <span i18n>Enables replication for the objects in the bucket.</span> + </cd-help-text> + <div class="mt-1" + *ngIf="!editing"> + <cd-alert-panel type="info" + class="me-1" + id="replication-info" + i18n> + A bi-directional sync policy group will be created by the dashboard along with flows and pipes. + The pipe id will then be used for applying the replication policy to the bucket. + </cd-alert-panel> + </div> + </div> + </div> + </fieldset> + <!-- Tags --> <fieldset> <legend class="cd-header" @@ -432,15 +468,15 @@ class="form-control resize-vertical" id="bucket_policy" formControlName="bucket_policy" - (change)="bucketPolicyOnChange()"> + (change)="textAreaOnChange('bucketPolicyTextArea')"> </textarea> <span class="invalid-feedback" *ngIf="bucketForm.showError('bucket_policy', frm, 'invalidJson')" - i18n>Invalid json text</span> + i18n>Invalid json text.</span> <button type="button" id="clear-bucket-policy" class="btn btn-light my-3" - (click)="clearBucketPolicy()" + (click)="clearTextArea('bucket_policy', '{}')" i18n> <i [ngClass]="[icons.destroy]"></i> Clear @@ -468,6 +504,50 @@ </div> </div> + <!-- Lifecycle --> + <div *ngIf="editing" + class="form-group row"> + <label i18n + class="cd-col-form-label" + for="id">Lifecycle + <cd-helper>JSON or XML formatted document</cd-helper> + </label> + <div class="cd-col-form-input"> + <textarea #lifecycleTextArea + class="form-control resize-vertical" + id="lifecycle" + formControlName="lifecycle" + (change)="textAreaOnChange('lifecycleTextArea')"> + </textarea> + <span class="invalid-feedback" + *ngIf="bucketForm.showError('lifecycle', frm, 'invalidJson')" + i18n>Invalid json text.</span> + <span class="invalid-feedback" + *ngIf="bucketForm.showError('lifecycle', frm, 'invalidXml')" + i18n>Invalid xml text.</span> + <button type="button" + id="clear-lifecycle" + class="btn btn-light my-3" + (click)="clearTextArea('lifecycle', '{}')" + i18n> + <i [ngClass]="[icons.destroy]"></i> + Clear + </button> + <div class="btn-group float-end" + role="group" + aria-label="bucket-policy-helpers"> + <button type="button" + id="lifecycle-examples-button" + class="btn btn-light my-3" + (click)="openUrl('https://docs.aws.amazon.com/cli/latest/reference/s3api/put-bucket-lifecycle.html#examples')" + i18n> + <i [ngClass]="[icons.externalUrl]"></i> + Policy examples + </button> + </div> + </div> + </div> + <div class="form-group row"> <!-- ACL --> @@ -594,3 +674,9 @@ </button> </div> </ng-template> + +<ng-template #loadingTpl> + <div class="cd-col-form-input"> + <cd-loading-panel i18n>Checking multi-site status...</cd-loading-panel> + </div> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.spec.ts index 44318eda88e..34619824f20 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.spec.ts @@ -307,4 +307,12 @@ describe('RgwBucketFormComponent', () => { expectValidLockInputs(false, 'Compliance', '2'); }); }); + + describe('bucket replication', () => { + it('should validate replication input', () => { + formHelper.setValue('replication', true); + fixture.detectChanges(); + formHelper.expectValid('replication'); + }); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts index 836ab3d301b..d82c71e3cf7 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-form/rgw-bucket-form.component.ts @@ -10,7 +10,7 @@ import { AbstractControl, Validators } from '@angular/forms'; import { ActivatedRoute, Router } from '@angular/router'; import _ from 'lodash'; -import { forkJoin } from 'rxjs'; +import { Observable, forkJoin } from 'rxjs'; import * as xml2js from 'xml2js'; import { RgwBucketService } from '~/app/shared/api/rgw-bucket.service'; @@ -25,7 +25,7 @@ import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { CdValidators } from '~/app/shared/forms/cd-validators'; import { ModalService } from '~/app/shared/services/modal.service'; import { NotificationService } from '~/app/shared/services/notification.service'; -import { RgwBucketEncryptionModel } from '../models/rgw-bucket-encryption'; +import { rgwBucketEncryptionModel } from '../models/rgw-bucket-encryption'; import { RgwBucketMfaDelete } from '../models/rgw-bucket-mfa-delete'; import { AclPermissionsType, @@ -33,19 +33,23 @@ import { RgwBucketAclGrantee as Grantee } from './rgw-bucket-acl-permissions.enum'; import { RgwBucketVersioning } from '../models/rgw-bucket-versioning'; -import { RgwConfigModalComponent } from '../rgw-config-modal/rgw-config-modal.component'; import { BucketTagModalComponent } from '../bucket-tag-modal/bucket-tag-modal.component'; import { TextAreaJsonFormatterService } from '~/app/shared/services/text-area-json-formatter.service'; +import { RgwMultisiteService } from '~/app/shared/api/rgw-multisite.service'; +import { RgwDaemonService } from '~/app/shared/api/rgw-daemon.service'; +import { map, switchMap } from 'rxjs/operators'; +import { TextAreaXmlFormatterService } from '~/app/shared/services/text-area-xml-formatter.service'; @Component({ selector: 'cd-rgw-bucket-form', templateUrl: './rgw-bucket-form.component.html', - styleUrls: ['./rgw-bucket-form.component.scss'], - providers: [RgwBucketEncryptionModel] + styleUrls: ['./rgw-bucket-form.component.scss'] }) export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewChecked { @ViewChild('bucketPolicyTextArea') public bucketPolicyTextArea: ElementRef<any>; + @ViewChild('lifecycleTextArea') + public lifecycleTextArea: ElementRef<any>; bucketForm: CdFormGroup; editing = false; @@ -58,8 +62,8 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC isVersioningAlreadyEnabled = false; isMfaDeleteAlreadyEnabled = false; icons = Icons; - kmsVaultConfig = false; - s3VaultConfig = false; + kmsConfigured = false; + s3Configured = false; tags: Record<string, string>[] = []; dirtyTags = false; tagConfig = [ @@ -72,6 +76,8 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC ]; grantees: string[] = [Grantee.Owner, Grantee.Everyone, Grantee.AuthenticatedUsers]; aclPermissions: AclPermissionsType[] = [aclPermission.FullControl]; + multisiteStatus$: Observable<any>; + isDefaultZoneGroup$: Observable<boolean>; get isVersioningEnabled(): boolean { return this.bucketForm.getValue('versioning'); @@ -89,10 +95,12 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC private modalService: ModalService, private rgwUserService: RgwUserService, private notificationService: NotificationService, - private rgwEncryptionModal: RgwBucketEncryptionModel, private textAreaJsonFormatterService: TextAreaJsonFormatterService, + private textAreaXmlFormatterService: TextAreaXmlFormatterService, public actionLabels: ActionLabelsI18n, - private readonly changeDetectorRef: ChangeDetectorRef + private readonly changeDetectorRef: ChangeDetectorRef, + private rgwMultisiteService: RgwMultisiteService, + private rgwDaemonService: RgwDaemonService ) { super(); this.editing = this.router.url.startsWith(`/rgw/bucket/${URLVerbs.EDIT}`); @@ -103,7 +111,8 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC ngAfterViewChecked(): void { this.changeDetectorRef.detectChanges(); - this.bucketPolicyOnChange(); + this.textAreaOnChange(this.bucketPolicyTextArea); + this.textAreaOnChange(this.lifecycleTextArea); } createForm() { @@ -153,8 +162,10 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC lock_mode: ['COMPLIANCE'], lock_retention_period_days: [10, [CdValidators.number(false), lockDaysValidator]], bucket_policy: ['{}', CdValidators.json()], + lifecycle: ['{}', CdValidators.jsonOrXml()], grantee: [Grantee.Owner, [Validators.required]], - aclPermission: [[aclPermission.FullControl], [Validators.required]] + aclPermission: [[aclPermission.FullControl], [Validators.required]], + replication: [false] }); } @@ -162,16 +173,31 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC const promises = { owners: this.rgwUserService.enumerate() }; + this.multisiteStatus$ = this.rgwMultisiteService.status(); + this.isDefaultZoneGroup$ = this.rgwDaemonService.selectedDaemon$.pipe( + switchMap((daemon) => + this.rgwSiteService.get('default-zonegroup').pipe( + map((defaultZoneGroup) => { + return daemon.zonegroup_id === defaultZoneGroup; + }) + ) + ) + ); - this.kmsProviders = this.rgwEncryptionModal.kmsProviders; + this.kmsProviders = rgwBucketEncryptionModel.kmsProviders; this.rgwBucketService.getEncryptionConfig().subscribe((data) => { - this.kmsVaultConfig = data[0]; - this.s3VaultConfig = data[1]; - if (this.kmsVaultConfig && this.s3VaultConfig) { + if (data['SSE_KMS']?.length > 0) { + this.kmsConfigured = true; + } + if (data['SSE_S3']?.length > 0) { + this.s3Configured = true; + } + // Set the encryption type based on the configurations + if (this.kmsConfigured && this.s3Configured) { this.bucketForm.get('encryption_type').setValue(''); - } else if (this.kmsVaultConfig) { + } else if (this.kmsConfigured) { this.bucketForm.get('encryption_type').setValue('aws:kms'); - } else if (this.s3VaultConfig) { + } else if (this.s3Configured) { this.bucketForm.get('encryption_type').setValue('AES256'); } else { this.bucketForm.get('encryption_type').setValue(''); @@ -239,6 +265,7 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC bidResp['acl'], bidResp['owner'] ); + value['lifecycle'] = JSON.stringify(bidResp['lifecycle'] || {}); } this.bucketForm.setValue(value); if (this.editing) { @@ -253,6 +280,14 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC .get('bucket_policy') .setValue(JSON.stringify(value['bucket_policy'], null, 2)); } + if (value['replication']) { + const replicationConfig = value['replication']; + if (replicationConfig?.['Rule']?.['Status'] === 'Enabled') { + this.bucketForm.get('replication').setValue(true); + } else { + this.bucketForm.get('replication').setValue(false); + } + } this.filterAclPermissions(); } } @@ -267,14 +302,22 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC submit() { // Exit immediately if the form isn't dirty. - if (this.bucketForm.getValue('encryption_enabled') == null) { - this.bucketForm.get('encryption_enabled').setValue(false); - this.bucketForm.get('encryption_type').setValue(null); - } if (this.bucketForm.pristine) { this.goToListView(); return; } + + // Ensure that no validation is pending + if (this.bucketForm.pending) { + this.bucketForm.setErrors({ cdSubmitButton: true }); + return; + } + + if (this.bucketForm.getValue('encryption_enabled') == null) { + this.bucketForm.get('encryption_enabled').setValue(false); + this.bucketForm.get('encryption_type').setValue(null); + } + const values = this.bucketForm.value; const xmlStrTags = this.tagsToXML(this.tags); const bucketPolicy = this.getBucketPolicy(); @@ -300,7 +343,9 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC values['lock_retention_period_days'], xmlStrTags, bucketPolicy, - cannedAcl + cannedAcl, + values['replication'], + values['lifecycle'] ) .subscribe( () => { @@ -331,7 +376,8 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC values['keyId'], xmlStrTags, bucketPolicy, - cannedAcl + cannedAcl, + values['replication'] ) .subscribe( () => { @@ -397,9 +443,11 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC }); } - bucketPolicyOnChange() { - if (this.bucketPolicyTextArea) { - this.textAreaJsonFormatterService.format(this.bucketPolicyTextArea); + textAreaOnChange(textArea: ElementRef<any>) { + if (textArea?.nativeElement?.value?.startsWith?.('<')) { + this.textAreaXmlFormatterService.format(textArea); + } else { + this.textAreaJsonFormatterService.format(textArea); } } @@ -407,19 +455,12 @@ export class RgwBucketFormComponent extends CdForm implements OnInit, AfterViewC window.open(url, '_blank'); } - clearBucketPolicy() { - this.bucketForm.get('bucket_policy').setValue('{}'); + clearTextArea(field: string, defaultValue: string = '') { + this.bucketForm.get(field).setValue(defaultValue); this.bucketForm.markAsDirty(); this.bucketForm.updateValueAndValidity(); } - openConfigModal() { - const modalRef = this.modalService.show(RgwConfigModalComponent, null, { size: 'lg' }); - modalRef.componentInstance.configForm - .get('encryptionType') - .setValue(this.bucketForm.getValue('encryption_type') || 'AES256'); - } - showTagModal(index?: number) { const modalRef = this.modalService.show(BucketTagModalComponent); const modalComponent = modalRef.componentInstance as BucketTagModalComponent; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.html new file mode 100644 index 00000000000..ed79ed27b60 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.html @@ -0,0 +1,17 @@ +<ng-container *ngIf="selection"> + <nav ngbNav + #nav="ngbNav" + id="tabset-config-details" + class="nav-tabs" + cdStatefulTab="config-details"> + <ng-container ngbNavItem="details"> + <a ngbNavLink + i18n>Details</a> + <ng-template ngbNavContent> + <cd-table-key-value [data]="transformedData"> + </cd-table-key-value> + </ng-template> + </ng-container> + </nav> + <div [ngbNavOutlet]="nav"></div> +</ng-container> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.spec.ts new file mode 100644 index 00000000000..8f522560f34 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.spec.ts @@ -0,0 +1,22 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { RgwConfigDetailsComponent } from './rgw-config-details.component'; + +describe('RgwConfigDetailsComponent', () => { + let component: RgwConfigDetailsComponent; + let fixture: ComponentFixture<RgwConfigDetailsComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [RgwConfigDetailsComponent] + }).compileComponents(); + + fixture = TestBed.createComponent(RgwConfigDetailsComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.ts new file mode 100644 index 00000000000..689330f3cc4 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-details/rgw-config-details.component.ts @@ -0,0 +1,37 @@ +import { Component, Input, OnChanges } from '@angular/core'; +import { rgwEncryptionConfigKeys } from '~/app/shared/models/rgw-encryption-config-keys'; + +@Component({ + selector: 'cd-rgw-config-details', + templateUrl: './rgw-config-details.component.html', + styleUrls: ['./rgw-config-details.component.scss'] +}) +export class RgwConfigDetailsComponent implements OnChanges { + transformedData: {}; + @Input() + selection: any; + + @Input() + excludeProps: any[] = []; + filteredEncryptionConfigValues: {}; + + ngOnChanges(): void { + if (this.selection) { + this.filteredEncryptionConfigValues = Object.keys(this.selection) + .filter((key) => !this.excludeProps.includes(key)) + .reduce((obj, key) => { + obj[key] = this.selection[key]; + return obj; + }, {}); + const transformedData = {}; + for (const key in this.filteredEncryptionConfigValues) { + if (rgwEncryptionConfigKeys[key]) { + transformedData[rgwEncryptionConfigKeys[key]] = this.filteredEncryptionConfigValues[key]; + } else { + transformedData[key] = this.filteredEncryptionConfigValues[key]; + } + } + this.transformedData = transformedData; + } + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-modal/rgw-config-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-modal/rgw-config-modal.component.html index a8ed1783834..7205665a7a7 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-modal/rgw-config-modal.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-modal/rgw-config-modal.component.html @@ -1,6 +1,6 @@ <cd-modal [modalRef]="activeModal"> <ng-container i18n="form title" - class="modal-title">Update RGW Encryption Configurations</ng-container> + class="modal-title">{{ action | titlecase }} RGW Encryption Configurations</ng-container> <ng-container class="modal-content"> <form name="configForm" @@ -17,10 +17,13 @@ id="s3Enabled" type="radio" name="encryptionType" + (change)="checkKmsProviders()" + [attr.disabled]="editing && configForm.getValue('encryptionType') !== 'AES256' ? true : null" value="AES256"> <label class="custom-check-label" + [ngClass]="{'text-muted': editing && configForm.getValue('encryptionType') !== 'AES256'}" for="s3Enabled" - i18n>SSE-S3 Encryption</label> + i18n>SSE-S3</label> </div> <div class="col-md-auto custom-checkbox form-check-inline"> @@ -28,11 +31,14 @@ formControlName="encryptionType" id="kmsEnabled" name="encryptionType" + (change)="checkKmsProviders()" value="aws:kms" + [attr.disabled]="editing && configForm.getValue('encryptionType') !== 'aws:kms' ? true : null" type="radio"> <label class="custom-check-label" + [ngClass]="{'text-muted': editing && configForm.getValue('encryptionType') !== 'aws:kms'}" for="kmsEnabled" - i18n>SSE-KMS Encryption</label> + i18n>SSE-KMS</label> </div> </div> @@ -46,9 +52,12 @@ name="kms_provider" class="form-select" formControlName="kms_provider"> - <option i18n - *ngIf="kmsProviders !== null" - [ngValue]="null">-- Select a provider --</option> + <option *ngIf="kmsProviders !== null && kmsProviders.length === 0" + ngValue="null" + i18n>-- No kms providers available --</option> + <option *ngIf="kmsProviders !== null && kmsProviders.length > 0" + ngValue="" + i18n>-- Select a provider --</option> <option *ngFor="let provider of kmsProviders" [value]="provider">{{ provider }}</option> </select> @@ -59,168 +68,170 @@ </div> </div> - <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> - <div class="form-group row"> - <label class="cd-col-form-label required" - for="auth_method" - i18n>Authentication Method</label> - <div class="cd-col-form-input"> - <select id="auth_method" - name="auth_method" - class="form-select" - formControlName="auth_method"> - <option *ngFor="let auth_method of authMethods" - [value]="auth_method">{{ auth_method }}</option> - </select> - <span class="invalid-feedback" - *ngIf="configForm.showError('auth_method', frm, 'required')" - i18n>This field is required.</span> + <div *ngIf="kmsProviders.length !== 0 && configForm.getValue('kms_provider') !== ''"> + <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> + <div class="form-group row"> + <label class="cd-col-form-label required" + for="auth_method" + i18n>Authentication Method</label> + <div class="cd-col-form-input"> + <select id="auth_method" + name="auth_method" + class="form-select" + formControlName="auth_method"> + <option *ngFor="let auth_method of authMethods" + [value]="auth_method">{{ auth_method }}</option> + </select> + <span class="invalid-feedback" + *ngIf="configForm.showError('auth_method', frm, 'required')" + i18n>This field is required.</span> + </div> </div> </div> - </div> - <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> - <div class="form-group row"> - <label class="cd-col-form-label required" - for="secret_engine" - i18n>Secret Engine</label> - <div class="cd-col-form-input"> - <select id="secret_engine" - name="secret_engine" - class="form-select" - formControlName="secret_engine"> - <option *ngFor="let secret_engine of secretEngines" - [value]="secret_engine">{{ secret_engine }}</option> - </select> - <span class="invalid-feedback" - *ngIf="configForm.showError('secret_engine', frm, 'required')" - i18n>This field is required.</span> + <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> + <div class="form-group row"> + <label class="cd-col-form-label required" + for="secret_engine" + i18n>Secret Engine</label> + <div class="cd-col-form-input"> + <select id="secret_engine" + name="secret_engine" + class="form-select" + formControlName="secret_engine"> + <option *ngFor="let secret_engine of secretEngines" + [value]="secret_engine">{{ secret_engine }}</option> + </select> + <span class="invalid-feedback" + *ngIf="configForm.showError('secret_engine', frm, 'required')" + i18n>This field is required.</span> + </div> </div> </div> - </div> - <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> - <div class="form-group row"> - <label class="cd-col-form-label" - for="secret_path" - i18n>Secret Path - </label> - <div class="cd-col-form-input"> - <input id="secret_path" - name="secret_path" - class="form-control" - type="text" - formControlName="secret_path"> - <span class="invalid-feedback" - *ngIf="configForm.showError('secret_path', frm, 'required')" - i18n>This field is required.</span> + <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> + <div class="form-group row"> + <label class="cd-col-form-label" + for="secret_path" + i18n>Secret Path + </label> + <div class="cd-col-form-input"> + <input id="secret_path" + name="secret_path" + class="form-control" + type="text" + formControlName="secret_path"> + <span class="invalid-feedback" + *ngIf="configForm.showError('secret_path', frm, 'required')" + i18n>This field is required.</span> + </div> </div> </div> - </div> - <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> - <div class="form-group row"> - <label class="cd-col-form-label" - for="namespace" - i18n>Namespace - </label> - <div class="cd-col-form-input"> - <input id="namespace" - name="namespace" - class="form-control" - type="text" - formControlName="namespace"> + <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> + <div class="form-group row"> + <label class="cd-col-form-label" + for="namespace" + i18n>Namespace + </label> + <div class="cd-col-form-input"> + <input id="namespace" + name="namespace" + class="form-control" + type="text" + formControlName="namespace"> + </div> </div> </div> - </div> - <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> - <div class="form-group row"> - <label class="cd-col-form-label required" - for="address" - i18n>Vault Address - </label> - <div class="cd-col-form-input"> - <input id="address" - name="address" - class="form-control" - formControlName="address" - placeholder="http://127.0.0.1:8000"> - <span class="invalid-feedback" - *ngIf="configForm.showError('address', frm, 'required')" - i18n>This field is required.</span> + <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> + <div class="form-group row"> + <label class="cd-col-form-label required" + for="address" + i18n>Vault Address + </label> + <div class="cd-col-form-input"> + <input id="address" + name="address" + class="form-control" + formControlName="address" + placeholder="http://127.0.0.1:8000"> + <span class="invalid-feedback" + *ngIf="configForm.showError('address', frm, 'required')" + i18n>This field is required.</span> + </div> </div> </div> - </div> - - <div *ngIf="configForm.getValue('auth_method') === 'token'" - class="form-group row"> - <label class="cd-col-form-label required" - for="token"> - <span i18n>Token</span> - <cd-helper i18n> - The token authentication method expects a Vault token to be present in a plaintext file. - </cd-helper> - </label> - <div class="cd-col-form-input"> - <input type="file" - formControlName="token" - (change)="fileUpload($event.target.files, 'token')"> - <span class="invalid-feedback" - *ngIf="configForm.showError('token', frm, 'required')" - i18n>This field is required.</span> - </div> - </div> - <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> - <div class="form-group row"> - <label class="cd-col-form-label" - for="ssl_cert"> - <span i18n>CA Certificate</span> - <cd-helper i18n>The SSL certificate in PEM format.</cd-helper> + <div *ngIf="configForm.getValue('auth_method') === 'token'" + class="form-group row"> + <label class="cd-col-form-label required" + for="token"> + <span i18n>Token</span> + <cd-helper i18n> + The token authentication method expects a Vault token to be present in a plaintext file. + </cd-helper> </label> <div class="cd-col-form-input"> <input type="file" - formControlName="ssl_cert" - (change)="fileUpload($event.target.files, 'ssl_cert')"> + formControlName="token" + (change)="fileUpload($event.target.files, 'token')"> <span class="invalid-feedback" - *ngIf="configForm.showError('ssl_cert', frm, 'required')" + *ngIf="configForm.showError('token', frm, 'required')" i18n>This field is required.</span> </div> </div> - </div> - <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> - <div class="form-group row"> - <label class="cd-col-form-label" - for="client_cert"> - <span i18n>Client Certificate</span> - <cd-helper i18n>The Client certificate in PEM format.</cd-helper> - </label> - <div class="cd-col-form-input"> - <input type="file" - formControlName="client_cert" - (change)="fileUpload($event.target.files, 'client_cert')"> - <span class="invalid-feedback" - *ngIf="configForm.showError('client_cert', frm, 'required')" - i18n>This field is required.</span> + <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> + <div class="form-group row"> + <label class="cd-col-form-label" + for="ssl_cert"> + <span i18n>CA Certificate</span> + <cd-helper i18n>The SSL certificate in PEM format.</cd-helper> + </label> + <div class="cd-col-form-input"> + <input type="file" + formControlName="ssl_cert" + (change)="fileUpload($event.target.files, 'ssl_cert')"> + <span class="invalid-feedback" + *ngIf="configForm.showError('ssl_cert', frm, 'required')" + i18n>This field is required.</span> + </div> </div> </div> - </div> - <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> - <div class="form-group row"> - <label class="cd-col-form-label" - for="client_key"> - <span i18n>Client Private Key</span> - <cd-helper i18n>The Client Private Key in PEM format.</cd-helper> - </label> - <div class="cd-col-form-input"> - <input type="file" - (change)="fileUpload($event.target.files, 'client_key')"> - <span class="invalid-feedback" - *ngIf="configForm.showError('client_key', frm, 'required')" - i18n>This field is required.</span> + <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> + <div class="form-group row"> + <label class="cd-col-form-label" + for="client_cert"> + <span i18n>Client Certificate</span> + <cd-helper i18n>The Client certificate in PEM format.</cd-helper> + </label> + <div class="cd-col-form-input"> + <input type="file" + formControlName="client_cert" + (change)="fileUpload($event.target.files, 'client_cert')"> + <span class="invalid-feedback" + *ngIf="configForm.showError('client_cert', frm, 'required')" + i18n>This field is required.</span> + </div> + </div> + </div> + + <div *ngIf="configForm.getValue('encryptionType') === 'aws:kms' || configForm.getValue('encryptionType') === 'AES256'"> + <div class="form-group row"> + <label class="cd-col-form-label" + for="client_key"> + <span i18n>Client Private Key</span> + <cd-helper i18n>The Client Private Key in PEM format.</cd-helper> + </label> + <div class="cd-col-form-input"> + <input type="file" + (change)="fileUpload($event.target.files, 'client_key')"> + <span class="invalid-feedback" + *ngIf="configForm.showError('client_key', frm, 'required')" + i18n>This field is required.</span> + </div> </div> </div> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-modal/rgw-config-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-modal/rgw-config-modal.component.ts index f2a0959109f..892916e86b5 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-modal/rgw-config-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-config-modal/rgw-config-modal.component.ts @@ -12,13 +12,12 @@ import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; import { CdValidators } from '~/app/shared/forms/cd-validators'; import { NotificationService } from '~/app/shared/services/notification.service'; -import { RgwBucketEncryptionModel } from '../models/rgw-bucket-encryption'; +import { rgwBucketEncryptionModel } from '../models/rgw-bucket-encryption'; @Component({ selector: 'cd-rgw-config-modal', templateUrl: './rgw-config-modal.component.html', - styleUrls: ['./rgw-config-modal.component.scss'], - providers: [RgwBucketEncryptionModel] + styleUrls: ['./rgw-config-modal.component.scss'] }) export class RgwConfigModalComponent implements OnInit { readonly vaultAddress = /^((https?:\/\/)|(www.))(?:([a-zA-Z]+)|(\d+\.\d+.\d+.\d+)):\d{4}$/; @@ -32,21 +31,75 @@ export class RgwConfigModalComponent implements OnInit { authMethods: string[]; secretEngines: string[]; + selectedEncryptionConfigValues: any = {}; + allEncryptionConfigValues: any = []; + editing = false; + action: string; + constructor( private formBuilder: CdFormBuilder, public activeModal: NgbActiveModal, private router: Router, public actionLabels: ActionLabelsI18n, private rgwBucketService: RgwBucketService, - private rgwEncryptionModal: RgwBucketEncryptionModel, private notificationService: NotificationService ) { this.createForm(); } ngOnInit(): void { - this.kmsProviders = this.rgwEncryptionModal.kmsProviders; - this.authMethods = this.rgwEncryptionModal.authMethods; - this.secretEngines = this.rgwEncryptionModal.secretEngines; + this.kmsProviders = rgwBucketEncryptionModel.kmsProviders; + this.authMethods = rgwBucketEncryptionModel.authMethods; + this.secretEngines = rgwBucketEncryptionModel.secretEngines; + if (this.editing && this.selectedEncryptionConfigValues) { + const patchValues = { + address: this.selectedEncryptionConfigValues['addr'], + encryptionType: + rgwBucketEncryptionModel[this.selectedEncryptionConfigValues['encryption_type']], + kms_provider: this.selectedEncryptionConfigValues['backend'], + auth_method: this.selectedEncryptionConfigValues['auth'], + secret_engine: this.selectedEncryptionConfigValues['secret_engine'], + secret_path: this.selectedEncryptionConfigValues['prefix'], + namespace: this.selectedEncryptionConfigValues['namespace'] + }; + this.configForm.patchValue(patchValues); + this.configForm.get('kms_provider').disable(); + } + this.checkKmsProviders(); + } + + checkKmsProviders() { + this.kmsProviders = rgwBucketEncryptionModel.kmsProviders; + if ( + this.allEncryptionConfigValues && + this.allEncryptionConfigValues.hasOwnProperty('SSE_KMS') && + !this.editing + ) { + const sseKmsBackends = this.allEncryptionConfigValues['SSE_KMS'].map( + (config: any) => config.backend + ); + if (this.configForm.get('encryptionType').value === rgwBucketEncryptionModel.SSE_KMS) { + this.kmsProviders = this.kmsProviders.filter( + (provider) => !sseKmsBackends.includes(provider) + ); + } + } + if ( + this.allEncryptionConfigValues && + this.allEncryptionConfigValues.hasOwnProperty('SSE_S3') && + !this.editing + ) { + const sseS3Backends = this.allEncryptionConfigValues['SSE_S3'].map( + (config: any) => config.backend + ); + if (this.configForm.get('encryptionType').value === rgwBucketEncryptionModel.SSE_S3) { + this.kmsProviders = this.kmsProviders.filter( + (provider) => !sseS3Backends.includes(provider) + ); + } + } + if (this.kmsProviders.length > 0 && !this.kmsProviders.includes('vault')) { + this.configForm.get('kms_provider').setValue(''); + } } createForm() { @@ -98,7 +151,7 @@ export class RgwConfigModalComponent implements OnInit { } onSubmit() { - const values = this.configForm.value; + const values = this.configForm.getRawValue(); this.rgwBucketService .setEncryptionConfig( values['encryptionType'], diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.html new file mode 100644 index 00000000000..c33c8dbe4aa --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.html @@ -0,0 +1,32 @@ +<nav ngbNav + #nav="ngbNav" + class="nav-tabs"> + <ng-container ngbNavItem> + <a ngbNavLink + i18n>Server-side Encryption</a> + <ng-template ngbNavContent> + <cd-table #table + [data]="encryptionConfigValues" + [columns]="columns" + identifier="unique_id" + [forceIdentifier]="true" + [hasDetails]="true" + (updateSelection)="updateSelection($event)" + (setExpandedRow)="setExpandedRow($event)" + columnMode="flex" + selectionType="single"> + <cd-table-actions class="table-actions" + [permission]="permissions.configOpt" + [selection]="selection" + [tableActions]="tableActions"> + </cd-table-actions> + <cd-rgw-config-details cdTableDetail + [selection]="expandedRow" + [excludeProps]="excludeProps"> + </cd-rgw-config-details> + </cd-table> + </ng-template> + </ng-container> +</nav> + +<div [ngbNavOutlet]="nav"></div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.spec.ts new file mode 100644 index 00000000000..a487050e91c --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.spec.ts @@ -0,0 +1,28 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { RgwConfigurationPageComponent } from './rgw-configuration-page.component'; +import { NgbActiveModal, NgbNavModule } from '@ng-bootstrap/ng-bootstrap'; +import { HttpClientTestingModule } from '@angular/common/http/testing'; +import { SharedModule } from '~/app/shared/shared.module'; +import { RgwModule } from '../rgw.module'; + +describe('RgwConfigurationPageComponent', () => { + let component: RgwConfigurationPageComponent; + let fixture: ComponentFixture<RgwConfigurationPageComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [RgwConfigurationPageComponent], + providers: [NgbActiveModal], + imports: [HttpClientTestingModule, SharedModule, NgbNavModule, RgwModule] + }).compileComponents(); + + fixture = TestBed.createComponent(RgwConfigurationPageComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.ts new file mode 100644 index 00000000000..12e1a365200 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-configuration-page/rgw-configuration-page.component.ts @@ -0,0 +1,148 @@ +import { Component, EventEmitter, OnInit, Output } from '@angular/core'; + +import { NgbActiveModal, NgbModalRef } from '@ng-bootstrap/ng-bootstrap'; +import _ from 'lodash'; + +import { Permissions } from '~/app/shared/models/permissions'; + +import { RgwBucketService } from '~/app/shared/api/rgw-bucket.service'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; +import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; +import { CdTableColumn } from '~/app/shared/models/cd-table-column'; +import { CdTableSelection } from '~/app/shared/models/cd-table-selection'; +import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; +import { ListWithDetails } from '~/app/shared/classes/list-with-details.class'; +import { CdTableAction } from '~/app/shared/models/cd-table-action'; +import { Icons } from '~/app/shared/enum/icons.enum'; +import { ModalService } from '~/app/shared/services/modal.service'; +import { RgwConfigModalComponent } from '../rgw-config-modal/rgw-config-modal.component'; +import { rgwBucketEncryptionModel } from '../models/rgw-bucket-encryption'; + +@Component({ + selector: 'cd-rgw-configuration-page', + templateUrl: './rgw-configuration-page.component.html', + styleUrls: ['./rgw-configuration-page.component.scss'] +}) +export class RgwConfigurationPageComponent extends ListWithDetails implements OnInit { + readonly vaultAddress = /^((https?:\/\/)|(www.))(?:([a-zA-Z]+)|(\d+\.\d+.\d+.\d+)):\d{4}$/; + + kmsProviders: string[]; + + columns: Array<CdTableColumn> = []; + + configForm: CdFormGroup; + permissions: Permissions; + encryptionConfigValues: any = []; + selection: CdTableSelection = new CdTableSelection(); + + @Output() + submitAction = new EventEmitter(); + authMethods: string[]; + secretEngines: string[]; + tableActions: CdTableAction[]; + bsModalRef: NgbModalRef; + filteredEncryptionConfigValues: {}; + excludeProps: any[] = []; + disableCreate = false; + allEncryptionValues: any; + + constructor( + public activeModal: NgbActiveModal, + public actionLabels: ActionLabelsI18n, + private rgwBucketService: RgwBucketService, + public authStorageService: AuthStorageService, + private modalService: ModalService + ) { + super(); + this.permissions = this.authStorageService.getPermissions(); + } + + ngOnInit() { + this.columns = [ + { + name: $localize`Encryption Type`, + prop: 'encryption_type', + flexGrow: 1 + }, + { + name: $localize`Key Management Service Provider`, + prop: 'backend', + flexGrow: 1 + }, + { + name: $localize`Address`, + prop: 'addr', + flexGrow: 1 + } + ]; + this.tableActions = [ + { + permission: 'create', + icon: Icons.add, + name: this.actionLabels.CREATE, + click: () => this.openRgwConfigModal(false), + disable: () => this.disableCreate + }, + { + permission: 'update', + icon: Icons.edit, + name: this.actionLabels.EDIT, + click: () => this.openRgwConfigModal(true) + } + ]; + + this.rgwBucketService.getEncryptionConfig().subscribe((data: any) => { + this.allEncryptionValues = data; + const allowedBackends = rgwBucketEncryptionModel.kmsProviders; + + const kmsBackends = this.getBackend(data, 'SSE_KMS'); + const s3Backends = this.getBackend(data, 'SSE_S3'); + + const allKmsBackendsPresent = this.areAllAllowedBackendsPresent(allowedBackends, kmsBackends); + const allS3BackendsPresent = this.areAllAllowedBackendsPresent(allowedBackends, s3Backends); + + this.disableCreate = allKmsBackendsPresent && allS3BackendsPresent; + this.encryptionConfigValues = Object.values(data).flat(); + }); + + this.excludeProps = this.columns.map((column) => column.prop); + this.excludeProps.push('unique_id'); + } + + getBackend(encryptionData: { [x: string]: any[] }, encryptionType: string) { + return new Set(encryptionData[encryptionType].map((item) => item.backend)); + } + + areAllAllowedBackendsPresent(allowedBackends: any[], backendsSet: Set<any>) { + return allowedBackends.every((backend) => backendsSet.has(backend)); + } + + openRgwConfigModal(edit: boolean) { + if (edit) { + const initialState = { + action: 'edit', + editing: true, + selectedEncryptionConfigValues: this.selection.first() + }; + this.bsModalRef = this.modalService.show(RgwConfigModalComponent, initialState, { + size: 'lg' + }); + } else { + const initialState = { + action: 'create', + allEncryptionConfigValues: this.allEncryptionValues + }; + this.bsModalRef = this.modalService.show(RgwConfigModalComponent, initialState, { + size: 'lg' + }); + } + } + + updateSelection(selection: CdTableSelection) { + this.selection = selection; + } + + setExpandedRow(expandedRow: any) { + super.setExpandedRow(expandedRow); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-daemon-list/rgw-daemon-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-daemon-list/rgw-daemon-list.component.spec.ts index bdb4decd9da..4936ee54a48 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-daemon-list/rgw-daemon-list.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-daemon-list/rgw-daemon-list.component.spec.ts @@ -32,6 +32,7 @@ describe('RgwDaemonListComponent', () => { server_hostname: 'ceph', realm_name: 'realm1', zonegroup_name: 'zg1-realm1', + zonegroup_id: 'zg1-id', zone_name: 'zone1-zg1-realm1', default: true, port: 80 diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.html index 291013a5ce2..921a2dfe3eb 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.html @@ -1,123 +1,156 @@ -<div class="row"> - <div class="col-sm-12 col-lg-12"> - <div> - <cd-alert-panel *ngIf="!rgwModuleStatus" - type="info" - spacingClass="mb-3" - class="d-flex align-items-center" - i18n>In order to access the import/export feature, the rgw module must be enabled +<nav ngbNav + #nav="ngbNav" + class="nav-tabs" + [(activeId)]="activeId" + (navChange)="onNavChange($event)"> + <ng-container ngbNavItem="configuration"> + <a ngbNavLink + i18n>Configuration</a> + <ng-template ngbNavContent> + <div> + <cd-alert-panel + *ngIf="!rgwModuleStatus" + type="info" + spacingClass="mb-3" + class="d-flex align-items-center" + i18n + >In order to access the import/export feature, the rgw module must be enabled - <button class="btn btn-light mx-2" - type="button" - (click)="enableRgwModule()">Enable</button> - </cd-alert-panel> - <cd-alert-panel *ngIf="restartGatewayMessage" - type="warning" - spacingClass="mb-3" - i18n>Please restart all Ceph Object Gateway instances in all zones to ensure consistent multisite configuration updates. - <a class="text-decoration-underline" - routerLink="/services"> - Cluster->Services</a> - </cd-alert-panel> - <cd-table-actions class="btn-group mb-4 me-2" - [permission]="permission" - [selection]="selection" - [tableActions]="createTableActions"> - </cd-table-actions> - <span *ngIf="showMigrateAction"> - <cd-table-actions class="btn-group mb-4 me-2 secondary" - [permission]="permission" - [btnColor]="'light'" - [selection]="selection" - [tableActions]="migrateTableAction"> + <button class="btn btn-light mx-2" + type="button" + (click)="enableRgwModule()"> + Enable + </button> + </cd-alert-panel> + <cd-alert-panel + *ngIf="restartGatewayMessage" + type="warning" + spacingClass="mb-3" + i18n>Please restart all Ceph Object Gateway instances in all zones to ensure consistent + multisite configuration updates. + <a class="text-decoration-underline" + routerLink="/services"> Cluster->Services</a> + </cd-alert-panel> + <cd-table-actions + class="btn-group mb-4 me-2" + [permission]="permission" + [selection]="selection" + [tableActions]="createTableActions" + > </cd-table-actions> - </span> - <cd-table-actions class="btn-group mb-4 me-2" - [permission]="permission" - [btnColor]="'light'" - [selection]="selection" - [tableActions]="importAction"> - </cd-table-actions> - <cd-table-actions class="btn-group mb-4 me-2" - [permission]="permission" - [btnColor]="'light'" - [selection]="selection" - [tableActions]="exportAction"> - </cd-table-actions> - </div> - <div class="card"> - <div class="card-header" - i18n>Topology Viewer</div> - <div class="card-body"> - <div class="row"> - <div class="col-sm-6 col-lg-6 tree-container"> - <i *ngIf="loadingIndicator" - [ngClass]="[icons.large, icons.spinner, icons.spin]"></i> - <tree-root #tree - [nodes]="nodes" - [options]="treeOptions" - (updateData)="onUpdateData()"> - <ng-template #treeNodeTemplate - let-node> - <span *ngIf="node.data.name" - class="me-3"> - <span *ngIf="(node.data.show_warning)"> - <i class="text-danger" + <span *ngIf="showMigrateAction"> + <cd-table-actions + class="btn-group mb-4 me-2 secondary" + [permission]="permission" + [btnColor]="'light'" + [selection]="selection" + [tableActions]="migrateTableAction" + > + </cd-table-actions> + </span> + <cd-table-actions + class="btn-group mb-4 me-2" + [permission]="permission" + [btnColor]="'light'" + [selection]="selection" + [tableActions]="importAction" + > + </cd-table-actions> + <cd-table-actions + class="btn-group mb-4 me-2" + [permission]="permission" + [btnColor]="'light'" + [selection]="selection" + [tableActions]="exportAction"> + </cd-table-actions> + </div> + <div class="card"> + <div class="card-header" + i18n>Topology Viewer</div> + <div class="card-body"> + <div class="row"> + <div class="col-sm-6 col-lg-6 tree-container"> + <i *ngIf="loadingIndicator" + [ngClass]="[icons.large, icons.spinner, icons.spin]"></i> + <tree-root + #tree + [nodes]="nodes" + [options]="treeOptions" + (updateData)="onUpdateData()"> + <ng-template + #treeNodeTemplate + let-node> + <span *ngIf="node.data.name" + class="me-3"> + <span *ngIf="node.data.show_warning"> + <i + class="text-danger" i18n-title [title]="node.data.warning_message" - [ngClass]="icons.danger"></i> - </span> - <i [ngClass]="node.data.icon"></i> + [ngClass]="icons.danger" + ></i> + </span> + <i [ngClass]="node.data.icon"></i> {{ node.data.name }} - </span> - <span class="badge badge-success me-2" - *ngIf="node.data.is_default"> - default - </span> - <span class="badge badge-warning me-2" - *ngIf="node.data.is_master"> - master - </span> - <span class="badge badge-warning me-2" - *ngIf="node.data.secondary_zone"> - secondary-zone - </span> - <div class="btn-group align-inline-btns" - *ngIf="node.isFocused" - role="group"> - <div [title]="editTitle" - i18n-title> - <button type="button" - class="btn btn-light dropdown-toggle-split ms-1" - (click)="openModal(node, true)" - [disabled]="getDisable() || node.data.secondary_zone"> - <i [ngClass]="[icons.edit]"></i> - </button> - </div> - <div [title]="deleteTitle" - i18n-title> - <button type="button" - class="btn btn-light ms-1" - [disabled]="isDeleteDisabled(node) || node.data.secondary_zone" - (click)="delete(node)"> - <i [ngClass]="[icons.destroy]"></i> - </button> + </span> + <span class="badge badge-success me-2" + *ngIf="node.data.is_default"> + default + </span> + <span class="badge badge-warning me-2" + *ngIf="node.data.is_master"> master </span> + <span class="badge badge-warning me-2" + *ngIf="node.data.secondary_zone"> + secondary-zone + </span> + <div class="btn-group align-inline-btns" + *ngIf="node.isFocused" + role="group"> + <div [title]="editTitle" + i18n-title> + <button + type="button" + class="btn btn-light dropdown-toggle-split ms-1" + (click)="openModal(node, true)" + [disabled]="getDisable() || node.data.secondary_zone"> + <i [ngClass]="[icons.edit]"></i> + </button> + </div> + <div [title]="deleteTitle" + i18n-title> + <button + type="button" + class="btn btn-light ms-1" + [disabled]="isDeleteDisabled(node) || node.data.secondary_zone" + (click)="delete(node)"> + <i [ngClass]="[icons.destroy]"></i> + </button> + </div> </div> - </div> - </ng-template> - </tree-root> - </div> - <div class="col-sm-6 col-lg-6 metadata" - *ngIf="metadata"> - <legend>{{ metadataTitle }}</legend> - <div> - <cd-table-key-value cdTableDetail - [data]="metadata"> - </cd-table-key-value> + </ng-template> + </tree-root> + </div> + <div class="col-sm-6 col-lg-6 metadata" + *ngIf="metadata"> + <legend>{{ metadataTitle }}</legend> + <div> + <cd-table-key-value + cdTableDetail + [data]="metadata"></cd-table-key-value> + </div> </div> </div> </div> </div> - </div> - </div> -</div> + </ng-template> + </ng-container> + <ng-container ngbNavItem="syncPolicy"> + <a ngbNavLink + i18n>Sync Policy</a> + <ng-template ngbNavContent> + <cd-rgw-multisite-sync-policy></cd-rgw-multisite-sync-policy> + </ng-template> + </ng-container> +</nav> + +<div [ngbNavOutlet]="nav"></div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.spec.ts index be65424cf7a..ef833a0324c 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.spec.ts @@ -8,6 +8,7 @@ import { SharedModule } from '~/app/shared/shared.module'; import { RgwMultisiteDetailsComponent } from './rgw-multisite-details.component'; import { RouterTestingModule } from '@angular/router/testing'; import { configureTestBed } from '~/testing/unit-test-helper'; +import { NgbNavModule } from '@ng-bootstrap/ng-bootstrap'; describe('RgwMultisiteDetailsComponent', () => { let component: RgwMultisiteDetailsComponent; @@ -21,7 +22,8 @@ describe('RgwMultisiteDetailsComponent', () => { TreeModule, SharedModule, ToastrModule.forRoot(), - RouterTestingModule + RouterTestingModule, + NgbNavModule ] }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.ts index 6e898e78945..4b65b7e37bd 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-details/rgw-multisite-details.component.ts @@ -99,6 +99,7 @@ export class RgwMultisiteDetailsComponent implements OnDestroy, OnInit { rgwModuleStatus: boolean; restartGatewayMessage = false; rgwModuleData: string | any[] = []; + activeId: string; constructor( private modalService: ModalService, @@ -115,6 +116,10 @@ export class RgwMultisiteDetailsComponent implements OnDestroy, OnInit { private notificationService: NotificationService ) { this.permission = this.authStorageService.getPermissions().rgw; + const activeId = this.router.getCurrentNavigation()?.extras?.state?.activeId; + if (activeId) { + this.activeId = activeId; + } } openModal(entity: any, edit = false) { @@ -267,7 +272,6 @@ export class RgwMultisiteDetailsComponent implements OnDestroy, OnInit { } }); } - /* setConfigValues() { this.rgwDaemonService .setMultisiteConfig( @@ -589,4 +593,19 @@ export class RgwMultisiteDetailsComponent implements OnDestroy, OnInit { } ); } + + onNavChange(event: any) { + if (event.nextId == 'configuration') { + this.metadata = null; + /* + It is a known issue with angular2-tree package when tree is hidden (for example inside tab or modal), + it is not rendered when it becomes visible. Solution is to call this.tree.sizeChanged() which recalculates + the rendered nodes according to the actual viewport size. (https://angular2-tree.readme.io/docs/common-issues) + */ + setTimeout(() => { + this.tree.sizeChanged(); + this.onUpdateData(); + }, 200); + } + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.html new file mode 100644 index 00000000000..f1c2af06fe5 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.html @@ -0,0 +1,102 @@ +<div class="cd-col-form"> + <form + name="bucketForm" + #frm="ngForm" + [formGroup]="syncPolicyForm" + *cdFormLoading="loading" + novalidate> + <div class="card"> + <div + i18n="form title" + class="card-header"> + {{ action | titlecase }} {{ resource | upperFirst }} + </div> + + <div class="card-body"> + <!-- Group Id --> + <div class="form-group row"> + <label + class="cd-col-form-label required" + for="group_id" + i18n>Group Id</label> + <div class="cd-col-form-input"> + <input + id="group_id" + name="group_id" + class="form-control" + type="text" + i18n-placeholder + placeholder="Group Id..." + formControlName="group_id" + [readonly]="editing"/> + <span + class="invalid-feedback" + *ngIf="syncPolicyForm.showError('group_id', frm, 'required')" + i18n>This field is required.</span> + </div> + </div> + + <!-- Status --> + <div class="form-group row"> + <label + class="cd-col-form-label required" + for="status" + i18n>Status</label> + <div class="cd-col-form-input"> + <select + id="status" + name="status" + class="form-select" + formControlName="status"> + <option + i18n + value="{{syncPolicyStatus.ENABLED}}">{{syncPolicyStatus.ENABLED | upperFirst }}</option> + <option + i18n + value="{{syncPolicyStatus.ALLOWED}}">{{syncPolicyStatus.ALLOWED | upperFirst }}</option> + <option + i18n + value="{{syncPolicyStatus.FORBIDDEN}}">{{syncPolicyStatus.FORBIDDEN | upperFirst }}</option> + </select> + <span + class="invalid-feedback" + *ngIf="syncPolicyForm.showError('status', frm, 'required')" + i18n>This field is required.</span> + </div> + </div> + + <!-- Bucket Name --> + <div class="form-group row"> + <label + class="cd-col-form-label" + for="bucket_name" + i18n>Bucket Name</label> + <div class="cd-col-form-input"> + <input + id="bucket_name" + name="bucket_name" + class="form-control" + type="text" + i18n-placeholder + placeholder="Bucket Name..." + formControlName="bucket_name" + [readonly]="editing" + [ngbTypeahead]="bucketDataSource"/> + <span + class="invalid-feedback" + *ngIf="syncPolicyForm.showError('bucket_name', frm, 'bucketNameNotAllowed')" + i18n>The bucket with chosen name does not exist.</span> + </div> + </div> + </div> + + <div class="card-footer"> + <cd-form-button-panel + (submitActionEvent)="submit()" + [form]="syncPolicyForm" + [submitText]="(action | titlecase) + ' ' + (resource | upperFirst)" + wrappingClass="text-right"></cd-form-button-panel> + </div> + </div> + </form> +</div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.spec.ts new file mode 100644 index 00000000000..b886ad1d5e5 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.spec.ts @@ -0,0 +1,36 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; +import { RgwMultisiteSyncPolicyFormComponent } from './rgw-multisite-sync-policy-form.component'; +import { HttpClientTestingModule } from '@angular/common/http/testing'; +import { ToastrModule } from 'ngx-toastr'; +import { ReactiveFormsModule } from '@angular/forms'; +import { PipesModule } from '~/app/shared/pipes/pipes.module'; +import { ComponentsModule } from '~/app/shared/components/components.module'; +import { RouterTestingModule } from '@angular/router/testing'; + +describe('RgwMultisiteSyncPolicyFormComponent', () => { + let component: RgwMultisiteSyncPolicyFormComponent; + let fixture: ComponentFixture<RgwMultisiteSyncPolicyFormComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [RgwMultisiteSyncPolicyFormComponent], + imports: [ + HttpClientTestingModule, + ReactiveFormsModule, + ToastrModule.forRoot(), + PipesModule, + ComponentsModule, + RouterTestingModule + ], + providers: [] + }).compileComponents(); + + fixture = TestBed.createComponent(RgwMultisiteSyncPolicyFormComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.ts new file mode 100644 index 00000000000..300f6171235 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component.ts @@ -0,0 +1,172 @@ +import { Component, OnInit } from '@angular/core'; +import { AbstractControl, AsyncValidatorFn, ValidationErrors, Validators } from '@angular/forms'; +import { ActivatedRoute, Router } from '@angular/router'; +import { Observable, timer as observableTimer, of } from 'rxjs'; +import { + catchError, + debounceTime, + distinctUntilChanged, + map, + mergeMap, + switchMapTo +} from 'rxjs/operators'; +import { RgwBucketService } from '~/app/shared/api/rgw-bucket.service'; +import { RgwMultisiteService } from '~/app/shared/api/rgw-multisite.service'; +import { ActionLabelsI18n, URLVerbs } from '~/app/shared/constants/app.constants'; +import { NotificationType } from '~/app/shared/enum/notification-type.enum'; +import { CdFormBuilder } from '~/app/shared/forms/cd-form-builder'; +import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; +import { NotificationService } from '~/app/shared/services/notification.service'; +import { RgwMultisiteSyncPolicyStatus } from '../models/rgw-multisite'; +import { CdForm } from '~/app/shared/forms/cd-form'; +import _ from 'lodash'; + +@Component({ + selector: 'cd-rgw-multisite-sync-policy-form', + templateUrl: './rgw-multisite-sync-policy-form.component.html', + styleUrls: ['./rgw-multisite-sync-policy-form.component.scss'] +}) +export class RgwMultisiteSyncPolicyFormComponent extends CdForm implements OnInit { + syncPolicyForm: CdFormGroup; + editing = false; + action: string; + resource: string; + syncPolicyStatus = RgwMultisiteSyncPolicyStatus; + + bucketDataSource = (text$: Observable<string>) => { + return text$.pipe( + debounceTime(200), + distinctUntilChanged(), + mergeMap((token: string) => this.getBucketTypeahead(token)) + ); + }; + + constructor( + private router: Router, + private route: ActivatedRoute, + public actionLabels: ActionLabelsI18n, + private fb: CdFormBuilder, + private rgwMultisiteService: RgwMultisiteService, + private notificationService: NotificationService, + private rgwBucketService: RgwBucketService + ) { + super(); + this.editing = this.router.url.startsWith(`/rgw/multisite/sync-policy/${URLVerbs.EDIT}`); + this.action = this.editing ? this.actionLabels.EDIT : this.actionLabels.CREATE; + this.resource = $localize`Sync Policy Group`; + this.createForm(); + this.loadingReady(); + } + + ngOnInit(): void { + if (this.editing) { + this.route.paramMap.subscribe((params: any) => { + const groupName = params.get('groupName'); + if (groupName) { + const bucketName = params.get('bucketName'); + this.loadingStart(); + this.rgwMultisiteService + .getSyncPolicyGroup(groupName, bucketName) + .subscribe((syncPolicy: any) => { + this.loadingReady(); + if (syncPolicy) { + this.syncPolicyForm.patchValue({ + group_id: syncPolicy.id, + status: syncPolicy.status, + bucket_name: bucketName + }); + } else { + this.goToListView(); + } + }); + } + }); + } + } + + createForm() { + this.syncPolicyForm = this.fb.group({ + group_id: ['', Validators.required], + status: [`${this.syncPolicyStatus.ENABLED}`, Validators.required], + bucket_name: ['', , this.bucketExistence(true)] + }); + } + + goToListView() { + // passing state in order to return to same tab on details page + this.router.navigate(['/rgw/multisite'], { state: { activeId: 'syncPolicy' } }); + } + + submit() { + if (this.syncPolicyForm.pristine) { + this.goToListView(); + return; + } + + // Ensure that no validation is pending + if (this.syncPolicyForm.pending) { + this.syncPolicyForm.setErrors({ cdSubmitButton: true }); + return; + } + + if (!this.editing) { + // Add + this.rgwMultisiteService.createSyncPolicyGroup(this.syncPolicyForm.value).subscribe( + () => { + this.notificationService.show( + NotificationType.success, + $localize`Created Sync Policy Group '${this.syncPolicyForm.getValue('group_id')}'` + ); + this.goToListView(); + }, + () => { + // Reset the 'Submit' button. + this.syncPolicyForm.setErrors({ cdSubmitButton: true }); + } + ); + } else { + this.rgwMultisiteService.modifySyncPolicyGroup(this.syncPolicyForm.value).subscribe( + () => { + this.notificationService.show( + NotificationType.success, + $localize`Modified Sync Policy Group '${this.syncPolicyForm.getValue('group_id')}'` + ); + this.goToListView(); + }, + () => { + // Reset the 'Submit' button. + this.syncPolicyForm.setErrors({ cdSubmitButton: true }); + } + ); + } + } + + bucketExistence(requiredExistenceResult: boolean): AsyncValidatorFn { + return (control: AbstractControl): Observable<ValidationErrors | null> => { + if (control.dirty) { + return observableTimer(500).pipe( + switchMapTo(this.rgwBucketService.exists(control.value)), + map((existenceResult: boolean) => + existenceResult === requiredExistenceResult ? null : { bucketNameNotAllowed: true } + ) + ); + } + return of(null); + }; + } + + private getBucketTypeahead(path: string): Observable<any> { + if (_.isString(path) && path !== '/' && path !== '') { + return this.rgwBucketService.list().pipe( + map((bucketList: any) => + bucketList + .filter((bucketName: string) => bucketName.toLowerCase().includes(path)) + .slice(0, 15) + ), + catchError(() => of([$localize`Error while retrieving bucket names.`])) + ); + } else { + return of([]); + } + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.html new file mode 100644 index 00000000000..8909f44c32b --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.html @@ -0,0 +1,40 @@ +<legend i18n> + Multisite Sync Policy + <cd-help-text> + Multisite bucket-granularity sync policy provides fine grained control of data movement between + buckets in different zones. + </cd-help-text> +</legend> +<cd-table + #table + [autoReload]="false" + [data]="syncPolicyData" + [columns]="columns" + identifier="uniqueId" + [forceIdentifier]="true" + columnMode="flex" + selectionType="multiClick" + [searchableObjects]="true" + [hasDetails]="false" + [serverSide]="false" + [count]="0" + [maxLimit]="25" + [toolHeader]="true" + (fetchData)="getPolicyList($event)" + (updateSelection)="updateSelection($event)"> + <div class="table-actions btn-toolbar"> + <cd-table-actions + [permission]="permission" + [selection]="selection" + class="btn-group" + [tableActions]="tableActions"> + </cd-table-actions> + </div> +</cd-table> + +<ng-template #deleteTpl> + <cd-alert-panel type="danger" + i18n> + Are you sure you want to delete these policy groups? + </cd-alert-panel> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.spec.ts new file mode 100644 index 00000000000..f555af7e765 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.spec.ts @@ -0,0 +1,28 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { RgwMultisiteSyncPolicyComponent } from './rgw-multisite-sync-policy.component'; +import { HttpClientModule } from '@angular/common/http'; +import { TitleCasePipe } from '@angular/common'; +import { ToastrModule } from 'ngx-toastr'; +import { PipesModule } from '~/app/shared/pipes/pipes.module'; + +describe('RgwMultisiteSyncPolicyComponent', () => { + let component: RgwMultisiteSyncPolicyComponent; + let fixture: ComponentFixture<RgwMultisiteSyncPolicyComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [RgwMultisiteSyncPolicyComponent], + imports: [HttpClientModule, ToastrModule.forRoot(), PipesModule], + providers: [TitleCasePipe] + }).compileComponents(); + + fixture = TestBed.createComponent(RgwMultisiteSyncPolicyComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.ts new file mode 100644 index 00000000000..2ed1ec0f7e2 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.ts @@ -0,0 +1,201 @@ +import { TitleCasePipe } from '@angular/common'; +import { Component, OnInit, TemplateRef, ViewChild } from '@angular/core'; +import { forkJoin as observableForkJoin, Observable, Subscriber } from 'rxjs'; +import { RgwMultisiteService } from '~/app/shared/api/rgw-multisite.service'; +import { CriticalConfirmationModalComponent } from '~/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component'; +import { ActionLabelsI18n } from '~/app/shared/constants/app.constants'; +import { TableComponent } from '~/app/shared/datatable/table/table.component'; +import { CellTemplate } from '~/app/shared/enum/cell-template.enum'; +import { Icons } from '~/app/shared/enum/icons.enum'; +import { CdTableAction } from '~/app/shared/models/cd-table-action'; +import { CdTableColumn } from '~/app/shared/models/cd-table-column'; +import { CdTableFetchDataContext } from '~/app/shared/models/cd-table-fetch-data-context'; +import { CdTableSelection } from '~/app/shared/models/cd-table-selection'; +import { FinishedTask } from '~/app/shared/models/finished-task'; +import { Permission } from '~/app/shared/models/permissions'; +import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; +import { ModalService } from '~/app/shared/services/modal.service'; +import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service'; +import { URLBuilderService } from '~/app/shared/services/url-builder.service'; + +const BASE_URL = 'rgw/multisite/sync-policy'; + +@Component({ + selector: 'cd-rgw-multisite-sync-policy', + templateUrl: './rgw-multisite-sync-policy.component.html', + styleUrls: ['./rgw-multisite-sync-policy.component.scss'], + providers: [{ provide: URLBuilderService, useValue: new URLBuilderService(BASE_URL) }] +}) +export class RgwMultisiteSyncPolicyComponent implements OnInit { + @ViewChild(TableComponent, { static: true }) + table: TableComponent; + @ViewChild('deleteTpl', { static: true }) + deleteTpl: TemplateRef<any>; + + columns: Array<CdTableColumn> = []; + syncPolicyData: any = []; + tableActions: CdTableAction[]; + selection = new CdTableSelection(); + permission: Permission; + + constructor( + private rgwMultisiteService: RgwMultisiteService, + private titleCasePipe: TitleCasePipe, + private actionLabels: ActionLabelsI18n, + private urlBuilder: URLBuilderService, + private authStorageService: AuthStorageService, + private modalService: ModalService, + private taskWrapper: TaskWrapperService + ) {} + + ngOnInit(): void { + this.permission = this.authStorageService.getPermissions().rgw; + this.columns = [ + { + prop: 'uniqueId', + isHidden: true + }, + { + name: $localize`Group Name`, + prop: 'groupName', + flexGrow: 1 + }, + { + name: $localize`Status`, + prop: 'status', + flexGrow: 1, + cellTransformation: CellTemplate.tooltip, + customTemplateConfig: { + map: { + Enabled: { class: 'badge-success', tooltip: 'sync is allowed and enabled' }, + Allowed: { class: 'badge-info', tooltip: 'sync is allowed' }, + Forbidden: { + class: 'badge-warning', + tooltip: + 'sync (as defined by this group) is not allowed and can override other groups' + } + } + }, + pipe: this.titleCasePipe + }, + { + name: $localize`Zonegroup`, + prop: 'zonegroup', + flexGrow: 1 + }, + { + name: $localize`Bucket`, + prop: 'bucket', + flexGrow: 1 + } + ]; + const getSyncGroupName = () => { + if (this.selection.first() && this.selection.first().groupName) { + if (this.selection.first().bucket) { + return `${encodeURIComponent(this.selection.first().groupName)}/${encodeURIComponent( + this.selection.first().bucket + )}`; + } + return `${encodeURIComponent(this.selection.first().groupName)}`; + } + return ''; + }; + const addAction: CdTableAction = { + permission: 'create', + icon: Icons.add, + routerLink: () => this.urlBuilder.getCreate(), + name: this.actionLabels.CREATE, + canBePrimary: (selection: CdTableSelection) => !selection.hasSelection + }; + const editAction: CdTableAction = { + permission: 'update', + icon: Icons.edit, + routerLink: () => this.urlBuilder.getEdit(getSyncGroupName()), + name: this.actionLabels.EDIT + }; + const deleteAction: CdTableAction = { + permission: 'delete', + icon: Icons.destroy, + click: () => this.deleteAction(), + disable: () => !this.selection.hasSelection, + name: this.actionLabels.DELETE, + canBePrimary: (selection: CdTableSelection) => selection.hasMultiSelection + }; + this.tableActions = [addAction, editAction, deleteAction]; + } + + transformSyncPolicyData(allSyncPolicyData: any) { + if (allSyncPolicyData && allSyncPolicyData.length > 0) { + allSyncPolicyData.forEach((policy: any) => { + this.syncPolicyData.push({ + uniqueId: policy['id'] + (policy['bucketName'] ? policy['bucketName'] : ''), + groupName: policy['id'], + status: policy['status'], + bucket: policy['bucketName'], + zonegroup: '' + }); + }); + this.syncPolicyData = [...this.syncPolicyData]; + } + } + + updateSelection(selection: CdTableSelection) { + this.selection = selection; + } + + getPolicyList(context: CdTableFetchDataContext) { + this.rgwMultisiteService.getSyncPolicy('', '', true).subscribe( + (resp: object[]) => { + this.syncPolicyData = []; + this.transformSyncPolicyData(resp); + }, + () => { + context.error(); + } + ); + } + + deleteAction() { + const groupNames = this.selection.selected.map((policy: any) => policy.groupName); + this.modalService.show(CriticalConfirmationModalComponent, { + itemDescription: this.selection.hasSingleSelection + ? $localize`Policy Group` + : $localize`Policy Groups`, + itemNames: groupNames, + bodyTemplate: this.deleteTpl, + submitActionObservable: () => { + return new Observable((observer: Subscriber<any>) => { + this.taskWrapper + .wrapTaskAroundCall({ + task: new FinishedTask('rgw/multisite/sync-policy/delete', { + group_names: groupNames + }), + call: observableForkJoin( + this.selection.selected.map((policy: any) => { + return this.rgwMultisiteService.removeSyncPolicyGroup( + policy.groupName, + policy.bucket + ); + }) + ) + }) + .subscribe({ + error: (error: any) => { + // Forward the error to the observer. + observer.error(error); + // Reload the data table content because some deletions might + // have been executed successfully in the meanwhile. + this.table.refreshBtn(); + }, + complete: () => { + // Notify the observer that we are done. + observer.complete(); + // Reload the data table content. + this.table.refreshBtn(); + } + }); + }); + } + }); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.spec.ts index 4f024f25f41..36cafa855a3 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.spec.ts @@ -26,6 +26,7 @@ describe('RgwOverviewDashboardComponent', () => { server_hostname: 'ceph', realm_name: 'realm1', zonegroup_name: 'zg1-realm1', + zonegroup_id: 'zg1-id', zone_name: 'zone1-zg1-realm1', default: true, port: 80 diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.spec.ts index 64afa205e69..a00d08ad75e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-user-form/rgw-user-form.component.spec.ts @@ -18,6 +18,7 @@ import { RgwUserCapabilities } from '../models/rgw-user-capabilities'; import { RgwUserCapability } from '../models/rgw-user-capability'; import { RgwUserS3Key } from '../models/rgw-user-s3-key'; import { RgwUserFormComponent } from './rgw-user-form.component'; +import { DUE_TIMER } from '~/app/shared/forms/cd-validators'; describe('RgwUserFormComponent', () => { let component: RgwUserFormComponent; @@ -162,14 +163,14 @@ describe('RgwUserFormComponent', () => { it('should validate that username is valid', fakeAsync(() => { spyOn(rgwUserService, 'get').and.returnValue(throwError('foo')); formHelper.setValue('user_id', 'ab', true); - tick(); + tick(DUE_TIMER); formHelper.expectValid('user_id'); })); it('should validate that username is invalid', fakeAsync(() => { spyOn(rgwUserService, 'get').and.returnValue(observableOf({})); formHelper.setValue('user_id', 'abc', true); - tick(); + tick(DUE_TIMER); formHelper.expectError('user_id', 'notUnique'); })); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts index 04755928b0a..dde6cff4866 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts @@ -1,13 +1,20 @@ -import { CommonModule } from '@angular/common'; +import { CommonModule, TitleCasePipe } from '@angular/common'; import { NgModule } from '@angular/core'; import { FormsModule, ReactiveFormsModule } from '@angular/forms'; import { RouterModule, Routes } from '@angular/router'; -import { NgbNavModule, NgbPopoverModule, NgbTooltipModule } from '@ng-bootstrap/ng-bootstrap'; +import { + NgbNavModule, + NgbPopoverModule, + NgbTooltipModule, + NgbTypeaheadModule +} from '@ng-bootstrap/ng-bootstrap'; import { NgxPipeFunctionModule } from 'ngx-pipe-function'; import { ActionLabels, URLVerbs } from '~/app/shared/constants/app.constants'; import { CRUDTableComponent } from '~/app/shared/datatable/crud-table/crud-table.component'; +import { FeatureTogglesGuardService } from '~/app/shared/services/feature-toggles-guard.service'; +import { ModuleStatusGuardService } from '~/app/shared/services/module-status-guard.service'; import { SharedModule } from '~/app/shared/shared.module'; import { PerformanceCounterModule } from '../performance-counter/performance-counter.module'; @@ -45,13 +52,19 @@ import { RgwSyncPrimaryZoneComponent } from './rgw-sync-primary-zone/rgw-sync-pr import { RgwSyncMetadataInfoComponent } from './rgw-sync-metadata-info/rgw-sync-metadata-info.component'; import { RgwSyncDataInfoComponent } from './rgw-sync-data-info/rgw-sync-data-info.component'; import { BucketTagModalComponent } from './bucket-tag-modal/bucket-tag-modal.component'; +import { NfsListComponent } from '../nfs/nfs-list/nfs-list.component'; +import { NfsFormComponent } from '../nfs/nfs-form/nfs-form.component'; +import { RgwMultisiteSyncPolicyComponent } from './rgw-multisite-sync-policy/rgw-multisite-sync-policy.component'; +import { RgwMultisiteSyncPolicyFormComponent } from './rgw-multisite-sync-policy-form/rgw-multisite-sync-policy-form.component'; +import { RgwConfigurationPageComponent } from './rgw-configuration-page/rgw-configuration-page.component'; +import { RgwConfigDetailsComponent } from './rgw-config-details/rgw-config-details.component'; @NgModule({ imports: [ CommonModule, SharedModule, FormsModule, - ReactiveFormsModule, + ReactiveFormsModule.withConfig({ callSetDisabledState: 'whenDisabledForLegacyCode' }), PerformanceCounterModule, NgbNavModule, RouterModule, @@ -60,7 +73,8 @@ import { BucketTagModalComponent } from './bucket-tag-modal/bucket-tag-modal.com NgxPipeFunctionModule, TreeModule, DataTableModule, - DashboardV3Module + DashboardV3Module, + NgbTypeaheadModule ], exports: [ RgwDaemonListComponent, @@ -102,8 +116,13 @@ import { BucketTagModalComponent } from './bucket-tag-modal/bucket-tag-modal.com RgwSyncPrimaryZoneComponent, RgwSyncMetadataInfoComponent, RgwSyncDataInfoComponent, - BucketTagModalComponent - ] + BucketTagModalComponent, + RgwMultisiteSyncPolicyComponent, + RgwMultisiteSyncPolicyFormComponent, + RgwConfigDetailsComponent, + RgwConfigurationPageComponent + ], + providers: [TitleCasePipe] }) export class RgwModule {} @@ -193,7 +212,56 @@ const routes: Routes = [ { path: 'multisite', data: { breadcrumbs: 'Multi-site' }, - children: [{ path: '', component: RgwMultisiteDetailsComponent }] + children: [ + { path: '', component: RgwMultisiteDetailsComponent }, + { + path: `sync-policy/${URLVerbs.CREATE}`, + component: RgwMultisiteSyncPolicyFormComponent, + data: { breadcrumbs: `${ActionLabels.CREATE} Sync Policy` } + }, + { + path: `sync-policy/${URLVerbs.EDIT}/:groupName`, + component: RgwMultisiteSyncPolicyFormComponent, + data: { breadcrumbs: `${ActionLabels.EDIT} Sync Policy` } + }, + { + path: `sync-policy/${URLVerbs.EDIT}/:groupName/:bucketName`, + component: RgwMultisiteSyncPolicyFormComponent, + data: { breadcrumbs: `${ActionLabels.EDIT} Sync Policy` } + } + ] + }, + { + path: 'nfs', + canActivateChild: [FeatureTogglesGuardService, ModuleStatusGuardService], + data: { + moduleStatusGuardConfig: { + uiApiPath: 'nfs-ganesha', + redirectTo: 'error', + section: 'nfs-ganesha', + section_info: 'NFS GANESHA', + header: 'NFS-Ganesha is not configured' + }, + breadcrumbs: 'NFS' + }, + children: [ + { path: '', component: NfsListComponent }, + { + path: URLVerbs.CREATE, + component: NfsFormComponent, + data: { breadcrumbs: ActionLabels.CREATE } + }, + { + path: `${URLVerbs.EDIT}/:cluster_id/:export_id`, + component: NfsFormComponent, + data: { breadcrumbs: ActionLabels.EDIT } + } + ] + }, + { + path: 'configuration', + data: { breadcrumbs: 'Configuration' }, + children: [{ path: '', component: RgwConfigurationPageComponent }] } ]; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/ceph-shared.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/ceph-shared.module.ts index 9e9f2917a47..9e276d5903b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/ceph-shared.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/ceph-shared.module.ts @@ -8,10 +8,11 @@ import { DataTableModule } from '~/app/shared/datatable/datatable.module'; import { SharedModule } from '~/app/shared/shared.module'; import { DeviceListComponent } from './device-list/device-list.component'; import { SmartListComponent } from './smart-list/smart-list.component'; +import { HealthChecksComponent } from './health-checks/health-checks.component'; @NgModule({ imports: [CommonModule, DataTableModule, SharedModule, NgbNavModule, NgxPipeFunctionModule], - exports: [DeviceListComponent, SmartListComponent], - declarations: [DeviceListComponent, SmartListComponent] + exports: [DeviceListComponent, SmartListComponent, HealthChecksComponent], + declarations: [DeviceListComponent, SmartListComponent, HealthChecksComponent] }) export class CephSharedModule {} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.html new file mode 100644 index 00000000000..9e9ff96e5f8 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.html @@ -0,0 +1,28 @@ +<ng-container *ngTemplateOutlet="logsLink"></ng-container> +<ul> + <li *ngFor="let check of healthData"> + <span [ngStyle]="check.severity | healthColor" + [class.health-warn-description]="check.severity === 'HEALTH_WARN'"> + {{ check.type }}</span>: {{ check.summary.message }} <br> + <div *ngIf="check.type === 'CEPHADM_FAILED_DAEMON'" + class="failed-daemons"> + <cd-help-text> + <b>Failed Daemons:</b> + <div *ngFor="let failedDaemons of getFailedDaemons(check.detail); let last = last"> + {{ failedDaemons }} + {{ !last ? ', ' : '' }} + </div> + </cd-help-text> + </div> + <div *ngFor="let details of check?.detail"> + <cd-help-text>{{ details?.message }}</cd-help-text> + </div> + </li> +</ul> + +<ng-template #logsLink> + <ng-container *ngIf="permissions.log.read"> + <p class="logs-link" + i18n><i [ngClass]="[icons.infoCircle]"></i> See <a routerLink="/logs">Logs</a> for more details.</p> + </ng-container> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.scss new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.scss diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.spec.ts new file mode 100644 index 00000000000..e9a4da80fd6 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.spec.ts @@ -0,0 +1,50 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { HealthChecksComponent } from './health-checks.component'; +import { HealthColorPipe } from '~/app/shared/pipes/health-color.pipe'; +import { By } from '@angular/platform-browser'; +import { CssHelper } from '~/app/shared/classes/css-helper'; + +describe('HealthChecksComponent', () => { + let component: HealthChecksComponent; + let fixture: ComponentFixture<HealthChecksComponent>; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [HealthChecksComponent, HealthColorPipe], + providers: [CssHelper] + }).compileComponents(); + + fixture = TestBed.createComponent(HealthChecksComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); + + it('should show the correct health warning for failed daemons', () => { + component.healthData = [ + { + severity: 'HEALTH_WARN', + summary: { + message: '1 failed cephadm daemon(s)', + count: 1 + }, + detail: [ + { + message: 'daemon ceph-exporter.ceph-node-00 on ceph-node-00 is in error state' + } + ], + muted: false, + type: 'CEPHADM_FAILED_DAEMON' + } + ]; + fixture.detectChanges(); + const failedDaemons = fixture.debugElement.query(By.css('.failed-daemons')); + expect(failedDaemons.nativeElement.textContent).toContain( + 'Failed Daemons: ceph-exporter.ceph-node-00 ' + ); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.ts new file mode 100644 index 00000000000..6c047bc4b78 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/shared/health-checks/health-checks.component.ts @@ -0,0 +1,28 @@ +import { Component, Input } from '@angular/core'; +import { Icons } from '~/app/shared/enum/icons.enum'; +import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; +import { Permissions } from '~/app/shared/models/permissions'; + +@Component({ + selector: 'cd-health-checks', + templateUrl: './health-checks.component.html', + styleUrls: ['./health-checks.component.scss'] +}) +export class HealthChecksComponent { + @Input() + healthData: any; + + icons = Icons; + + permissions: Permissions; + + constructor(private authStorageService: AuthStorageService) { + this.permissions = this.authStorageService.getPermissions(); + } + + getFailedDaemons(detail: any[]): string[] { + return detail.map( + (failedDaemons) => failedDaemons.message.split('daemon ')?.[1].split(' on ')[0] + ); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts index fc02e9bdeee..3b9e62c4829 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.spec.ts @@ -53,6 +53,8 @@ describe('LoginComponent', () => { component.login(); expect(routerNavigateSpy).toHaveBeenCalledTimes(1); - expect(routerNavigateSpy).toHaveBeenCalledWith(['/expand-cluster']); + expect(routerNavigateSpy).toHaveBeenCalledWith(['/expand-cluster'], { + queryParams: { welcome: true } + }); }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts index 57039c0f6d0..8bfda90c9e7 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/auth/login/login.component.ts @@ -71,7 +71,11 @@ export class LoginComponent implements OnInit { if (!this.postInstalled && this.route.snapshot.queryParams['returnUrl'] === '/dashboard') { url = '/expand-cluster'; } - this.router.navigate([url]); + if (url == '/expand-cluster') { + this.router.navigate([url], { queryParams: { welcome: true } }); + } else { + this.router.navigate([url]); + } }); } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.html index 2b3c82bfe20..958ba64129a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.html @@ -2,7 +2,16 @@ <cd-navigation> <div class="container-fluid h-100" [ngClass]="{'dashboard': (router.url == '/dashboard' || router.url == '/dashboard_3' || router.url == '/multi-cluster/overview'), 'rgw-dashboard': (router.url == '/rgw/overview')}"> - <cd-context></cd-context> + <!-- ************************ --> + <!-- ALERTS BANNER --> + <!-- ************************ --> + <div class="cd-alert-container" + [ngClass]="{'ms-4 me-4': (router.url == '/dashboard' || router.url == '/dashboard_3' || router.url == '/multi-cluster/overview'), 'm-3': (router.url == '/rgw/overview')}"> + <cd-pwd-expiration-notification></cd-pwd-expiration-notification> + <cd-telemetry-notification></cd-telemetry-notification> + <cd-motd></cd-motd> + </div> + <cd-context></cd-context> <cd-breadcrumbs></cd-breadcrumbs> <router-outlet></router-outlet> </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.scss index 32c0b2ae8c0..321f684da29 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.scss +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.scss @@ -8,9 +8,16 @@ .container-fluid { overflow: auto; + padding-bottom: 48px; position: absolute; + top: 48px; } .rgw-dashboard { background-color: vv.$body-bg-alt; } + +.cd-alert-container { + display: flex; + flex-direction: column; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.spec.ts index faf8c9cdf94..22451d8206a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.spec.ts @@ -32,4 +32,56 @@ describe('WorkbenchLayoutComponent', () => { it('should create', () => { expect(component).toBeTruthy(); }); + + describe('showTopNotification', () => { + const notification1 = 'notificationName1'; + const notification2 = 'notificationName2'; + + beforeEach(() => { + component.notifications = []; + }); + + it('should show notification', () => { + component.showTopNotification(notification1, true); + expect(component.notifications.includes(notification1)).toBeTruthy(); + expect(component.notifications.length).toBe(1); + }); + + it('should not add a second notification if it is already shown', () => { + component.showTopNotification(notification1, true); + component.showTopNotification(notification1, true); + expect(component.notifications.includes(notification1)).toBeTruthy(); + expect(component.notifications.length).toBe(1); + }); + + it('should add a second notification if the first one is different', () => { + component.showTopNotification(notification1, true); + component.showTopNotification(notification2, true); + expect(component.notifications.includes(notification1)).toBeTruthy(); + expect(component.notifications.includes(notification2)).toBeTruthy(); + expect(component.notifications.length).toBe(2); + }); + + it('should hide an active notification', () => { + component.showTopNotification(notification1, true); + expect(component.notifications.includes(notification1)).toBeTruthy(); + expect(component.notifications.length).toBe(1); + component.showTopNotification(notification1, false); + expect(component.notifications.length).toBe(0); + }); + + it('should not fail if it tries to hide an inactive notification', () => { + expect(() => component.showTopNotification(notification1, false)).not.toThrow(); + expect(component.notifications.length).toBe(0); + }); + + it('should keep other notifications if it hides one', () => { + component.showTopNotification(notification1, true); + component.showTopNotification(notification2, true); + expect(component.notifications.length).toBe(2); + component.showTopNotification(notification2, false); + expect(component.notifications.length).toBe(1); + expect(component.notifications.includes(notification1)).toBeTruthy(); + }); + }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.ts index 054ebf8bba1..230e6e7ae44 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/layouts/workbench-layout/workbench-layout.component.ts @@ -1,4 +1,4 @@ -import { Component, OnDestroy, OnInit } from '@angular/core'; +import { Component, HostBinding, OnDestroy, OnInit } from '@angular/core'; import { Router } from '@angular/router'; import { Subscription } from 'rxjs'; @@ -9,6 +9,9 @@ import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; import { FaviconService } from '~/app/shared/services/favicon.service'; import { SummaryService } from '~/app/shared/services/summary.service'; import { TaskManagerService } from '~/app/shared/services/task-manager.service'; +import { TelemetryNotificationService } from '../../../shared/services/telemetry-notification.service'; +import { MotdNotificationService } from '~/app/shared/services/motd-notification.service'; +import _ from 'lodash'; @Component({ selector: 'cd-workbench-layout', @@ -17,8 +20,12 @@ import { TaskManagerService } from '~/app/shared/services/task-manager.service'; providers: [FaviconService] }) export class WorkbenchLayoutComponent implements OnInit, OnDestroy { + notifications: string[] = []; private subs = new Subscription(); permissions: Permissions; + @HostBinding('class') get class(): string { + return 'top-notification-' + this.notifications.length; + } constructor( public router: Router, @@ -26,7 +33,9 @@ export class WorkbenchLayoutComponent implements OnInit, OnDestroy { private taskManagerService: TaskManagerService, private multiClusterService: MultiClusterService, private faviconService: FaviconService, - private authStorageService: AuthStorageService + private authStorageService: AuthStorageService, + private telemetryNotificationService: TelemetryNotificationService, + private motdNotificationService: MotdNotificationService ) { this.permissions = this.authStorageService.getPermissions(); } @@ -38,8 +47,36 @@ export class WorkbenchLayoutComponent implements OnInit, OnDestroy { } this.subs.add(this.summaryService.startPolling()); this.subs.add(this.taskManagerService.init(this.summaryService)); + + this.subs.add( + this.authStorageService.isPwdDisplayed$.subscribe((isDisplayed) => { + this.showTopNotification('isPwdDisplayed', isDisplayed); + }) + ); + this.subs.add( + this.telemetryNotificationService.update.subscribe((visible: boolean) => { + this.showTopNotification('telemetryNotificationEnabled', visible); + }) + ); + this.subs.add( + this.motdNotificationService.motd$.subscribe((motd: any) => { + this.showTopNotification('motdNotificationEnabled', _.isPlainObject(motd)); + }) + ); this.faviconService.init(); } + showTopNotification(name: string, isDisplayed: boolean) { + if (isDisplayed) { + if (!this.notifications.includes(name)) { + this.notifications.push(name); + } + } else { + const index = this.notifications.indexOf(name); + if (index >= 0) { + this.notifications.splice(index, 1); + } + } + } ngOnDestroy() { this.subs.unsubscribe(); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/administration/administration.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/administration/administration.component.html index eda1e83be54..ddadef6c20f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/administration/administration.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/administration/administration.component.html @@ -1,23 +1,24 @@ -<div ngbDropdown - placement="bottom-right" - *ngIf="userPermission.read"> - <a ngbDropdownToggle - class="dropdown-toggle" - i18n-title - title="Dashboard Settings" - role="button"> - <i [ngClass]="[icons.deepCheck]"></i> - <span i18n - class="d-md-none">Dashboard Settings</span> - </a> - <div ngbDropdownMenu> - <button ngbDropdownItem - *ngIf="userPermission.read" +<cds-overflow-menu [customTrigger]="customTrigger" + [offset]="{y:0, x:-80}"> + <li class="cds--overflow-menu-options__option mb-2"> + <button *ngIf="userPermission.read" routerLink="/user-management" + class="cds--overflow-menu-options__btn" i18n>User management</button> - <button ngbDropdownItem - *ngIf="configOptPermission.read" + </li> + <li class="cds--overflow-menu-options__option mb-2"> + <button *ngIf="configOptPermission.read" routerLink="/telemetry" + class="cds--overflow-menu-options__btn" i18n>Telemetry configuration</button> - </div> -</div> + </li> +</cds-overflow-menu> + +<ng-template #customTrigger> + <svg cdsIcon="settings" + size="20" + title="user" + *ngIf="userPermission.read"></svg> + <span i18n + class="d-md-none">Dashboard Settings</span> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/administration/administration.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/administration/administration.component.ts index 60cd17ec68a..265d01b5b7b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/administration/administration.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/administration/administration.component.ts @@ -1,6 +1,5 @@ import { Component } from '@angular/core'; -import { Icons } from '~/app/shared/enum/icons.enum'; import { Permission } from '~/app/shared/models/permissions'; import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; @@ -12,7 +11,6 @@ import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; export class AdministrationComponent { userPermission: Permission; configOptPermission: Permission; - icons = Icons; constructor(private authStorageService: AuthStorageService) { const permissions = this.authStorageService.getPermissions(); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/breadcrumbs/breadcrumbs.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/breadcrumbs/breadcrumbs.component.spec.ts index f10c6a56d85..b92b2ae497e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/breadcrumbs/breadcrumbs.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/breadcrumbs/breadcrumbs.component.spec.ts @@ -79,7 +79,7 @@ describe('BreadcrumbsComponent', () => { tick(); expect(component.crumbs).toEqual([ { path: null, text: 'Cluster' }, - { path: '/hosts', text: 'Hosts' } + { path: '/hosts', text: 'Hosts', disableSplit: false } ]); })); @@ -125,9 +125,9 @@ describe('BreadcrumbsComponent', () => { }); tick(); expect(component.crumbs).toEqual([ - { path: null, text: 'Block' }, - { path: '/block/rbd', text: 'Images' }, - { path: '/block/rbd/add', text: 'Add' } + { path: null, text: 'Block', disableSplit: false }, + { path: '/block/rbd', text: 'Images', disableSplit: false }, + { path: '/block/rbd/add', text: 'Add', disableSplit: false } ]); })); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/breadcrumbs/breadcrumbs.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/breadcrumbs/breadcrumbs.component.ts index 860b89ec90b..82d69fbf5d1 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/breadcrumbs/breadcrumbs.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/breadcrumbs/breadcrumbs.component.ts @@ -115,7 +115,7 @@ export class BreadcrumbsComponent implements OnDestroy { const result: IBreadcrumb[] = []; breadcrumbs.forEach((element) => { const split = element.text.split('/'); - if (split.length > 1) { + if (!element.disableSplit && split.length > 1) { element.text = split[split.length - 1]; for (let i = 0; i < split.length - 1; i++) { result.push({ text: split[i], path: null }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/dashboard-help/dashboard-help.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/dashboard-help/dashboard-help.component.html index 34ff79a115e..5a6ca4691a2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/dashboard-help/dashboard-help.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/dashboard-help/dashboard-help.component.html @@ -1,32 +1,37 @@ -<div ngbDropdown - placement="bottom-right"> - <a ngbDropdownToggle - i18n-title - title="Help" - role="button"> - <i [ngClass]="[icons.questionCircle]"></i> - <span i18n - class="d-md-none">Help</span> - </a> - <div ngbDropdownMenu> - <a ngbDropdownItem - [ngClass]="{'disabled': !docsUrl}" +<cds-overflow-menu [customTrigger]="customTrigger" + [offset]="{y:0, x:-80}"> + <li> + <a [ngClass]="{'cds--overflow-menu-options__btn': true, 'disabled': !docsUrl}" href="{{ docsUrl }}" target="_blank" i18n>Documentation - <i class="fa fa-external-link"></i> + <svg cdsIcon="launch" + class="ms-2" + size="16"></svg> </a> - <a ngbDropdownItem - routerLink="/api-docs" + </li> + <li> + <a routerLink="/api-docs" target="_blank" + class="cds--overflow-menu-options__btn" i18n>API - <i class="fa fa-external-link"></i> + <svg cdsIcon="launch" + class="ms-2" + size="16"></svg> </a> - <button ngbDropdownItem - (click)="openAboutModal()" - i18n>About</button> - <button ngbDropdownItem - (click)="openFeedbackModal()" - i18n>Report an issue...</button> - </div> -</div> + </li> + <cds-overflow-menu-option (click)="openAboutModal()" + i18n>About</cds-overflow-menu-option> + <cds-overflow-menu-option (click)="openFeedbackModal()" + i18n>Report an issue...</cds-overflow-menu-option> +</cds-overflow-menu> + +<ng-template #customTrigger> + <svg cdsIcon="help" + size="20" + i18n-title + title="Help" + role="button"></svg> + <span i18n + class="d-md-none">Help</span> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/identity/identity.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/identity/identity.component.html index 61e0e0527fe..871cb27a388 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/identity/identity.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/identity/identity.component.html @@ -1,28 +1,36 @@ -<div ngbDropdown - placement="bottom-right"> - <a ngbDropdownToggle - i18n-title - title="Logged in user" - role="button"> - <i [ngClass]="[icons.user]"></i> - <span i18n - class="d-md-none">Logged in user</span> - </a> - <div ngbDropdownMenu> - <button ngbDropdownItem - disabled - i18n>Signed in as <strong>{{ username }}</strong></button> - <hr class="dropdown-divider" /> - <button ngbDropdownItem - *ngIf="!sso" - routerLink="/user-profile/edit"> - <i [ngClass]="[icons.lock]"></i> - <span i18n>Change password</span> - </button> - <button ngbDropdownItem - (click)="logout()"> - <i [ngClass]="[icons.signOut]"></i> - <span i18n>Sign out</span> - </button> - </div> -</div> +<cds-overflow-menu [customTrigger]="customTrigger" + [offset]="{y:0, x:-80}"> + <li disabled="true" + class="show cds--overflow-menu-options__option cds--overflow-menu-options__option--disabled my-2" + i18n> + <div class="cds--overflow-menu-options__btn">Signed in as {{ username }} + </div> + </li> + <li class="cds--overflow-menu-options__option cds--overflow-menu--divider mb-2"> + <button *ngIf="!sso" + routerLink="/user-profile/edit" + class="cds--overflow-menu-options__btn" + i18n> + <svg cdsIcon="locked" + class="me-2" + size="16"></svg>Change password + </button> + </li> + <li class="cds--overflow-menu-options__option mb-2"> + <button (click)="logout()" + data-testid="logout" + class="cds--overflow-menu-options__btn" + i18n><svg cdsIcon="logout" + class="me-2" + size="16"></svg>Sign out + </button> + </li> +</cds-overflow-menu> + +<ng-template #customTrigger> + <svg cdsIcon="user--filled" + size="20" + title="user"></svg> + <span i18n + class="d-md-none">Logged in user</span> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation.module.ts index c8d2a9d9cab..958dfb4c00a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation.module.ts @@ -4,6 +4,14 @@ import { RouterModule } from '@angular/router'; import { NgbCollapseModule, NgbDropdownModule } from '@ng-bootstrap/ng-bootstrap'; import { SimplebarAngularModule } from 'simplebar-angular'; +import { + UIShellModule, + IconService, + IconModule, + ThemeModule, + DialogModule, + GridModule +} from 'carbon-components-angular'; import { AppRoutingModule } from '~/app/app-routing.module'; import { SharedModule } from '~/app/shared/shared.module'; @@ -17,6 +25,23 @@ import { IdentityComponent } from './identity/identity.component'; import { NavigationComponent } from './navigation/navigation.component'; import { NotificationsComponent } from './notifications/notifications.component'; +// Icons +import UserFilledIcon from '@carbon/icons/es/user--filled/20'; +import SettingsIcon from '@carbon/icons/es/settings/20'; +import HelpIcon from '@carbon/icons/es/help/20'; +import NotificationIcon from '@carbon/icons/es/notification/20'; +import LaunchIcon from '@carbon/icons/es/launch/16'; +import DashboardIcon from '@carbon/icons/es/template/20'; +import ClusterIcon from '@carbon/icons/es/web-services--cluster/20'; +import MultiClusterIcon from '@carbon/icons/es/edge-cluster/20'; +import BlockIcon from '@carbon/icons/es/datastore/20'; +import ObjectIcon from '@carbon/icons/es/object-storage/20'; +import FileIcon from '@carbon/icons/es/file-storage/20'; +import ObservabilityIcon from '@carbon/icons/es/observed--hail/20'; +import AdminIcon from '@carbon/icons/es/network--admin-control/20'; +import LockedIcon from '@carbon/icons/es/locked/16'; +import LogoutIcon from '@carbon/icons/es/logout/16'; + @NgModule({ imports: [ CommonModule, @@ -26,7 +51,12 @@ import { NotificationsComponent } from './notifications/notifications.component' AppRoutingModule, SharedModule, SimplebarAngularModule, - RouterModule + RouterModule, + UIShellModule, + IconModule, + ThemeModule, + DialogModule, + GridModule ], declarations: [ AboutComponent, @@ -40,4 +70,24 @@ import { NotificationsComponent } from './notifications/notifications.component' ], exports: [NavigationComponent, BreadcrumbsComponent] }) -export class NavigationModule {} +export class NavigationModule { + constructor(private iconService: IconService) { + this.iconService.registerAll([ + UserFilledIcon, + SettingsIcon, + HelpIcon, + NotificationIcon, + LaunchIcon, + DashboardIcon, + ClusterIcon, + MultiClusterIcon, + BlockIcon, + ObjectIcon, + FileIcon, + ObservabilityIcon, + AdminIcon, + LockedIcon, + LogoutIcon + ]); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.html index 2ef529e142f..1f25dabec4b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.html @@ -1,427 +1,333 @@ <div class="cd-navbar-main"> - <cd-pwd-expiration-notification></cd-pwd-expiration-notification> - <cd-telemetry-notification></cd-telemetry-notification> - <cd-motd></cd-motd> + <!-- ************************ --> + <!-- NOTIFICATIONS --> + <!-- ************************ --> <cd-notifications-sidebar></cd-notifications-sidebar> - <div class="cd-navbar-top"> - <nav class="navbar navbar-expand-md navbar-dark cd-navbar-brand"> - <button class="btn btn-link py-0" - (click)="showMenuSidebar = !showMenuSidebar" - aria-label="toggle sidebar visibility"> - <i [ngClass]="[icons.bars, icons.large2x]" - aria-hidden="true"></i> - </button> - - <a class="navbar-brand ms-2" - routerLink="/dashboard"> - <img src="assets/Ceph_Ceph_Logo_with_text_white.svg" - alt="Ceph" /> - </a> - - <button type="button" - class="navbar-toggler" - (click)="toggleRightSidebar()"> - <span i18n - class="sr-only">Toggle navigation</span> - <span> - <i [ngClass]="[icons.navicon, icons.large]"></i> - </span> - </button> - - <div class="collapse navbar-collapse" - [ngClass]="{'show': rightSidebarOpen}"> - <ng-container *ngIf="clustersMap?.size > 1"> - <div ngbDropdown - placement="bottom-left" - class="d-inline-block ms-5"> - <button ngbDropdownToggle - class="btn btn-outline-light cd-context-bar" - i18n-title - title="Selected Cluster:"> - <span class="dropdown-text"> {{ selectedCluster?.name }} </span> - <span>- {{ selectedCluster?.cluster_alias }} - {{ selectedCluster?.user }}</span> - </button> - <div ngbDropdownMenu> - <ng-container *ngFor="let cluster of clustersMap | keyvalue"> - <button ngbDropdownItem - (click)="onClusterSelection(cluster.value)" - [disabled]="cluster.value.cluster_connection_status === 1"> - <div class="dropdown-text">{{ cluster.value.name }}</div> - <div *ngIf="cluster.value.cluster_alias" - class="text-secondary">{{ cluster.value.cluster_alias }} - {{ cluster.value.user }}</div> - </button> - </ng-container> - </div> - </div> + <!-- ************************ --> + <!-- HEADER --> + <!-- ************************ --> + <cds-header name="Ceph Dashboard" + class="cd-navbar-top" + [brand]="brandTemplate"> + <cds-hamburger [active]="showMenuSidebar" + data-testid="main-menu-toggler" + (selected)="showMenuSidebar = !showMenuSidebar"></cds-hamburger> + <!-- ************************* --> + <!-- CLUSTER SWITCHER TEMPLATE --> + <!-- ************************* --> + <cds-header-navigation class="cluster-switcher" + *ngIf="clustersMap?.size > 1"> + <cds-header-menu [title]="currentClusterName" + data-testid="selected-cluster"> + <ng-container *ngFor="let cluster of clustersMap | keyvalue; trackBy:trackByFn "> + <cds-header-item (click)="onClusterSelection(cluster.value)" + [class.disabled]="cluster.value.cluster_connection_status === 1" + data-testid="select-a-cluster"> + {{ cluster.value.name }} - {{ cluster.value?.cluster_alias }} - {{ cluster.value?.user }} + </cds-header-item> </ng-container> - <ul class="nav navbar-nav cd-navbar-utility my-2 my-md-0"> - <ng-container *ngTemplateOutlet="cd_utilities"> </ng-container> - </ul> - </div> - </nav> - </div> + </cds-header-menu> + </cds-header-navigation> + <cds-header-global> + <cds-header-navigation> + <cd-language-selector class="d-flex"></cd-language-selector> + </cds-header-navigation> + <div class="cds--btn cds--btn--icon-only cds--header__action"> + <cd-notifications (click)="toggleRightSidebar()"></cd-notifications> + </div> + <div class="cds--btn cds--btn--icon-only cds--header__action"> + <cd-dashboard-help></cd-dashboard-help> + </div> + <div class="cds--btn cds--btn--icon-only cds--header__action"> + <cd-administration></cd-administration> + </div> + <div class="cds--btn cds--btn--icon-only cds--header__action"> + <cd-identity></cd-identity> + </div> + </cds-header-global> + </cds-header> + <!-- ***************************** --> + <!-- LOGO BRAND TEMPLATE --> + <!-- ***************************** --> + <ng-template #brandTemplate> + <a class="cds--header__name navbar-brand ms-3" + routerLink="/dashboard"> + <img src="assets/Ceph_Ceph_Logo_with_text_white.svg" + alt="Ceph" /> + </a> + </ng-template> + <!-- **************************************** --> + <!-- WRAPPER AROUND SIDENAV AND MAIN CONTAINT --> + <!-- **************************************** --> <div class="wrapper"> <!-- Content --> <nav id="sidebar" [ngClass]="{'active': !showMenuSidebar}"> - <ngx-simplebar [options]="simplebar"> - <ul class="list-unstyled components cd-navbar-primary"> - <ng-container *ngTemplateOutlet="cd_menu"> </ng-container> - </ul> - </ngx-simplebar> + <ng-container *ngTemplateOutlet="cd_menu"></ng-container> </nav> - <!-- Page Content --> <div id="content" [ngClass]="{'active': !showMenuSidebar}"> <ng-content></ng-content> </div> </div> - - <ng-template #cd_utilities> - <li class="nav-item"> - <cd-language-selector class="cd-navbar"></cd-language-selector> - </li> - <li class="nav-item"> - <cd-notifications class="cd-navbar" - (click)="toggleRightSidebar()"></cd-notifications> - </li> - <li class="nav-item"> - <cd-dashboard-help class="cd-navbar"></cd-dashboard-help> - </li> - <li class="nav-item"> - <cd-administration class="cd-navbar"></cd-administration> - </li> - <li class="nav-item"> - <cd-identity class="cd-navbar"></cd-identity> - </li> - </ng-template> - + <!-- ************************ --> + <!-- SIDENAV --> + <!-- ************************ --> <ng-template #cd_menu> <ng-container *ngIf="enabledFeature$ | async as enabledFeature"> - <!-- Dashboard --> - <li routerLinkActive="active" - class="nav-item tc_menuitem_dashboard"> - <a routerLink="/dashboard" - class="nav-link"> + <div cdsTheme="theme"> + <cds-sidenav [expanded]="showMenuSidebar" + class="mt-5"> + <!-- Dashboard --> + <cds-sidenav-item route="/dashboard" + [useRouter]="true" + title="Dashboard" + i18n-title + class="nav-item tc_menuitem_dashboard"> + <svg cdsIcon="template" + icon + size="20"></svg> <span i18n> - <i [ngClass]="[icons.areaChart]"></i> Dashboard</span> - <i - *ngIf="summaryData?.health_status !== 'HEALTH_OK'" - [ngClass]="[icons.circle]" - [ngStyle]="summaryData?.health_status | healthColor"> - </i> - </a> - </li> - <!-- Multi-cluster Dashboard --> - <li routerLinkActive="active" - class="nav-item tc_menuitem_multi_cluster"> - <a (click)="toggleSubMenu('multiCluster')" - class="nav-link dropdown-toggle" - [attr.aria-expanded]="displayedSubMenu.multiCluster" - aria-controls="multi-cluster-nav" - role="button"> - <ng-container i18n> - <i [ngClass]="[icons.sitemap]"></i> - Multi-Cluster - </ng-container> - </a> - <ul class="list-unstyled" - id="multi-cluster-nav" - [ngbCollapse]="!displayedSubMenu.multiCluster"> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_multiCluster_overview"> - <a i18n - routerLink="/multi-cluster/overview">Overview</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_multiCluster_manage_clusters"> - <a i18n - routerLink="/multi-cluster/manage-clusters">Manage Clusters</a> - </li> - </ul> - </li> - <!-- Cluster --> - <li routerLinkActive="active" - class="nav-item tc_menuitem_cluster" - *ngIf="permissions.hosts.read || permissions.monitor.read || - permissions.osd.read || permissions.pool.read"> - <a (click)="toggleSubMenu('cluster')" - class="nav-link dropdown-toggle" - [attr.aria-expanded]="displayedSubMenu.cluster" - aria-controls="cluster-nav" - role="button"> - <ng-container i18n> - <i [ngClass]="[icons.sitemap]"></i> - Cluster - </ng-container> - </a> - <ul class="list-unstyled" - id="cluster-nav" - [ngbCollapse]="!displayedSubMenu.cluster"> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_cluster_pool" - *ngIf="permissions.pool.read"> - <a i18n - routerLink="/pool">Pools</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_cluster_hosts" - *ngIf="permissions.hosts.read"> - <a i18n - routerLink="/hosts">Hosts</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_cluster_osds" - *ngIf="permissions.osd.read"> - <a i18n - routerLink="/osd">OSDs</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_cluster_inventory" - *ngIf="permissions.hosts.read"> - <a i18n - routerLink="/inventory">Physical Disks</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_cluster_crush" - *ngIf="permissions.osd.read"> - <a i18n - routerLink="/crush-map">CRUSH map</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_cluster_monitor" - *ngIf="permissions.monitor.read"> - <a i18n - routerLink="/monitor/">Monitors</a> - </li> - </ul> - </li> - - <!-- Block Storage --> - <li routerLinkActive="active" - class="nav-item tc_menuitem_block" - *ngIf="(permissions.rbdImage.read || permissions.rbdMirroring.read || permissions.iscsi.read) && - (enabledFeature.rbd || enabledFeature.mirroring || enabledFeature.iscsi)"> - <a class="nav-link dropdown-toggle" - (click)="toggleSubMenu('block')" - [attr.aria-expanded]="displayedSubMenu.block" - aria-controls="block-nav" - role="button" - [ngStyle]="blockHealthColor()"> - <ng-container i18n> - <i [ngClass]="[icons.database]"></i> - Block - </ng-container> - </a> - - <ul class="list-unstyled" - id="block-nav" - [ngbCollapse]="!displayedSubMenu.block"> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_block_images" - *ngIf="permissions.rbdImage.read && enabledFeature.rbd"> - <a i18n - routerLink="/block/rbd">Images</a> - </li> - - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_block_mirroring" - *ngIf="permissions.rbdMirroring.read && enabledFeature.mirroring"> - <a routerLink="/block/mirroring"> - <ng-container i18n>Mirroring</ng-container> + </cds-sidenav-item> + <!-- Multi-cluster Dashboard --> + <cds-sidenav-menu title="Multi-Cluster" + i18n-title> + <svg cdsIcon="edge-cluster" + icon + size="20"></svg> + <cds-sidenav-item route="/multi-cluster/overview" + title="Overview" + i18n-title + [useRouter]="true" + class="tc_submenuitem tc_submenuitem_multiCluster_overview"><span i18n>Overview</span></cds-sidenav-item> + <cds-sidenav-item route="/multi-cluster/manage-clusters" + title="Manager Cluster" + i18n-title + [useRouter]="true" + class="tc_submenuitem tc_submenuitem_multiCluster_manage_clusters"><span i18n>Manager Cluster</span></cds-sidenav-item> + </cds-sidenav-menu> + <!-- Cluster --> + <cds-sidenav-menu title="Cluster" + i18n-title + *ngIf="permissions.hosts.read || permissions.monitor.read || permissions.osd.read || permissions.pool.read" + class="tc_menuitem_cluster"> + <svg cdsIcon="web-services--cluster" + icon + size="20"></svg> + <cds-sidenav-item route="/pool" + [useRouter]="true" + title="Pools" + i18n-title + *ngIf="permissions.pool.read" + class="tc_submenuitem tc_submenuitem_cluster_pool"><span i18n>Pools</span></cds-sidenav-item> + <cds-sidenav-item route="/hosts" + [useRouter]="true" + title="Hosts" + i18n-title + *ngIf="permissions.hosts.read" + class="tc_submenuitem tc_submenuitem_cluster_hosts"><span i18n>Hosts</span></cds-sidenav-item> + <cds-sidenav-item route="/osd" + [useRouter]="true" + title="OSDs" + i18n-title + *ngIf="permissions.osd.read" + class="tc_submenuitem tc_submenuitem_cluster_osds"><span i18n>OSDs</span></cds-sidenav-item> + <cds-sidenav-item route="/inventory" + [useRouter]="true" + title="Physical Disks" + i18n-title + *ngIf="permissions.hosts.read" + class="tc_submenuitem tc_submenuitem_cluster_inventory"><span i18n>Physical Disks</span></cds-sidenav-item> + <cds-sidenav-item route="/crush-map" + [useRouter]="true" + title="CRUSH Map" + i18n-title + *ngIf="permissions.osd.read" + class="tc_submenuitem tc_submenuitem_cluster_crush"><span i18n>CRUSH Map</span></cds-sidenav-item> + <cds-sidenav-item route="/monitor" + [useRouter]="true" + title="Monitors" + i18n-title + *ngIf="permissions.monitor.read" + class="tc_submenuitem tc_submenuitem_cluster_monitor"><span i18n>Monitors</span></cds-sidenav-item> + </cds-sidenav-menu> + <!-- Block Storage --> + <cds-sidenav-menu title="Block" + i18n-title + *ngIf="(permissions.rbdImage.read || permissions.rbdMirroring.read|| permissions.iscsi.read) && (enabledFeature.rbd || enabledFeature.mirroring || enabledFeature.iscsi)" + class="tc_menuitem_block"> + <svg cdsIcon="datastore" + icon + size="20"></svg> + <cds-sidenav-item route="/block/rbd" + [useRouter]="true" + title="Images" + i18n-title + *ngIf="permissions.rbdImage.read && enabledFeature.rbd" + class="tc_submenuitem tc_submenuitem_block_images"><span i18n>Images</span></cds-sidenav-item> + <cds-sidenav-item route="/block/mirroring" + [useRouter]="true" + title="Mirroring" + i18n-title + *ngIf="permissions.rbdMirroring.read && enabledFeature.mirroring" + class="tc_submenuitem tc_submenuitem_block_mirroring"> + <span i18n>Mirroring <small *ngIf="summaryData?.rbd_mirroring?.warnings !== 0" class="badge badge-warning">{{ summaryData?.rbd_mirroring?.warnings }}</small> <small *ngIf="summaryData?.rbd_mirroring?.errors !== 0" class="badge badge-danger">{{ summaryData?.rbd_mirroring?.errors }}</small> - </a> - </li> - - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_block_iscsi" - *ngIf="permissions.iscsi.read && enabledFeature.iscsi"> - <a i18n - routerLink="/block/iscsi">iSCSI</a> - </li> - </ul> - </li> - - <!-- Object Storage --> - <li routerLinkActive="active" - class="nav-item tc_menuitem_rgw" - *ngIf="permissions.rgw.read && enabledFeature.rgw"> - <a class="nav-link dropdown-toggle" - (click)="toggleSubMenu('object')" - [attr.aria-expanded]="displayedSubMenu.object" - aria-controls="gateway-nav" - role="button"> - <ng-container i18n> - <i [ngClass]="[icons.cubes]"></i> - Object - </ng-container> - </a> - <ul class="list-unstyled" - id="gateway-nav" - [ngbCollapse]="!displayedSubMenu.object"> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_rgw_overview"> - <a i18n - routerLink="/rgw/overview">Overview</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_rgw_buckets"> - <a i18n - routerLink="/rgw/bucket">Buckets</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_rgw_users"> - <a i18n - routerLink="/rgw/user">Users</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_rgw_buckets"> - <a i18n - routerLink="/rgw/multisite">Multi-site</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_rgw_daemons"> - <a i18n - routerLink="/rgw/daemon">Gateways</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_rgw_nfs" - *ngIf="permissions.nfs.read && enabledFeature.nfs"> - <a i18n - class="nav-link" - routerLink="/nfs">NFS</a> - </li> - </ul> - </li> - - <!-- Filesystem --> - <li routerLinkActive="active" - class="nav-item tc_menuitem_file" - *ngIf="permissions.nfs.read && enabledFeature.nfs - || permissions.cephfs.read && enabledFeature.cephfs"> - <a class="nav-link dropdown-toggle" - (click)="toggleSubMenu('file')" - [attr.aria-expanded]="displayedSubMenu.file" - aria-controls="filesystem-nav" - role="button"> - <ng-container i18n> - <i [ngClass]="[icons.text]"></i> - File - </ng-container> - </a> - <ul class="list-unstyled" - id="filesystem-nav" - [ngbCollapse]="!displayedSubMenu.file"> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_file_cephfs" - *ngIf="permissions.cephfs.read && enabledFeature.cephfs"> - <a i18n - class="nav-link" - routerLink="/cephfs">File Systems</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_file_nfs" - *ngIf="permissions.nfs.read && enabledFeature.nfs"> - <a i18n - class="nav-link" - routerLink="/nfs">NFS</a> - </li> - </ul> - </li> - - - <!-- Observability --> - <li routerLinkActive="active" - class="nav-item tc_menuitem_observe" - *ngIf="permissions.log.read || permissions.prometheus.read"> - <a class="nav-link dropdown-toggle" - (click)="toggleSubMenu('observe')" - [attr.aria-expanded]="displayedSubMenu.observe" - aria-controls="observe-nav" - role="button"> - <ng-container i18n> - <i [ngClass]="[icons.eye]"></i> - Observability - </ng-container> - </a> - <ul class="list-unstyled" - id="observe-nav" - [ngbCollapse]="!displayedSubMenu.observe"> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_observe_log" - *ngIf="permissions.log.read"> - <a i18n - routerLink="/logs">Logs</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_observe_monitoring" - *ngIf="permissions.prometheus.read"> - <a routerLink="/monitoring"> - <ng-container i18n>Alerts</ng-container> + </span> + </cds-sidenav-item> + <cds-sidenav-item route="/block/iscsi" + [useRouter]="true" + title="iSCSI" + i18n-title + *ngIf="permissions.iscsi.read && enabledFeature.iscsi" + class="tc_submenuitem tc_submenuitem_block_iscsi"><span i18n>iSCSI</span></cds-sidenav-item> + <cds-sidenav-item route="/block/nvmeof" + [useRouter]="true" + title="NVMe/TCP" + i18n-title><span i18n>NVMe/TCP</span></cds-sidenav-item> + </cds-sidenav-menu> + <!-- Object Storage --> + <cds-sidenav-menu title="Object" + i18n-title + *ngIf="permissions.rgw.read && enabledFeature.rgw" + class="nav-item tc_menuitem_rgw"> + <svg cdsIcon="object-storage" + icon + size="20"></svg> + <cds-sidenav-item route="/rgw/overview" + title="Overview" + i18n-title + [useRouter]="true" + class="tc_submenuitem tc_submenuitem_rgw_overview"><span i18n>Overview</span></cds-sidenav-item> + <cds-sidenav-item route="/rgw/bucket" + title="Buckets" + i18n-title + [useRouter]="true" + class="tc_submenuitem tc_submenuitem_rgw_buckets"><span i18n>Buckets</span></cds-sidenav-item> + <cds-sidenav-item route="/rgw/user" + title="Users" + i18n-title + [useRouter]="true" + class="tc_submenuitem tc_submenuitem_rgw_users"><span i18n>Users</span></cds-sidenav-item> + <cds-sidenav-item route="/rgw/multisite" + title="Multi-site" + i18n-title + [useRouter]="true" + class="tc_submenuitem tc_submenuitem_rgw_buckets"><span i18n>Multi-site</span></cds-sidenav-item> + <cds-sidenav-item route="/rgw/daemon" + title="Gateways" + i18n-title + [useRouter]="true" + class="tc_submenuitem tc_submenuitem_rgw_daemons"><span i18n>Gateways</span></cds-sidenav-item> + <cds-sidenav-item route="/rgw/nfs" + [useRouter]="true" + title="NFS" + i18n-title + *ngIf="permissions.nfs.read && enabledFeature.nfs" + class="tc_submenuitem tc_submenuitem_rgw_nfs"><span i18n>NFS</span></cds-sidenav-item> + <cds-sidenav-item route="/rgw/configuration" + [useRouter]="true" + title="Configuration" + i18n-title + class="tc_submenuitem tc_submenuitem_rgw_configuration"><span i18n>Configuration</span></cds-sidenav-item> + </cds-sidenav-menu> + <!-- Filesystem --> + <cds-sidenav-menu title="File" + i18n-title + *ngIf="permissions.nfs.read && enabledFeature.nfs || permissions.cephfs.read && enabledFeature.cephfs" + class="tc_menuitem_file"> + <svg cdsIcon="file-storage" + icon + size="20"></svg> + <cds-sidenav-item route="/cephfs/fs" + [useRouter]="true" + title="File Systems" + i18n-title + *ngIf="permissions.cephfs.read && enabledFeature.cephfs" + class="tc_submenuitem tc_submenuitem_file_cephfs"><span i18n>File Systems</span></cds-sidenav-item> + <cds-sidenav-item route="/cephfs/nfs" + [useRouter]="true" + title="NFS" + i18n-title + *ngIf="permissions.nfs.read && enabledFeature.nfs" + class="tc_submenuitem tc_submenuitem_file_nfs"><span i18n>NFS</span></cds-sidenav-item> + </cds-sidenav-menu> + <!-- Observability --> + <cds-sidenav-menu title="Observability" + i18n-title + *ngIf="permissions.log.read || permissions.prometheus.read" + class="tc_menuitem_observe"> + <svg cdsIcon="observed--hail" + icon + size="20"></svg> + <cds-sidenav-item route="/logs" + [useRouter]="true" + title="Logs" + i18n-title + *ngIf="permissions.log.read" + class="tc_submenuitem tc_submenuitem_observe_log"><span i18n>Logs</span></cds-sidenav-item> + <cds-sidenav-item route="/monitoring" + [useRouter]="true" + title="Alerts" + i18n-title + *ngIf="permissions.prometheus.read" + class="tc_submenuitem tc_submenuitem_observe_monitoring"> + <span i18n> + <ng-container>Alerts</ng-container> <small *ngIf="prometheusAlertService.activeCriticalAlerts > 0" class="badge badge-danger ms-1">{{ prometheusAlertService.activeCriticalAlerts }}</small> <small *ngIf="prometheusAlertService.activeWarningAlerts > 0" class="badge badge-warning ms-1">{{ prometheusAlertService.activeWarningAlerts }}</small> - </a> - </li> - </ul> - </li> - <li routerLinkActive="active" - class="nav-item tc_menuitem_admin" - *ngIf="permissions.configOpt.read || - permissions.hosts.read"> - <a class="nav-link dropdown-toggle" - (click)="toggleSubMenu('admin')" - [attr.aria-expanded]="displayedSubMenu.admin" - aria-controls="admin-nav" - role="button"> - <ng-container i18n> - <i [ngClass]="[icons.cogs]"></i> - Administration - </ng-container> - </a> - <ul class="list-unstyled" - id="admin-nav" - [ngbCollapse]="!displayedSubMenu.admin"> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_admin_services" - *ngIf="permissions.hosts.read"> - <a i18n - routerLink="/services/">Services</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_admin_upgrade" - *ngIf="permissions.configOpt.read"> - <a i18n - routerLink="/upgrade">Upgrade</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_admin_users" - *ngIf="permissions.configOpt.read"> - <a i18n - routerLink="/ceph-users">Ceph Users</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_admin_modules" - *ngIf="permissions.configOpt.read"> - <a i18n - routerLink="/mgr-modules">Manager Modules</a> - </li> - <li routerLinkActive="active" - class="tc_submenuitem tc_submenuitem_admin_configuration" - *ngIf="permissions.configOpt.read"> - <a i18n - routerLink="/configuration">Configuration</a> - </li> - </ul> - </li> + </span> + </cds-sidenav-item> + </cds-sidenav-menu> + <!-- Administration --> + <cds-sidenav-menu title="Administration" + i18n-title + *ngIf="permissions.configOpt.read || permissions.hosts.read" + class="tc_menuitem_admin"> + <svg cdsIcon="network--admin-control" + icon + size="20"></svg> + <cds-sidenav-item route="/services/" + [useRouter]="true" + title="Services" + i18n-title + *ngIf="permissions.hosts.read" + class="tc_submenuitem tc_submenuitem_admin_services"><span i18n>Services</span></cds-sidenav-item> + <cds-sidenav-item route="/upgrade" + [useRouter]="true" + title="Upgrade" + i18n-title + *ngIf="permissions.configOpt.read" + class="tc_submenuitem tc_submenuitem_admin_upgrade"><span i18n>Upgrade</span></cds-sidenav-item> + <cds-sidenav-item route="/ceph-users" + [useRouter]="true" + title="Ceph Users" + i18n-title + *ngIf="permissions.configOpt.read" + class="tc_submenuitem tc_submenuitem_admin_users"><span i18n>Ceph Users</span></cds-sidenav-item> + <cds-sidenav-item route="/mgr-modules" + [useRouter]="true" + title="Manager Modules" + i18n-title + *ngIf="permissions.configOpt.read" + class="tc_submenuitem tc_submenuitem_admin_modules"><span i18n>Manager Modules</span></cds-sidenav-item> + <cds-sidenav-item route="/configuration" + [useRouter]="true" + title="Configuration" + i18n-title + *ngIf="permissions.configOpt.read" + class="tc_submenuitem tc_submenuitem_admin_configuration"><span i18n>Configuration</span></cds-sidenav-item> + </cds-sidenav-menu> + </cds-sidenav> + </div> </ng-container> </ng-template> - -</div> + </div> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.scss index 4d2c829e93d..56dc7e749a3 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.scss +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.scss @@ -5,16 +5,33 @@ --------------------------------------------------- */ .cd-navbar-main { - display: flex; - flex: 1; - flex-direction: column; height: 100%; + overflow: hidden; } /* --------------------------------------------------- NAVBAR STYLE --------------------------------------------------- */ +.navbar-brand, +.navbar-brand:hover { + color: vv.$gray-200; + height: auto; + padding: 0; +} + +.navbar-brand > img { + height: 25px; +} + +cds-header-item { + width: 500px; +} + +.cluster-switcher { + margin-left: 6rem; +} + ::ng-deep cd-navigation .cd-navbar-top { .cd-navbar-brand { background: vv.$secondary; @@ -160,15 +177,7 @@ SIDEBAR STYLE --------------------------------------------------- */ -$sidebar-width: 200px; - -.cd-navbar-primary .active > a, -.cd-navbar-primary > .active > a:focus, -.cd-navbar-primary > .active > a:hover { - background-color: vv.$primary !important; - border: 0 !important; - color: vv.$white !important; -} +$sidebar-width: 20.8rem; .wrapper { display: flex; @@ -176,9 +185,7 @@ $sidebar-width: 200px; width: 100%; #sidebar { - background: vv.$secondary; bottom: 0; - color: vv.$white; height: auto; left: 0; overflow-y: auto; @@ -190,92 +197,6 @@ $sidebar-width: 200px; &.active { margin-left: -$sidebar-width; } - - ul { - &.component { - margin: 0; - padding: 20px 0; - } - - p { - color: vv.$white; - padding: 10px; - } - - li a { - color: vv.$white; - display: block; - font-size: 1.3em; - padding: 10px 23px 10px 10px; - text-decoration: none; - - &:hover { - background: vv.$primary; - color: vv.$white; - } - - > .badge { - margin-left: 5px; - } - - i.fa.fa-circle { - animation: blink 2s ease-in infinite; - font-size: 0.875em; - margin-top: 4px; - position: absolute; - right: 35px; - } - - @keyframes blink { - from, - to { - opacity: 1; - } - - 50% { - opacity: 0; - } - } - } - - li.active > a, - li > a a[aria-expanded='true'] { - color: vv.$white; - } - } - } - - a.dropdown-toggle { - position: relative; - - &::after { - border: 0; - content: '\f054'; - font-family: 'ForkAwesome'; - font-size: 1rem; - margin-top: 2px; - position: absolute; - right: 20px; - transition: transform 0.3s ease-in-out; - } - - &[aria-expanded='true']::after { - transform: rotate(90deg); - } - } - - ul ul a { - background: lighten(vv.$secondary, 10); - font-size: 1.1em !important; - padding-left: 35px !important; - } - - .cd-navbar-primary a:focus { - outline: none; - } - - ngx-simplebar { - height: 100%; } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.spec.ts index 92d9a28878a..9bf0bde51e2 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.spec.ts @@ -24,6 +24,7 @@ import { AdministrationComponent } from '../administration/administration.compon import { IdentityComponent } from '../identity/identity.component'; import { NgbModule } from '@ng-bootstrap/ng-bootstrap'; import { DashboardHelpComponent } from '../dashboard-help/dashboard-help.component'; +import { DialogModule, GridModule, ThemeModule, UIShellModule } from 'carbon-components-angular'; function everythingPermittedExcept(disabledPermissions: string[] = []): any { const permissions: Permissions = new Permissions({}); @@ -71,7 +72,11 @@ describe('NavigationComponent', () => { ToastrModule.forRoot(), RouterTestingModule, SimplebarAngularModule, - NgbModule + NgbModule, + UIShellModule, + ThemeModule, + DialogModule, + GridModule ], providers: [AuthStorageService, SummaryService, FeatureTogglesService, PrometheusAlertService] }); @@ -214,56 +219,4 @@ describe('NavigationComponent', () => { }); } }); - - describe('showTopNotification', () => { - const notification1 = 'notificationName1'; - const notification2 = 'notificationName2'; - - beforeEach(() => { - component.notifications = []; - }); - - it('should show notification', () => { - component.showTopNotification(notification1, true); - expect(component.notifications.includes(notification1)).toBeTruthy(); - expect(component.notifications.length).toBe(1); - }); - - it('should not add a second notification if it is already shown', () => { - component.showTopNotification(notification1, true); - component.showTopNotification(notification1, true); - expect(component.notifications.includes(notification1)).toBeTruthy(); - expect(component.notifications.length).toBe(1); - }); - - it('should add a second notification if the first one is different', () => { - component.showTopNotification(notification1, true); - component.showTopNotification(notification2, true); - expect(component.notifications.includes(notification1)).toBeTruthy(); - expect(component.notifications.includes(notification2)).toBeTruthy(); - expect(component.notifications.length).toBe(2); - }); - - it('should hide an active notification', () => { - component.showTopNotification(notification1, true); - expect(component.notifications.includes(notification1)).toBeTruthy(); - expect(component.notifications.length).toBe(1); - component.showTopNotification(notification1, false); - expect(component.notifications.length).toBe(0); - }); - - it('should not fail if it tries to hide an inactive notification', () => { - expect(() => component.showTopNotification(notification1, false)).not.toThrow(); - expect(component.notifications.length).toBe(0); - }); - - it('should keep other notifications if it hides one', () => { - component.showTopNotification(notification1, true); - component.showTopNotification(notification2, true); - expect(component.notifications.length).toBe(2); - component.showTopNotification(notification2, false); - expect(component.notifications.length).toBe(1); - expect(component.notifications.includes(notification1)).toBeTruthy(); - }); - }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.ts index 6f52dc6cf33..55c9ab7e88d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/navigation/navigation.component.ts @@ -1,4 +1,4 @@ -import { Component, HostBinding, OnDestroy, OnInit } from '@angular/core'; +import { Component, OnDestroy, OnInit } from '@angular/core'; import { Router } from '@angular/router'; import * as _ from 'lodash'; @@ -6,7 +6,6 @@ import { Subscription } from 'rxjs'; import { MultiClusterService } from '~/app/shared/api/multi-cluster.service'; import { SettingsService } from '~/app/shared/api/settings.service'; -import { Icons } from '~/app/shared/enum/icons.enum'; import { MultiCluster } from '~/app/shared/models/multi-cluster'; import { Permissions } from '~/app/shared/models/permissions'; import { AuthStorageService } from '~/app/shared/services/auth-storage.service'; @@ -15,10 +14,8 @@ import { FeatureTogglesMap$, FeatureTogglesService } from '~/app/shared/services/feature-toggles.service'; -import { MotdNotificationService } from '~/app/shared/services/motd-notification.service'; import { PrometheusAlertService } from '~/app/shared/services/prometheus-alert.service'; import { SummaryService } from '~/app/shared/services/summary.service'; -import { TelemetryNotificationService } from '~/app/shared/services/telemetry-notification.service'; @Component({ selector: 'cd-navigation', @@ -26,17 +23,12 @@ import { TelemetryNotificationService } from '~/app/shared/services/telemetry-no styleUrls: ['./navigation.component.scss'] }) export class NavigationComponent implements OnInit, OnDestroy { - notifications: string[] = []; clusterDetails: any[] = []; - @HostBinding('class') get class(): string { - return 'top-notification-' + this.notifications.length; - } permissions: Permissions; enabledFeature$: FeatureTogglesMap$; clusterTokenStatus: object = {}; summaryData: any; - icons = Icons; rightSidebarOpen = false; // rightSidebar only opens when width is less than 768px showMenuSidebar = true; @@ -48,7 +40,13 @@ export class NavigationComponent implements OnInit, OnDestroy { private subs = new Subscription(); clustersMap: Map<string, any> = new Map<string, any>(); - selectedCluster: object; + selectedCluster: { + name: string; + cluster_alias: string; + user: string; + cluster_connection_status?: number; + }; + currentClusterName: string; constructor( private authStorageService: AuthStorageService, @@ -56,9 +54,7 @@ export class NavigationComponent implements OnInit, OnDestroy { private router: Router, private summaryService: SummaryService, private featureToggles: FeatureTogglesService, - private telemetryNotificationService: TelemetryNotificationService, public prometheusAlertService: PrometheusAlertService, - private motdNotificationService: MotdNotificationService, private cookieService: CookiesService, private settingsService: SettingsService ) { @@ -84,6 +80,7 @@ export class NavigationComponent implements OnInit, OnDestroy { }); this.selectedCluster = this.clustersMap.get(`${resp['current_url']}-${resp['current_user']}`) || {}; + this.currentClusterName = `${this.selectedCluster?.name} - ${this.selectedCluster?.cluster_alias} - ${this.selectedCluster?.user}`; } }) ); @@ -93,26 +90,6 @@ export class NavigationComponent implements OnInit, OnDestroy { this.summaryData = summary; }) ); - /* - Note: If you're going to add more top notifications please do not forget to increase - the number of generated css-classes in section topNotification settings in the scss - file. - */ - this.subs.add( - this.authStorageService.isPwdDisplayed$.subscribe((isDisplayed) => { - this.showTopNotification('isPwdDisplayed', isDisplayed); - }) - ); - this.subs.add( - this.telemetryNotificationService.update.subscribe((visible: boolean) => { - this.showTopNotification('telemetryNotificationEnabled', visible); - }) - ); - this.subs.add( - this.motdNotificationService.motd$.subscribe((motd: any) => { - this.showTopNotification('motdNotificationEnabled', _.isPlainObject(motd)); - }) - ); this.subs.add( this.multiClusterService.subscribeClusterTokenStatus((resp: object) => { this.clusterTokenStatus = resp; @@ -167,19 +144,6 @@ export class NavigationComponent implements OnInit, OnDestroy { this.rightSidebarOpen = !this.rightSidebarOpen; } - showTopNotification(name: string, isDisplayed: boolean) { - if (isDisplayed) { - if (!this.notifications.includes(name)) { - this.notifications.push(name); - } - } else { - const index = this.notifications.indexOf(name); - if (index >= 0) { - this.notifications.splice(index, 1); - } - } - } - onClusterSelection(value: object) { this.multiClusterService.setCluster(value).subscribe( (resp: any) => { @@ -225,4 +189,8 @@ export class NavigationComponent implements OnInit, OnDestroy { } ); } + + trackByFn(item: any) { + return item; + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/notifications/notifications.component.html b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/notifications/notifications.component.html index f5eae4f890d..f120234b9cd 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/notifications/notifications.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/core/navigation/notifications/notifications.component.html @@ -2,7 +2,9 @@ title="Tasks and Notifications" [ngClass]="{ 'running': hasRunningTasks }" (click)="toggleSidebar()"> - <i [ngClass]="[icons.bell]"></i> + <svg cdsIcon="notification" + size="20" + title="notification"></svg> <span class="dot" *ngIf="hasNotifications"> </span> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/erasure-code-profile.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/erasure-code-profile.service.ts index d2bd131a464..988a13de2a9 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/erasure-code-profile.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/erasure-code-profile.service.ts @@ -84,6 +84,13 @@ export class ErasureCodeProfileService { domain. For instance, if the failure domain is host no two chunks will be stored on the same host. It is used to create a CRUSH rule step such as step chooseleaf host.`, + crushNumFailureDomains: $localize` Number of failure domains to map. Results in a CRUSH MSR rule being created. + Must be specified if crush-osds-per-failure-domain is specified.`, + + crushOsdsPerFailureDomain: $localize`Maximum number of OSDs to place in each failure domain -- + defaults to 1. Using a value greater than one will cause a CRUSH MSR rule to be created. + Must be specified if crush-num-failure-domains is specified.`, + crushDeviceClass: $localize`Restrict placement to devices of a specific class (e.g., ssd or hdd), using the crush device class names in the CRUSH map.`, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/multi-cluster.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/multi-cluster.service.ts index 5a03abd22ff..9c2dcda4d8d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/multi-cluster.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/multi-cluster.service.ts @@ -2,7 +2,7 @@ import { HttpClient, HttpParams } from '@angular/common/http'; import { Injectable } from '@angular/core'; import { BehaviorSubject, Observable, Subscription } from 'rxjs'; import { TimerService } from '../services/timer.service'; -import { filter } from 'rxjs/operators'; +import { filter, first } from 'rxjs/operators'; import { SummaryService } from '../services/summary.service'; import { Router } from '@angular/router'; @@ -48,7 +48,7 @@ export class MultiClusterService { startClusterTokenStatusPolling() { let clustersTokenMap = new Map<string, { token: string; user: string }>(); - const dataSubscription = this.subscribe((resp: any) => { + const dataSubscription = this.subscribeOnce((resp: any) => { const clustersConfig = resp['config']; let tempMap = new Map<string, { token: string; user: string }>(); if (clustersConfig) { @@ -92,13 +92,22 @@ export class MultiClusterService { } refreshTokenStatus() { - this.subscribe((resp: any) => { + this.subscribeOnce((resp: any) => { const clustersConfig = resp['config']; let tempMap = this.getTempMap(clustersConfig); return this.checkTokenStatus(tempMap).subscribe(this.getClusterTokenStatusObserver()); }); } + subscribeOnce(next: (data: any) => void, error?: (error: any) => void) { + return this.msData$ + .pipe( + filter((value) => !!value), + first() + ) + .subscribe(next, error); + } + subscribe(next: (data: any) => void, error?: (error: any) => void) { return this.msData$.pipe(filter((value) => !!value)).subscribe(next, error); } @@ -116,6 +125,7 @@ export class MultiClusterService { } editCluster( + name: string, url: any, clusterAlias: string, username: string, @@ -123,6 +133,7 @@ export class MultiClusterService { ssl_certificate = '' ) { return this.http.put('api/multi-cluster/edit_cluster', { + name: name, url, cluster_alias: clusterAlias, username: username, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nfs.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nfs.service.ts index 9b4e4a0a288..1fcce26e50a 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nfs.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nfs.service.ts @@ -3,7 +3,7 @@ import { Injectable } from '@angular/core'; import { Observable, throwError } from 'rxjs'; -import { NfsFSAbstractionLayer } from '~/app/ceph/nfs/models/nfs.fsal'; +import { NfsFSAbstractionLayer, SUPPORTED_FSAL } from '~/app/ceph/nfs/models/nfs.fsal'; import { ApiClient } from '~/app/shared/api/api-client'; export interface Directory { @@ -34,12 +34,12 @@ export class NfsService extends ApiClient { nfsFsal: NfsFSAbstractionLayer[] = [ { - value: 'CEPH', + value: SUPPORTED_FSAL.CEPH, descr: $localize`CephFS`, disabled: false }, { - value: 'RGW', + value: SUPPORTED_FSAL.RGW, descr: $localize`Object Gateway`, disabled: false } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.spec.ts new file mode 100644 index 00000000000..dd6aba7cf6c --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.spec.ts @@ -0,0 +1,33 @@ +import { TestBed } from '@angular/core/testing'; +import { HttpClientTestingModule, HttpTestingController } from '@angular/common/http/testing'; +import { configureTestBed } from '~/testing/unit-test-helper'; +import { NvmeofService } from '../../shared/api/nvmeof.service'; + +describe('NvmeofService', () => { + let service: NvmeofService; + let httpTesting: HttpTestingController; + + configureTestBed({ + providers: [NvmeofService], + imports: [HttpClientTestingModule] + }); + + beforeEach(() => { + service = TestBed.inject(NvmeofService); + httpTesting = TestBed.inject(HttpTestingController); + }); + + afterEach(() => { + httpTesting.verify(); + }); + + it('should be created', () => { + expect(service).toBeTruthy(); + }); + + it('should call listGateways', () => { + service.listGateways().subscribe(); + const req = httpTesting.expectOne('api/nvmeof/gateway'); + expect(req.request.method).toBe('GET'); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.ts new file mode 100644 index 00000000000..8d5b8a3830c --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.ts @@ -0,0 +1,47 @@ +import { Injectable } from '@angular/core'; +import { HttpClient } from '@angular/common/http'; + +import _ from 'lodash'; +import { Observable, of as observableOf } from 'rxjs'; +import { catchError, mapTo } from 'rxjs/operators'; + +const BASE_URL = 'api/nvmeof'; + +@Injectable({ + providedIn: 'root' +}) +export class NvmeofService { + constructor(private http: HttpClient) {} + + listGateways() { + return this.http.get(`${BASE_URL}/gateway`); + } + + listSubsystems() { + return this.http.get(`${BASE_URL}/subsystem`); + } + + getSubsystem(subsystemNQN: string) { + return this.http.get(`${BASE_URL}/subsystem/${subsystemNQN}`); + } + + createSubsystem(request: { nqn: string; max_namespaces?: number; enable_ha: boolean }) { + return this.http.post(`${BASE_URL}/subsystem`, request, { observe: 'response' }); + } + + deleteSubsystem(subsystemNQN: string) { + return this.http.delete(`${BASE_URL}/subsystem/${subsystemNQN}`, { + observe: 'response' + }); + } + + isSubsystemPresent(subsystemNqn: string): Observable<boolean> { + return this.getSubsystem(subsystemNqn).pipe( + mapTo(true), + catchError((e) => { + e?.preventDefault(); + return observableOf(false); + }) + ); + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts index 34461bf6314..f2ed4d7cc9e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts @@ -11,6 +11,7 @@ import { DeploymentOptions } from '../models/osd-deployment-options'; import { OsdSettings } from '../models/osd-settings'; import { SmartDataResponseV1 } from '../models/smart'; import { DeviceService } from '../services/device.service'; +import { CdFormGroup } from '../forms/cd-form-group'; @Injectable({ providedIn: 'root' @@ -20,6 +21,8 @@ export class OsdService { private uiPath = 'ui-api/osd'; osdDevices: InventoryDeviceType[] = []; + selectedFormValues: CdFormGroup; + isDeployementModeSimple: boolean = true; osdRecvSpeedModalPriorities = { KNOWN_PRIORITIES: [ diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts index eaed2c4abac..ec0da64df99 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.spec.ts @@ -62,11 +62,12 @@ describe('RgwBucketService', () => { 'qwerty1', null, null, - 'private' + 'private', + 'true' ) .subscribe(); const req = httpTesting.expectOne( - `api/rgw/bucket?bucket=foo&uid=bar&zonegroup=default&lock_enabled=false&lock_mode=COMPLIANCE&lock_retention_period_days=5&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&tags=null&bucket_policy=null&canned_acl=private&${RgwHelper.DAEMON_QUERY_PARAM}` + `api/rgw/bucket?bucket=foo&uid=bar&zonegroup=default&lock_enabled=false&lock_mode=COMPLIANCE&lock_retention_period_days=5&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&tags=null&bucket_policy=null&canned_acl=private&replication=true&${RgwHelper.DAEMON_QUERY_PARAM}` ); expect(req.request.method).toBe('POST'); }); @@ -88,11 +89,13 @@ describe('RgwBucketService', () => { '10', null, null, - 'private' + 'private', + 'true', + null ) .subscribe(); const req = httpTesting.expectOne( - `api/rgw/bucket/foo?${RgwHelper.DAEMON_QUERY_PARAM}&bucket_id=bar&uid=baz&versioning_state=Enabled&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&mfa_delete=Enabled&mfa_token_serial=1&mfa_token_pin=223344&lock_mode=GOVERNANCE&lock_retention_period_days=10&tags=null&bucket_policy=null&canned_acl=private` + `api/rgw/bucket/foo?${RgwHelper.DAEMON_QUERY_PARAM}&bucket_id=bar&uid=baz&versioning_state=Enabled&encryption_state=true&encryption_type=aws%253Akms&key_id=qwerty1&mfa_delete=Enabled&mfa_token_serial=1&mfa_token_pin=223344&lock_mode=GOVERNANCE&lock_retention_period_days=10&tags=null&bucket_policy=null&canned_acl=private&replication=true&lifecycle=null` ); expect(req.request.method).toBe('PUT'); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts index ddeeadf5e49..595b02ec276 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-bucket.service.ts @@ -62,7 +62,8 @@ export class RgwBucketService extends ApiClient { key_id: string, tags: string, bucketPolicy: string, - cannedAcl: string + cannedAcl: string, + replication: string ) { return this.rgwDaemonService.request((params: HttpParams) => { const paramsObject = { @@ -78,6 +79,7 @@ export class RgwBucketService extends ApiClient { tags: tags, bucket_policy: bucketPolicy, canned_acl: cannedAcl, + replication: replication, daemon_name: params.get('daemon_name') }; @@ -106,7 +108,9 @@ export class RgwBucketService extends ApiClient { lockRetentionPeriodDays: string, tags: string, bucketPolicy: string, - cannedAcl: string + cannedAcl: string, + replication: string, + lifecycle: string ) { return this.rgwDaemonService.request((params: HttpParams) => { params = params.appendAll({ @@ -123,7 +127,9 @@ export class RgwBucketService extends ApiClient { lock_retention_period_days: lockRetentionPeriodDays, tags: tags, bucket_policy: bucketPolicy, - canned_acl: cannedAcl + canned_acl: cannedAcl, + replication: replication, + lifecycle: lifecycle }); return this.http.put(`${this.url}/${bucket}`, null, { params: params }); }); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.spec.ts new file mode 100644 index 00000000000..424eb21e41b --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.spec.ts @@ -0,0 +1,92 @@ +import { HttpClientTestingModule, HttpTestingController } from '@angular/common/http/testing'; +import { TestBed } from '@angular/core/testing'; +import { configureTestBed } from '~/testing/unit-test-helper'; +import { RgwMultisiteService } from './rgw-multisite.service'; + +const mockSyncPolicyData: any = [ + { + id: 'test', + data_flow: {}, + pipes: [], + status: 'enabled', + bucketName: 'test' + }, + { + id: 'test', + data_flow: {}, + pipes: [], + status: 'enabled' + } +]; + +describe('RgwMultisiteService', () => { + let service: RgwMultisiteService; + let httpTesting: HttpTestingController; + + configureTestBed({ + providers: [RgwMultisiteService], + imports: [HttpClientTestingModule] + }); + + beforeEach(() => { + service = TestBed.inject(RgwMultisiteService); + httpTesting = TestBed.inject(HttpTestingController); + }); + + afterEach(() => { + httpTesting.verify(); + }); + + it('should be created', () => { + expect(service).toBeTruthy(); + }); + + it('should fetch all the sync policy related or un-related to a bucket', () => { + service.getSyncPolicy('', '', true).subscribe(); + const req = httpTesting.expectOne('api/rgw/multisite/sync-policy?all_policy=true'); + expect(req.request.method).toBe('GET'); + req.flush(mockSyncPolicyData); + }); + + it('should create Sync Policy Group w/o bucket_name', () => { + const postData = { group_id: 'test', status: 'enabled' }; + service.createSyncPolicyGroup(postData).subscribe(); + const req = httpTesting.expectOne('api/rgw/multisite/sync-policy-group'); + expect(req.request.method).toBe('POST'); + expect(req.request.body).toEqual(postData); + req.flush(null); + }); + + it('should create Sync Policy Group with bucket_name', () => { + const postData = { group_id: 'test', status: 'enabled', bucket_name: 'test' }; + service.createSyncPolicyGroup(postData).subscribe(); + const req = httpTesting.expectOne('api/rgw/multisite/sync-policy-group'); + expect(req.request.method).toBe('POST'); + expect(req.request.body).toEqual(postData); + req.flush(null); + }); + + it('should modify Sync Policy Group', () => { + const postData = { group_id: 'test', status: 'enabled', bucket_name: 'test' }; + service.modifySyncPolicyGroup(postData).subscribe(); + const req = httpTesting.expectOne('api/rgw/multisite/sync-policy-group'); + expect(req.request.method).toBe('PUT'); + expect(req.request.body).toEqual(postData); + req.flush(null); + }); + + it('should remove Sync Policy Group', () => { + const group_id = 'test'; + service.removeSyncPolicyGroup(group_id).subscribe(); + const req = httpTesting.expectOne('api/rgw/multisite/sync-policy-group/' + group_id); + expect(req.request.method).toBe('DELETE'); + req.flush(null); + }); + + it('should fetch the sync policy group with given group_id and bucket_name', () => { + service.getSyncPolicyGroup('test', 'test').subscribe(); + const req = httpTesting.expectOne('api/rgw/multisite/sync-policy-group/test?bucket_name=test'); + expect(req.request.method).toBe('GET'); + req.flush(mockSyncPolicyData[1]); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.ts index da789d29fc4..cc03042815e 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.ts @@ -30,4 +30,45 @@ export class RgwMultisiteService { getSyncStatus() { return this.http.get(`${this.url}/sync_status`); } + + status() { + return this.http.get(`${this.uiUrl}/status`); + } + + getSyncPolicy(bucketName?: string, zonegroup?: string, fetchAllPolicy = false) { + let params = new HttpParams(); + if (bucketName) { + params = params.append('bucket_name', bucketName); + } + if (zonegroup) { + params = params.append('zonegroup_name', zonegroup); + } + // fetchAllPolicy - if true, will fetch all the policy either linked or not linked with the buckets + params = params.append('all_policy', fetchAllPolicy); + return this.http.get(`${this.url}/sync-policy`, { params }); + } + + getSyncPolicyGroup(group_id: string, bucket_name?: string) { + let params = new HttpParams(); + if (bucket_name) { + params = params.append('bucket_name', bucket_name); + } + return this.http.get(`${this.url}/sync-policy-group/${group_id}`, { params }); + } + + createSyncPolicyGroup(payload: { group_id: string; status: string; bucket_name?: string }) { + return this.http.post(`${this.url}/sync-policy-group`, payload); + } + + modifySyncPolicyGroup(payload: { group_id: string; status: string; bucket_name?: string }) { + return this.http.put(`${this.url}/sync-policy-group`, payload); + } + + removeSyncPolicyGroup(group_id: string, bucket_name?: string) { + let params = new HttpParams(); + if (bucket_name) { + params = params.append('bucket_name', bucket_name); + } + return this.http.delete(`${this.url}/sync-policy-group/${group_id}`, { params }); + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.html index 30f8b530a59..58761eead54 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.html @@ -1,43 +1,50 @@ -<ngb-alert type="{{ bootstrapClass }}" - [dismissible]="dismissible" - (closed)="onClose()" - [ngClass]="spacingClass"> - <table> - <ng-container *ngIf="size === 'normal'; else slim"> - <tr> - <td *ngIf="showIcon" - rowspan="2" - class="alert-panel-icon"> - <i [ngClass]="[icons.large3x]" - class="alert-{{ bootstrapClass }} {{ typeIcon }}" - aria-hidden="true"></i> - </td> - <td *ngIf="showTitle" - class="alert-panel-title">{{ title }}</td> - </tr> - <tr> - <td class="alert-panel-text"> - <ng-container *ngTemplateOutlet="content"></ng-container> - </td> - </tr> - </ng-container> - <ng-template #slim> - <tr> - <td *ngIf="showIcon" - class="alert-panel-icon"> - <i class="alert-{{ bootstrapClass }} {{ typeIcon }}" - aria-hidden="true"></i> - </td> - <td *ngIf="showTitle" - class="alert-panel-title">{{ title }}</td> - <td class="alert-panel-text"> - <ng-container *ngTemplateOutlet="content"></ng-container> - </td> - </tr> - </ng-template> - </table> -</ngb-alert> +<cds-actionable-notification + class="mb-1" + [notificationObj]="notificationContent" + *ngIf="size === 'slim'; else normal"> +</cds-actionable-notification> + +<ng-template #normal> + <ngb-alert type="{{ bootstrapClass }}" + [dismissible]="dismissible" + (closed)="onClose()" + [ngClass]="spacingClass"> + <table> + <ng-container *ngIf="size === 'normal'"> + <tr> + <td *ngIf="showIcon" + rowspan="2" + class="alert-panel-icon"> + <i [ngClass]="[icons.large3x]" + class="alert-{{ bootstrapClass }} {{ typeIcon }}" + aria-hidden="true"></i> + </td> + <td *ngIf="showTitle" + class="alert-panel-title">{{ title }}</td> + </tr> + <tr> + <td class="alert-panel-text"> + <ng-container *ngTemplateOutlet="content"></ng-container> + </td> + </tr> + </ng-container> + </table> + </ngb-alert> +</ng-template> <ng-template #content> <ng-content></ng-content> </ng-template> + +<ng-template #closeTpl> + <button cdsActionableButton + cdsButton="ghost" + size="md" + title="Close" + (click)="onClose()" + *ngIf="dismissible"> + <svg class="cds--btn__icon" + cdsIcon="close" + size="16"></svg> + </button> +</ng-template> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.scss b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.scss index 6b89d6d3e38..98541e9bfda 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.scss +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.scss @@ -10,3 +10,7 @@ .alert { margin-bottom: 0; } + +cds-actionable-notification { + max-width: 100%; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.ts index cc2024baa23..3402eea5742 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/alert-panel/alert-panel.component.ts @@ -1,4 +1,13 @@ -import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core'; +import { + Component, + EventEmitter, + Input, + OnInit, + Output, + TemplateRef, + ViewChild +} from '@angular/core'; +import { NotificationContent, NotificationType } from 'carbon-components-angular'; import { Icons } from '~/app/shared/enum/icons.enum'; @@ -8,6 +17,11 @@ import { Icons } from '~/app/shared/enum/icons.enum'; styleUrls: ['./alert-panel.component.scss'] }) export class AlertPanelComponent implements OnInit { + @ViewChild('content', { static: true }) + alertContent: TemplateRef<any>; + @ViewChild('closeTpl', { static: true }) + closeTpl: TemplateRef<any>; + @Input() title = ''; @Input() @@ -36,7 +50,18 @@ export class AlertPanelComponent implements OnInit { icons = Icons; + notificationContent: NotificationContent; + ngOnInit() { + const type: NotificationType = this.type === 'danger' ? 'error' : this.type; + this.notificationContent = { + type: type, + template: this.alertContent, + actionsTemplate: this.closeTpl, + showClose: false, + title: this.title + }; + switch (this.type) { case 'warning': this.title = this.title || $localize`Warning`; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts index d6943b0c71a..e6ccde0a362 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/components.module.ts @@ -15,6 +15,12 @@ import { import { ClickOutsideModule } from 'ng-click-outside'; import { NgChartsModule } from 'ng2-charts'; import { SimplebarAngularModule } from 'simplebar-angular'; +import { + UIShellModule, + ButtonModule, + NotificationModule, + IconModule +} from 'carbon-components-angular'; import { MotdComponent } from '~/app/shared/components/motd/motd.component'; import { DirectivesModule } from '../directives/directives.module'; @@ -76,7 +82,11 @@ import { UpgradableComponent } from './upgradable/upgradable.component'; SimplebarAngularModule, RouterModule, NgbDatepickerModule, - NgbTimepickerModule + NgbTimepickerModule, + UIShellModule, + ButtonModule, + NotificationModule, + IconModule ], declarations: [ SparklineComponent, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-modal/form-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-modal/form-modal.component.ts index 1b4af6cd69f..aafbd604232 100755 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-modal/form-modal.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/form-modal/form-modal.component.ts @@ -23,6 +23,8 @@ export class FormModalComponent implements OnInit { submitButtonText: string; onSubmit: Function; + updateAsyncValidators?: Function; + // Internal formGroup: CdFormGroup; @@ -57,13 +59,22 @@ export class FormModalComponent implements OnInit { if (field.asyncValidators) { asyncValidators = asyncValidators.concat(field.asyncValidators); } - return new UntypedFormControl( + + const control = new UntypedFormControl( _.defaultTo( field.type === 'binary' ? this.dimlessBinaryPipe.transform(field.value) : field.value, null ), { validators, asyncValidators } ); + + if (field.valueChangeListener) { + control.valueChanges.subscribe((value) => { + const validatorToUpdate = this.updateAsyncValidators(value); + this.updateValidation(field.dependsOn, validatorToUpdate); + }); + } + return control; } getError(field: CdFormModalFieldConfig): string { @@ -114,4 +125,10 @@ export class FormModalComponent implements OnInit { this.onSubmit(values); } } + + updateValidation(name?: string, validator?: AsyncValidatorFn[]) { + const field = this.formGroup.get(name); + field.setAsyncValidators(validator); + field.updateValueAndValidity(); + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/language-selector/language-selector.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/language-selector/language-selector.component.html index be98eaa6f94..649b8b45a4d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/components/language-selector/language-selector.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/components/language-selector/language-selector.component.html @@ -1,22 +1,6 @@ -<div ngbDropdown - display="dynamic" - placement="bottom-right"> - <a ngbDropdownToggle - i18n-title - id="toggle-language-button" - title="Select a Language" - role="button"> - {{ allLanguages[selectedLanguage] }} - </a> - <div ngbDropdownMenu - role="listbox" - aria-labelledby="toggle-language-button"> - <ng-container *ngFor="let lang of supportedLanguages | keyvalue"> - <button ngbDropdownItem - role="option" - (click)="changeLanguage(lang.key)"> - {{ lang.value }} - </button> - </ng-container> - </div> -</div> +<cds-header-menu [title]="allLanguages[selectedLanguage]"> + <ng-container *ngFor="let lang of supportedLanguages | keyvalue"> + <cds-header-item (click)="changeLanguage(lang.key)">{{ lang.value }}</cds-header-item> + </ng-container> +</cds-header-menu> + diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts index 185c778bc1b..24915507fd9 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/constants/app.constants.ts @@ -148,6 +148,7 @@ export class ActionLabelsI18n { DISCONNECT: string; RECONNECT: string; AUTHORIZE: string; + EXPAND_CLUSTER: string; constructor() { /* Create a new item */ @@ -234,6 +235,7 @@ export class ActionLabelsI18n { this.CONNECT = $localize`Connect`; this.DISCONNECT = $localize`Disconnect`; this.RECONNECT = $localize`Reconnect`; + this.EXPAND_CLUSTER = $localize`Expand Cluster`; } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.html b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.html index f977273b0cf..a856a4c4870 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.html +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.html @@ -317,6 +317,20 @@ <span>{{ value | map:column?.customTemplateConfig }}</span> </ng-template> +<ng-template #tooltipTpl + let-column="column" + let-value="value"> + <span *ngFor="let item of (value | array);"> + <span + i18n + i18n-ngbTooltip + class="{{(column?.customTemplateConfig?.map && column?.customTemplateConfig?.map[item]?.class) ? column.customTemplateConfig.map[item].class : ''}}" + ngbTooltip="{{(column?.customTemplateConfig?.map && column?.customTemplateConfig?.map[item]?.tooltip) ? column.customTemplateConfig.map[item].tooltip : ''}}"> + {{value}} + </span> + </span> +</ng-template> + <ng-template #truncateTpl let-column="column" let-value="value"> diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.ts index 6e39f4bff13..905646b55b8 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/datatable/table/table.component.ts @@ -75,6 +75,8 @@ export class TableComponent implements AfterContentChecked, OnInit, OnChanges, O rowSelectionTpl: TemplateRef<any>; @ViewChild('pathTpl', { static: true }) pathTpl: TemplateRef<any>; + @ViewChild('tooltipTpl', { static: true }) + tooltipTpl: TemplateRef<any>; // This is the array with the items to be shown. @Input() @@ -612,6 +614,7 @@ export class TableComponent implements AfterContentChecked, OnInit, OnChanges, O this.cellTemplates.truncate = this.truncateTpl; this.cellTemplates.timeAgo = this.timeAgoTpl; this.cellTemplates.path = this.pathTpl; + this.cellTemplates.tooltip = this.tooltipTpl; } useCustomClass(value: any): string { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/cell-template.enum.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/cell-template.enum.ts index 2790f974978..5c4072f7f1f 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/cell-template.enum.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/cell-template.enum.ts @@ -60,5 +60,17 @@ export enum CellTemplate { This template truncates a path to a shorter format and shows the whole path in a tooltip eg: /var/lib/ceph/osd/ceph-0 -> /var/.../ceph-0 */ - path = 'path' + path = 'path', + /* + This template is used to attach tooltip to the given column value + // { + // ... + // cellTransformation: CellTemplate.tooltip, + // customTemplateConfig: { + // map?: { + // [key: any]: { class?: string, tooltip: string } + // } + // } + */ + tooltip = 'tooltip' } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/icons.enum.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/icons.enum.ts index be454076b86..8f90a51cf86 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/icons.enum.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/enum/icons.enum.ts @@ -27,7 +27,7 @@ export enum Icons { right = 'fa fa-arrow-right', // Mark in down = 'fa fa-arrow-down', // Mark Down erase = 'fa fa-eraser', // Purge color: bd.$white; - + expand = 'fa fa-expand', // Expand cluster user = 'fa fa-user', // User, Initiators users = 'fa fa-users', // Users, Groups share = 'fa fa-share-alt', // share diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/cd-validators.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/cd-validators.spec.ts index a0954990be5..011d7011fa4 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/cd-validators.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/cd-validators.spec.ts @@ -6,7 +6,7 @@ import { of as observableOf } from 'rxjs'; import { RgwBucketService } from '~/app/shared/api/rgw-bucket.service'; import { CdFormGroup } from '~/app/shared/forms/cd-form-group'; -import { CdValidators } from '~/app/shared/forms/cd-validators'; +import { CdValidators, DUE_TIMER } from '~/app/shared/forms/cd-validators'; import { FormHelper } from '~/testing/unit-test-helper'; let mockBucketExists = observableOf(true); @@ -771,7 +771,7 @@ describe('CdValidators', () => { describe('bucket', () => { const testValidator = (name: string, valid: boolean, expectedError?: string) => { formHelper.setValue('x', name, true); - tick(); + tick(DUE_TIMER); if (valid) { formHelper.expectValid('x'); } else { diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/cd-validators.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/cd-validators.ts index 78171f650f5..c72b0b89e12 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/cd-validators.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/forms/cd-validators.ts @@ -20,6 +20,8 @@ export function isEmptyInputValue(value: any): boolean { export type existsServiceFn = (value: any, ...args: any[]) => Observable<boolean>; +export const DUE_TIMER = 500; + export class CdValidators { /** * Validator that performs email validation. In contrast to the Angular @@ -192,6 +194,10 @@ export class CdValidators { result = value.length >= prerequisite['arg1']; } break; + case 'minValue': + if (_.isNumber(value)) { + result = value >= prerequisite['arg1']; + } } return result; } @@ -347,9 +353,12 @@ export class CdValidators { * boolean 'true' if the given value exists, otherwise 'false'. * @param serviceFnThis {any} The object to be used as the 'this' object * when calling the serviceFn function. Defaults to null. - * @param {number|Date} dueTime The delay time to wait before the - * serviceFn call is executed. This is useful to prevent calls on - * every keystroke. Defaults to 500. + * @param usernameFn {Function} Specifically used in rgw user form to + * validate the tenant$username format + * @param uidField {boolean} Specifically used in rgw user form to + * validate the tenant$username format + * @param extraArgs {...any} Any extra arguments that need to be passed + * to the serviceFn function. * @return {AsyncValidatorFn} Returns an asynchronous validator function * that returns an error map with the `notUnique` property if the * validation check succeeds, otherwise `null`. @@ -377,7 +386,7 @@ export class CdValidators { } } - return observableTimer().pipe( + return observableTimer(DUE_TIMER).pipe( switchMapTo(serviceFn.call(serviceFnThis, uName, ...extraArgs)), map((resp: boolean) => { if (!resp) { @@ -480,7 +489,7 @@ export class CdValidators { if (_.isFunction(usernameFn)) { username = usernameFn(); } - return observableTimer(500).pipe( + return observableTimer(DUE_TIMER).pipe( switchMapTo(_.invoke(userServiceThis, 'validatePassword', control.value, username)), map((resp: { valid: boolean; credits: number; valuation: string }) => { if (_.isFunction(callback)) { @@ -601,13 +610,12 @@ export class CdValidators { if (control.pristine || !control.value) { return observableOf({ required: true }); } - return rgwBucketService - .exists(control.value) - .pipe( - map((existenceResult: boolean) => - existenceResult === requiredExistenceResult ? null : { bucketNameNotAllowed: true } - ) - ); + return observableTimer(DUE_TIMER).pipe( + switchMapTo(rgwBucketService.exists(control.value)), + map((existenceResult: boolean) => + existenceResult === requiredExistenceResult ? null : { bucketNameNotAllowed: true } + ) + ); }; } @@ -622,4 +630,40 @@ export class CdValidators { } }; } + + static xml(): ValidatorFn { + return (control: AbstractControl): Record<string, boolean> | null => { + if (!control.value) return null; + const parser = new DOMParser(); + const xml = parser.parseFromString(control.value, 'application/xml'); + const errorNode = xml.querySelector('parsererror'); + if (errorNode) { + return { invalidXml: true }; + } + return null; + }; + } + + static jsonOrXml(): ValidatorFn { + return (control: AbstractControl): Record<string, boolean> | null => { + if (!control.value) return null; + + if (control.value.trim().startsWith('<')) { + const parser = new DOMParser(); + const xml = parser.parseFromString(control.value, 'application/xml'); + const errorNode = xml.querySelector('parsererror'); + if (errorNode) { + return { invalidXml: true }; + } + return null; + } else { + try { + JSON.parse(control.value); + return null; + } catch (e) { + return { invalidJson: true }; + } + } + }; + } } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/breadcrumbs.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/breadcrumbs.ts index 10e799929da..9f0fc49786d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/breadcrumbs.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/breadcrumbs.ts @@ -32,13 +32,14 @@ export class BreadcrumbsResolver implements Resolve<IBreadcrumb[]> { ): Observable<IBreadcrumb[]> | Promise<IBreadcrumb[]> | IBreadcrumb[] { const data = route.routeConfig.data; const path = data.path === null ? null : this.getFullPath(route); + const disableSplit = data.disableSplit || false; const text = typeof data.breadcrumbs === 'string' ? data.breadcrumbs : data.breadcrumbs.text || data.text || path; - const crumbs: IBreadcrumb[] = [{ text: text, path: path }]; + const crumbs: IBreadcrumb[] = [{ text: text, path: path, disableSplit: disableSplit }]; return of(crumbs); } @@ -56,4 +57,5 @@ export class BreadcrumbsResolver implements Resolve<IBreadcrumb[]> { export interface IBreadcrumb { text: string; path: string; + disableSplit?: boolean; } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cd-form-modal-field-config.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cd-form-modal-field-config.ts index a899e6daa69..58dc6619590 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cd-form-modal-field-config.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cd-form-modal-field-config.ts @@ -13,6 +13,11 @@ export class CdFormModalFieldConfig { validators: ValidatorFn[]; asyncValidators?: AsyncValidatorFn[]; + // Used when you want to dynamically update the + // async validators based on the field value + valueChangeListener?: boolean; + dependsOn?: string; + // --- Specific field properties --- typeConfig?: { [prop: string]: any; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/erasure-code-profile.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/erasure-code-profile.ts index ea9985ccd49..c5e744632ac 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/erasure-code-profile.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/erasure-code-profile.ts @@ -12,6 +12,8 @@ export class ErasureCodeProfile { 'crush-root'?: string; 'crush-locality'?: string; 'crush-failure-domain'?: string; + 'crush-num-failure-domains'?: number; + 'crush-osds-per-failure-domain'?: number; 'crush-device-class'?: string; 'directory'?: string; } diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/nvmeof.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/nvmeof.ts new file mode 100644 index 00000000000..e383d4a1dfc --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/nvmeof.ts @@ -0,0 +1,21 @@ +export interface NvmeofGateway { + cli_version: string; + version: string; + name: string; + group: string; + addr: string; + port: string; + load_balancing_group: string; + spdk_version: string; +} + +export interface NvmeofSubsystem { + nqn: string; + serial_number: string; + model_number: string; + min_cntlid: number; + max_cntlid: number; + namespace_count: number; + subtype: string; + max_namespaces: number; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/permission.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/permission.spec.ts index fb2c90469cc..213fb416ea5 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/permission.spec.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/permission.spec.ts @@ -8,6 +8,7 @@ describe('cd-notification classes', () => { grafana: { create: false, delete: false, read: false, update: false }, hosts: { create: false, delete: false, read: false, update: false }, iscsi: { create: false, delete: false, read: false, update: false }, + nvmeof: { create: false, delete: false, read: false, update: false }, log: { create: false, delete: false, read: false, update: false }, manager: { create: false, delete: false, read: false, update: false }, monitor: { create: false, delete: false, read: false, update: false }, @@ -29,6 +30,7 @@ describe('cd-notification classes', () => { grafana: ['create', 'read', 'update', 'delete'], hosts: ['create', 'read', 'update', 'delete'], iscsi: ['create', 'read', 'update', 'delete'], + 'nvme-of': ['create', 'read', 'update', 'delete'], log: ['create', 'read', 'update', 'delete'], manager: ['create', 'read', 'update', 'delete'], monitor: ['create', 'read', 'update', 'delete'], @@ -46,6 +48,7 @@ describe('cd-notification classes', () => { grafana: { create: true, delete: true, read: true, update: true }, hosts: { create: true, delete: true, read: true, update: true }, iscsi: { create: true, delete: true, read: true, update: true }, + nvmeof: { create: true, delete: true, read: true, update: true }, log: { create: true, delete: true, read: true, update: true }, manager: { create: true, delete: true, read: true, update: true }, monitor: { create: true, delete: true, read: true, update: true }, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/permissions.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/permissions.ts index 3f2c87ed1a0..5e9fe4aae47 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/permissions.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/permissions.ts @@ -19,6 +19,7 @@ export class Permissions { monitor: Permission; rbdImage: Permission; iscsi: Permission; + nvmeof: Permission; rbdMirroring: Permission; rgw: Permission; cephfs: Permission; @@ -37,6 +38,7 @@ export class Permissions { this.monitor = new Permission(serverPermissions['monitor']); this.rbdImage = new Permission(serverPermissions['rbd-image']); this.iscsi = new Permission(serverPermissions['iscsi']); + this.nvmeof = new Permission(serverPermissions['nvme-of']); this.rbdMirroring = new Permission(serverPermissions['rbd-mirroring']); this.rgw = new Permission(serverPermissions['rgw']); this.cephfs = new Permission(serverPermissions['cephfs']); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/rgw-encryption-config-keys.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/rgw-encryption-config-keys.ts new file mode 100644 index 00000000000..90fccb8184b --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/rgw-encryption-config-keys.ts @@ -0,0 +1,21 @@ +export enum rgwEncryptionConfigKeys { + auth = 'Authentication Method', + encryption_type = 'Encryption Type', + backend = 'Backend', + prefix = 'Prefix', + namespace = 'Namespace', + secret_engine = 'Secret Engine', + addr = 'Address', + token_file = 'Token File', + ssl_cacert = 'SSL CA Certificate', + ssl_clientcert = 'SSL Client Certificate', + ssl_clientkey = 'SSL Client Key', + verify_ssl = 'Verify SSL', + ca_path = 'CA Path', + client_cert = 'Client Certificate', + client_key = 'Client Key', + kms_key_template = 'KMS Key Template', + password = 'Password', + s3_key_template = 'S3 Key Template', + username = 'Username' +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/pipes.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/pipes.module.ts index 53f8f9f309f..025eb72a47b 100755 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/pipes.module.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/pipes.module.ts @@ -38,6 +38,7 @@ import { UpperFirstPipe } from './upper-first.pipe'; import { OctalToHumanReadablePipe } from './octal-to-human-readable.pipe'; import { PathPipe } from './path.pipe'; import { PluralizePipe } from './pluralize.pipe'; +import { XmlPipe } from './xml.pipe'; @NgModule({ imports: [CommonModule], @@ -78,7 +79,8 @@ import { PluralizePipe } from './pluralize.pipe'; OsdSummaryPipe, OctalToHumanReadablePipe, PathPipe, - PluralizePipe + PluralizePipe, + XmlPipe ], exports: [ ArrayPipe, @@ -117,7 +119,8 @@ import { PluralizePipe } from './pluralize.pipe'; OsdSummaryPipe, OctalToHumanReadablePipe, PathPipe, - PluralizePipe + PluralizePipe, + XmlPipe ], providers: [ ArrayPipe, diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/xml.pipe.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/xml.pipe.spec.ts new file mode 100644 index 00000000000..47ddb2ee17e --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/xml.pipe.spec.ts @@ -0,0 +1,22 @@ +import { TestBed } from '@angular/core/testing'; +import { configureTestBed } from '~/testing/unit-test-helper'; +import { JsonToXmlService } from '../services/json-to-xml.service'; +import { XmlPipe } from './xml.pipe'; + +describe('XmlPipe', () => { + let pipe: XmlPipe; + let jsonToXmlService: JsonToXmlService; + + configureTestBed({ + providers: [JsonToXmlService] + }); + + beforeEach(() => { + jsonToXmlService = TestBed.inject(JsonToXmlService); + pipe = new XmlPipe(jsonToXmlService); + }); + + it('create an instance', () => { + expect(pipe).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/xml.pipe.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/xml.pipe.ts new file mode 100644 index 00000000000..59d7572e9f0 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/xml.pipe.ts @@ -0,0 +1,16 @@ +import { Pipe, PipeTransform } from '@angular/core'; +import { JsonToXmlService } from '../services/json-to-xml.service'; + +@Pipe({ + name: 'xml' +}) +export class XmlPipe implements PipeTransform { + constructor(private jsonToXmlService: JsonToXmlService) {} + + transform(value: string, valueFormat: string = 'json'): string { + if (valueFormat === 'json') { + value = this.jsonToXmlService.format(value); + } + return value; + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/json-to-xml.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/json-to-xml.service.spec.ts new file mode 100644 index 00000000000..5035dae9b1f --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/json-to-xml.service.spec.ts @@ -0,0 +1,44 @@ +import { TestBed } from '@angular/core/testing'; + +import { JsonToXmlService } from './json-to-xml.service'; + +describe('JsonToXmlService', () => { + let service: JsonToXmlService; + + beforeEach(() => { + TestBed.configureTestingModule({}); + service = TestBed.inject(JsonToXmlService); + }); + + it('should be created', () => { + expect(service).toBeTruthy(); + }); + + it('should transform JSON formatted string to XML string', () => { + const json: string = `{ + "foo": "bar", + "items": [ + { + "name": "item1", + "value": "value1" + }, + { + "name": "item2", + "value": "value2" + } + ] + }`; + const expectedXml = `<foo>bar</foo> +<items> + <name>item1</name> + <value>value1</value> +</items> +<items> + <name>item2</name> + <value>value2</value> +</items> +`; + expect(JSON.parse(json)).toBeTruthy(); + expect(service.format(json)).toBe(expectedXml); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/json-to-xml.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/json-to-xml.service.ts new file mode 100644 index 00000000000..8f1d128c0c5 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/json-to-xml.service.ts @@ -0,0 +1,40 @@ +import { Injectable } from '@angular/core'; + +@Injectable({ + providedIn: 'root' +}) +export class JsonToXmlService { + constructor() {} + + format(json: any, indentSize: number = 2, currentIndent: number = 0): string { + if (!json) return null; + let xml = ''; + if (typeof json === 'string') { + json = JSON.parse(json); + } + + for (const key in json) { + if (json.hasOwnProperty(key)) { + const value = json[key]; + const indentation = ' '.repeat(currentIndent); + + if (Array.isArray(value)) { + value.forEach((item) => { + xml += + `${indentation}<${key}>\n` + + this.format(item, indentSize, currentIndent + indentSize) + + `${indentation}</${key}>\n`; + }); + } else if (typeof value === 'object') { + xml += + `${indentation}<${key}>\n` + + this.format(value, indentSize, currentIndent + indentSize) + + `${indentation}</${key}>\n`; + } else { + xml += `${indentation}<${key}>${value}</${key}>\n`; + } + } + } + return xml; + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts index a7e2cf8f421..71621072783 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts @@ -324,6 +324,16 @@ export class TaskMessageService { metadata.bucket_names.length > 1 ? 'selected buckets' : metadata.bucket_names[0] }`; }), + 'rgw/multisite/sync-policy/delete': this.newTaskMessage( + this.commonOperations.delete, + (metadata) => { + return $localize`${ + metadata.group_names.length > 1 + ? 'selected policy groups' + : `policy group '${metadata.group_names[0]}'` + }`; + } + ), // iSCSI target tasks 'iscsi/target/create': this.newTaskMessage(this.commonOperations.create, (metadata) => this.iscsiTarget(metadata) @@ -334,6 +344,13 @@ export class TaskMessageService { 'iscsi/target/delete': this.newTaskMessage(this.commonOperations.delete, (metadata) => this.iscsiTarget(metadata) ), + // NVME/TCP tasks + 'nvmeof/subsystem/create': this.newTaskMessage(this.commonOperations.create, (metadata) => + this.nvmeofSubsystem(metadata) + ), + 'nvmeof/subsystem/delete': this.newTaskMessage(this.commonOperations.delete, (metadata) => + this.nvmeofSubsystem(metadata) + ), 'nfs/create': this.newTaskMessage(this.commonOperations.create, (metadata) => this.nfs(metadata) ), @@ -459,6 +476,10 @@ export class TaskMessageService { return $localize`target '${metadata.target_iqn}'`; } + nvmeofSubsystem(metadata: any) { + return $localize`subsystem '${metadata.nqn}'`; + } + nfs(metadata: any) { return $localize`NFS '${metadata.cluster_id}\:${ metadata.export_id ? metadata.export_id : metadata.path diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/text-area-json-formatter.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/text-area-json-formatter.service.ts index 0e696022aff..d2f4fb5b05b 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/text-area-json-formatter.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/text-area-json-formatter.service.ts @@ -7,7 +7,7 @@ export class TextAreaJsonFormatterService { constructor() {} format(textArea: ElementRef<any>): void { - const value = textArea.nativeElement.value; + const value = textArea?.nativeElement?.value; try { const formatted = JSON.stringify(JSON.parse(value), null, 2); textArea.nativeElement.value = formatted; diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/text-area-xml-formatter.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/text-area-xml-formatter.service.spec.ts new file mode 100644 index 00000000000..8e91a604515 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/text-area-xml-formatter.service.spec.ts @@ -0,0 +1,16 @@ +import { TestBed } from '@angular/core/testing'; + +import { TextAreaXmlFormatterService } from './text-area-xml-formatter.service'; + +describe('TextAreaXmlFormatterService', () => { + let service: TextAreaXmlFormatterService; + + beforeEach(() => { + TestBed.configureTestingModule({}); + service = TestBed.inject(TextAreaXmlFormatterService); + }); + + it('should be created', () => { + expect(service).toBeTruthy(); + }); +}); diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/text-area-xml-formatter.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/text-area-xml-formatter.service.ts new file mode 100644 index 00000000000..ff9d63ff200 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/text-area-xml-formatter.service.ts @@ -0,0 +1,23 @@ +import { ElementRef, Injectable } from '@angular/core'; + +@Injectable({ + providedIn: 'root' +}) +export class TextAreaXmlFormatterService { + constructor() {} + + format(textArea: ElementRef<any>): void { + if (!textArea.nativeElement?.value) return; + const value = textArea.nativeElement.value; + const parser = new DOMParser(); + const formatted = parser.parseFromString(value, 'application/xml'); + const lineNumber = formatted.getElementsByTagName('*').length; + const pixelPerLine = 20; + const pixels = lineNumber * pixelPerLine; + textArea.nativeElement.style.height = pixels + 'px'; + const errorNode = formatted.querySelector('parsererror'); + if (errorNode) { + return; + } + } +} diff --git a/src/pybind/mgr/dashboard/frontend/src/styles.scss b/src/pybind/mgr/dashboard/frontend/src/styles.scss index 484756d4025..f4cdd7981f5 100644 --- a/src/pybind/mgr/dashboard/frontend/src/styles.scss +++ b/src/pybind/mgr/dashboard/frontend/src/styles.scss @@ -1,6 +1,6 @@ /* You can add global styles to this file, and also import other style files */ @use './src/styles/defaults' as *; - +@import './src/styles/carbon-defaults.scss'; // Angular2-Tree Component @import '@circlon/angular-tree-component/css/angular-tree-component.css'; @@ -164,12 +164,6 @@ tags-input .tags { pointer-events: none; } -a { - &:hover { - text-decoration: underline; - } -} - .clickable, a { cursor: pointer; diff --git a/src/pybind/mgr/dashboard/frontend/src/styles/_carbon-defaults.scss b/src/pybind/mgr/dashboard/frontend/src/styles/_carbon-defaults.scss new file mode 100644 index 00000000000..89f002572a7 --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/styles/_carbon-defaults.scss @@ -0,0 +1,103 @@ +@use '@carbon/styles/scss/config' with ( + $font-path: '~@ibm/plex', + $flex-grid-columns: 16, + $use-flexbox-grid: true, +); +@use './themes/default'; +@use '@carbon/styles/scss/compat/themes' as compat; +@use '@carbon/styles/scss/themes'; +@use '@carbon/styles/scss/theme' with ( + $theme: default.$theme, + $fallback: compat.$g90, +); +@use '@carbon/styles'; +@use '@carbon/type'; + +/********************************************************************************** +These are meant to be temporary style overrides. +The sizing of some Carbon components clash with a requirement +of one third party component - the data table - that needs +to set the body's font-size at 12px. +Once this component is removed we should be ok to remove the overrides below +**********************************************************************************/ + +/****************************************** +Side nav +******************************************/ + +$sidenav-block-size: 2.7rem; + +.cds--side-nav__submenu { + block-size: $sidenav-block-size; +} + +a.cds--side-nav__link { + min-block-size: $sidenav-block-size; +} + +.cds--side-nav__menu a.cds--side-nav__link { + block-size: $sidenav-block-size; +} + +.cds--side-nav__submenu-title, +a.cds--side-nav__link > .cds--side-nav__link-text { + font-size: calc(type.type-scale(4) + 0.5px); +} + +.cds--side-nav__icon > svg { + block-size: 20px; + inline-size: 20px; +} + +.cds--side-nav--expanded { + min-width: 20.8rem !important; +} + +.cds--side-nav__navigation { + min-width: 4.2rem; +} + +.cds--side-nav__navigation { + left: -4.8rem; + transition: 250ms ease; +} + +.cds--side-nav--expanded { + left: 0; + transition: 250ms ease; +} +/****************************************** +Header +******************************************/ +$header-block-size: 3.9rem; + +a.cds--header__menu-item, +.cds--header__action, +.cds--header { + block-size: $header-block-size; + font-size: calc(type.type-scale(4) + 0.5px); +} + +button.cds--header__menu-trigger.cds--header__action.cds--header__menu-toggle { + inline-size: $header-block-size; +} + +button.cds--overflow-menu { + block-size: $header-block-size; + inline-size: calc($header-block-size - 1rem); +} + +/****************************************** +Modals +******************************************/ + +.modal-dialog { + margin-top: 5rem !important; +} + +/****************************************** +Overflow menu +******************************************/ +.cds--overflow-menu.cds--overflow-menu--open { + box-shadow: none; +} diff --git a/src/pybind/mgr/dashboard/frontend/src/styles/ceph-custom/_basics.scss b/src/pybind/mgr/dashboard/frontend/src/styles/ceph-custom/_basics.scss index 6ca04c3d8a4..ed987be9f4d 100644 --- a/src/pybind/mgr/dashboard/frontend/src/styles/ceph-custom/_basics.scss +++ b/src/pybind/mgr/dashboard/frontend/src/styles/ceph-custom/_basics.scss @@ -7,6 +7,7 @@ html { html, body { + // WARNING: This was clashing with Carbon's font-size font-size: 12px; height: 100%; width: 100%; diff --git a/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss b/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss index 8147d9381ce..16cbcac3e32 100644 --- a/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss +++ b/src/pybind/mgr/dashboard/frontend/src/styles/defaults/_bootstrap-defaults.scss @@ -110,8 +110,9 @@ $code-block-bg: #f7f7f9 !default; // Typography -$font-family-sans-serif: 'Helvetica Neue', Helvetica, Arial, 'Noto Sans', sans-serif, - 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol', 'Noto Color Emoji' !default; +// WARNING: This was clashing with Carbon's font-family +// $font-family-sans-serif: 'Helvetica Neue', Helvetica, Arial, 'Noto Sans', sans-serif, +// 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol', 'Noto Color Emoji' !default; // Card diff --git a/src/pybind/mgr/dashboard/frontend/src/styles/themes/_default.scss b/src/pybind/mgr/dashboard/frontend/src/styles/themes/_default.scss new file mode 100644 index 00000000000..cdb1e986f8e --- /dev/null +++ b/src/pybind/mgr/dashboard/frontend/src/styles/themes/_default.scss @@ -0,0 +1,20 @@ +@use './src/styles/vendor/variables' as vv; + +$theme: ( + text-disabled: vv.$gray-500, + text-error: vv.$danger, + text-helper: vv.$body-color, + text-inverse: vv.$black, + text-on-color: vv.$white, + text-on-color-disabled: vv.$gray-700, + text-placeholder: vv.$gray-700, + text-primary: vv.$body-bg-alt, + text-secondary: vv.$body-bg-alt, + btn-primary: vv.$primary, + border-interactive: vv.$primary, + background: vv.$secondary, + layer-01: vv.$secondary, + icon-primary: vv.$gray-100, + icon-secondary: vv.$gray-300, + link-primary: vv.$primary +); diff --git a/src/pybind/mgr/dashboard/frontend/src/typings.d.ts b/src/pybind/mgr/dashboard/frontend/src/typings.d.ts index ef5c7bd6205..0ca84068d29 100644 --- a/src/pybind/mgr/dashboard/frontend/src/typings.d.ts +++ b/src/pybind/mgr/dashboard/frontend/src/typings.d.ts @@ -3,3 +3,5 @@ declare var module: NodeModule; interface NodeModule { id: string; } + +declare module '@carbon/icons/*'; diff --git a/src/pybind/mgr/dashboard/module.py b/src/pybind/mgr/dashboard/module.py index 2149537c157..341a4f00f1b 100644 --- a/src/pybind/mgr/dashboard/module.py +++ b/src/pybind/mgr/dashboard/module.py @@ -32,7 +32,7 @@ from .grafana import push_local_dashboards from .services import nvmeof_cli # noqa # pylint: disable=unused-import from .services.auth import AuthManager, AuthManagerTool, JwtManager from .services.exception import dashboard_exception_handler -from .services.rgw_client import configure_rgw_credentials +from .services.service import RgwServiceManager from .services.sso import SSO_COMMANDS, handle_sso_command from .settings import handle_option_command, options_command_list, options_schema_list from .tools import NotificationQueue, RequestLoggingTool, TaskManager, \ @@ -417,7 +417,8 @@ class Module(MgrModule, CherryPyConfig): @CLIWriteCommand("dashboard set-rgw-credentials") def set_rgw_credentials(self): try: - configure_rgw_credentials() + rgw_service_manager = RgwServiceManager() + rgw_service_manager.configure_rgw_credentials() except Exception as error: return -errno.EINVAL, '', str(error) diff --git a/src/pybind/mgr/dashboard/openapi.yaml b/src/pybind/mgr/dashboard/openapi.yaml index 99c4ee54f0b..ecf305d77ac 100644 --- a/src/pybind/mgr/dashboard/openapi.yaml +++ b/src/pybind/mgr/dashboard/openapi.yaml @@ -635,6 +635,8 @@ paths: force: default: false type: boolean + image_mirror_mode: + type: string metadata: type: string mirror_mode: @@ -3114,6 +3116,55 @@ paths: - jwt: [] tags: - Cephfs + /api/cephfs/{fs_id}/rename-path: + put: + description: "\n Rename a file or directory.\n :param fs_id: The\ + \ filesystem identifier.\n :param src_path: The path to the existing\ + \ file or directory.\n :param dst_path: The new name of the file or\ + \ directory.\n " + parameters: + - in: path + name: fs_id + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + properties: + dst_path: + type: string + src_path: + type: string + required: + - src_path + - dst_path + type: object + responses: + '200': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: Resource updated. + '202': + content: + application/vnd.ceph.api.v1.0+json: + type: object + description: Operation is still executing. Please check the task queue. + '400': + description: Operation exception. Please check the response body for details. + '401': + description: Unauthenticated access. Please login first. + '403': + description: Unauthorized access. Please check your permissions. + '500': + description: Unexpected error. Please check the response body for the stack + trace. + security: + - jwt: [] + tags: + - Cephfs /api/cephfs/{fs_id}/snapshot: delete: description: "\n Remove a snapshot.\n :param fs_id: The filesystem\ @@ -4204,11 +4255,15 @@ paths: type: string name: type: string + pool_type: + default: replication + type: string + profile: + type: string root: type: string required: - name - - root - failure_domain type: object responses: @@ -5008,6 +5063,9 @@ paths: balancer: description: '' type: string + btime: + description: '' + type: string compat: description: '' properties: @@ -5127,6 +5185,7 @@ paths: - failed - metadata_pool - epoch + - btime - stopped - max_mds - compat @@ -7244,6 +7303,8 @@ paths: properties: cluster_alias: type: string + name: + type: string ssl_certificate: type: string url: @@ -7254,6 +7315,7 @@ paths: default: false type: boolean required: + - name - url - cluster_alias - username @@ -10273,6 +10335,8 @@ paths: type: string flags: type: string + rbd_mirroring: + type: string type: object responses: '200': @@ -10613,6 +10677,9 @@ paths: type: string placement_target: type: string + replication: + default: 'false' + type: string tags: type: string uid: @@ -10919,6 +10986,8 @@ paths: type: string key_id: type: string + lifecycle: + type: string lock_mode: type: string lock_retention_period_days: @@ -10931,6 +11000,8 @@ paths: type: string mfa_token_serial: type: string + replication: + type: string tags: type: string uid: @@ -11324,6 +11395,11 @@ paths: name: zonegroup_name schema: type: string + - allowEmptyValue: true + in: query + name: all_policy + schema: + type: string responses: '200': content: @@ -11649,7 +11725,6 @@ paths: - realm_token - zone_name - port - - placement_spec type: object responses: '201': diff --git a/src/pybind/mgr/dashboard/run-frontend-e2e-tests.sh b/src/pybind/mgr/dashboard/run-frontend-e2e-tests.sh index a481a983f40..8c4c1735596 100755 --- a/src/pybind/mgr/dashboard/run-frontend-e2e-tests.sh +++ b/src/pybind/mgr/dashboard/run-frontend-e2e-tests.sh @@ -33,7 +33,21 @@ start_ceph() { # Set SSL verify to False ceph_all dashboard set-rgw-api-ssl-verify False - CYPRESS_BASE_URL=$(ceph mgr services | jq -r .dashboard) + # Set test_orchestrator as orch backend + ceph mgr module enable test_orchestrator + ceph orch set backend test_orchestrator + + CYPRESS_BASE_URL="" + retry=0 + while [[ -z "${CYPRESS_BASE_URL}" || "${CYPRESS_BASE_URL}" == "null" ]]; do + CYPRESS_BASE_URL=$(ceph mgr services | jq -r .dashboard) + if [ $retry -eq 10 ]; then + echo "ERROR: Could not get the dashboard URL" + stop 1 + fi + retry=$((retry + 1)) + sleep 1 + done CYPRESS_CEPH2_URL=$(ceph2 mgr services | jq -r .dashboard) # start rbd-mirror daemon in the cluster diff --git a/src/pybind/mgr/dashboard/services/ceph_service.py b/src/pybind/mgr/dashboard/services/ceph_service.py index 53cd0e7ad93..0a34e39a713 100644 --- a/src/pybind/mgr/dashboard/services/ceph_service.py +++ b/src/pybind/mgr/dashboard/services/ceph_service.py @@ -2,6 +2,7 @@ import json import logging +from abc import ABC, abstractmethod import rados from mgr_module import CommandResult @@ -10,7 +11,7 @@ from mgr_util import get_most_recent_rate, get_time_series_rates, name_to_config from .. import mgr try: - from typing import Any, Dict, Optional, Union + from typing import Any, Dict, List, Optional, Union except ImportError: pass # For typing only @@ -24,6 +25,45 @@ class SendCommandError(rados.Error): super(SendCommandError, self).__init__(err, errno) +class BackendConfig(ABC): + @abstractmethod + def get_config_keys(self) -> List[str]: + pass + + @abstractmethod + def get_required_keys(self) -> List[str]: + pass + + @abstractmethod + def get_key_pattern(self, enc_type: str) -> str: + pass + + +class VaultConfig(BackendConfig): + def get_config_keys(self) -> List[str]: + return ['addr', 'auth', 'namespace', 'prefix', 'secret_engine', + 'token_file', 'ssl_cacert', 'ssl_clientcert', 'ssl_clientkey', + 'verify_ssl'] + + def get_required_keys(self) -> List[str]: + return ['auth', 'prefix', 'secret_engine', 'addr'] + + def get_key_pattern(self, enc_type: str) -> str: + return 'rgw_crypt_{backend}_{key}' if enc_type == 'SSE_KMS' else 'rgw_crypt_sse_s3_{backend}_{key}' # noqa E501 #pylint: disable=line-too-long + + +class KmipConfig(BackendConfig): + def get_config_keys(self) -> List[str]: + return ['addr', 'ca_path', 'client_cert', 'client_key', 'kms_key_template', + 'password', 's3_key_template', 'username'] + + def get_required_keys(self) -> List[str]: + return ['addr', 'username', 'password'] + + def get_key_pattern(self, enc_type: str) -> str: + return 'rgw_crypt_{backend}_{key}' if enc_type == 'SSE_KMS' else 'rgw_crypt_sse_s3_{backend}_{key}' # noqa E501 #pylint: disable=line-too-long + + # pylint: disable=too-many-public-methods class CephService(object): @@ -183,64 +223,59 @@ class CephService(object): return None @classmethod - def get_encryption_config(cls, daemon_name): - kms_vault_configured = False - s3_vault_configured = False - kms_backend: str = '' - sse_s3_backend: str = '' - vault_stats = [] - full_daemon_name = 'rgw.' + daemon_name + def get_encryption_config(cls, daemon_name: str) -> Dict[str, List[Dict[str, Any]]]: + # Define backends with their respective configuration classes + backends: Dict[str, Dict[str, BackendConfig]] = { + 'SSE_KMS': { + 'vault': VaultConfig(), + 'kmip': KmipConfig() + }, + 'SSE_S3': { + 'vault': VaultConfig() + } + } - kms_backend = CephService.send_command('mon', 'config get', - who=name_to_config_section(full_daemon_name), - key='rgw_crypt_s3_kms_backend') - sse_s3_backend = CephService.send_command('mon', 'config get', - who=name_to_config_section(full_daemon_name), - key='rgw_crypt_sse_s3_backend') - - if kms_backend.strip() == 'vault': - kms_vault_auth: str = CephService.send_command('mon', 'config get', - who=name_to_config_section(full_daemon_name), # noqa E501 #pylint: disable=line-too-long - key='rgw_crypt_vault_auth') - kms_vault_engine: str = CephService.send_command('mon', 'config get', - who=name_to_config_section(full_daemon_name), # noqa E501 #pylint: disable=line-too-long - key='rgw_crypt_vault_secret_engine') - kms_vault_address: str = CephService.send_command('mon', 'config get', - who=name_to_config_section(full_daemon_name), # noqa E501 #pylint: disable=line-too-long - key='rgw_crypt_vault_addr') - kms_vault_token: str = CephService.send_command('mon', 'config get', - who=name_to_config_section(full_daemon_name), # noqa E501 #pylint: disable=line-too-long - key='rgw_crypt_vault_token_file') # noqa E501 #pylint: disable=line-too-long - if (kms_vault_auth.strip() != "" and kms_vault_engine.strip() != "" and kms_vault_address.strip() != ""): # noqa E501 #pylint: disable=line-too-long - if(kms_vault_auth == 'token' and kms_vault_token.strip() == ""): - kms_vault_configured = False - else: - kms_vault_configured = True - - if sse_s3_backend.strip() == 'vault': - s3_vault_auth: str = CephService.send_command('mon', 'config get', - who=name_to_config_section(full_daemon_name), # noqa E501 #pylint: disable=line-too-long - key='rgw_crypt_sse_s3_vault_auth') - s3_vault_engine: str = CephService.send_command('mon', - 'config get', - who=name_to_config_section(full_daemon_name), # noqa E501 #pylint: disable=line-too-long - key='rgw_crypt_sse_s3_vault_secret_engine') # noqa E501 #pylint: disable=line-too-long - s3_vault_address: str = CephService.send_command('mon', 'config get', - who=name_to_config_section(full_daemon_name), # noqa E501 #pylint: disable=line-too-long - key='rgw_crypt_sse_s3_vault_addr') - s3_vault_token: str = CephService.send_command('mon', 'config get', - who=name_to_config_section(full_daemon_name), # noqa E501 #pylint: disable=line-too-long - key='rgw_crypt_sse_s3_vault_token_file') # noqa E501 #pylint: disable=line-too-long - - if (s3_vault_auth.strip() != "" and s3_vault_engine.strip() != "" and s3_vault_address.strip() != ""): # noqa E501 #pylint: disable=line-too-long - if(s3_vault_auth == 'token' and s3_vault_token.strip() == ""): - s3_vault_configured = False - else: - s3_vault_configured = True + # Final configuration values + config_values: Dict[str, List[Dict[str, Any]]] = { + 'SSE_KMS': [], + 'SSE_S3': [] + } + + full_daemon_name = 'rgw.' + daemon_name - vault_stats.append(kms_vault_configured) - vault_stats.append(s3_vault_configured) - return vault_stats + for enc_type, backend_list in backends.items(): + for backend_name, backend in backend_list.items(): + config_keys = backend.get_config_keys() + required_keys = backend.get_required_keys() + key_pattern = backend.get_key_pattern(enc_type) + + # Check if all required configurations are present and not empty + all_required_configs_present = True + for key in required_keys: + config_key = key_pattern.format(backend=backend_name, key=key) + value = CephService.send_command('mon', 'config get', + who=name_to_config_section(full_daemon_name), + key=config_key) + if not (isinstance(value, str) and value.strip()): + all_required_configs_present = False + break + + # If all required configurations are present, gather all config values + if all_required_configs_present: + config_dict = {} + for key in config_keys: + config_key = key_pattern.format(backend=backend_name, key=key) + value = CephService.send_command('mon', 'config get', + who=name_to_config_section(full_daemon_name), # noqa E501 #pylint: disable=line-too-long + key=config_key) + if value: + config_dict[key] = value.strip() if isinstance(value, str) else value + config_dict['backend'] = backend_name + config_dict['encryption_type'] = enc_type + config_dict['unique_id'] = enc_type + '-' + backend_name + config_values[enc_type].append(config_dict) + + return config_values @classmethod def set_encryption_config(cls, encryption_type, kms_provider, auth_method, diff --git a/src/pybind/mgr/dashboard/services/cephfs.py b/src/pybind/mgr/dashboard/services/cephfs.py index ffbf9d0c816..6a3cd6b72ba 100644 --- a/src/pybind/mgr/dashboard/services/cephfs.py +++ b/src/pybind/mgr/dashboard/services/cephfs.py @@ -298,3 +298,12 @@ class CephFS(object): rfiles = int(self.cfs.getxattr(path, 'ceph.dir.rfiles')) rsubdirs = int(self.cfs.getxattr(path, 'ceph.dir.rsubdirs')) return {'bytes': rbytes, 'files': rfiles, 'subdirs': rsubdirs} + + def rename_path(self, src_path, dst_path) -> None: + """ + Rename a file or directory. + :param src: the path to the existing file or directory. + :param dst: the new name of the file or directory. + """ + logger.info("Renaming: from %s to %s", src_path, dst_path) + self.cfs.rename(src_path, dst_path) diff --git a/src/pybind/mgr/dashboard/services/nvmeof_client.py b/src/pybind/mgr/dashboard/services/nvmeof_client.py index 5dee7dfcfbc..019ecf0267c 100644 --- a/src/pybind/mgr/dashboard/services/nvmeof_client.py +++ b/src/pybind/mgr/dashboard/services/nvmeof_client.py @@ -24,12 +24,23 @@ else: def __init__(self): logger.info("Initiating nvmeof gateway connection...") - - self.gateway_addr = list( - NvmeofGatewaysConfig.get_gateways_config()["gateways"].values() - )[0]["service_url"] - self.channel = grpc.insecure_channel("{}".format(self.gateway_addr)) - logger.info("Found nvmeof gateway at %s", self.gateway_addr) + service_name, self.gateway_addr = NvmeofGatewaysConfig.get_service_info() + + root_ca_cert = NvmeofGatewaysConfig.get_root_ca_cert(service_name) + client_key = NvmeofGatewaysConfig.get_client_key(service_name) + client_cert = NvmeofGatewaysConfig.get_client_cert(service_name) + + if root_ca_cert and client_key and client_cert: + logger.info('Securely connecting to: %s', self.gateway_addr) + credentials = grpc.ssl_channel_credentials( + root_certificates=root_ca_cert, + private_key=client_key, + certificate_chain=client_cert, + ) + self.channel = grpc.secure_channel(self.gateway_addr, credentials) + else: + logger.info("Insecurely connecting to: %s", self.gateway_addr) + self.channel = grpc.insecure_channel(self.gateway_addr) self.stub = pb2_grpc.GatewayStub(self.channel) def make_namedtuple_from_object(cls: Type[NamedTuple], obj: Any) -> NamedTuple: diff --git a/src/pybind/mgr/dashboard/services/nvmeof_conf.py b/src/pybind/mgr/dashboard/services/nvmeof_conf.py index 901098ea566..3879e308db4 100644 --- a/src/pybind/mgr/dashboard/services/nvmeof_conf.py +++ b/src/pybind/mgr/dashboard/services/nvmeof_conf.py @@ -2,7 +2,11 @@ import json +from orchestrator import OrchestratorError + from .. import mgr +from ..exceptions import DashboardException +from ..services.orchestrator import OrchClient class NvmeofGatewayAlreadyExists(Exception): @@ -58,3 +62,53 @@ class NvmeofGatewaysConfig(object): raise NvmeofGatewayDoesNotExist(name) del config['gateways'][name] cls._save_config(config) + + @classmethod + def get_service_info(cls): + try: + config = cls.get_gateways_config() + service_name = list(config['gateways'].keys())[0] + addr = config['gateways'][service_name]['service_url'] + return service_name, addr + except (KeyError, IndexError) as e: + raise DashboardException( + msg=f'NVMe-oF configuration is not set: {e}', + ) + + @classmethod + def get_client_cert(cls, service_name: str): + client_cert = cls.from_cert_store('nvmeof_client_cert', service_name) + return client_cert.encode() if client_cert else None + + @classmethod + def get_client_key(cls, service_name: str): + client_key = cls.from_cert_store('nvmeof_client_key', service_name, key=True) + return client_key.encode() if client_key else None + + @classmethod + def get_root_ca_cert(cls, service_name: str): + try: + root_ca_cert = cls.from_cert_store('nvmeof_root_ca_cert', service_name) + return root_ca_cert.encode() + except DashboardException: + # If root_ca_cert is not set, use server_cert as root_ca_cert + return cls.get_server_cert(service_name) + + @classmethod + def get_server_cert(cls, service_name: str): + server_cert = cls.from_cert_store('nvmeof_server_cert', service_name) + return server_cert.encode() if server_cert else None + + @classmethod + def from_cert_store(cls, entity: str, service_name: str, key=False): + try: + orch = OrchClient.instance() + if orch.available(): + if key: + return orch.cert_store.get_key(entity, service_name) + return orch.cert_store.get_cert(entity, service_name) + return None + except OrchestratorError as e: + raise DashboardException( + msg=f'Failed to get {entity} for {service_name}: {e}', + ) diff --git a/src/pybind/mgr/dashboard/services/orchestrator.py b/src/pybind/mgr/dashboard/services/orchestrator.py index 97776dec335..1f77b3c0ab5 100644 --- a/src/pybind/mgr/dashboard/services/orchestrator.py +++ b/src/pybind/mgr/dashboard/services/orchestrator.py @@ -130,11 +130,9 @@ class ServiceManager(ResourceManager): service_ids = [service_ids] completion_list = [ - self.api.service_action('reload', service_type, service_name, - service_id) - for service_name, service_id in service_ids + self.api.service_action('restart', f'{service_type}.{service_id}') + for service_id in service_ids ] - self.api.orchestrator_wait(completion_list) for c in completion_list: raise_if_exception(c) @@ -207,6 +205,19 @@ class HardwareManager(ResourceManager): return self.api.node_proxy_common(category, hostname=hostname) +class CertStoreManager(ResourceManager): + + @wait_api_result + def get_cert(self, entity: str, service_name: Optional[str] = None, + hostname: Optional[str] = None) -> str: + return self.api.cert_store_get_cert(entity, service_name, hostname) + + @wait_api_result + def get_key(self, entity: str, service_name: Optional[str] = None, + hostname: Optional[str] = None) -> str: + return self.api.cert_store_get_key(entity, service_name, hostname) + + class OrchClient(object): _instance = None @@ -228,6 +239,7 @@ class OrchClient(object): self.daemons = DaemonManager(self.api) self.upgrades = UpgradeManager(self.api) self.hardware = HardwareManager(self.api) + self.cert_store = CertStoreManager(self.api) def available(self, features: Optional[List[str]] = None) -> bool: available = self.status()['available'] diff --git a/src/pybind/mgr/dashboard/services/rbd.py b/src/pybind/mgr/dashboard/services/rbd.py index ec65b1fd5e1..31fdb7c9818 100644 --- a/src/pybind/mgr/dashboard/services/rbd.py +++ b/src/pybind/mgr/dashboard/services/rbd.py @@ -559,8 +559,8 @@ class RbdService(object): @ttl_cache_invalidator(RBD_IMAGE_REFS_CACHE_REFERENCE) def set(cls, image_spec, name=None, size=None, features=None, configuration=None, metadata=None, enable_mirror=None, primary=None, - force=False, resync=False, mirror_mode=None, schedule_interval='', - remove_scheduling=False): + force=False, resync=False, mirror_mode=None, image_mirror_mode=None, + schedule_interval='', remove_scheduling=False): # pylint: disable=too-many-branches pool_name, namespace, image_name = parse_image_spec(image_spec) @@ -574,15 +574,22 @@ class RbdService(object): if size and size != image.size(): image.resize(size) + if image_mirror_mode is not None and mirror_mode is not None: + if image_mirror_mode != mirror_mode: + RbdMirroringService.disable_image(image_name, pool_name, namespace) + mirror_image_info = image.mirror_image_get_info() - if enable_mirror and mirror_image_info['state'] == rbd.RBD_MIRROR_IMAGE_DISABLED: + if (enable_mirror is True + and mirror_image_info['state'] == rbd.RBD_MIRROR_IMAGE_DISABLED): RbdMirroringService.enable_image( image_name, pool_name, namespace, - MIRROR_IMAGE_MODE[mirror_mode]) + MIRROR_IMAGE_MODE[mirror_mode] + ) elif (enable_mirror is False - and mirror_image_info['state'] == rbd.RBD_MIRROR_IMAGE_ENABLED): + and mirror_image_info['state'] == rbd.RBD_MIRROR_IMAGE_ENABLED): RbdMirroringService.disable_image( - image_name, pool_name, namespace) + image_name, pool_name, namespace + ) # check enable/disable features if features is not None: diff --git a/src/pybind/mgr/dashboard/services/rgw_client.py b/src/pybind/mgr/dashboard/services/rgw_client.py index fb6c83d60cb..2fb30b67f43 100644 --- a/src/pybind/mgr/dashboard/services/rgw_client.py +++ b/src/pybind/mgr/dashboard/services/rgw_client.py @@ -2,25 +2,31 @@ # pylint: disable=C0302 # pylint: disable=too-many-branches # pylint: disable=too-many-lines - +import ast import ipaddress import json import logging import os import re +import time import xml.etree.ElementTree as ET # noqa: N814 from enum import Enum from subprocess import SubprocessError +from urllib.parse import urlparse +import requests from mgr_util import build_url, name_to_config_section from .. import mgr from ..awsauth import S3Auth +from ..controllers.multi_cluster import MultiCluster from ..exceptions import DashboardException from ..rest_client import RequestException, RestClient from ..settings import Settings from ..tools import dict_contains_path, dict_get, json_str_to_object, str_to_bool from .ceph_service import CephService +from .orchestrator import OrchClient +from .service import RgwServiceManager try: from typing import Any, Dict, List, Optional, Tuple, Union @@ -29,20 +35,16 @@ except ImportError: logger = logging.getLogger('rgw_client') +_SYNC_GROUP_ID = 'dashboard_admin_group' +_SYNC_FLOW_ID = 'dashboard_admin_flow' +_SYNC_PIPE_ID = 'dashboard_admin_pipe' + class NoRgwDaemonsException(Exception): def __init__(self): super().__init__('No RGW service is running.') -class NoCredentialsException(Exception): - def __init__(self): - super(NoCredentialsException, self).__init__( - 'No RGW credentials found, ' - 'please consult the documentation on how to enable RGW for ' - 'the dashboard.') - - class RgwAdminException(Exception): pass @@ -55,6 +57,7 @@ class RgwDaemon: ssl: bool realm_name: str zonegroup_name: str + zonegroup_id: str zone_name: str @@ -74,6 +77,7 @@ def _get_daemons() -> Dict[str, RgwDaemon]: daemon.name = daemon_map[key]['metadata']['id'] daemon.realm_name = daemon_map[key]['metadata']['realm_name'] daemon.zonegroup_name = daemon_map[key]['metadata']['zonegroup_name'] + daemon.zonegroup_id = daemon_map[key]['metadata']['zonegroup_id'] daemon.zone_name = daemon_map[key]['metadata']['zone_name'] daemons[daemon.name] = daemon logger.info('Found RGW daemon with configuration: host=%s, port=%d, ssl=%s', @@ -210,78 +214,6 @@ def _parse_frontend_config(config) -> Tuple[int, bool]: raise LookupError('Failed to determine RGW port from "{}"'.format(config)) -def _parse_secrets(user: str, data: dict) -> Tuple[str, str]: - for key in data.get('keys', []): - if key.get('user') == user and data.get('system') in ['true', True]: - access_key = key.get('access_key') - secret_key = key.get('secret_key') - return access_key, secret_key - return '', '' - - -def _get_user_keys(user: str, realm: Optional[str] = None) -> Tuple[str, str]: - access_key = '' - secret_key = '' - rgw_user_info_cmd = ['user', 'info', '--uid', user] - cmd_realm_option = ['--rgw-realm', realm] if realm else [] - if realm: - rgw_user_info_cmd += cmd_realm_option - try: - _, out, err = mgr.send_rgwadmin_command(rgw_user_info_cmd) - if out: - access_key, secret_key = _parse_secrets(user, out) - if not access_key: - rgw_create_user_cmd = [ - 'user', 'create', - '--uid', user, - '--display-name', 'Ceph Dashboard', - '--system', - ] + cmd_realm_option - _, out, err = mgr.send_rgwadmin_command(rgw_create_user_cmd) - if out: - access_key, secret_key = _parse_secrets(user, out) - if not access_key: - logger.error('Unable to create rgw user "%s": %s', user, err) - except SubprocessError as error: - logger.exception(error) - - return access_key, secret_key - - -def configure_rgw_credentials(): - logger.info('Configuring dashboard RGW credentials') - user = 'dashboard' - realms = [] - access_key = '' - secret_key = '' - try: - _, out, err = mgr.send_rgwadmin_command(['realm', 'list']) - if out: - realms = out.get('realms', []) - if err: - logger.error('Unable to list RGW realms: %s', err) - if realms: - realm_access_keys = {} - realm_secret_keys = {} - for realm in realms: - realm_access_key, realm_secret_key = _get_user_keys(user, realm) - if realm_access_key: - realm_access_keys[realm] = realm_access_key - realm_secret_keys[realm] = realm_secret_key - if realm_access_keys: - access_key = json.dumps(realm_access_keys) - secret_key = json.dumps(realm_secret_keys) - else: - access_key, secret_key = _get_user_keys(user) - - assert access_key and secret_key - Settings.RGW_API_ACCESS_KEY = access_key - Settings.RGW_API_SECRET_KEY = secret_key - except (AssertionError, SubprocessError) as error: - logger.exception(error) - raise NoCredentialsException - - # pylint: disable=R0904 class RgwClient(RestClient): _host = None @@ -342,11 +274,27 @@ class RgwClient(RestClient): # The API access key and secret key are mandatory for a minimal configuration. if not (Settings.RGW_API_ACCESS_KEY and Settings.RGW_API_SECRET_KEY): - configure_rgw_credentials() + rgw_service_manager = RgwServiceManager() + rgw_service_manager.configure_rgw_credentials() + daemon_keys = RgwClient._daemons.keys() if not daemon_name: - # Select 1st daemon: - daemon_name = next(iter(RgwClient._daemons.keys())) + if len(daemon_keys) > 1: + try: + multiiste = RgwMultisite() + default_zonegroup = multiiste.get_all_zonegroups_info()['default_zonegroup'] + + # Iterate through _daemons.values() to find the daemon with the + # matching zonegroup_id + for daemon in RgwClient._daemons.values(): + if daemon.zonegroup_id == default_zonegroup: + daemon_name = daemon.name + break + except Exception: # pylint: disable=broad-except + daemon_name = next(iter(daemon_keys)) + else: + # Handle the case where there is only one or no key in _daemons + daemon_name = next(iter(daemon_keys)) # Discard all cached instances if any rgw setting has changed if RgwClient._rgw_settings_snapshot != RgwClient._rgw_settings(): @@ -354,29 +302,29 @@ class RgwClient(RestClient): RgwClient.drop_instance() if daemon_name not in RgwClient._config_instances: - connection_info = RgwClient._get_daemon_connection_info(daemon_name) - RgwClient._config_instances[daemon_name] = RgwClient(connection_info['access_key'], + connection_info = RgwClient._get_daemon_connection_info(daemon_name) # type: ignore + RgwClient._config_instances[daemon_name] = RgwClient(connection_info['access_key'], # type: ignore # noqa E501 #pylint: disable=line-too-long connection_info['secret_key'], - daemon_name) + daemon_name) # type: ignore - if not userid or userid == RgwClient._config_instances[daemon_name].userid: - return RgwClient._config_instances[daemon_name] + if not userid or userid == RgwClient._config_instances[daemon_name].userid: # type: ignore + return RgwClient._config_instances[daemon_name] # type: ignore if daemon_name not in RgwClient._user_instances \ or userid not in RgwClient._user_instances[daemon_name]: # Get the access and secret keys for the specified user. - keys = RgwClient._config_instances[daemon_name].get_user_keys(userid) + keys = RgwClient._config_instances[daemon_name].get_user_keys(userid) # type: ignore if not keys: raise RequestException( "User '{}' does not have any keys configured.".format( userid)) instance = RgwClient(keys['access_key'], keys['secret_key'], - daemon_name, + daemon_name, # type: ignore userid) - RgwClient._user_instances.update({daemon_name: {userid: instance}}) + RgwClient._user_instances.update({daemon_name: {userid: instance}}) # type: ignore - return RgwClient._user_instances[daemon_name][userid] + return RgwClient._user_instances[daemon_name][userid] # type: ignore @staticmethod def admin_instance(daemon_name: Optional[str] = None) -> 'RgwClient': @@ -605,6 +553,9 @@ class RgwClient(RestClient): return realm_info['name'] return None + def get_default_zonegroup(self): + return self.daemon.zonegroup_name + @RestClient.api_get('/{bucket_name}?versioning') def get_bucket_versioning(self, bucket_name, request=None): """ @@ -740,6 +691,83 @@ class RgwClient(RestClient): raise DashboardException(msg=str(e), component='rgw') return result + @RestClient.api_get('/{bucket_name}?lifecycle') + def get_lifecycle(self, bucket_name, request=None): + # pylint: disable=unused-argument + try: + result = request() # type: ignore + result = {'LifecycleConfiguration': result} + except RequestException as e: + if e.content: + content = json_str_to_object(e.content) + if content.get( + 'Code') == 'NoSuchLifecycleConfiguration': + return None + raise DashboardException(msg=str(e), component='rgw') + return result + + @staticmethod + def dict_to_xml(data): + if not data or data == '{}': + return '' + if isinstance(data, str): + try: + data = json.loads(data) + except json.JSONDecodeError: + raise DashboardException('Could not load json string') + + def transform(data): + xml: str = '' + if isinstance(data, dict): + for key, value in data.items(): + if isinstance(value, list): + for item in value: + if key == 'Rules': + key = 'Rule' + xml += f'<{key}>\n{transform(item)}</{key}>\n' + elif isinstance(value, dict): + xml += f'<{key}>\n{transform(value)}</{key}>\n' + else: + xml += f'<{key}>{str(value)}</{key}>\n' + + elif isinstance(data, list): + for item in data: + xml += transform(item) + else: + xml += f'{data}' + + return xml + + return transform(data) + + @RestClient.api_put('/{bucket_name}?lifecycle') + def set_lifecycle(self, bucket_name, lifecycle, request=None): + # pylint: disable=unused-argument + lifecycle = lifecycle.strip() + if lifecycle.startswith('{'): + lifecycle = RgwClient.dict_to_xml(lifecycle) + try: + if lifecycle and '<LifecycleConfiguration>' not in str(lifecycle): + lifecycle = f'<LifecycleConfiguration>{lifecycle}</LifecycleConfiguration>' + result = request(data=lifecycle) # type: ignore + except RequestException as e: + if e.content: + content = json_str_to_object(e.content) + if content.get("Code") == "MalformedXML": + msg = "Invalid Lifecycle document" + raise DashboardException(msg=msg, component='rgw') + raise DashboardException(msg=str(e), component='rgw') + return result + + @RestClient.api_delete('/{bucket_name}?lifecycle') + def delete_lifecycle(self, bucket_name, request=None): + # pylint: disable=unused-argument + try: + result = request() + except RequestException as e: + raise DashboardException(msg=str(e), component='rgw') + return result + @RestClient.api_get('/{bucket_name}?object-lock') def get_bucket_locking(self, bucket_name, request=None): # type: (str, Optional[object]) -> dict @@ -984,6 +1012,51 @@ class RgwClient(RestClient): raise DashboardException(msg=msg, component='rgw') return retention_period_days, retention_period_years + @RestClient.api_put('/{bucket_name}?replication') + def set_bucket_replication(self, bucket_name, replication: bool, request=None): + # pGenerate the minimum replication configuration + # required for enabling the replication + root = ET.Element('ReplicationConfiguration', + xmlns='http://s3.amazonaws.com/doc/2006-03-01/') + role = ET.SubElement(root, 'Role') + role.text = f'{bucket_name}_replication_role' + + rule = ET.SubElement(root, 'Rule') + rule_id = ET.SubElement(rule, 'ID') + rule_id.text = _SYNC_PIPE_ID + + status = ET.SubElement(rule, 'Status') + status.text = 'Enabled' if replication else 'Disabled' + + filter_elem = ET.SubElement(rule, 'Filter') + prefix = ET.SubElement(filter_elem, 'Prefix') + prefix.text = '' + + destination = ET.SubElement(rule, 'Destination') + + bucket = ET.SubElement(destination, 'Bucket') + bucket.text = bucket_name + + replication_config = ET.tostring(root, encoding='utf-8', method='xml').decode() + + try: + request = request(data=replication_config) + except RequestException as e: + raise DashboardException(msg=str(e), component='rgw') + + @RestClient.api_get('/{bucket_name}?replication') + def get_bucket_replication(self, bucket_name, request=None): + # pylint: disable=unused-argument + try: + result = request() + return result + except RequestException as e: + if e.content: + content = json_str_to_object(e.content) + if content.get('Code') == 'ReplicationConfigurationNotFoundError': + return None + raise e + class SyncStatus(Enum): enabled = 'enabled' @@ -1073,6 +1146,177 @@ class RgwMultisite: except SubprocessError as error: raise DashboardException(error, http_status_code=500, component='rgw') + def replace_hostname(self, endpoint, hostname_to_ip): + # Replace the hostname in the endpoint URL with its corresponding IP address. + parsed_url = urlparse(endpoint) + hostname = parsed_url.hostname + if hostname in hostname_to_ip: + return endpoint.replace(hostname, hostname_to_ip[hostname]) + return endpoint + + def setup_multisite_replication(self, realm_name: str, zonegroup_name: str, + zonegroup_endpoints: str, zone_name: str, + zone_endpoints: str, username: str, + cluster_fsid: Optional[str] = None): + + # Set up multisite replication for Ceph RGW. + logger.info("Starting multisite replication setup") + orch = OrchClient.instance() + + def get_updated_endpoints(endpoints): + # Update endpoint URLs by replacing hostnames with IP addresses. + logger.debug("Updating endpoints: %s", endpoints) + try: + hostname_to_ip = {host['hostname']: host['addr'] for host in (h.to_json() for h in orch.hosts.list())} # noqa E501 # pylint: disable=line-too-long + updated_endpoints = [self.replace_hostname(endpoint, hostname_to_ip) for endpoint in endpoints.split(',')] # noqa E501 # pylint: disable=line-too-long + logger.debug("Updated endpoints: %s", updated_endpoints) + return updated_endpoints + except Exception as e: + logger.error("Failed to update endpoints: %s", e) + raise + + zonegroup_ip_url = ','.join(get_updated_endpoints(zonegroup_endpoints)) + zone_ip_url = ','.join(get_updated_endpoints(zone_endpoints)) + try: + # Create the realm and zonegroup + logger.info("Creating realm: %s", realm_name) + self.create_realm(realm_name=realm_name, default=True) + logger.info("Creating zonegroup: %s", zonegroup_name) + self.create_zonegroup(realm_name=realm_name, zonegroup_name=zonegroup_name, + default=True, master=True, endpoints=zonegroup_ip_url) + except Exception as e: + logger.error("Failed to create realm or zonegroup: %s", e) + raise + try: + # Create the zone and system user, then modify the zone with user credentials + logger.info("Creating zone: %s", zone_name) + if self.create_zone(zone_name=zone_name, zonegroup_name=zonegroup_name, + default=True, master=True, endpoints=zone_ip_url, + access_key=None, secret_key=None): + logger.info("Creating system user: %s", username) + user_details = self.create_system_user(username, zone_name) + if user_details: + keys = user_details['keys'][0] + logger.info("Modifying zone with user credentials: %s", username) + self.modify_zone(zone_name=zone_name, zonegroup_name=zonegroup_name, + default='true', master='true', endpoints=zone_ip_url, + access_key=keys['access_key'], + secret_key=keys['secret_key']) + except Exception as e: + logger.error("Failed to create zone or system user: %s", e) + raise + try: + # Restart RGW daemons and set credentials + logger.info("Restarting RGW daemons and setting credentials") + rgw_service_manager = RgwServiceManager() + rgw_service_manager.restart_rgw_daemons_and_set_credentials() + except Exception as e: + logger.error("Failed to restart RGW daemons: %s", e) + raise + try: + # Get realm tokens and import to another cluster if specified + logger.info("Getting realm tokens") + realm_token_info = CephService.get_realm_tokens() + + if cluster_fsid and realm_token_info: + logger.info("Importing realm token to cluster: %s", cluster_fsid) + self.import_realm_token_to_cluster(cluster_fsid, realm_name, + realm_token_info, username) + except Exception as e: + logger.error("Failed to get realm tokens or import to cluster: %s", e) + raise + logger.info("Multisite replication setup completed") + return realm_token_info + + def import_realm_token_to_cluster(self, cluster_fsid, realm_name, realm_token_info, username): + logger.info("Importing realm token to cluster: %s", cluster_fsid) + try: + for realm_token in realm_token_info: + if realm_token['realm'] == realm_name: + realm_export_token = realm_token['token'] + break + else: + raise ValueError(f"Realm {realm_name} not found in realm tokens") + multi_cluster_config_str = str(mgr.get_module_option_ex('dashboard', 'MULTICLUSTER_CONFIG')) # noqa E501 # pylint: disable=line-too-long + multi_cluster_config = ast.literal_eval(multi_cluster_config_str) + for fsid, clusters in multi_cluster_config['config'].items(): + if fsid == cluster_fsid: + for cluster_info in clusters: + cluster_token = cluster_info.get('token') + cluster_url = cluster_info.get('url') + break + else: + raise ValueError(f"No cluster token found for fsid: {cluster_fsid}") + break + else: + raise ValueError(f"Cluster fsid {cluster_fsid} not found in multi-cluster config") + if cluster_token: + placement_spec: Dict[str, Dict] = {"placement": {}} + payload = { + 'realm_token': realm_export_token, + 'zone_name': 'new_replicated_zone', + 'port': 81, + 'placement_spec': placement_spec + } + + if not cluster_url.endswith('/'): + cluster_url += '/' + + path = 'api/rgw/realm/import_realm_token' + try: + multi_cluster_instance = MultiCluster() + # pylint: disable=protected-access + response = multi_cluster_instance._proxy(method='POST', base_url=cluster_url, + path=path, payload=payload, + token=cluster_token) + logger.info("Successfully imported realm token to cluster: %s", cluster_fsid) + self.check_user_in_second_cluster(cluster_url, cluster_token, username) + return response + except requests.RequestException as e: + logger.error("Could not reach %s: %s", cluster_url, e) + raise DashboardException(f"Could not reach {cluster_url}: {e}", + http_status_code=404, component='dashboard') + except json.JSONDecodeError as e: + logger.error("Error parsing Dashboard API response: %s", e.msg) + raise DashboardException(f"Error parsing Dashboard API response: {e.msg}", + component='dashboard') + except Exception as e: + logger.error("Failed to import realm token to cluster: %s", e) + raise + + def check_user_in_second_cluster(self, cluster_url, cluster_token, username): + logger.info("Checking for user %s in the second cluster", username) + path = 'api/rgw/zone/get_user_list?zoneName=new_replicated_zone' + user_found = False + start_time = time.time() + while not user_found: + if time.time() - start_time > 120: # Timeout after 2 minutes + logger.error("Timeout reached while waiting for user %s to appear \ + in the second cluster", username) + raise DashboardException(code='user_replication_timeout', + msg="Timeout reached while waiting for \ + user %s to appear in the second cluster." % username) + try: + multi_cluster_instance = MultiCluster() + # pylint: disable=protected-access + user_content = multi_cluster_instance._proxy(method='GET', base_url=cluster_url, + path=path, token=cluster_token) + logger.info("User content in the second cluster: %s", user_content) + for user in user_content: + if user['user_id'] == username: + user_found = True + logger.info("User %s found in the second cluster", username) + # pylint: disable=protected-access + restart_daemons_content = multi_cluster_instance._proxy(method='PUT', base_url=cluster_url, # noqa E501 # pylint: disable=line-too-long + path='ui-api/rgw/multisite/setup-rgw-credentials', # noqa E501 # pylint: disable=line-too-long + token=cluster_token) # noqa E501 # pylint: disable=line-too-long + logger.info("Restarted RGW daemons in the second cluster: %s", restart_daemons_content) # noqa E501 # pylint: disable=line-too-long + break + except requests.RequestException as e: + logger.error("Error checking user in the second cluster: %s", e) + logger.info("User %s not found yet, retrying in 5 seconds", username) + time.sleep(5) + def create_realm(self, realm_name: str, default: bool): rgw_realm_create_cmd = ['realm', 'create'] cmd_create_realm_options = ['--rgw-realm', realm_name] @@ -1655,8 +1899,8 @@ class RgwMultisite: rgw_realm_list = self.list_realms() rgw_zonegroup_list = self.list_zonegroups() rgw_zone_list = self.list_zones() - if len(rgw_realm_list['realms']) < 1 and len(rgw_zonegroup_list['zonegroups']) < 1 \ - and len(rgw_zone_list['zones']) < 1: + if len(rgw_realm_list['realms']) < 1 and len(rgw_zonegroup_list['zonegroups']) <= 1 \ + and len(rgw_zone_list['zones']) <= 1: is_multisite_configured = False return is_multisite_configured @@ -1772,10 +2016,13 @@ class RgwMultisite: except SubprocessError as error: raise DashboardException(error, http_status_code=500, component='rgw') - def get_sync_policy_group(self, group_id: str, bucket_name: str = ''): + def get_sync_policy_group(self, group_id: str, bucket_name: str = '', + zonegroup_name: str = ''): rgw_sync_policy_cmd = ['sync', 'group', 'get', '--group-id', group_id] if bucket_name: rgw_sync_policy_cmd += ['--bucket', bucket_name] + if zonegroup_name: + rgw_sync_policy_cmd += ['--rgw-zonegroup', zonegroup_name] try: exit_code, out, err = mgr.send_rgwadmin_command(rgw_sync_policy_cmd) if exit_code > 0: @@ -1922,3 +2169,30 @@ class RgwMultisite: http_status_code=500, component='rgw') except SubprocessError as error: raise DashboardException(error, http_status_code=500, component='rgw') + + def create_dashboard_admin_sync_group(self, zonegroup_name: str = ''): + + zonegroup_info = self.get_zonegroup(zonegroup_name) + zone_names = [] + for zones in zonegroup_info['zones']: + zone_names.append(zones['name']) + + # create a sync policy group with status allowed + self.create_sync_policy_group(_SYNC_GROUP_ID, SyncStatus.allowed.value) + # create a sync flow with source and destination zones + self.create_sync_flow(_SYNC_GROUP_ID, _SYNC_FLOW_ID, + SyncFlowTypes.symmetrical.value, + zones=zone_names) + # create a sync pipe with source and destination zones + self.create_sync_pipe(_SYNC_GROUP_ID, _SYNC_PIPE_ID, source_zones=['*'], + destination_zones=['*'], destination_buckets=['*']) + # period update --commit + self.update_period() + + def policy_group_exists(self, group_name: str, zonegroup_name: str): + try: + _ = self.get_sync_policy_group( + group_id=group_name, zonegroup_name=zonegroup_name) + return True + except DashboardException: + return False diff --git a/src/pybind/mgr/dashboard/services/service.py b/src/pybind/mgr/dashboard/services/service.py new file mode 100644 index 00000000000..792604dcc59 --- /dev/null +++ b/src/pybind/mgr/dashboard/services/service.py @@ -0,0 +1,180 @@ +import json +import logging +import time +from subprocess import SubprocessError + +try: + from typing import Optional, Tuple +except ImportError: + pass # For typing only + +from .. import mgr +from ..exceptions import DashboardException +from ..settings import Settings +from .orchestrator import OrchClient + +logger = logging.getLogger('service') + + +class NoCredentialsException(Exception): + def __init__(self): + super(NoCredentialsException, self).__init__( + 'No RGW credentials found, ' + 'please consult the documentation on how to enable RGW for ' + 'the dashboard.') + + +def verify_service_restart(service_type: str, service_id: str): + orch = OrchClient.instance() + service_name = f'{service_type}.{service_id}' + + logger.info("Getting initial service info for: %s", service_name) + info = orch.services.get(service_name)[0].to_dict() + last_refreshed = info['status']['last_refresh'] + + logger.info("Reloading service: %s", service_name) + orch.services.reload(service_type, service_id) + + logger.info("Waiting for service refresh: %s", service_name) + wait_for_refresh(orch, service_name, last_refreshed) + + logger.info("Checking daemon status for: %s", service_name) + daemon_status = wait_for_daemon_to_start(orch, service_name) + return daemon_status + + +def wait_for_refresh(orch, service_name, last_refreshed): + orch = OrchClient.instance() + logger.info("Waiting for service %s to refresh", service_name) + + while True: + updated_info = orch.services.get(service_name)[0].to_dict() + if updated_info['status']['last_refresh'] != last_refreshed: + logger.info("Service %s refreshed", service_name) + break + + +def wait_for_daemon_to_start(orch, service_name): + orch = OrchClient.instance() + start_time = time.time() + logger.info("Waiting for daemon %s to start", service_name) + + while True: + daemons = [d.to_dict() for d in orch.services.list_daemons(service_name=service_name)] + all_running = True + + for daemon in daemons: + daemon_state = daemon['status_desc'] + logger.debug("Daemon %s state: %s", daemon['daemon_id'], daemon_state) + + if daemon_state in ('unknown', 'error', 'stopped'): + logger.error("Failed to restart daemon %s for service %s. State is %s", daemon['daemon_id'], service_name, daemon_state) # noqa E501 # pylint: disable=line-too-long + raise DashboardException( + code='daemon_restart_failed', + msg="Failed to restart the daemon %s. Daemon state is %s." % (service_name, daemon_state) # noqa E501 # pylint: disable=line-too-long + ) + if daemon_state != 'running': + all_running = False + + if all_running: + logger.info("All daemons for service %s are running", service_name) + return True + + if time.time() - start_time > 10: + logger.error("Timeout reached while waiting for daemon %s to start", service_name) + raise DashboardException( + code='daemon_restart_timeout', + msg="Timeout reached while waiting for daemon %s to start." % service_name + ) + return False + + +class RgwServiceManager: + def restart_rgw_daemons_and_set_credentials(self): + # Restart RGW daemons and set credentials. + logger.info("Restarting RGW daemons and setting credentials") + orch = OrchClient.instance() + services, _ = orch.services.list(service_type='rgw', offset=0) + + all_daemons_up = True + for service in services: + logger.info("Verifying service restart for: %s", service['service_id']) + daemons_up = verify_service_restart('rgw', service['service_id']) + if not daemons_up: + logger.error("Service %s restart verification failed", service['service_id']) + all_daemons_up = False + + if all_daemons_up: + logger.info("All daemons are up, configuring RGW credentials") + self.configure_rgw_credentials() + else: + logger.error("Not all daemons are up, skipping RGW credentials configuration") + + def _parse_secrets(self, user: str, data: dict) -> Tuple[str, str]: + for key in data.get('keys', []): + if key.get('user') == user and data.get('system') in ['true', True]: + access_key = key.get('access_key') + secret_key = key.get('secret_key') + return access_key, secret_key + return '', '' + + def _get_user_keys(self, user: str, realm: Optional[str] = None) -> Tuple[str, str]: + access_key = '' + secret_key = '' + rgw_user_info_cmd = ['user', 'info', '--uid', user] + cmd_realm_option = ['--rgw-realm', realm] if realm else [] + if realm: + rgw_user_info_cmd += cmd_realm_option + try: + _, out, err = mgr.send_rgwadmin_command(rgw_user_info_cmd) + if out: + access_key, secret_key = self._parse_secrets(user, out) + if not access_key: + rgw_create_user_cmd = [ + 'user', 'create', + '--uid', user, + '--display-name', 'Ceph Dashboard', + '--system', + ] + cmd_realm_option + _, out, err = mgr.send_rgwadmin_command(rgw_create_user_cmd) + if out: + access_key, secret_key = self._parse_secrets(user, out) + if not access_key: + logger.error('Unable to create rgw user "%s": %s', user, err) + except SubprocessError as error: + logger.exception(error) + + return access_key, secret_key + + def configure_rgw_credentials(self): + logger.info('Configuring dashboard RGW credentials') + user = 'dashboard' + realms = [] + access_key = '' + secret_key = '' + try: + _, out, err = mgr.send_rgwadmin_command(['realm', 'list']) + if out: + realms = out.get('realms', []) + if err: + logger.error('Unable to list RGW realms: %s', err) + if realms: + realm_access_keys = {} + realm_secret_keys = {} + for realm in realms: + realm_access_key, realm_secret_key = self._get_user_keys(user, realm) + if realm_access_key: + realm_access_keys[realm] = realm_access_key + realm_secret_keys[realm] = realm_secret_key + if realm_access_keys: + access_key = json.dumps(realm_access_keys) + secret_key = json.dumps(realm_secret_keys) + else: + access_key, secret_key = self._get_user_keys(user) + + assert access_key and secret_key + Settings.RGW_API_ACCESS_KEY = access_key + Settings.RGW_API_SECRET_KEY = secret_key + except (AssertionError, SubprocessError) as error: + logger.exception(error) + raise NoCredentialsException diff --git a/src/pybind/mgr/dashboard/tests/__init__.py b/src/pybind/mgr/dashboard/tests/__init__.py index ece3ef721bf..3061fe9dc93 100644 --- a/src/pybind/mgr/dashboard/tests/__init__.py +++ b/src/pybind/mgr/dashboard/tests/__init__.py @@ -302,6 +302,7 @@ class RgwStub(Stub): 'id': 'daemon1', 'realm_name': 'realm1', 'zonegroup_name': 'zonegroup1', + 'zonegroup_id': 'zonegroup1-id', 'zone_name': 'zone1', 'hostname': 'daemon1.server.lan' } @@ -313,6 +314,7 @@ class RgwStub(Stub): 'id': 'daemon2', 'realm_name': 'realm2', 'zonegroup_name': 'zonegroup2', + 'zonegroup_id': 'zonegroup2-id', 'zone_name': 'zone2', 'hostname': 'daemon2.server.lan' } diff --git a/src/pybind/mgr/dashboard/tests/test_rgw.py b/src/pybind/mgr/dashboard/tests/test_rgw.py index b84838d1016..d01187c4e24 100644 --- a/src/pybind/mgr/dashboard/tests/test_rgw.py +++ b/src/pybind/mgr/dashboard/tests/test_rgw.py @@ -3,7 +3,7 @@ from unittest.mock import Mock, call, patch from .. import mgr from ..controllers.rgw import Rgw, RgwDaemon, RgwUser from ..rest_client import RequestException -from ..services.rgw_client import RgwClient +from ..services.rgw_client import RgwClient, RgwMultisite from ..tests import ControllerTestCase, RgwStub @@ -93,6 +93,7 @@ class RgwDaemonControllerTestCase(ControllerTestCase): 'id': 'daemon1', 'realm_name': 'realm1', 'zonegroup_name': 'zg1', + 'zonegroup_id': 'zg1-id', 'zone_name': 'zone1', 'frontend_config#0': 'beast port=80' }, @@ -101,6 +102,7 @@ class RgwDaemonControllerTestCase(ControllerTestCase): 'id': 'daemon2', 'realm_name': 'realm2', 'zonegroup_name': 'zg2', + 'zonegroup_id': 'zg2-id', 'zone_name': 'zone2', 'frontend_config#0': 'beast ssl_port=443 ssl_certificate=config:/config' }, @@ -109,6 +111,7 @@ class RgwDaemonControllerTestCase(ControllerTestCase): 'id': 'daemon3', 'realm_name': 'realm3', 'zonegroup_name': 'zg3', + 'zonegroup_id': 'zg3-id', 'zone_name': 'zone3', 'frontend_config#0': 'beast ssl_endpoint=0.0.0.0:8080 ssl_certificate=config:/config' @@ -118,6 +121,7 @@ class RgwDaemonControllerTestCase(ControllerTestCase): 'id': 'daemon4', 'realm_name': 'realm4', 'zonegroup_name': 'zg4', + 'zonegroup_id': 'zg4-id', 'zone_name': 'zone4', 'frontend_config#0': 'beast ssl_certificate=config:/config' }, @@ -126,6 +130,7 @@ class RgwDaemonControllerTestCase(ControllerTestCase): 'id': 'daemon5', 'realm_name': 'realm5', 'zonegroup_name': 'zg5', + 'zonegroup_id': 'zg5-id', 'zone_name': 'zone5', 'frontend_config#0': 'beast endpoint=0.0.0.0:8445 ssl_certificate=config:/config' @@ -139,6 +144,7 @@ class RgwDaemonControllerTestCase(ControllerTestCase): 'server_hostname': 'host1', 'realm_name': 'realm1', 'zonegroup_name': 'zg1', + 'zonegroup_id': 'zg1-id', 'zone_name': 'zone1', 'default': True, 'port': 80 }, @@ -149,6 +155,7 @@ class RgwDaemonControllerTestCase(ControllerTestCase): 'server_hostname': 'host1', 'realm_name': 'realm2', 'zonegroup_name': 'zg2', + 'zonegroup_id': 'zg2-id', 'zone_name': 'zone2', 'default': False, 'port': 443, @@ -160,6 +167,7 @@ class RgwDaemonControllerTestCase(ControllerTestCase): 'server_hostname': 'host1', 'realm_name': 'realm3', 'zonegroup_name': 'zg3', + 'zonegroup_id': 'zg3-id', 'zone_name': 'zone3', 'default': False, 'port': 8080, @@ -171,6 +179,7 @@ class RgwDaemonControllerTestCase(ControllerTestCase): 'server_hostname': 'host1', 'realm_name': 'realm4', 'zonegroup_name': 'zg4', + 'zonegroup_id': 'zg4-id', 'zone_name': 'zone4', 'default': False, 'port': None, @@ -182,6 +191,7 @@ class RgwDaemonControllerTestCase(ControllerTestCase): 'server_hostname': 'host1', 'realm_name': 'realm5', 'zonegroup_name': 'zg5', + 'zonegroup_id': 'zg5-id', 'zone_name': 'zone5', 'default': False, 'port': 8445, @@ -193,6 +203,105 @@ class RgwDaemonControllerTestCase(ControllerTestCase): self.assertStatus(200) self.assertJsonBody([]) + @patch('dashboard.services.rgw_client.RgwClient._get_user_id', Mock( + return_value='dummy_admin')) + @patch('dashboard.services.ceph_service.CephService.send_command') + @patch.object(RgwMultisite, 'get_all_zonegroups_info', Mock( + return_value={'default_zonegroup': 'zonegroup2-id'})) + def test_default_zonegroup_when_multiple_daemons(self, send_command): + send_command.return_value = '' + RgwStub.get_daemons() + RgwStub.get_settings() + metadata_return_values = [ + { + 'ceph_version': 'ceph version master (dev)', + 'id': 'daemon1', + 'realm_name': 'realm1', + 'zonegroup_name': 'zg1', + 'zonegroup_id': 'zg1-id', + 'zone_name': 'zone1', + 'frontend_config#0': 'beast port=80' + }, + { + 'ceph_version': 'ceph version master (dev)', + 'id': 'daemon2', + 'realm_name': 'realm2', + 'zonegroup_name': 'zg2', + 'zonegroup_id': 'zg2-id', + 'zone_name': 'zone2', + 'frontend_config#0': 'beast ssl_port=443' + } + ] + list_servers_return_value = [{ + 'hostname': 'host1', + 'services': [ + {'id': '5297', 'type': 'rgw'}, + {'id': '5356', 'type': 'rgw'}, + ] + }] + + mgr.list_servers.return_value = list_servers_return_value + mgr.get_metadata.side_effect = metadata_return_values + self._get('/test/api/rgw/daemon') + self.assertStatus(200) + + self.assertJsonBody([{ + 'id': 'daemon1', + 'service_map_id': '5297', + 'version': 'ceph version master (dev)', + 'server_hostname': 'host1', + 'realm_name': 'realm1', + 'zonegroup_name': 'zg1', + 'zonegroup_id': 'zg1-id', + 'zone_name': 'zone1', + 'default': False, + 'port': 80 + }, + { + 'id': 'daemon2', + 'service_map_id': '5356', + 'version': 'ceph version master (dev)', + 'server_hostname': 'host1', + 'realm_name': 'realm2', + 'zonegroup_name': 'zg2', + 'zonegroup_id': 'zg2-id', + 'zone_name': 'zone2', + 'default': True, + 'port': 443, + }]) + + # Change the default zonegroup and test if the correct daemon gets picked up + RgwMultisite().get_all_zonegroups_info.return_value = {'default_zonegroup': 'zonegroup1-id'} + mgr.list_servers.return_value = list_servers_return_value + mgr.get_metadata.side_effect = metadata_return_values + self._get('/test/api/rgw/daemon') + self.assertStatus(200) + + self.assertJsonBody([{ + 'id': 'daemon1', + 'service_map_id': '5297', + 'version': 'ceph version master (dev)', + 'server_hostname': 'host1', + 'realm_name': 'realm1', + 'zonegroup_name': 'zg1', + 'zonegroup_id': 'zg1-id', + 'zone_name': 'zone1', + 'default': True, + 'port': 80 + }, + { + 'id': 'daemon2', + 'service_map_id': '5356', + 'version': 'ceph version master (dev)', + 'server_hostname': 'host1', + 'realm_name': 'realm2', + 'zonegroup_name': 'zg2', + 'zonegroup_id': 'zg2-id', + 'zone_name': 'zone2', + 'default': False, + 'port': 443, + }]) + class RgwUserControllerTestCase(ControllerTestCase): @classmethod diff --git a/src/pybind/mgr/dashboard/tests/test_rgw_client.py b/src/pybind/mgr/dashboard/tests/test_rgw_client.py index 4949ba36bf2..f2d34ca5458 100644 --- a/src/pybind/mgr/dashboard/tests/test_rgw_client.py +++ b/src/pybind/mgr/dashboard/tests/test_rgw_client.py @@ -6,8 +6,8 @@ from unittest.mock import Mock, patch from .. import mgr from ..exceptions import DashboardException -from ..services.rgw_client import NoCredentialsException, \ - NoRgwDaemonsException, RgwClient, _parse_frontend_config +from ..services.rgw_client import NoRgwDaemonsException, RgwClient, _parse_frontend_config +from ..services.service import NoCredentialsException from ..settings import Settings from ..tests import CLICommandTestMixin, RgwStub @@ -355,3 +355,47 @@ class RgwClientHelperTest(TestCase): _parse_frontend_config('mongoose port=8080') self.assertEqual(str(ctx.exception), 'Failed to determine RGW port from "mongoose port=8080"') + + +class TestDictToXML(TestCase): + def test_empty_dict(self): + result = RgwClient.dict_to_xml({}) + self.assertEqual(result, '') + + def test_empty_string(self): + result = RgwClient.dict_to_xml("") + self.assertEqual(result, '') + + def test_invalid_json_string(self): + with self.assertRaises(DashboardException): + RgwClient.dict_to_xml("invalid json") + + def test_simple_dict(self): + data = {"name": "Foo", "age": 30} + expected_xml = "<name>Foo</name>\n<age>30</age>\n" + result = RgwClient.dict_to_xml(data) + self.assertEqual(result, expected_xml) + + def test_nested_dict(self): + data = {"person": {"name": "Foo", "age": 30}} + expected_xml = "<person>\n<name>Foo</name>\n<age>30</age>\n</person>\n" + result = RgwClient.dict_to_xml(data) + self.assertEqual(result, expected_xml) + + def test_list_in_dict(self): + data = {"names": ["Foo", "Boo"]} + expected_xml = "<names>\nFoo</names>\n<names>\nBoo</names>\n" + result = RgwClient.dict_to_xml(data) + self.assertEqual(result, expected_xml) + + def test_rules_list_in_dict(self): + data = {"Rules": [{"id": 1}, {"id": 2}]} + expected_xml = "<Rule>\n<id>1</id>\n</Rule>\n<Rule>\n<id>2</id>\n</Rule>\n" + result = RgwClient.dict_to_xml(data) + self.assertEqual(result, expected_xml) + + def test_json_string(self): + data = '{"name": "Foo", "age": 30}' + expected_xml = "<name>Foo</name>\n<age>30</age>\n" + result = RgwClient.dict_to_xml(data) + self.assertEqual(result, expected_xml) diff --git a/src/pybind/mgr/dashboard/tox.ini b/src/pybind/mgr/dashboard/tox.ini index 0dfeaf60c89..fdb2dd75809 100644 --- a/src/pybind/mgr/dashboard/tox.ini +++ b/src/pybind/mgr/dashboard/tox.ini @@ -170,7 +170,7 @@ setenv = check: OPENAPI_FILE_TMP={envtmpdir}/{env:OPENAPI_FILE} commands = python3 -m dashboard.controllers.docs {env:OPENAPI_FILE_TMP:{env:OPENAPI_FILE}} - check: diff {env:OPENAPI_FILE} {env:OPENAPI_FILE_TMP} + check: diff -au {env:OPENAPI_FILE} {env:OPENAPI_FILE_TMP} [testenv:openapi-doc] description = Generate Sphinx documentation from OpenAPI specification diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py index 60c9ce06ca9..36f591332d1 100644 --- a/src/pybind/mgr/mgr_module.py +++ b/src/pybind/mgr/mgr_module.py @@ -84,6 +84,7 @@ PG_STATES = [ NFS_GANESHA_SUPPORTED_FSALS = ['CEPH', 'RGW'] NFS_POOL_NAME = '.nfs' + class CephReleases(IntEnum): argonaut = 1 bobtail = 2 @@ -106,6 +107,7 @@ class CephReleases(IntEnum): squid = 19 maximum = 20 + class NotifyType(str, Enum): mon_map = 'mon_map' pg_summary = 'pg_summary' @@ -165,8 +167,12 @@ class HandleCommandResult(NamedTuple): stderr: str = "" # Typically used for error messages. -class MonCommandFailed(RuntimeError): pass -class MgrDBNotReady(RuntimeError): pass +class MonCommandFailed(RuntimeError): + pass + + +class MgrDBNotReady(RuntimeError): + pass class OSDMap(ceph_module.BasePyOSDMap): @@ -355,6 +361,7 @@ class CRUSHMap(ceph_module.BasePyCRUSH): HandlerFuncType = Callable[..., Tuple[int, str, str]] + def _extract_target_func( f: HandlerFuncType ) -> Tuple[HandlerFuncType, Dict[str, Any]]: @@ -531,16 +538,23 @@ def CLICheckNonemptyFileInput(desc: str) -> Callable[[HandlerFuncType], HandlerF # Delete new line separator at EOF (it may have been added by a text editor). kwargs['inbuf'] = kwargs['inbuf'].rstrip('\r\n').rstrip('\n') if not kwargs['inbuf'] or not kwargs['inbuf'].strip(): - return -errno.EINVAL, '', f'{ERROR_MSG_EMPTY_INPUT_FILE}: Please add {desc} to '\ - 'the file' + return ( + -errno.EINVAL, + '', + f'{ERROR_MSG_EMPTY_INPUT_FILE}: Please add {desc} to ' + 'the file' + ) return func(*args, **kwargs) check.__signature__ = inspect.signature(func) # type: ignore[attr-defined] return check return CheckFileInput + # If the mgr loses its lock on the database because e.g. the pgs were # transiently down, then close it and allow it to be reopened. MAX_DBCLEANUP_RETRIES = 3 + + def MgrModuleRecoverDB(func: Callable) -> Callable: @functools.wraps(func) def check(self: MgrModule, *args: Any, **kwargs: Any) -> Any: @@ -550,16 +564,17 @@ def MgrModuleRecoverDB(func: Callable) -> Callable: return func(self, *args, **kwargs) except sqlite3.DatabaseError as e: self.log.error(f"Caught fatal database error: {e}") - retries = retries+1 + retries = retries + 1 if retries > MAX_DBCLEANUP_RETRIES: raise - self.log.debug(f"attempting reopen of database") + self.log.debug("attempting reopen of database") self.close_db() - self.open_db(); + self.open_db() # allow retry of func(...) check.__signature__ = inspect.signature(func) # type: ignore[attr-defined] return check + def CLIRequiresDB(func: HandlerFuncType) -> HandlerFuncType: @functools.wraps(func) def check(self: MgrModule, *args: Any, **kwargs: Any) -> Tuple[int, str, str]: @@ -569,6 +584,7 @@ def CLIRequiresDB(func: HandlerFuncType) -> HandlerFuncType: check.__signature__ = inspect.signature(func) # type: ignore[attr-defined] return check + def _get_localized_key(prefix: str, key: str) -> str: return '{}/{}'.format(prefix, key) @@ -584,12 +600,14 @@ if TYPE_CHECKING: # common/options.h: value_t OptionValue = Optional[Union[bool, int, float, str]] + class OptionLevel(IntEnum): BASIC = 0 ADVANCED = 1 DEV = 2 UNKNOWN = 3 + class Option(Dict): """ Helper class to declare options for MODULE_OPTIONS list. @@ -744,9 +762,9 @@ class MgrModuleLoggingMixin(object): # remove existing handlers: rm_handlers = [ h for h in self._root_logger.handlers - if (isinstance(h, CPlusPlusHandler) or - isinstance(h, FileHandler) or - isinstance(h, ClusterLogHandler))] + if (isinstance(h, CPlusPlusHandler) + or isinstance(h, FileHandler) + or isinstance(h, ClusterLogHandler))] for h in rm_handlers: self._root_logger.removeHandler(h) self.log_to_file = False @@ -972,7 +990,7 @@ class API: class DecoratorClass: _ATTR_TOKEN = f'__ATTR_{attr.upper()}__' - def __init__(self, value: Any=default) -> None: + def __init__(self, value: Any = default) -> None: self.value = value def __call__(self, func: Callable) -> Any: @@ -997,8 +1015,8 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): MODULE_OPTION_DEFAULTS = {} # type: Dict[str, Any] # Database Schema - SCHEMA = None # type: Optional[List[str]] - SCHEMA_VERSIONED = None # type: Optional[List[List[str]]] + SCHEMA = None # type: Optional[List[str]] + SCHEMA_VERSIONED = None # type: Optional[List[List[str]]] # Priority definitions for perf counters PRIO_CRITICAL = 10 @@ -1058,7 +1076,7 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): # for backwards compatibility self._logger = self.getLogger() - self._db = None # type: Optional[sqlite3.Connection] + self._db = None # type: Optional[sqlite3.Connection] self._version = self._ceph_get_version() @@ -1184,15 +1202,15 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): def create_skeleton_schema(self, db: sqlite3.Connection) -> None: SQL = [ - """ - CREATE TABLE IF NOT EXISTS MgrModuleKV ( - key TEXT PRIMARY KEY, - value NOT NULL - ) WITHOUT ROWID; - """, - """ - INSERT OR IGNORE INTO MgrModuleKV (key, value) VALUES ('__version', 0); - """, + """ + CREATE TABLE IF NOT EXISTS MgrModuleKV ( + key TEXT PRIMARY KEY, + value NOT NULL + ) WITHOUT ROWID; + """, + """ + INSERT OR IGNORE INTO MgrModuleKV (key, value) VALUES ('__version', 0); + """, ] for sql in SQL: @@ -1291,10 +1309,10 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): if not self.have_enough_osds(): return None self.create_mgr_pool() - uri = f"file:///{self.MGR_POOL_NAME}:{self.module_name}/main.db?vfs=ceph"; + uri = f"file:///{self.MGR_POOL_NAME}:{self.module_name}/main.db?vfs=ceph" self.log.debug(f"using uri {uri}") try: - db = sqlite3.connect(uri, check_same_thread=False, uri=True, autocommit=False) # type: ignore[call-arg] + db = sqlite3.connect(uri, check_same_thread=False, uri=True, autocommit=False) # type: ignore[call-arg] except TypeError: db = sqlite3.connect(uri, check_same_thread=False, uri=True, isolation_level=None) # if libcephsqlite reconnects, update the addrv for blocklist @@ -1322,10 +1340,10 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): return self._db db_allowed = self.get_ceph_option("mgr_pool") if not db_allowed: - raise MgrDBNotReady(); + raise MgrDBNotReady() self._db = self.open_db() if self._db is None: - raise MgrDBNotReady(); + raise MgrDBNotReady() return self._db @property @@ -1439,7 +1457,7 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): All these structures have their own JSON representations: experiment or look at the C++ ``dump()`` methods to learn about them. """ - obj = self._ceph_get(data_name) + obj = self._ceph_get(data_name) if isinstance(obj, bytes): obj = json.loads(obj) @@ -1761,11 +1779,11 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): return r def get_quiesce_leader_gid(self, fscid: str) -> Optional[int]: - leader_gid : Optional[int] = None + leader_gid: Optional[int] = None for fs in self.get("fs_map")['filesystems']: if fscid != fs["id"]: continue - + # quiesce leader is the lowest rank # with the highest state mdsmap = fs["mdsmap"] @@ -1802,7 +1820,7 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): command: str, tag: str, inbuf: Optional[str] = None, - *, # kw-only args go below + *, # kw-only args go below one_shot: bool = False) -> None: """ Called by the plugin to send a command to the mon @@ -2138,10 +2156,19 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): @API.expose @profile_method() - def get_unlabeled_perf_counters(self, prio_limit: int = PRIO_USEFUL, - services: Sequence[str] = ("mds", "mon", "osd", - "rbd-mirror", "cephfs-mirror", "rgw", - "tcmu-runner")) -> Dict[str, dict]: + def get_unlabeled_perf_counters( + self, + prio_limit: int = PRIO_USEFUL, + services: Sequence[str] = ( + "mds", + "mon", + "osd", + "rbd-mirror", + "cephfs-mirror", + "rgw", + "tcmu-runner", + ), + ) -> Dict[str, dict]: """ Return the perf counters currently known to this ceph-mgr instance, filtered by priority equal to or greater than `prio_limit`. @@ -2397,7 +2424,6 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): return self._ceph_remove_mds_perf_query(query_id) @API.expose - def reregister_mds_perf_queries(self) -> None: """ Re-register MDS perf queries. @@ -2435,11 +2461,11 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): stdout_as_json: bool = True) -> Tuple[int, Union[str, dict], str]: try: cmd = [ - 'radosgw-admin', - '-c', str(self.get_ceph_conf_path()), - '-k', str(self.get_ceph_option('keyring')), - '-n', f'mgr.{self.get_mgr_id()}', - ] + args + 'radosgw-admin', + '-c', str(self.get_ceph_conf_path()), + '-k', str(self.get_ceph_option('keyring')), + '-n', f'mgr.{self.get_mgr_id()}', + ] + args self.log.debug('Executing %s', str(cmd)) result = subprocess.run( # pylint: disable=subprocess-run-check cmd, diff --git a/src/pybind/mgr/mgr_util.py b/src/pybind/mgr/mgr_util.py index 05ec6496682..3497fcdb655 100644 --- a/src/pybind/mgr/mgr_util.py +++ b/src/pybind/mgr/mgr_util.py @@ -1,7 +1,7 @@ import os if 'UNITTEST' in os.environ: - import tests + import tests # noqa import bcrypt import cephfs @@ -13,7 +13,7 @@ import time import logging import sys from ipaddress import ip_address -from threading import Lock, Condition, Event +from threading import Lock, Condition from typing import no_type_check, NewType import urllib from functools import wraps @@ -70,6 +70,7 @@ class CephfsConnectionException(Exception): def __str__(self) -> str: return "{0} ({1})".format(self.errno, self.error_str) + class RTimer(Timer): """ recurring timer variant of Timer @@ -85,6 +86,7 @@ class RTimer(Timer): logger.error("task exception: %s", e) raise + @contextlib.contextmanager def lock_timeout_log(lock: Lock, timeout: int = 5) -> Iterator[None]: start = time.time() @@ -145,7 +147,7 @@ class CephfsConnectionPool(object): fs_id = None try: fs_id = self.get_fs_id() - except: + except: # noqa # the filesystem does not exist now -- connection is not valid. pass logger.debug("self.fs_id={0}, fs_id={1}".format(self.fs_id, fs_id)) @@ -333,7 +335,6 @@ class CephfsClient(Generic[Module_T]): return fs_list - @contextlib.contextmanager def open_filesystem(fsc: CephfsClient, fs_name: str) -> Generator["cephfs.LibCephFS", None, None]: """ @@ -516,7 +517,7 @@ def create_self_signed_cert(organisation: str = 'Ceph', :param organisation: String representing the Organisation(O) RDN (default='Ceph') :param common_name: String representing the Common Name(CN) RDN (default='mgr') - :param dname: Optional dictionary containing RDNs to use for crt/key generation + :param dname: Optional dictionary containing RDNs to use for crt/key generation :return: ssl crt and key in utf-8 format @@ -600,10 +601,11 @@ def verify_cacrt(cert_fname): raise ServerConfigException( 'Invalid certificate {}: {}'.format(cert_fname, str(e))) -def get_cert_issuer_info(crt: str) -> Tuple[Optional[str],Optional[str]]: + +def get_cert_issuer_info(crt: str) -> Tuple[Optional[str], Optional[str]]: """Basic validation of a ca cert""" - from OpenSSL import crypto, SSL + from OpenSSL import crypto, SSL # noqa try: crt_buffer = crt.encode("ascii") if isinstance(crt, str) else crt (org_name, cn) = (None, None) @@ -618,6 +620,7 @@ def get_cert_issuer_info(crt: str) -> Tuple[Optional[str],Optional[str]]: except (ValueError, crypto.Error) as e: raise ServerConfigException(f'Invalid certificate key: {e}') + def verify_tls(crt, key): # type: (str, str) -> None verify_cacrt_content(crt) @@ -648,7 +651,6 @@ def verify_tls(crt, key): raise ServerConfigException(f'Invalid cert/key pair: {e}') - def verify_tls_files(cert_fname, pkey_fname): # type: (str, str) -> None """Basic checks for TLS certificate and key files @@ -716,6 +718,7 @@ def get_most_recent_rate(rates: Optional[List[Tuple[float, float]]]) -> float: return 0.0 return rates[-1][1] + def get_time_series_rates(data: List[Tuple[float, float]]) -> List[Tuple[float, float]]: """ Rates from time series data @@ -744,6 +747,7 @@ def get_time_series_rates(data: List[Tuple[float, float]]) -> List[Tuple[float, return [(data2[0], _derivative(data1, data2) if data1 is not None else 0.0) for data1, data2 in _pairwise(data)] + def name_to_config_section(name: str) -> ConfEntity: """ Map from daemon names to ceph entity names (as seen in config) @@ -840,12 +844,12 @@ def to_pretty_timedelta(n: datetime.timedelta) -> str: if n < datetime.timedelta(hours=48): return str(int(n.total_seconds()) // 3600) + 'h' if n < datetime.timedelta(days=14): - return str(int(n.total_seconds()) // (3600*24)) + 'd' - if n < datetime.timedelta(days=7*12): - return str(int(n.total_seconds()) // (3600*24*7)) + 'w' - if n < datetime.timedelta(days=365*2): - return str(int(n.total_seconds()) // (3600*24*30)) + 'M' - return str(int(n.total_seconds()) // (3600*24*365)) + 'y' + return str(int(n.total_seconds()) // (3600 * 24)) + 'd' + if n < datetime.timedelta(days=7 * 12): + return str(int(n.total_seconds()) // (3600 * 24 * 7)) + 'w' + if n < datetime.timedelta(days=365 * 2): + return str(int(n.total_seconds()) // (3600 * 24 * 30)) + 'M' + return str(int(n.total_seconds()) // (3600 * 24 * 365)) + 'y' def profile_method(skip_attribute: bool = False) -> Callable[[Callable[..., T]], Callable[..., T]]: diff --git a/src/pybind/mgr/nfs/utils.py b/src/pybind/mgr/nfs/utils.py index ba3190a9644..269079c1ccf 100644 --- a/src/pybind/mgr/nfs/utils.py +++ b/src/pybind/mgr/nfs/utils.py @@ -5,6 +5,7 @@ from typing import List, Tuple, TYPE_CHECKING from object_format import ErrorResponseBase import orchestrator +from orchestrator import NoOrchestrator import cephfs from mgr_util import CephfsClient, open_filesystem @@ -67,7 +68,11 @@ def available_clusters(mgr: 'Module') -> List[str]: return value: ['vstart'] ''' # TODO check cephadm cluster list with rados pool conf objects - completion = mgr.describe_service(service_type='nfs') + try: + completion = mgr.describe_service(service_type='nfs') + except NoOrchestrator: + log.exception("No orchestrator configured") + return [] orchestrator.raise_if_exception(completion) assert completion.result is not None return [cluster.spec.service_id for cluster in completion.result diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index d0570caf0f4..bb1bb5385d8 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -42,6 +42,7 @@ from ceph.deployment.service_spec import ( RGWSpec, SMBSpec, SNMPGatewaySpec, + MgmtGatewaySpec, ServiceSpec, TunedProfileSpec, ) @@ -557,6 +558,28 @@ class Orchestrator(object): """ raise NotImplementedError() + def cert_store_cert_ls(self) -> OrchResult[Dict[str, Any]]: + raise NotImplementedError() + + def cert_store_key_ls(self) -> OrchResult[Dict[str, Any]]: + raise NotImplementedError() + + def cert_store_get_cert( + self, + entity: str, + service_name: Optional[str] = None, + hostname: Optional[str] = None + ) -> OrchResult[str]: + raise NotImplementedError() + + def cert_store_get_key( + self, + entity: str, + service_name: Optional[str] = None, + hostname: Optional[str] = None + ) -> OrchResult[str]: + raise NotImplementedError() + @handle_orch_error def apply(self, specs: Sequence["GenericSpec"], no_overwrite: bool = False) -> List[str]: """ @@ -584,6 +607,7 @@ class Orchestrator(object): 'snmp-gateway': self.apply_snmp_gateway, 'host': self.add_host, 'smb': self.apply_smb, + 'mgmt-gateway': self.apply_mgmt_gateway, } def merge(l: OrchResult[List[str]], r: OrchResult[str]) -> OrchResult[List[str]]: # noqa: E741 @@ -825,6 +849,10 @@ class Orchestrator(object): """Update an existing snmp gateway service""" raise NotImplementedError() + def apply_mgmt_gateway(self, spec: MgmtGatewaySpec) -> OrchResult[str]: + """Update an existing cluster gateway service""" + raise NotImplementedError() + def apply_smb(self, spec: SMBSpec) -> OrchResult[str]: """Update a smb gateway service""" raise NotImplementedError() @@ -908,6 +936,7 @@ def daemon_type_to_service(dtype: str) -> str: 'keepalived': 'ingress', 'iscsi': 'iscsi', 'nvmeof': 'nvmeof', + 'mgmt-gateway': 'mgmt-gateway', 'rbd-mirror': 'rbd-mirror', 'cephfs-mirror': 'cephfs-mirror', 'nfs': 'nfs', @@ -943,6 +972,7 @@ def service_to_daemon_types(stype: str) -> List[str]: 'ingress': ['haproxy', 'keepalived'], 'iscsi': ['iscsi'], 'nvmeof': ['nvmeof'], + 'mgmt-gateway': ['mgmt-gateway'], 'rbd-mirror': ['rbd-mirror'], 'cephfs-mirror': ['cephfs-mirror'], 'nfs': ['nfs'], diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 4969e1f5eb7..8cde0a4a2b0 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -46,6 +46,7 @@ from ._interface import ( RGWSpec, SMBSpec, SNMPGatewaySpec, + MgmtGatewaySpec, ServiceDescription, TunedProfileSpec, _cli_read_command, @@ -1136,6 +1137,61 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, return HandleCommandResult(stdout=table.get_string()) + def _process_cert_store_json(self, d: Dict[str, Any], level: int = 0) -> str: + result_str = '' + indent = ' ' * level + for k, v in d.items(): + if isinstance(v, dict): + result_str += f'{indent}{k}\n' + result_str += self._process_cert_store_json(v, level + 1) + else: + result_str += f'{indent}{k} - {v}\n' + return result_str + + @_cli_read_command('orch cert-store cert ls') + def _cert_store_cert_ls(self, format: Format = Format.plain) -> HandleCommandResult: + completion = self.cert_store_cert_ls() + cert_ls = raise_if_exception(completion) + if format != Format.plain: + return HandleCommandResult(stdout=to_format(cert_ls, format, many=False, cls=None)) + else: + result_str = self._process_cert_store_json(cert_ls, 0) + return HandleCommandResult(stdout=result_str) + + @_cli_read_command('orch cert-store key ls') + def _cert_store_key_ls(self, format: Format = Format.plain) -> HandleCommandResult: + completion = self.cert_store_key_ls() + key_ls = raise_if_exception(completion) + if format != Format.plain: + return HandleCommandResult(stdout=to_format(key_ls, format, many=False, cls=None)) + else: + result_str = self._process_cert_store_json(key_ls, 0) + return HandleCommandResult(stdout=result_str) + + @_cli_read_command('orch cert-store get cert') + def _cert_store_get_cert( + self, + entity: str, + _end_positional_: int = 0, + service_name: Optional[str] = None, + hostname: Optional[str] = None + ) -> HandleCommandResult: + completion = self.cert_store_get_cert(entity, service_name, hostname) + cert = raise_if_exception(completion) + return HandleCommandResult(stdout=cert) + + @_cli_read_command('orch cert-store get key') + def _cert_store_get_key( + self, + entity: str, + _end_positional_: int = 0, + service_name: Optional[str] = None, + hostname: Optional[str] = None + ) -> HandleCommandResult: + completion = self.cert_store_get_key(entity, service_name, hostname) + key = raise_if_exception(completion) + return HandleCommandResult(stdout=key) + def _get_credentials(self, username: Optional[str] = None, password: Optional[str] = None, inbuf: Optional[str] = None) -> Tuple[str, str]: _username = username @@ -1227,11 +1283,11 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, """ if inbuf and all_available_devices: - return HandleCommandResult(-errno.EINVAL, '-i infile and --all-available-devices are mutually exclusive') + return HandleCommandResult(-errno.EINVAL, stderr='-i infile and --all-available-devices are mutually exclusive') if not inbuf and not all_available_devices: # one parameter must be present - return HandleCommandResult(-errno.EINVAL, '--all-available-devices is required') + return HandleCommandResult(-errno.EINVAL, stderr='--all-available-devices is required') if inbuf: if unmanaged is not None: @@ -1746,6 +1802,32 @@ Usage: return self._apply_misc([spec], dry_run, format, no_overwrite) + @_cli_write_command('orch apply mgmt-gateway') + def _apply_mgmt_gateway(self, + port: Optional[int] = None, + disable_https: Optional[bool] = False, + placement: Optional[str] = None, + unmanaged: bool = False, + dry_run: bool = False, + format: Format = Format.plain, + no_overwrite: bool = False, + inbuf: Optional[str] = None) -> HandleCommandResult: + """Add a cluster gateway service (cephadm only)""" + if inbuf: + raise OrchestratorValidationError('unrecognized command -i; -h or --help for usage') + + spec = MgmtGatewaySpec( + placement=PlacementSpec.from_string(placement), + unmanaged=unmanaged, + port=port, + disable_https=disable_https, + preview_only=dry_run + ) + + spec.validate() # force any validation exceptions to be caught correctly + + return self._apply_misc([spec], dry_run, format, no_overwrite) + @_cli_write_command('orch apply nvmeof') def _apply_nvmeof(self, pool: str, diff --git a/src/pybind/mgr/rgw/module.py b/src/pybind/mgr/rgw/module.py index 1b589541932..156529f1379 100644 --- a/src/pybind/mgr/rgw/module.py +++ b/src/pybind/mgr/rgw/module.py @@ -101,7 +101,14 @@ def check_orchestrator(func: FuncT) -> FuncT: class Module(orchestrator.OrchestratorClientMixin, MgrModule): - MODULE_OPTIONS: List[Option] = [] + MODULE_OPTIONS: List[Option] = [ + Option( + 'secondary_zone_period_retry_limit', + type='int', + default=5, + desc='RGW module period update retry limit for secondary site' + ), + ] # These are "native" Ceph options that this module cares about. NATIVE_OPTIONS: List[Option] = [] @@ -115,6 +122,9 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): # ensure config options members are initialized; see config_notify() self.config_notify() + if TYPE_CHECKING: + self.secondary_zone_period_retry_limit = 5 + with self.lock: self.inited = True self.env = EnvArgs(RGWAMOrchMgr(self)) @@ -310,10 +320,12 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): inbuf: Optional[str] = None) -> HandleCommandResult: """Bootstrap new rgw zone that syncs with zone on another cluster in the same realm""" - created_zones = self.rgw_zone_create(zone_name, realm_token, port, placement, - start_radosgw, zone_endpoints, inbuf) - - return HandleCommandResult(retval=0, stdout=f"Zones {', '.join(created_zones)} created successfully") + try: + created_zones = self.rgw_zone_create(zone_name, realm_token, port, placement, + start_radosgw, zone_endpoints, self.secondary_zone_period_retry_limit, inbuf) + return HandleCommandResult(retval=0, stdout=f"Zones {', '.join(created_zones)} created successfully") + except RGWAMException as e: + return HandleCommandResult(retval=e.retcode, stderr=f'Failed to create zone: {str(e)}') def rgw_zone_create(self, zone_name: Optional[str] = None, @@ -322,13 +334,14 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): placement: Optional[Union[str, Dict[str, Any]]] = None, start_radosgw: Optional[bool] = True, zone_endpoints: Optional[str] = None, - inbuf: Optional[str] = None) -> Any: + secondary_zone_period_retry_limit: Optional[int] = None, + inbuf: Optional[str] = None) -> List[str]: if inbuf: try: rgw_specs = self._parse_rgw_specs(inbuf) except RGWSpecParsingError as e: - return HandleCommandResult(retval=-errno.EINVAL, stderr=f'{e}') + raise RGWAMException(str(e)) elif (zone_name and realm_token): token = RealmToken.from_base64_str(realm_token) if isinstance(placement, dict): @@ -343,18 +356,19 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): zone_endpoints=zone_endpoints)] else: err_msg = 'Invalid arguments: either pass a spec with -i or provide the zone_name and realm_token.' - return HandleCommandResult(retval=-errno.EINVAL, stdout='', stderr=err_msg) + raise RGWAMException(err_msg) try: created_zones = [] for rgw_spec in rgw_specs: - RGWAM(self.env).zone_create(rgw_spec, start_radosgw) + RGWAM(self.env).zone_create(rgw_spec, start_radosgw, secondary_zone_period_retry_limit) if rgw_spec.rgw_zone is not None: created_zones.append(rgw_spec.rgw_zone) return created_zones except RGWAMException as e: - self.log.error('cmd run exception: (%d) %s' % (e.retcode, e.message)) - return HandleCommandResult(retval=e.retcode, stdout=e.stdout, stderr=e.stderr) + err_msg = 'cmd run exception: (%d) %s' % (e.retcode, e.message) + self.log.error(err_msg) + raise e return created_zones @CLICommand('rgw realm reconcile', perm='rw') diff --git a/src/pybind/mgr/smb/enums.py b/src/pybind/mgr/smb/enums.py index 6e19c882dad..175af98d499 100644 --- a/src/pybind/mgr/smb/enums.py +++ b/src/pybind/mgr/smb/enums.py @@ -41,15 +41,12 @@ class AuthMode(_StrEnum): class JoinSourceType(_StrEnum): - PASSWORD = 'password' - HTTP_URI = 'http_uri' RESOURCE = 'resource' class UserGroupSourceType(_StrEnum): - INLINE = 'inline' - HTTP_URI = 'http_uri' RESOURCE = 'resource' + EMPTY = 'empty' class ConfigNS(_StrEnum): @@ -57,3 +54,26 @@ class ConfigNS(_StrEnum): SHARES = 'shares' USERS_AND_GROUPS = 'users_and_groups' JOIN_AUTHS = 'join_auths' + + +class LoginCategory(_StrEnum): + USER = 'user' + GROUP = 'group' + + +class LoginAccess(_StrEnum): + ADMIN = 'admin' + NONE = 'none' + READ_ONLY = 'read' + READ_ONLY_SHORT = 'r' + READ_WRITE = 'read-write' + READ_WRITE_SHORT = 'rw' + + def expand(self) -> 'LoginAccess': + """Exapend abbreviated enum values into their full forms.""" + # the extra LoginAccess(...) calls are to appease mypy + if self == self.READ_ONLY_SHORT: + return LoginAccess(self.READ_ONLY) + if self == self.READ_WRITE_SHORT: + return LoginAccess(self.READ_WRITE) + return self diff --git a/src/pybind/mgr/smb/fs.py b/src/pybind/mgr/smb/fs.py index 5a1ba2aabc7..dc9613f21fc 100644 --- a/src/pybind/mgr/smb/fs.py +++ b/src/pybind/mgr/smb/fs.py @@ -1,8 +1,9 @@ -from typing import List, Optional +from typing import Dict, List, Optional, Tuple import logging import posixpath import stat +import time import cephfs from mgr_util import CephfsClient, Module_T, open_filesystem @@ -54,20 +55,19 @@ class CephFSPathResolver: map to real paths in the cephfs volume and determine if those paths exist. """ - def __init__(self, mgr: Module_T) -> None: + def __init__( + self, mgr: Module_T, *, client: Optional[CephfsClient] = None + ) -> None: self._mgr = mgr - self._cephfs_client = CephfsClient(mgr) + self._cephfs_client = client or CephfsClient(mgr) - def resolve( - self, volume: str, subvolumegroup: str, subvolume: str, path: str + def resolve_subvolume_path( + self, volume: str, subvolumegroup: str, subvolume: str ) -> str: - """Given a volume, subvolumegroup, subvolume, and path, return the real - path within the file system. subvolumegroup and subvolume may be empty - strings when no subvolume is being used. + """Given a volume, subvolumegroup, and subvolume, return the real path + within the file system. subvolumegroup and subvolume may be empty strings + when no subvolume is being used. """ - path = path.lstrip('/') - if not (subvolumegroup or subvolume): - return f'/{path}' cmd = { 'prefix': 'fs subvolume getpath', 'vol_name': volume, @@ -79,8 +79,23 @@ class CephFSPathResolver: ret, data, status = self._mgr.mon_command(cmd) if ret != 0: raise CephFSSubvolumeResolutionError(status) - log.debug('Mapped subvolume to path: %r', data) - return posixpath.join(data.strip(), path) + log.info('Mapped subvolume to path: %r', data) + return data.strip() + + def resolve( + self, volume: str, subvolumegroup: str, subvolume: str, path: str + ) -> str: + """Given a volume, subvolumegroup, subvolume, and path, return the real + path within the file system. subvolumegroup and subvolume may be empty + strings when no subvolume is being used. + """ + path = path.lstrip('/') + if not (subvolumegroup or subvolume): + return f'/{path}' + subvolume_path = self.resolve_subvolume_path( + volume, subvolumegroup, subvolume + ) + return posixpath.join(subvolume_path, path) def resolve_exists( self, volume: str, subvolumegroup: str, subvolume: str, path: str @@ -106,3 +121,86 @@ class CephFSPathResolver: raise NotADirectoryError(volpath) log.debug('Verified that %r exists in %r', volpath, volume) return volpath + + +class _TTLCache: + def __init__(self, maxsize: int = 512, ttl: float = 300.0) -> None: + self.cache: Dict[Tuple[str, str, str], Tuple[str, float]] = {} + self.maxsize: int = maxsize + self.ttl: float = ttl + + def _evict(self) -> None: + """Evicts items that have expired or if cache size exceeds maxsize.""" + current_time: float = time.monotonic() + keys_to_evict: list[Tuple[str, str, str]] = [ + key + for key, (_, timestamp) in self.cache.items() + if current_time - timestamp > self.ttl + ] + for key in keys_to_evict: + del self.cache[key] + + # Further evict if cache size exceeds maxsize + if len(self.cache) > self.maxsize: + for key in list(self.cache.keys())[ + : len(self.cache) - self.maxsize + ]: + del self.cache[key] + + def get(self, key: Tuple[str, str, str]) -> Optional[str]: + """Retrieve item from cache if it exists and is not expired.""" + self._evict() # Ensure expired items are removed + if key in self.cache: + value, _ = self.cache[key] + return value + return None + + def set(self, key: Tuple[str, str, str], value: str) -> None: + """Set item in cache, evicting expired or excess items.""" + self._evict() # Ensure expired items are removed + self.cache[key] = (value, time.monotonic()) + + def clear(self) -> None: + """Clear all items in the cache.""" + self.cache.clear() + + def __len__(self) -> int: + """Return the number of items currently in the cache.""" + return len(self.cache) + + +class CachingCephFSPathResolver(CephFSPathResolver): + """ + A subclass of CephFSPathResolver that adds caching to the resolve method + to improve performance by reducing redundant path resolutions. + + This implementation uses a TTL (Time-To-Live) cache rather than an LRU (Least + Recently Used) cache. The TTL cache is preferred in this scenario because + the validity of cached paths is time-sensitive, and we want to ensure that + paths are refreshed after a certain period regardless of access frequency. + Rlock can be used to synchronize access to the cache, but that is something + not required for now & can be later tested. + """ + + def __init__( + self, mgr: Module_T, *, client: Optional[CephfsClient] = None + ) -> None: + super().__init__(mgr, client=client) + # Initialize a TTL cache. + self._cache = _TTLCache(maxsize=512, ttl=5) + + def resolve_subvolume_path( + self, volume: str, subvolumegroup: str, subvolume: str + ) -> str: + cache_key = (volume, subvolumegroup, subvolume) + cached_path = self._cache.get(cache_key) + if cached_path: + log.debug("Cache hit for key: %r", cache_key) + return cached_path + + log.debug("Cache miss for key: %r", cache_key) + resolved_path = super().resolve_subvolume_path( + volume, subvolumegroup, subvolume + ) + self._cache.set(cache_key, resolved_path) + return resolved_path diff --git a/src/pybind/mgr/smb/handler.py b/src/pybind/mgr/smb/handler.py index 387d0a41283..f230d7952d7 100644 --- a/src/pybind/mgr/smb/handler.py +++ b/src/pybind/mgr/smb/handler.py @@ -3,6 +3,7 @@ from typing import ( Collection, Dict, Iterable, + Iterator, List, Optional, Set, @@ -22,27 +23,31 @@ from .enums import ( CephFSStorageProvider, Intent, JoinSourceType, + LoginAccess, + LoginCategory, State, UserGroupSourceType, ) from .internal import ( ClusterEntry, JoinAuthEntry, - ResourceEntry, ShareEntry, UsersAndGroupsEntry, + resource_entry, + resource_key, ) from .proto import ( AccessAuthorizer, ConfigEntry, ConfigStore, + EntryKey, OrchSubmitter, PathResolver, Simplified, - checked, ) from .resources import SMBResource from .results import ErrorResult, Result, ResultGroup +from .utils import checked, ynbool ClusterRef = Union[resources.Cluster, resources.RemovedCluster] ShareRef = Union[resources.Share, resources.RemovedShare] @@ -180,6 +185,99 @@ class _Matcher: ) +class _Staging: + def __init__(self, store: ConfigStore) -> None: + self.destination_store = store + self.incoming: Dict[EntryKey, SMBResource] = {} + self.deleted: Dict[EntryKey, SMBResource] = {} + self._store_keycache: Set[EntryKey] = set() + self._virt_keycache: Set[EntryKey] = set() + + def stage(self, resource: SMBResource) -> None: + self._virt_keycache = set() + ekey = resource_key(resource) + if resource.intent == Intent.REMOVED: + self.deleted[ekey] = resource + else: + self.deleted.pop(ekey, None) + self.incoming[ekey] = resource + + def _virtual_keys(self) -> Collection[EntryKey]: + if self._virt_keycache: + return self._virt_keycache + self._virt_keycache = set(self._store_keys()) - set( + self.deleted + ) | set(self.incoming) + return self._virt_keycache + + def _store_keys(self) -> Collection[EntryKey]: + if not self._store_keycache: + self._store_keycache = set(self.destination_store) + return self._store_keycache + + def __iter__(self) -> Iterator[EntryKey]: + return iter(self._virtual_keys()) + + def namespaces(self) -> Collection[str]: + return {k[0] for k in self} + + def contents(self, ns: str) -> Collection[str]: + return {kname for kns, kname in self if kns == ns} + + def is_new(self, resource: SMBResource) -> bool: + ekey = resource_key(resource) + return ekey not in self._store_keys() + + def get_cluster(self, cluster_id: str) -> resources.Cluster: + ekey = (str(ClusterEntry.namespace), cluster_id) + if ekey in self.incoming: + res = self.incoming[ekey] + assert isinstance(res, resources.Cluster) + return res + return ClusterEntry.from_store( + self.destination_store, cluster_id + ).get_cluster() + + def get_join_auth(self, auth_id: str) -> resources.JoinAuth: + ekey = (str(JoinAuthEntry.namespace), auth_id) + if ekey in self.incoming: + res = self.incoming[ekey] + assert isinstance(res, resources.JoinAuth) + return res + return JoinAuthEntry.from_store( + self.destination_store, auth_id + ).get_join_auth() + + def get_users_and_groups(self, ug_id: str) -> resources.UsersAndGroups: + ekey = (str(UsersAndGroupsEntry.namespace), ug_id) + if ekey in self.incoming: + res = self.incoming[ekey] + assert isinstance(res, resources.UsersAndGroups) + return res + return UsersAndGroupsEntry.from_store( + self.destination_store, ug_id + ).get_users_and_groups() + + def save(self) -> ResultGroup: + results = ResultGroup() + for res in self.deleted.values(): + results.append(self._save(res)) + for res in self.incoming.values(): + results.append(self._save(res)) + return results + + def _save(self, resource: SMBResource) -> Result: + entry = resource_entry(self.destination_store, resource) + if resource.intent == Intent.REMOVED: + removed = entry.remove() + state = State.REMOVED if removed else State.NOT_PRESENT + else: + state = entry.create_or_update(resource) + log.debug('saved resource: %r; state: %s', resource, state) + result = Result(resource, success=True, status={'state': state}) + return result + + class ClusterConfigHandler: """The central class for ingesting and handling smb configuration change requests. @@ -244,23 +342,36 @@ class ClusterConfigHandler: f' orch {self._orch!r}' ) - def apply(self, inputs: Iterable[SMBResource]) -> ResultGroup: + def apply( + self, inputs: Iterable[SMBResource], *, create_only: bool = False + ) -> ResultGroup: + """Apply resource configuration changes. + Set `create_only` to disable changing existing resource values. + """ log.debug('applying changes to internal data store') results = ResultGroup() - for resource in self._order_inputs(inputs): - try: - result = self._update_resource(resource) - except ErrorResult as err: - result = err - except Exception as err: - log.exception("error updating resource") - result = ErrorResult(resource, msg=str(err)) + staging = _Staging(self.internal_store) + try: + incoming = order_resources(inputs) + for resource in incoming: + staging.stage(resource) + for resource in incoming: + results.append( + self._check(resource, staging, create_only=create_only) + ) + except ErrorResult as err: + results.append(err) + except Exception as err: + log.exception("error updating resource") + result = ErrorResult(resource, msg=str(err)) results.append(result) if results.success: log.debug( 'successfully updated %s resources. syncing changes to public stores', len(list(results)), ) + results = staging.save() + _prune_linked_entries(staging) self._sync_modified(results) return results @@ -300,7 +411,7 @@ class ClusterConfigHandler: for cluster_id in self.cluster_ids(): if (resources.Cluster, cluster_id) in matcher: out.append(self._cluster_entry(cluster_id).get_cluster()) - for share_id in cluster_shares[cluster_id]: + for share_id in cluster_shares.get(cluster_id, []): if (resources.Share, cluster_id, share_id) in matcher: out.append( self._share_entry( @@ -324,58 +435,44 @@ class ClusterConfigHandler: log.debug("search found %d resources", len(out)) return out - def _order_inputs( - self, inputs: Iterable[SMBResource] - ) -> List[SMBResource]: - """Sort resource objects by type so that the user can largely input - objects freely but that references map out cleanly. - """ - - def _keyfunc(r: SMBResource) -> int: - if isinstance(r, resources.RemovedShare): - return -2 - if isinstance(r, resources.RemovedCluster): - return -1 - if isinstance(r, resources.Share): - return 2 - if isinstance(r, resources.Cluster): - return 1 - return 0 - - return sorted(inputs, key=_keyfunc) - - def _update_resource(self, resource: SMBResource) -> Result: - """Update the internal store with a new resource object.""" - entry: ResourceEntry - log.debug('updating resource: %r', resource) - if isinstance( - resource, (resources.Cluster, resources.RemovedCluster) - ): - self._check_cluster(resource) - entry = self._cluster_entry(resource.cluster_id) - elif isinstance(resource, (resources.Share, resources.RemovedShare)): - self._check_share(resource) - entry = self._share_entry(resource.cluster_id, resource.share_id) - elif isinstance(resource, resources.JoinAuth): - self._check_join_auths(resource) - entry = self._join_auth_entry(resource.auth_id) - elif isinstance(resource, resources.UsersAndGroups): - self._check_users_and_groups(resource) - entry = self._users_and_groups_entry(resource.users_groups_id) - else: - raise TypeError('not a valid smb resource') - state = self._save(entry, resource) - result = Result(resource, success=True, status={'state': state}) - log.debug('saved resource: %r; state: %s', resource, state) + def _check( + self, + resource: SMBResource, + staging: _Staging, + *, + create_only: bool = False, + ) -> Result: + """Check/validate a staged resource.""" + log.debug('staging resource: %r', resource) + if create_only: + if not staging.is_new(resource): + return Result( + resource, + success=False, + msg='a resource with the same ID already exists', + ) + try: + if isinstance( + resource, (resources.Cluster, resources.RemovedCluster) + ): + _check_cluster(resource, staging) + elif isinstance( + resource, (resources.Share, resources.RemovedShare) + ): + _check_share(resource, staging, self._path_resolver) + elif isinstance(resource, resources.JoinAuth): + _check_join_auths(resource, staging) + elif isinstance(resource, resources.UsersAndGroups): + _check_users_and_groups(resource, staging) + else: + raise TypeError('not a valid smb resource') + except ErrorResult as err: + log.debug('rejected resource: %r', resource) + return err + log.debug('checked resource: %r', resource) + result = Result(resource, success=True, status={'checked': True}) return result - def _save(self, entry: ResourceEntry, resource: SMBResource) -> State: - # Returns the Intent indicating the previous state. - if resource.intent == Intent.REMOVED: - removed = entry.remove() - return State.REMOVED if removed else State.NOT_PRESENT - return entry.create_or_update(resource) - def _sync_clusters( self, modified_cluster_ids: Optional[Collection[str]] = None ) -> None: @@ -572,92 +669,6 @@ class ClusterConfigHandler: external.rm_cluster(self.priv_store, cluster_id) external.rm_cluster(self.public_store, cluster_id) - def _check_cluster(self, cluster: ClusterRef) -> None: - """Check that the cluster resource can be updated.""" - if cluster.intent == Intent.REMOVED: - share_ids = ShareEntry.ids(self.internal_store) - clusters_used = {cid for cid, _ in share_ids} - if cluster.cluster_id in clusters_used: - raise ErrorResult( - cluster, - msg="cluster in use by shares", - status={ - 'clusters': [ - shid - for cid, shid in share_ids - if cid == cluster.cluster_id - ] - }, - ) - return - assert isinstance(cluster, resources.Cluster) - cluster.validate() - - def _check_share(self, share: ShareRef) -> None: - """Check that the share resource can be updated.""" - if share.intent == Intent.REMOVED: - return - assert isinstance(share, resources.Share) - share.validate() - if share.cluster_id not in ClusterEntry.ids(self.internal_store): - raise ErrorResult( - share, - msg="no matching cluster id", - status={"cluster_id": share.cluster_id}, - ) - assert share.cephfs is not None - try: - self._path_resolver.resolve_exists( - share.cephfs.volume, - share.cephfs.subvolumegroup, - share.cephfs.subvolume, - share.cephfs.path, - ) - except (FileNotFoundError, NotADirectoryError): - raise ErrorResult( - share, msg="path is not a valid directory in volume" - ) - - def _check_join_auths(self, join_auth: resources.JoinAuth) -> None: - """Check that the JoinAuth resource can be updated.""" - if join_auth.intent == Intent.PRESENT: - return # adding is always ok - refs_in_use: Dict[str, List[str]] = {} - for cluster_id in ClusterEntry.ids(self.internal_store): - cluster = self._cluster_entry(cluster_id).get_cluster() - for ref in _auth_refs(cluster): - refs_in_use.setdefault(ref, []).append(cluster_id) - log.debug('refs_in_use: %r', refs_in_use) - if join_auth.auth_id in refs_in_use: - raise ErrorResult( - join_auth, - msg='join auth resource in use by clusters', - status={ - 'clusters': refs_in_use[join_auth.auth_id], - }, - ) - - def _check_users_and_groups( - self, users_and_groups: resources.UsersAndGroups - ) -> None: - """Check that the UsersAndGroups resource can be updated.""" - if users_and_groups.intent == Intent.PRESENT: - return # adding is always ok - refs_in_use: Dict[str, List[str]] = {} - for cluster_id in ClusterEntry.ids(self.internal_store): - cluster = self._cluster_entry(cluster_id).get_cluster() - for ref in _ug_refs(cluster): - refs_in_use.setdefault(ref, []).append(cluster_id) - log.debug('refs_in_use: %r', refs_in_use) - if users_and_groups.users_groups_id in refs_in_use: - raise ErrorResult( - users_and_groups, - msg='users and groups resource in use by clusters', - status={ - 'clusters': refs_in_use[users_and_groups.users_groups_id], - }, - ) - def _cluster_entry(self, cluster_id: str) -> ClusterEntry: return ClusterEntry.from_store(self.internal_store, cluster_id) @@ -716,6 +727,210 @@ class ClusterConfigHandler: ) +def order_resources( + resource_objs: Iterable[SMBResource], +) -> List[SMBResource]: + """Sort resource objects by type so that the user can largely input + objects freely but that references map out cleanly. + """ + + def _keyfunc(r: SMBResource) -> int: + if isinstance(r, resources.RemovedShare): + return -2 + if isinstance(r, resources.RemovedCluster): + return -1 + if isinstance(r, resources.Share): + return 2 + if isinstance(r, resources.Cluster): + return 1 + return 0 + + return sorted(resource_objs, key=_keyfunc) + + +def _check_cluster(cluster: ClusterRef, staging: _Staging) -> None: + """Check that the cluster resource can be updated.""" + if cluster.intent == Intent.REMOVED: + share_ids = ShareEntry.ids(staging) + clusters_used = {cid for cid, _ in share_ids} + if cluster.cluster_id in clusters_used: + raise ErrorResult( + cluster, + msg="cluster in use by shares", + status={ + 'clusters': [ + shid + for cid, shid in share_ids + if cid == cluster.cluster_id + ] + }, + ) + return + assert isinstance(cluster, resources.Cluster) + cluster.validate() + for auth_ref in _auth_refs(cluster): + auth = staging.get_join_auth(auth_ref) + if ( + auth.linked_to_cluster + and auth.linked_to_cluster != cluster.cluster_id + ): + raise ErrorResult( + cluster, + msg="join auth linked to different cluster", + status={ + 'other_cluster_id': auth.linked_to_cluster, + }, + ) + for ug_ref in _ug_refs(cluster): + ug = staging.get_users_and_groups(ug_ref) + if ( + ug.linked_to_cluster + and ug.linked_to_cluster != cluster.cluster_id + ): + raise ErrorResult( + cluster, + msg="users and groups linked to different cluster", + status={ + 'other_cluster_id': ug.linked_to_cluster, + }, + ) + + +def _check_share( + share: ShareRef, staging: _Staging, resolver: PathResolver +) -> None: + """Check that the share resource can be updated.""" + if share.intent == Intent.REMOVED: + return + assert isinstance(share, resources.Share) + share.validate() + if share.cluster_id not in ClusterEntry.ids(staging): + raise ErrorResult( + share, + msg="no matching cluster id", + status={"cluster_id": share.cluster_id}, + ) + assert share.cephfs is not None + try: + resolver.resolve_exists( + share.cephfs.volume, + share.cephfs.subvolumegroup, + share.cephfs.subvolume, + share.cephfs.path, + ) + except (FileNotFoundError, NotADirectoryError): + raise ErrorResult( + share, msg="path is not a valid directory in volume" + ) + + +def _check_join_auths( + join_auth: resources.JoinAuth, staging: _Staging +) -> None: + """Check that the JoinAuth resource can be updated.""" + if join_auth.intent == Intent.PRESENT: + return _check_join_auths_present(join_auth, staging) + return _check_join_auths_removed(join_auth, staging) + + +def _check_join_auths_removed( + join_auth: resources.JoinAuth, staging: _Staging +) -> None: + cids = set(ClusterEntry.ids(staging)) + refs_in_use: Dict[str, List[str]] = {} + for cluster_id in cids: + cluster = staging.get_cluster(cluster_id) + for ref in _auth_refs(cluster): + refs_in_use.setdefault(ref, []).append(cluster_id) + log.debug('refs_in_use: %r', refs_in_use) + if join_auth.auth_id in refs_in_use: + raise ErrorResult( + join_auth, + msg='join auth resource in use by clusters', + status={ + 'clusters': refs_in_use[join_auth.auth_id], + }, + ) + + +def _check_join_auths_present( + join_auth: resources.JoinAuth, staging: _Staging +) -> None: + if join_auth.linked_to_cluster: + cids = set(ClusterEntry.ids(staging)) + if join_auth.linked_to_cluster not in cids: + raise ErrorResult( + join_auth, + msg='linked_to_cluster id not valid', + status={ + 'unknown_id': join_auth.linked_to_cluster, + }, + ) + + +def _check_users_and_groups( + users_and_groups: resources.UsersAndGroups, staging: _Staging +) -> None: + """Check that the UsersAndGroups resource can be updated.""" + if users_and_groups.intent == Intent.PRESENT: + return _check_users_and_groups_present(users_and_groups, staging) + return _check_users_and_groups_removed(users_and_groups, staging) + + +def _check_users_and_groups_removed( + users_and_groups: resources.UsersAndGroups, staging: _Staging +) -> None: + refs_in_use: Dict[str, List[str]] = {} + cids = set(ClusterEntry.ids(staging)) + for cluster_id in cids: + cluster = staging.get_cluster(cluster_id) + for ref in _ug_refs(cluster): + refs_in_use.setdefault(ref, []).append(cluster_id) + log.debug('refs_in_use: %r', refs_in_use) + if users_and_groups.users_groups_id in refs_in_use: + raise ErrorResult( + users_and_groups, + msg='users and groups resource in use by clusters', + status={ + 'clusters': refs_in_use[users_and_groups.users_groups_id], + }, + ) + + +def _check_users_and_groups_present( + users_and_groups: resources.UsersAndGroups, staging: _Staging +) -> None: + if users_and_groups.linked_to_cluster: + cids = set(ClusterEntry.ids(staging)) + if users_and_groups.linked_to_cluster not in cids: + raise ErrorResult( + users_and_groups, + msg='linked_to_cluster id not valid', + status={ + 'unknown_id': users_and_groups.linked_to_cluster, + }, + ) + + +def _prune_linked_entries(staging: _Staging) -> None: + cids = set(ClusterEntry.ids(staging)) + for auth_id in JoinAuthEntry.ids(staging): + join_auth = staging.get_join_auth(auth_id) + if ( + join_auth.linked_to_cluster + and join_auth.linked_to_cluster not in cids + ): + JoinAuthEntry.from_store( + staging.destination_store, auth_id + ).remove() + for ug_id in UsersAndGroupsEntry.ids(staging): + ug = staging.get_users_and_groups(ug_id) + if ug.linked_to_cluster and ug.linked_to_cluster not in cids: + UsersAndGroupsEntry.from_store( + staging.destination_store, ug_id + ).remove() + + def _auth_refs(cluster: resources.Cluster) -> Collection[str]: if cluster.auth_mode != AuthMode.ACTIVE_DIRECTORY: return set() @@ -739,11 +954,6 @@ def _ug_refs(cluster: resources.Cluster) -> Collection[str]: } -def _ynbool(value: bool) -> str: - """Convert a bool to an smb.conf compatible string.""" - return 'Yes' if value else 'No' - - def _generate_share( share: resources.Share, resolver: PathResolver, cephx_entity: str ) -> Dict[str, Dict[str, str]]: @@ -763,7 +973,7 @@ def _generate_share( share.cephfs.subvolume, share.cephfs.path, ) - return { + cfg = { # smb.conf options 'options': { 'path': path, @@ -771,12 +981,56 @@ def _generate_share( 'ceph:config_file': '/etc/ceph/ceph.conf', 'ceph:filesystem': share.cephfs.volume, 'ceph:user_id': cephx_entity, - 'read only': _ynbool(share.readonly), - 'browseable': _ynbool(share.browseable), + 'read only': ynbool(share.readonly), + 'browseable': ynbool(share.browseable), 'kernel share modes': 'no', 'x:ceph:id': f'{share.cluster_id}.{share.share_id}', } } + # extend share with user+group login access lists + _generate_share_login_control(share, cfg) + # extend share with custom options + custom_opts = share.cleaned_custom_smb_share_options + if custom_opts: + cfg['options'].update(custom_opts) + cfg['options']['x:ceph:has_custom_options'] = 'yes' + return cfg + + +def _generate_share_login_control( + share: resources.Share, cfg: Simplified +) -> None: + valid_users: List[str] = [] + invalid_users: List[str] = [] + read_list: List[str] = [] + write_list: List[str] = [] + admin_users: List[str] = [] + for entry in share.login_control or []: + if entry.category == LoginCategory.GROUP: + name = f'@{entry.name}' + else: + name = entry.name + if entry.access == LoginAccess.NONE: + invalid_users.append(name) + continue + elif entry.access == LoginAccess.ADMIN: + admin_users.append(name) + elif entry.access == LoginAccess.READ_ONLY: + read_list.append(name) + elif entry.access == LoginAccess.READ_WRITE: + write_list.append(name) + if share.restrict_access: + valid_users.append(name) + if valid_users: + cfg['options']['valid users'] = ' '.join(valid_users) + if invalid_users: + cfg['options']['invalid users'] = ' '.join(invalid_users) + if read_list: + cfg['options']['read list'] = ' '.join(read_list) + if write_list: + cfg['options']['write list'] = ' '.join(write_list) + if admin_users: + cfg['options']['admin users'] = ' '.join(admin_users) def _generate_config( @@ -801,7 +1055,7 @@ def _generate_config( for share in shares } - return { + cfg: Dict[str, Any] = { 'samba-container-config': 'v0', 'configs': { cluster.cluster_id: { @@ -827,6 +1081,14 @@ def _generate_config( }, 'shares': share_configs, } + # insert global custom options + custom_opts = cluster.cleaned_custom_smb_global_options + if custom_opts: + # isolate custom config opts into a section for cleanliness + gname = f'{cluster.cluster_id}_custom' + cfg['configs'][cluster.cluster_id]['globals'].append(gname) + cfg['globals'][gname] = {'options': dict(custom_opts)} + return cfg def _generate_smb_service_spec( @@ -911,8 +1173,6 @@ def _save_pending_join_auths( for idx, src in enumerate(checked(cluster.domain_settings).join_sources): if src.source_type == JoinSourceType.RESOURCE: javalues = checked(arefs[src.ref].auth) - elif src.source_type == JoinSourceType.PASSWORD: - javalues = checked(src.auth) else: raise ValueError( f'unsupported join source type: {src.source_type}' @@ -936,9 +1196,8 @@ def _save_pending_users_and_groups( if ugsv.source_type == UserGroupSourceType.RESOURCE: ugvalues = augs[ugsv.ref].values assert ugvalues - elif ugsv.source_type == UserGroupSourceType.INLINE: - ugvalues = ugsv.values - assert ugvalues + elif ugsv.source_type == UserGroupSourceType.EMPTY: + continue else: raise ValueError( f'unsupported users/groups source type: {ugsv.source_type}' diff --git a/src/pybind/mgr/smb/internal.py b/src/pybind/mgr/smb/internal.py index d40561b08db..3571ed44400 100644 --- a/src/pybind/mgr/smb/internal.py +++ b/src/pybind/mgr/smb/internal.py @@ -5,13 +5,54 @@ from typing import Collection, Tuple, Type, TypeVar from . import resources from .enums import AuthMode, ConfigNS, State -from .proto import ConfigEntry, ConfigStore, Self, Simplifiable, one +from .proto import ( + ConfigEntry, + ConfigStore, + ConfigStoreListing, + EntryKey, + Self, + Simplifiable, +) from .resources import SMBResource from .results import ErrorResult +from .utils import one T = TypeVar('T') +def cluster_key(cluster_id: str) -> EntryKey: + """Return store entry key for a cluster entry.""" + return str(ConfigNS.CLUSTERS), cluster_id + + +def share_key(cluster_id: str, share_id: str) -> EntryKey: + """Return store entry key for a share entry.""" + return str(ConfigNS.SHARES), f'{cluster_id}.{share_id}' + + +def join_auth_key(auth_id: str) -> EntryKey: + """Return store entry key for a join auth entry.""" + return str(ConfigNS.JOIN_AUTHS), auth_id + + +def users_and_groups_key(users_groups_id: str) -> EntryKey: + """Return store entry key for a users-and-groups entry.""" + return str(ConfigNS.USERS_AND_GROUPS), users_groups_id + + +def resource_key(resource: SMBResource) -> EntryKey: + """Return a store entry key for an smb resource object.""" + if isinstance(resource, (resources.Cluster, resources.RemovedCluster)): + return cluster_key(resource.cluster_id) + elif isinstance(resource, (resources.Share, resources.RemovedShare)): + return share_key(resource.cluster_id, resource.share_id) + elif isinstance(resource, resources.JoinAuth): + return join_auth_key(resource.auth_id) + elif isinstance(resource, resources.UsersAndGroups): + return users_and_groups_key(resource.users_groups_id) + raise TypeError('not a valid smb resource') + + class ResourceEntry: """Base class for resource entry getter/setter objects.""" @@ -61,7 +102,7 @@ class ClusterEntry(ResourceEntry): return cls(cluster_id, store[str(cls.namespace), cluster_id]) @classmethod - def ids(cls, store: ConfigStore) -> Collection[str]: + def ids(cls, store: ConfigStoreListing) -> Collection[str]: return store.contents(str(cls.namespace)) def get_cluster(self) -> resources.Cluster: @@ -118,7 +159,7 @@ class ShareEntry(ResourceEntry): return cls(key, store[str(cls.namespace), key]) @classmethod - def ids(cls, store: ConfigStore) -> Collection[Tuple[str, str]]: + def ids(cls, store: ConfigStoreListing) -> Collection[Tuple[str, str]]: return [_split(k) for k in store.contents(str(cls.namespace))] def get_share(self) -> resources.Share: @@ -135,7 +176,7 @@ class JoinAuthEntry(ResourceEntry): return cls(auth_id, store[str(cls.namespace), auth_id]) @classmethod - def ids(cls, store: ConfigStore) -> Collection[str]: + def ids(cls, store: ConfigStoreListing) -> Collection[str]: return store.contents(str(cls.namespace)) def get_join_auth(self) -> resources.JoinAuth: @@ -154,13 +195,30 @@ class UsersAndGroupsEntry(ResourceEntry): return cls(auth_id, store[str(cls.namespace), auth_id]) @classmethod - def ids(cls, store: ConfigStore) -> Collection[str]: + def ids(cls, store: ConfigStoreListing) -> Collection[str]: return store.contents(str(cls.namespace)) def get_users_and_groups(self) -> resources.UsersAndGroups: return self.get_resource_type(resources.UsersAndGroups) +def resource_entry( + store: ConfigStore, resource: SMBResource +) -> ResourceEntry: + """Return a bound store entry object given a resource object.""" + if isinstance(resource, (resources.Cluster, resources.RemovedCluster)): + return ClusterEntry.from_store(store, resource.cluster_id) + elif isinstance(resource, (resources.Share, resources.RemovedShare)): + return ShareEntry.from_store( + store, resource.cluster_id, resource.share_id + ) + elif isinstance(resource, resources.JoinAuth): + return JoinAuthEntry.from_store(store, resource.auth_id) + elif isinstance(resource, resources.UsersAndGroups): + return UsersAndGroupsEntry.from_store(store, resource.users_groups_id) + raise TypeError('not a valid smb resource') + + def _split(share_key: str) -> Tuple[str, str]: cluster_id, share_id = share_key.split('.', 1) return cluster_id, share_id diff --git a/src/pybind/mgr/smb/module.py b/src/pybind/mgr/smb/module.py index fff7fc46925..43ad681769a 100644 --- a/src/pybind/mgr/smb/module.py +++ b/src/pybind/mgr/smb/module.py @@ -6,7 +6,16 @@ import orchestrator from ceph.deployment.service_spec import PlacementSpec, SMBSpec from mgr_module import MgrModule, Option -from . import cli, fs, handler, mon_store, rados_store, resources +from . import ( + cli, + fs, + handler, + mon_store, + rados_store, + resources, + results, + utils, +) from .enums import AuthMode, JoinSourceType, UserGroupSourceType from .proto import AccessAuthorizer, Simplified @@ -43,7 +52,7 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): self._public_store = ( public_store or rados_store.RADOSConfigStore.init(self) ) - path_resolver = path_resolver or fs.CephFSPathResolver(self) + path_resolver = path_resolver or fs.CachingCephFSPathResolver(self) # Why the honk is the cast needed but path_resolver doesn't need it?? # Sometimes mypy drives me batty. authorizer = cast( @@ -59,11 +68,18 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): ) @cli.SMBCommand('apply', perm='rw') - def apply_resources(self, inbuf: str) -> handler.ResultGroup: + def apply_resources(self, inbuf: str) -> results.ResultGroup: """Create, update, or remove smb configuration resources based on YAML or JSON specs """ - return self._handler.apply(resources.load_text(inbuf)) + try: + return self._handler.apply(resources.load_text(inbuf)) + except resources.InvalidResourceError as err: + # convert the exception into a result and return it as the only + # item in the result group + return results.ResultGroup( + [results.InvalidResourceResult(err.resource_data, str(err))] + ) @cli.SMBCommand('cluster ls', perm='r') def cluster_ls(self) -> List[str]: @@ -82,10 +98,11 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): define_user_pass: Optional[List[str]] = None, custom_dns: Optional[List[str]] = None, placement: Optional[str] = None, - ) -> handler.Result: + ) -> results.Result: """Create an smb cluster""" domain_settings = None user_group_settings = None + to_apply: List[resources.SMBResource] = [] if domain_realm or domain_join_ref or domain_join_user_pass: join_sources: List[resources.JoinSource] = [] @@ -108,13 +125,21 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): 'a domain join username & password value' ' must contain a "%" separator' ) + rname = utils.rand_name(cluster_id) join_sources.append( resources.JoinSource( - source_type=JoinSourceType.PASSWORD, + source_type=JoinSourceType.RESOURCE, + ref=rname, + ) + ) + to_apply.append( + resources.JoinAuth( + auth_id=rname, auth=resources.JoinAuthValues( username=username, password=password, ), + linked_to_cluster=cluster_id, ) ) domain_settings = resources.DomainSettings( @@ -140,15 +165,22 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): for unpw in define_user_pass or []: username, password = unpw.split('%', 1) users.append({'name': username, 'password': password}) - user_group_settings += [ + rname = utils.rand_name(cluster_id) + user_group_settings.append( resources.UserGroupSource( - source_type=UserGroupSourceType.INLINE, + source_type=UserGroupSourceType.RESOURCE, ref=rname + ) + ) + to_apply.append( + resources.UsersAndGroups( + users_groups_id=rname, values=resources.UserGroupSettings( users=users, groups=[], ), + linked_to_cluster=cluster_id, ) - ] + ) pspec = resources.WrappedPlacementSpec.wrap( PlacementSpec.from_string(placement) @@ -161,10 +193,11 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): custom_dns=custom_dns, placement=pspec, ) - return self._handler.apply([cluster]).one() + to_apply.append(cluster) + return self._handler.apply(to_apply, create_only=True).squash(cluster) @cli.SMBCommand('cluster rm', perm='rw') - def cluster_rm(self, cluster_id: str) -> handler.Result: + def cluster_rm(self, cluster_id: str) -> results.Result: """Remove an smb cluster""" cluster = resources.RemovedCluster(cluster_id=cluster_id) return self._handler.apply([cluster]).one() @@ -190,7 +223,7 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): share_name: str = '', subvolume: str = '', readonly: bool = False, - ) -> handler.Result: + ) -> results.Result: """Create an smb share""" share = resources.Share( cluster_id=cluster_id, @@ -203,10 +236,10 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule): subvolume=subvolume, ), ) - return self._handler.apply([share]).one() + return self._handler.apply([share], create_only=True).one() @cli.SMBCommand('share rm', perm='rw') - def share_rm(self, cluster_id: str, share_id: str) -> handler.Result: + def share_rm(self, cluster_id: str, share_id: str) -> results.Result: """Remove an smb share""" share = resources.RemovedShare( cluster_id=cluster_id, share_id=share_id diff --git a/src/pybind/mgr/smb/proto.py b/src/pybind/mgr/smb/proto.py index 96aed6c4174..ffcc647a48e 100644 --- a/src/pybind/mgr/smb/proto.py +++ b/src/pybind/mgr/smb/proto.py @@ -9,7 +9,6 @@ from typing import ( List, Optional, Tuple, - TypeVar, ) import sys @@ -18,7 +17,7 @@ from ceph.deployment.service_spec import SMBSpec # this uses a version check as opposed to a try/except because this # form makes mypy happy and try/except doesn't. -if sys.version_info >= (3, 8): +if sys.version_info >= (3, 8): # pragma: no cover from typing import Protocol elif TYPE_CHECKING: # pragma: no cover # typing_extensions will not be available for the real mgr server @@ -29,7 +28,7 @@ else: # pragma: no cover pass -if sys.version_info >= (3, 11): +if sys.version_info >= (3, 11): # pragma: no cover from typing import Self elif TYPE_CHECKING: # pragma: no cover # typing_extensions will not be available for the real mgr server @@ -78,13 +77,8 @@ class ConfigEntry(Protocol): ... # pragma: no cover -class ConfigStore(Protocol): - """A protocol for describing a configuration data store capable of - retaining and tracking configuration entry objects. - """ - - def __getitem__(self, key: EntryKey) -> ConfigEntry: - ... # pragma: no cover +class ConfigStoreListing(Protocol): + """A protocol for describing the content-listing methods of a config store.""" def namespaces(self) -> Collection[str]: ... # pragma: no cover @@ -95,6 +89,15 @@ class ConfigStore(Protocol): def __iter__(self) -> Iterator[EntryKey]: ... # pragma: no cover + +class ConfigStore(ConfigStoreListing, Protocol): + """A protocol for describing a configuration data store capable of + retaining and tracking configuration entry objects. + """ + + def __getitem__(self, key: EntryKey) -> ConfigEntry: + ... # pragma: no cover + def remove(self, ns: EntryKey) -> bool: ... # pragma: no cover @@ -151,27 +154,3 @@ class AccessAuthorizer(Protocol): self, volume: str, entity: str, caps: str = '' ) -> None: ... # pragma: no cover - - -T = TypeVar('T') - - -# TODO: move to a utils.py -def one(lst: List[T]) -> T: - if len(lst) != 1: - raise ValueError("list does not contain exactly one element") - return lst[0] - - -class IsNoneError(ValueError): - pass - - -def checked(v: Optional[T]) -> T: - """Ensures the provided value is not a None or raises a IsNoneError. - Intended use is similar to an `assert v is not None` but more usable in - one-liners and list/dict/etc comprehensions. - """ - if v is None: - raise IsNoneError('value is None') - return v diff --git a/src/pybind/mgr/smb/resourcelib.py b/src/pybind/mgr/smb/resourcelib.py index 6d3a7ff63c7..ea7a82263bd 100644 --- a/src/pybind/mgr/smb/resourcelib.py +++ b/src/pybind/mgr/smb/resourcelib.py @@ -83,6 +83,7 @@ from typing import ( Callable, Dict, Hashable, + Iterator, List, Optional, Tuple, @@ -91,6 +92,7 @@ from typing import ( import dataclasses import logging import sys +from contextlib import contextmanager from itertools import chain from .proto import Self, Simplified @@ -304,6 +306,7 @@ class Resource: self.resource_cls = cls self.fields: Dict[str, Field] = {} self._on_condition: Optional[Callable[..., bool]] = None + self._on_construction_error: Optional[Callable[..., Exception]] = None for fld in dataclasses.fields(self.resource_cls): self.fields[fld.name] = Field.create(fld) @@ -317,6 +320,12 @@ class Resource: """Set a condition function.""" self._on_condition = cond + def on_construction_error(self, cond: Callable[..., Exception]) -> None: + """Set a function to handle/convert exceptions that occur while + constructing objects from simplified data. + """ + self._on_construction_error = cond + def type_name(self) -> str: """Return the name of the type managed by this resource.""" return self.resource_cls.__name__ @@ -330,16 +339,29 @@ class Resource: """Given a dict-based unstructured data object return the structured object-based equivalent. """ - kw = {} - for fld in self.fields.values(): - value = self._object_field_from_simplified(fld, data) - if value is not _unset: - kw[fld.name] = value - obj = self.resource_cls(**kw) - validate = getattr(obj, 'validate', None) - if validate: - validate() - return obj + with self._structuring_error_hook(self.resource_cls, data): + kw = {} + for fld in self.fields.values(): + value = self._object_field_from_simplified(fld, data) + if value is not _unset: + kw[fld.name] = value + obj = self.resource_cls(**kw) + validate = getattr(obj, 'validate', None) + if validate: + validate() + return obj + + @contextmanager + @_xt + def _structuring_error_hook( + self, resource_cls: Any, data: Simplified + ) -> Iterator[None]: + try: + yield + except Exception as err: + if self._on_construction_error: + raise self._on_construction_error(err, data) from err + raise @_xt def _object_field_from_simplified( @@ -628,6 +650,7 @@ def load(data: Simplified) -> List[Any]: """ # Given a bare list/iterator. Assume it contains loadable objects. if not isinstance(data, dict): + assert not isinstance(data, (str, bytes)) return list(chain.from_iterable(load(v) for v in data)) # Given a "list object" if _RESOURCE_TYPE not in data and _RESOURCES in data: diff --git a/src/pybind/mgr/smb/resources.py b/src/pybind/mgr/smb/resources.py index aad57ff79f0..7ef25f4f0a2 100644 --- a/src/pybind/mgr/smb/resources.py +++ b/src/pybind/mgr/smb/resources.py @@ -1,10 +1,12 @@ -from typing import Dict, List, Optional, Union, cast +from typing import Dict, List, Optional, Tuple, Union, cast +import errno import json import yaml from ceph.deployment.service_spec import PlacementSpec +from object_format import ErrorResponseBase from . import resourcelib, validation from .enums import ( @@ -12,9 +14,12 @@ from .enums import ( CephFSStorageProvider, Intent, JoinSourceType, + LoginAccess, + LoginCategory, UserGroupSourceType, ) -from .proto import Self, Simplified, checked +from .proto import Self, Simplified +from .utils import checked def _get_intent(data: Simplified) -> Intent: @@ -32,6 +37,51 @@ def _present(data: Simplified) -> bool: return _get_intent(data) == Intent.PRESENT +class InvalidResourceError(ValueError, ErrorResponseBase): + def __init__(self, msg: str, data: Simplified) -> None: + super().__init__(msg) + self.resource_data = data + + def to_simplified(self) -> Simplified: + return { + 'resource': self.resource_data, + 'msg': str(self), + 'success': False, + } + + def format_response(self) -> Tuple[int, str, str]: + data = json.dumps(self.to_simplified()) + return -errno.EINVAL, data, "Invalid resource" + + @classmethod + def wrap(cls, err: Exception, data: Simplified) -> Exception: + if isinstance(err, ValueError) and not isinstance( + err, resourcelib.ResourceTypeError + ): + return cls(str(err), data) + return err + + +class InvalidInputError(ValueError, ErrorResponseBase): + summary_max = 1024 + + def __init__(self, msg: str, content: str) -> None: + super().__init__(msg) + self.content = content + + def to_simplified(self) -> Simplified: + return { + 'input': self.content[: self.summary_max], + 'truncated_input': len(self.content) > self.summary_max, + 'msg': str(self), + 'success': False, + } + + def format_response(self) -> Tuple[int, str, str]: + data = json.dumps(self.to_simplified()) + return -errno.EINVAL, data, "Invalid input" + + class _RBase: # mypy doesn't currently (well?) support class decorators adding methods # so we use a base class to add this method to all our resource classes. @@ -85,6 +135,19 @@ class CephFSStorage(_RBase): return rc +@resourcelib.component() +class LoginAccessEntry(_RBase): + name: str + category: LoginCategory = LoginCategory.USER + access: LoginAccess = LoginAccess.READ_ONLY + + def __post_init__(self) -> None: + self.access = self.access.expand() + + def validate(self) -> None: + validation.check_access_name(self.name) + + @resourcelib.resource('ceph.smb.share') class RemovedShare(_RBase): """Represents a share that has / will be removed.""" @@ -104,6 +167,7 @@ class RemovedShare(_RBase): @resourcelib.customize def _customize_resource(rc: resourcelib.Resource) -> resourcelib.Resource: rc.on_condition(_removed) + rc.on_construction_error(InvalidResourceError.wrap) return rc @@ -119,6 +183,9 @@ class Share(_RBase): readonly: bool = False browseable: bool = True cephfs: Optional[CephFSStorage] = None + custom_smb_share_options: Optional[Dict[str, str]] = None + login_control: Optional[List[LoginAccessEntry]] = None + restrict_access: bool = False def __post_init__(self) -> None: # if name is not given explicitly, take it from the share_id @@ -138,6 +205,11 @@ class Share(_RBase): # currently only cephfs is supported if self.cephfs is None: raise ValueError('a cephfs configuration is required') + validation.check_custom_options(self.custom_smb_share_options) + if self.restrict_access and not self.login_control: + raise ValueError( + 'a share with restricted access must define at least one login_control entry' + ) @property def checked_cephfs(self) -> CephFSStorage: @@ -146,9 +218,15 @@ class Share(_RBase): @resourcelib.customize def _customize_resource(rc: resourcelib.Resource) -> resourcelib.Resource: + rc.restrict_access.quiet = True rc.on_condition(_present) + rc.on_construction_error(InvalidResourceError.wrap) return rc + @property + def cleaned_custom_smb_share_options(self) -> Optional[Dict[str, str]]: + return validation.clean_custom_options(self.custom_smb_share_options) + @resourcelib.component() class JoinAuthValues(_RBase): @@ -162,18 +240,17 @@ class JoinAuthValues(_RBase): class JoinSource(_RBase): """Represents data that can be used to join a system to Active Directory.""" - source_type: JoinSourceType - auth: Optional[JoinAuthValues] = None - uri: str = '' + source_type: JoinSourceType = JoinSourceType.RESOURCE ref: str = '' def validate(self) -> None: - if self.ref: + if not self.ref: + raise ValueError('reference value must be specified') + else: validation.check_id(self.ref) @resourcelib.customize def _customize_resource(rc: resourcelib.Resource) -> resourcelib.Resource: - rc.uri.quiet = True rc.ref.quiet = True return rc @@ -190,40 +267,21 @@ class UserGroupSettings(_RBase): class UserGroupSource(_RBase): """Represents data used to set up user/group settings for an instance.""" - source_type: UserGroupSourceType - values: Optional[UserGroupSettings] = None - uri: str = '' + source_type: UserGroupSourceType = UserGroupSourceType.RESOURCE ref: str = '' def validate(self) -> None: - if self.source_type == UserGroupSourceType.INLINE: - pfx = 'inline User/Group configuration' - if self.values is None: - raise ValueError(pfx + ' requires values') - if self.uri: - raise ValueError(pfx + ' does not take a uri') - if self.ref: - raise ValueError(pfx + ' does not take a ref value') - if self.source_type == UserGroupSourceType.HTTP_URI: - pfx = 'http User/Group configuration' - if not self.uri: - raise ValueError(pfx + ' requires a uri') - if self.values: - raise ValueError(pfx + ' does not take inline values') - if self.ref: - raise ValueError(pfx + ' does not take a ref value') if self.source_type == UserGroupSourceType.RESOURCE: - pfx = 'resource reference User/Group configuration' if not self.ref: - raise ValueError(pfx + ' requires a ref value') - if self.uri: - raise ValueError(pfx + ' does not take a uri') - if self.values: - raise ValueError(pfx + ' does not take inline values') + raise ValueError('reference value must be specified') + else: + validation.check_id(self.ref) + else: + if self.ref: + raise ValueError('ref may not be specified') @resourcelib.customize def _customize_resource(rc: resourcelib.Resource) -> resourcelib.Resource: - rc.uri.quiet = True rc.ref.quiet = True return rc @@ -246,6 +304,7 @@ class RemovedCluster(_RBase): @resourcelib.customize def _customize_resource(rc: resourcelib.Resource) -> resourcelib.Resource: rc.on_condition(_removed) + rc.on_construction_error(InvalidResourceError.wrap) return rc def validate(self) -> None: @@ -272,8 +331,7 @@ class WrappedPlacementSpec(PlacementSpec): # improperly typed. They are improperly typed because typing.Self # didn't exist and the old correct way is a PITA to write (and # remember). Thus a lot of classmethods are return the exact class - # which is technically incorrect. This fine class is guilty of the same - # sin. :-) + # which is technically incorrect. return cast(Self, cls.from_json(data)) @classmethod @@ -297,6 +355,7 @@ class Cluster(_RBase): domain_settings: Optional[DomainSettings] = None user_group_settings: Optional[List[UserGroupSource]] = None custom_dns: Optional[List[str]] = None + custom_smb_global_options: Optional[Dict[str, str]] = None # embedded orchestration placement spec placement: Optional[WrappedPlacementSpec] = None @@ -324,12 +383,18 @@ class Cluster(_RBase): raise ValueError( 'domain settings not supported for user auth mode' ) + validation.check_custom_options(self.custom_smb_global_options) @resourcelib.customize def _customize_resource(rc: resourcelib.Resource) -> resourcelib.Resource: rc.on_condition(_present) + rc.on_construction_error(InvalidResourceError.wrap) return rc + @property + def cleaned_custom_smb_global_options(self) -> Optional[Dict[str, str]]: + return validation.clean_custom_options(self.custom_smb_global_options) + @resourcelib.resource('ceph.smb.join.auth') class JoinAuth(_RBase): @@ -338,11 +403,22 @@ class JoinAuth(_RBase): auth_id: str intent: Intent = Intent.PRESENT auth: Optional[JoinAuthValues] = None + # linked resources can only be used by the resource they are linked to + # and are automatically removed when the "parent" resource is removed + linked_to_cluster: Optional[str] = None def validate(self) -> None: if not self.auth_id: raise ValueError('auth_id requires a value') validation.check_id(self.auth_id) + if self.linked_to_cluster is not None: + validation.check_id(self.linked_to_cluster) + + @resourcelib.customize + def _customize_resource(rc: resourcelib.Resource) -> resourcelib.Resource: + rc.linked_to_cluster.quiet = True + rc.on_construction_error(InvalidResourceError.wrap) + return rc @resourcelib.resource('ceph.smb.usersgroups') @@ -352,11 +428,22 @@ class UsersAndGroups(_RBase): users_groups_id: str intent: Intent = Intent.PRESENT values: Optional[UserGroupSettings] = None + # linked resources can only be used by the resource they are linked to + # and are automatically removed when the "parent" resource is removed + linked_to_cluster: Optional[str] = None def validate(self) -> None: if not self.users_groups_id: raise ValueError('users_groups_id requires a value') validation.check_id(self.users_groups_id) + if self.linked_to_cluster is not None: + validation.check_id(self.linked_to_cluster) + + @resourcelib.customize + def _customize_resource(rc: resourcelib.Resource) -> resourcelib.Resource: + rc.linked_to_cluster.quiet = True + rc.on_construction_error(InvalidResourceError.wrap) + return rc # SMBResource is a union of all valid top-level smb resource types. @@ -370,19 +457,27 @@ SMBResource = Union[ ] -def load_text(blob: str) -> List[SMBResource]: +def load_text( + blob: str, *, input_sample_max: int = 1024 +) -> List[SMBResource]: """Given JSON or YAML return a list of SMBResource objects deserialized from the input. """ + json_err = None try: - data = yaml.safe_load(blob) - except ValueError: - pass - try: + # apparently JSON is not always as strict subset of YAML + # therefore trying to parse as JSON first is not a waste: + # https://john-millikin.com/json-is-not-a-yaml-subset data = json.loads(blob) - except ValueError: - pass - return load(data) + except ValueError as err: + json_err = err + try: + data = yaml.safe_load(blob) if json_err else data + except (ValueError, yaml.parser.ParserError) as err: + raise InvalidInputError(str(err), blob) from err + if not isinstance(data, (list, dict)): + raise InvalidInputError("input must be an object or list", blob) + return load(cast(Simplified, data)) def load(data: Simplified) -> List[SMBResource]: diff --git a/src/pybind/mgr/smb/results.py b/src/pybind/mgr/smb/results.py index 4b958fd7a5e..b62d6e66377 100644 --- a/src/pybind/mgr/smb/results.py +++ b/src/pybind/mgr/smb/results.py @@ -1,9 +1,10 @@ -from typing import Iterator, List, Optional +from typing import Iterable, Iterator, List, Optional import errno -from .proto import Simplified, one +from .proto import Simplified from .resources import SMBResource +from .utils import one _DOMAIN = 'domain' @@ -56,13 +57,38 @@ class ErrorResult(Result, Exception): super().__init__(src, success=False, msg=msg, status=status) +class InvalidResourceResult(Result): + def __init__( + self, + resource_data: Simplified, + msg: str = '', + status: Optional[Simplified] = None, + ) -> None: + self.resource_data = resource_data + self.success = False + self.msg = msg + self.status = status + + def to_simplified(self) -> Simplified: + ds: Simplified = {} + ds['resource'] = self.resource_data + ds['success'] = self.success + if self.msg: + ds['msg'] = self.msg + if self.status: + ds.update(self.status) + return ds + + class ResultGroup: """Result of applying multiple smb resource updates to the system.""" # Compatible with object formatter, thus suitable for being returned # directly to mgr module. - def __init__(self) -> None: - self._contents: List[Result] = [] + def __init__( + self, initial_results: Optional[Iterable[Result]] = None + ) -> None: + self._contents: List[Result] = list(initial_results or []) def append(self, result: Result) -> None: self._contents.append(result) @@ -70,6 +96,23 @@ class ResultGroup: def one(self) -> Result: return one(self._contents) + def squash(self, target: SMBResource) -> Result: + match: Optional[Result] = None + others: List[Result] = [] + for result in self._contents: + if result.src == target: + match = result + else: + others.append(result) + if match: + match.success = self.success + match.status = {} if match.status is None else match.status + match.status['additional_results'] = [ + r.to_simplified() for r in others + ] + return match + raise ValueError('no matching result for resource found') + def __iter__(self) -> Iterator[Result]: return iter(self._contents) diff --git a/src/pybind/mgr/smb/tests/test_enums.py b/src/pybind/mgr/smb/tests/test_enums.py index f3f0f4eeb8b..1ebd40d238c 100644 --- a/src/pybind/mgr/smb/tests/test_enums.py +++ b/src/pybind/mgr/smb/tests/test_enums.py @@ -18,9 +18,28 @@ import smb.enums (smb.enums.State.UPDATED, "updated"), (smb.enums.AuthMode.USER, "user"), (smb.enums.AuthMode.ACTIVE_DIRECTORY, "active-directory"), - (smb.enums.JoinSourceType.PASSWORD, "password"), - (smb.enums.UserGroupSourceType.INLINE, "inline"), ], ) def test_stringified(value, strval): assert str(value) == strval + + +def test_login_access_expand(): + assert smb.enums.LoginAccess.ADMIN.expand() == smb.enums.LoginAccess.ADMIN + assert ( + smb.enums.LoginAccess.READ_ONLY.expand() + == smb.enums.LoginAccess.READ_ONLY + ) + assert ( + smb.enums.LoginAccess.READ_ONLY_SHORT.expand() + == smb.enums.LoginAccess.READ_ONLY + ) + assert ( + smb.enums.LoginAccess.READ_WRITE.expand() + == smb.enums.LoginAccess.READ_WRITE + ) + assert ( + smb.enums.LoginAccess.READ_WRITE_SHORT.expand() + == smb.enums.LoginAccess.READ_WRITE + ) + assert smb.enums.LoginAccess.NONE.expand() == smb.enums.LoginAccess.NONE diff --git a/src/pybind/mgr/smb/tests/test_fs.py b/src/pybind/mgr/smb/tests/test_fs.py new file mode 100644 index 00000000000..5653ccfd081 --- /dev/null +++ b/src/pybind/mgr/smb/tests/test_fs.py @@ -0,0 +1,175 @@ +import time +import unittest +from unittest import mock + +import pytest + +import smb.fs +from smb.fs import _TTLCache + + +def test_mocked_fs_authorizer(): + def mmcmd(cmd): + assert cmd['filesystem'] == 'cephfs' + if 'kaboom' in cmd['entity']: + return -5, 'oops', 'fail' + return 0, 'ok', 'nice' + + m = mock.MagicMock() + m.mon_command.side_effect = mmcmd + + fsauth = smb.fs.FileSystemAuthorizer(m) + fsauth.authorize_entity('cephfs', 'client.smb.foo') + with pytest.raises(smb.fs.AuthorizationGrantError): + fsauth.authorize_entity('cephfs', 'client.smb.kaboom') + + +def test_mocked_fs_path_resolver(monkeypatch): + # we have to "re-patch" whatever cephfs module gets mocked with because + # the ObjectNotFound attribute is not an exception in the test environment + monkeypatch.setattr('cephfs.ObjectNotFound', KeyError) + + def mmcmd(cmd): + if cmd['prefix'] == 'fs subvolume getpath': + if cmd['vol_name'] == 'cephfs' and cmd['sub_name'] == 'beta': + return 0, '/volumes/cool/path/f00d-600d', '' + return -5, '', 'eek' + + m = mock.MagicMock() + m.mon_command.side_effect = mmcmd + + fspr = smb.fs.CephFSPathResolver(m, client=m) + + # resolve + path = fspr.resolve('cephfs', '', '', '/zowie') + assert path == '/zowie' + + path = fspr.resolve('cephfs', 'alpha', 'beta', '/zowie') + assert path == '/volumes/cool/path/f00d-600d/zowie' + + with pytest.raises(smb.fs.CephFSSubvolumeResolutionError): + path = fspr.resolve('ouch', 'alpha', 'beta', '/zowie') + + # resolve_exists + m.connection_pool.get_fs_handle.return_value.statx.return_value = { + 'mode': 0o41777 + } + path = fspr.resolve_exists('cephfs', 'alpha', 'beta', '/zowie') + assert path == '/volumes/cool/path/f00d-600d/zowie' + + m.connection_pool.get_fs_handle.return_value.statx.return_value = { + 'mode': 0o101777 + } + with pytest.raises(NotADirectoryError): + fspr.resolve_exists('cephfs', 'alpha', 'beta', '/zowie') + + m.connection_pool.get_fs_handle.return_value.statx.side_effect = ( + mock.MagicMock(side_effect=OSError('nope')) + ) + with pytest.raises(FileNotFoundError): + fspr.resolve_exists('cephfs', 'alpha', 'beta', '/zowie') + + +class TestTTLCache(unittest.TestCase): + def setUp(self): + self.cache = _TTLCache( + ttl=1, maxsize=3 + ) # Short TTL and small size for testing + + def test_cache_set_and_get(self): + self.cache.set(('key1', 'key2', 'key3'), ('value1', 'val', 'test')) + self.assertEqual( + self.cache.get(('key1', 'key2', 'key3')), + ('value1', 'val', 'test'), + ) + + def test_cache_expiry(self): + self.cache.set(('key1', 'key2', 'key3'), ('value1', 'val', 'test')) + time.sleep(1.5) # Wait for the TTL to expire + self.assertIsNone(self.cache.get(('key1', 'key2', 'key3'))) + + def test_cache_eviction(self): + # Fill the cache to maxsize + self.cache.set(('key1', 'key2', 'key3'), ('value1', 'val', 'test')) + self.cache.set(('key4', 'key5', 'key6'), ('value2', 'val', 'test')) + self.cache.set(('key7', 'key8', 'key9'), ('value3', 'val', 'test')) + + # Add another entry to trigger eviction of the oldest + self.cache.set(('key10', 'key11', 'key12'), ('value4', 'val', 'test')) + + # Ensure oldest entry is evicted + self.assertIsNone(self.cache.get(('key1', 'key2', 'key3'))) + + # Ensure other entries are present + self.assertEqual( + self.cache.get(('key4', 'key5', 'key6')), + ('value2', 'val', 'test'), + ) + self.assertEqual( + self.cache.get(('key7', 'key8', 'key9')), + ('value3', 'val', 'test'), + ) + self.assertEqual( + self.cache.get(('key10', 'key11', 'key12')), + ('value4', 'val', 'test'), + ) + + def test_cache_clear(self): + self.cache.set(('key1', 'key2', 'key3'), ('value1', 'val', 'test')) + self.cache.clear() + self.assertIsNone(self.cache.get(('key1', 'key2', 'key3'))) + + +def test_caching_fs_path_resolver(monkeypatch): + monkeypatch.setattr('cephfs.ObjectNotFound', KeyError) + + def mmcmd(cmd): + if cmd['prefix'] == 'fs subvolume getpath': + if ( + cmd['vol_name'] == 'cached_cephfs' + and cmd['sub_name'] == 'cached_beta' + ): + return 0, '/volumes/cool/path/f00d-600d', '' + return -5, '', 'cached_eek' + + m = mock.MagicMock() + m.mon_command.side_effect = mmcmd + + fspr = smb.fs.CachingCephFSPathResolver(m, client=m) + + # Resolve a path (cache miss) + path = fspr.resolve( + 'cached_cephfs', 'cached_alpha', 'cached_beta', '/zowie' + ) + assert path == '/volumes/cool/path/f00d-600d/zowie' + assert len(fspr._cache) == 1 + assert m.mon_command.call_count == 1 + + # Resolve the same path again (cache hit) + path = fspr.resolve( + 'cached_cephfs', 'cached_alpha', 'cached_beta', '/zowie' + ) + assert path == '/volumes/cool/path/f00d-600d/zowie' + + # Ensure cache size remains the same + assert len(fspr._cache) == 1 + assert m.mon_command.call_count == 1 + + path = fspr.resolve('cached_cephfs', '', '', '/zowie') + assert path == '/zowie' + + # If subvolume is empty cache size should remain the same + assert len(fspr._cache) == 1 + assert m.mon_command.call_count == 1 + + # Clear cache and validate + fspr._cache.clear() + assert len(fspr._cache) == 0 + + # Re-resolve to repopulate cache + path = fspr.resolve( + 'cached_cephfs', 'cached_alpha', 'cached_beta', '/zowie' + ) + assert path == '/volumes/cool/path/f00d-600d/zowie' + assert len(fspr._cache) == 1 + assert m.mon_command.call_count == 2 diff --git a/src/pybind/mgr/smb/tests/test_handler.py b/src/pybind/mgr/smb/tests/test_handler.py index 270f3e72bf9..99d1bfd1654 100644 --- a/src/pybind/mgr/smb/tests/test_handler.py +++ b/src/pybind/mgr/smb/tests/test_handler.py @@ -31,11 +31,7 @@ def test_internal_apply_cluster(thandler): auth_mode=smb.enums.AuthMode.USER, user_group_settings=[ smb.resources.UserGroupSource( - source_type=smb.resources.UserGroupSourceType.INLINE, - values=smb.resources.UserGroupSettings( - users=[], - groups=[], - ), + source_type=smb.resources.UserGroupSourceType.EMPTY, ), ], ) @@ -50,11 +46,7 @@ def test_cluster_add(thandler): auth_mode=smb.enums.AuthMode.USER, user_group_settings=[ smb.resources.UserGroupSource( - source_type=smb.resources.UserGroupSourceType.INLINE, - values=smb.resources.UserGroupSettings( - users=[], - groups=[], - ), + source_type=smb.resources.UserGroupSourceType.EMPTY, ), ], ) @@ -72,11 +64,7 @@ def test_internal_apply_cluster_and_share(thandler): auth_mode=smb.enums.AuthMode.USER, user_group_settings=[ smb.resources.UserGroupSource( - source_type=smb.resources.UserGroupSourceType.INLINE, - values=smb.resources.UserGroupSettings( - users=[], - groups=[], - ), + source_type=smb.resources.UserGroupSourceType.EMPTY, ), ], ) @@ -109,8 +97,7 @@ def test_internal_apply_remove_cluster(thandler): 'intent': 'present', 'user_group_settings': [ { - 'source_type': 'inline', - 'values': {'users': [], 'groups': []}, + 'source_type': 'empty', } ], } @@ -141,8 +128,7 @@ def test_internal_apply_remove_shares(thandler): 'intent': 'present', 'user_group_settings': [ { - 'source_type': 'inline', - 'values': {'users': [], 'groups': []}, + 'source_type': 'empty', } ], }, @@ -222,8 +208,7 @@ def test_internal_apply_add_joinauth(thandler): 'intent': 'present', 'user_group_settings': [ { - 'source_type': 'inline', - 'values': {'users': [], 'groups': []}, + 'source_type': 'empty', } ], } @@ -254,8 +239,7 @@ def test_internal_apply_add_usergroups(thandler): 'intent': 'present', 'user_group_settings': [ { - 'source_type': 'inline', - 'values': {'users': [], 'groups': []}, + 'source_type': 'empty', } ], } @@ -286,8 +270,7 @@ def test_generate_config_basic(thandler): 'intent': 'present', 'user_group_settings': [ { - 'source_type': 'inline', - 'values': {'users': [], 'groups': []}, + 'source_type': 'empty', } ], }, @@ -338,15 +321,21 @@ def test_generate_config_ad(thandler): 'realm': 'dom1.example.com', 'join_sources': [ { - 'source_type': 'password', - 'auth': { - 'username': 'testadmin', - 'password': 'Passw0rd', - }, + 'source_type': 'resource', + 'ref': 'foo1', } ], }, }, + 'join_auths.foo1': { + 'resource_type': 'ceph.smb.join.auth', + 'auth_id': 'foo1', + 'intent': 'present', + 'auth': { + 'username': 'testadmin', + 'password': 'Passw0rd', + }, + }, 'shares.foo.s1': { 'resource_type': 'ceph.smb.share', 'cluster_id': 'foo', @@ -383,6 +372,164 @@ def test_generate_config_ad(thandler): assert cfg['globals']['foo']['options']['realm'] == 'dom1.example.com' +def test_generate_config_with_login_control(thandler): + thandler.internal_store.overwrite( + { + 'clusters.foo': { + 'resource_type': 'ceph.smb.cluster', + 'cluster_id': 'foo', + 'auth_mode': 'active-directory', + 'intent': 'present', + 'domain_settings': { + 'realm': 'dom1.example.com', + 'join_sources': [ + { + 'source_type': 'resource', + 'ref': 'foo1', + } + ], + }, + }, + 'join_auths.foo1': { + 'resource_type': 'ceph.smb.join.auth', + 'auth_id': 'foo1', + 'intent': 'present', + 'auth': { + 'username': 'testadmin', + 'password': 'Passw0rd', + }, + }, + 'shares.foo.s1': { + 'resource_type': 'ceph.smb.share', + 'cluster_id': 'foo', + 'share_id': 's1', + 'intent': 'present', + 'name': 'Ess One', + 'readonly': False, + 'browseable': True, + 'cephfs': { + 'volume': 'cephfs', + 'path': '/', + 'provider': 'samba-vfs', + }, + 'login_control': [ + { + 'name': 'dom1\\alan', + 'category': 'user', + 'access': 'read', + }, + { + 'name': 'dom1\\betsy', + 'category': 'user', + 'access': 'read-write', + }, + { + 'name': 'dom1\\chuck', + 'category': 'user', + 'access': 'admin', + }, + { + 'name': 'dom1\\ducky', + 'category': 'user', + 'access': 'none', + }, + { + 'name': 'dom1\\eggbert', + 'category': 'user', + 'access': 'read', + }, + { + 'name': 'dom1\\guards', + 'category': 'group', + 'access': 'read-write', + }, + ], + }, + } + ) + + cfg = thandler.generate_config('foo') + assert cfg + assert cfg['shares']['Ess One']['options'] + shopts = cfg['shares']['Ess One']['options'] + assert shopts['invalid users'] == 'dom1\\ducky' + assert shopts['read list'] == 'dom1\\alan dom1\\eggbert' + assert shopts['write list'] == 'dom1\\betsy @dom1\\guards' + assert shopts['admin users'] == 'dom1\\chuck' + + +def test_generate_config_with_login_control_restricted(thandler): + thandler.internal_store.overwrite( + { + 'clusters.foo': { + 'resource_type': 'ceph.smb.cluster', + 'cluster_id': 'foo', + 'auth_mode': 'active-directory', + 'intent': 'present', + 'domain_settings': { + 'realm': 'dom1.example.com', + 'join_sources': [ + { + 'source_type': 'resource', + 'ref': 'foo1', + } + ], + }, + }, + 'join_auths.foo1': { + 'resource_type': 'ceph.smb.join.auth', + 'auth_id': 'foo1', + 'intent': 'present', + 'auth': { + 'username': 'testadmin', + 'password': 'Passw0rd', + }, + }, + 'shares.foo.s1': { + 'resource_type': 'ceph.smb.share', + 'cluster_id': 'foo', + 'share_id': 's1', + 'intent': 'present', + 'name': 'Ess One', + 'readonly': False, + 'browseable': True, + 'cephfs': { + 'volume': 'cephfs', + 'path': '/', + 'provider': 'samba-vfs', + }, + 'restrict_access': True, + 'login_control': [ + { + 'name': 'dom1\\alan', + 'category': 'user', + 'access': 'read', + }, + { + 'name': 'dom1\\betsy', + 'category': 'user', + 'access': 'read-write', + }, + { + 'name': 'dom1\\chuck', + 'category': 'user', + 'access': 'none', + }, + ], + }, + } + ) + + cfg = thandler.generate_config('foo') + assert cfg + assert cfg['shares']['Ess One']['options'] + shopts = cfg['shares']['Ess One']['options'] + assert shopts['invalid users'] == 'dom1\\chuck' + assert shopts['valid users'] == 'dom1\\alan dom1\\betsy' + assert shopts['read list'] == 'dom1\\alan' + assert shopts['write list'] == 'dom1\\betsy' + + def test_error_result(): share = smb.resources.Share( cluster_id='foo', @@ -427,14 +574,6 @@ def test_apply_no_matching_cluster_error(thandler): assert not rg.success -def test_one(): - assert smb.proto.one(['a']) == 'a' - with pytest.raises(ValueError): - smb.proto.one([]) - with pytest.raises(ValueError): - smb.proto.one(['a', 'b']) - - def test_apply_full_cluster_create(thandler): to_apply = [ smb.resources.JoinAuth( @@ -566,52 +705,6 @@ def test_apply_update_password(thandler): assert jdata == {'username': 'testadmin', 'password': 'Zm9vYmFyCg'} -def test_apply_update_cluster_inline_pw(thandler): - test_apply_full_cluster_create(thandler) - to_apply = [ - smb.resources.Cluster( - cluster_id='mycluster1', - auth_mode=smb.enums.AuthMode.ACTIVE_DIRECTORY, - domain_settings=smb.resources.DomainSettings( - realm='MYDOMAIN.EXAMPLE.ORG', - join_sources=[ - smb.resources.JoinSource( - source_type=smb.enums.JoinSourceType.RESOURCE, - ref='join1', - ), - smb.resources.JoinSource( - source_type=smb.enums.JoinSourceType.PASSWORD, - auth=smb.resources.JoinAuthValues( - username='Jimmy', - password='j4mb0ree!', - ), - ), - ], - ), - ), - ] - - results = thandler.apply(to_apply) - assert results.success, results.to_simplified() - assert len(list(results)) == 1 - - assert 'mycluster1' in thandler.public_store.namespaces() - ekeys = list(thandler.public_store.contents('mycluster1')) - assert len(ekeys) == 5 - assert 'cluster-info' in ekeys - assert 'config.smb' in ekeys - assert 'spec.smb' in ekeys - assert 'join.0.json' in ekeys - assert 'join.1.json' in ekeys - - # we changed the password value. the store should reflect that - jdata = thandler.public_store['mycluster1', 'join.0.json'].get() - assert jdata == {'username': 'testadmin', 'password': 'Passw0rd'} - # we changed the password value. the store should reflect that - jdata2 = thandler.public_store['mycluster1', 'join.1.json'].get() - assert jdata2 == {'username': 'Jimmy', 'password': 'j4mb0ree!'} - - def test_apply_add_second_cluster(thandler): test_apply_full_cluster_create(thandler) to_apply = [ @@ -622,15 +715,20 @@ def test_apply_add_second_cluster(thandler): realm='YOURDOMAIN.EXAMPLE.ORG', join_sources=[ smb.resources.JoinSource( - source_type=smb.enums.JoinSourceType.PASSWORD, - auth=smb.resources.JoinAuthValues( - username='Jimmy', - password='j4mb0ree!', - ), + source_type=smb.enums.JoinSourceType.RESOURCE, + ref='coolcluster', ), ], ), ), + smb.resources.JoinAuth( + auth_id='coolcluster', + auth=smb.resources.JoinAuthValues( + username='Jimmy', + password='j4mb0ree!', + ), + linked_to_cluster='coolcluster', + ), smb.resources.Share( cluster_id='coolcluster', share_id='images', @@ -643,7 +741,7 @@ def test_apply_add_second_cluster(thandler): results = thandler.apply(to_apply) assert results.success, results.to_simplified() - assert len(list(results)) == 2 + assert len(list(results)) == 3 assert 'mycluster1' in thandler.public_store.namespaces() ekeys = list(thandler.public_store.contents('mycluster1')) @@ -865,13 +963,14 @@ def test_apply_remove_all_clusters(thandler): def test_all_resources(thandler): test_apply_add_second_cluster(thandler) rall = thandler.all_resources() - assert len(rall) == 6 + assert len(rall) == 7 assert rall[0].resource_type == 'ceph.smb.cluster' assert rall[1].resource_type == 'ceph.smb.share' assert rall[2].resource_type == 'ceph.smb.share' assert rall[3].resource_type == 'ceph.smb.cluster' assert rall[4].resource_type == 'ceph.smb.share' assert rall[5].resource_type == 'ceph.smb.join.auth' + assert rall[6].resource_type == 'ceph.smb.join.auth' @pytest.mark.parametrize( @@ -962,6 +1061,10 @@ def test_all_resources(thandler): 'resource_type': 'ceph.smb.join.auth', 'auth_id': 'join1', }, + { + 'resource_type': 'ceph.smb.join.auth', + 'auth_id': 'coolcluster', + }, ], ), # cluster with id @@ -1051,3 +1154,382 @@ def test_matching_resources(thandler, params): def test_invalid_resource_match_strs(thandler, txt): with pytest.raises(ValueError): thandler.matching_resources([txt]) + + +def test_apply_cluster_linked_auth(thandler): + to_apply = [ + smb.resources.JoinAuth( + auth_id='join1', + auth=smb.resources.JoinAuthValues( + username='testadmin', + password='Passw0rd', + ), + linked_to_cluster='mycluster1', + ), + smb.resources.Cluster( + cluster_id='mycluster1', + auth_mode=smb.enums.AuthMode.ACTIVE_DIRECTORY, + domain_settings=smb.resources.DomainSettings( + realm='MYDOMAIN.EXAMPLE.ORG', + join_sources=[ + smb.resources.JoinSource( + source_type=smb.enums.JoinSourceType.RESOURCE, + ref='join1', + ), + ], + ), + custom_dns=['192.168.76.204'], + ), + smb.resources.Share( + cluster_id='mycluster1', + share_id='homedirs', + name='Home Directries', + cephfs=smb.resources.CephFSStorage( + volume='cephfs', + subvolume='homedirs', + path='/', + ), + ), + ] + results = thandler.apply(to_apply) + assert results.success, results.to_simplified() + assert len(list(results)) == 3 + assert ('clusters', 'mycluster1') in thandler.internal_store.data + assert ('shares', 'mycluster1.homedirs') in thandler.internal_store.data + assert ('join_auths', 'join1') in thandler.internal_store.data + + to_apply = [ + smb.resources.RemovedCluster( + cluster_id='mycluster1', + ), + smb.resources.RemovedShare( + cluster_id='mycluster1', + share_id='homedirs', + ), + ] + results = thandler.apply(to_apply) + assert results.success, results.to_simplified() + assert len(list(results)) == 2 + assert ('clusters', 'mycluster1') not in thandler.internal_store.data + assert ( + 'shares', + 'mycluster1.homedirs', + ) not in thandler.internal_store.data + assert ('join_auths', 'join1') not in thandler.internal_store.data + + +def test_apply_cluster_bad_linked_auth(thandler): + to_apply = [ + smb.resources.JoinAuth( + auth_id='join1', + auth=smb.resources.JoinAuthValues( + username='testadmin', + password='Passw0rd', + ), + linked_to_cluster='mycluster2', + ), + smb.resources.Cluster( + cluster_id='mycluster1', + auth_mode=smb.enums.AuthMode.ACTIVE_DIRECTORY, + domain_settings=smb.resources.DomainSettings( + realm='MYDOMAIN.EXAMPLE.ORG', + join_sources=[ + smb.resources.JoinSource( + source_type=smb.enums.JoinSourceType.RESOURCE, + ref='join1', + ), + ], + ), + custom_dns=['192.168.76.204'], + ), + ] + results = thandler.apply(to_apply) + assert not results.success + rs = results.to_simplified() + assert len(rs['results']) == 2 + assert rs['results'][0]['msg'] == 'linked_to_cluster id not valid' + assert rs['results'][1]['msg'] == 'join auth linked to different cluster' + + +def test_apply_cluster_bad_linked_ug(thandler): + to_apply = [ + smb.resources.UsersAndGroups( + users_groups_id='ug1', + values=smb.resources.UserGroupSettings( + users=[{"username": "foo"}], + groups=[], + ), + linked_to_cluster='mycluster2', + ), + smb.resources.Cluster( + cluster_id='mycluster1', + auth_mode=smb.enums.AuthMode.USER, + user_group_settings=[ + smb.resources.UserGroupSource( + source_type=smb.resources.UserGroupSourceType.RESOURCE, + ref='ug1', + ), + ], + ), + ] + results = thandler.apply(to_apply) + assert not results.success + rs = results.to_simplified() + assert len(rs['results']) == 2 + assert rs['results'][0]['msg'] == 'linked_to_cluster id not valid' + assert ( + rs['results'][1]['msg'] + == 'users and groups linked to different cluster' + ) + + +def test_apply_with_create_only(thandler): + test_apply_full_cluster_create(thandler) + + to_apply = [ + smb.resources.Cluster( + cluster_id='mycluster1', + auth_mode=smb.enums.AuthMode.ACTIVE_DIRECTORY, + domain_settings=smb.resources.DomainSettings( + realm='MYDOMAIN.EXAMPLE.ORG', + join_sources=[ + smb.resources.JoinSource( + source_type=smb.enums.JoinSourceType.RESOURCE, + ref='join1', + ), + ], + ), + custom_dns=['192.168.76.204'], + ), + smb.resources.Share( + cluster_id='mycluster1', + share_id='homedirs', + name='Altered Home Directries', + cephfs=smb.resources.CephFSStorage( + volume='cephfs', + subvolume='homedirs', + path='/', + ), + ), + smb.resources.Share( + cluster_id='mycluster1', + share_id='foodirs', + name='Foo Directries', + cephfs=smb.resources.CephFSStorage( + volume='cephfs', + subvolume='homedirs', + path='/foo', + ), + ), + ] + results = thandler.apply(to_apply, create_only=True) + assert not results.success + rs = results.to_simplified() + assert len(rs['results']) == 3 + assert ( + rs['results'][0]['msg'] + == 'a resource with the same ID already exists' + ) + assert ( + rs['results'][1]['msg'] + == 'a resource with the same ID already exists' + ) + + # no changes to the store + assert ( + 'shares', + 'mycluster1.foodirs', + ) not in thandler.internal_store.data + assert ('shares', 'mycluster1.homedirs') in thandler.internal_store.data + assert ( + thandler.internal_store.data[('shares', 'mycluster1.homedirs')][ + 'name' + ] + == 'Home Directries' + ) + + # foodirs share is new, it can be applied separately + to_apply = [ + smb.resources.Share( + cluster_id='mycluster1', + share_id='foodirs', + name='Foo Directries', + cephfs=smb.resources.CephFSStorage( + volume='cephfs', + subvolume='homedirs', + path='/foo', + ), + ), + ] + results = thandler.apply(to_apply, create_only=True) + assert results.success + rs = results.to_simplified() + assert len(rs['results']) == 1 + assert ( + 'shares', + 'mycluster1.foodirs', + ) in thandler.internal_store.data + + +def test_remove_in_use_cluster(thandler): + thandler.internal_store.overwrite( + { + 'clusters.foo': { + 'resource_type': 'ceph.smb.cluster', + 'cluster_id': 'foo', + 'auth_mode': 'active-directory', + 'intent': 'present', + 'domain_settings': { + 'realm': 'dom1.example.com', + 'join_sources': [ + { + 'source_type': 'resource', + 'ref': 'foo1', + } + ], + }, + }, + 'join_auths.foo1': { + 'resource_type': 'ceph.smb.join.auth', + 'auth_id': 'foo1', + 'intent': 'present', + 'auth': { + 'username': 'testadmin', + 'password': 'Passw0rd', + }, + }, + 'shares.foo.s1': { + 'resource_type': 'ceph.smb.share', + 'cluster_id': 'foo', + 'share_id': 's1', + 'intent': 'present', + 'name': 'Ess One', + 'readonly': False, + 'browseable': True, + 'cephfs': { + 'volume': 'cephfs', + 'path': '/', + 'provider': 'samba-vfs', + }, + }, + } + ) + + to_apply = [ + smb.resources.RemovedCluster( + cluster_id='foo', + ), + ] + results = thandler.apply(to_apply) + rs = results.to_simplified() + assert not results.success + assert 'cluster in use' in rs['results'][0]['msg'] + + +def test_remove_in_use_join_auth(thandler): + thandler.internal_store.overwrite( + { + 'clusters.foo': { + 'resource_type': 'ceph.smb.cluster', + 'cluster_id': 'foo', + 'auth_mode': 'active-directory', + 'intent': 'present', + 'domain_settings': { + 'realm': 'dom1.example.com', + 'join_sources': [ + { + 'source_type': 'resource', + 'ref': 'foo1', + } + ], + }, + }, + 'join_auths.foo1': { + 'resource_type': 'ceph.smb.join.auth', + 'auth_id': 'foo1', + 'intent': 'present', + 'auth': { + 'username': 'testadmin', + 'password': 'Passw0rd', + }, + }, + 'shares.foo.s1': { + 'resource_type': 'ceph.smb.share', + 'cluster_id': 'foo', + 'share_id': 's1', + 'intent': 'present', + 'name': 'Ess One', + 'readonly': False, + 'browseable': True, + 'cephfs': { + 'volume': 'cephfs', + 'path': '/', + 'provider': 'samba-vfs', + }, + }, + } + ) + + to_apply = [ + smb.resources.JoinAuth( + auth_id='foo1', + intent=smb.enums.Intent.REMOVED, + ), + ] + results = thandler.apply(to_apply) + rs = results.to_simplified() + assert not results.success + assert 'resource in use' in rs['results'][0]['msg'] + + +def test_remove_in_use_ug(thandler): + thandler.internal_store.overwrite( + { + 'clusters.foo': { + 'resource_type': 'ceph.smb.cluster', + 'cluster_id': 'foo', + 'auth_mode': 'user', + 'intent': 'present', + 'user_group_settings': [ + { + 'source_type': 'resource', + 'ref': 'foo1', + } + ], + }, + 'users_and_groups.foo1': { + 'resource_type': 'ceph.smb.usersgroups', + 'users_groups_id': 'foo1', + 'intent': 'present', + 'values': { + 'users': [{"username": "foo"}], + 'groups': [], + }, + }, + 'shares.foo.s1': { + 'resource_type': 'ceph.smb.share', + 'cluster_id': 'foo', + 'share_id': 's1', + 'intent': 'present', + 'name': 'Ess One', + 'readonly': False, + 'browseable': True, + 'cephfs': { + 'volume': 'cephfs', + 'path': '/', + 'provider': 'samba-vfs', + }, + }, + } + ) + + to_apply = [ + smb.resources.UsersAndGroups( + users_groups_id='foo1', + intent=smb.enums.Intent.REMOVED, + ), + ] + results = thandler.apply(to_apply) + rs = results.to_simplified() + assert not results.success + assert 'resource in use' in rs['results'][0]['msg'] diff --git a/src/pybind/mgr/smb/tests/test_resources.py b/src/pybind/mgr/smb/tests/test_resources.py index 6fce09c2698..d8edfafe5d4 100644 --- a/src/pybind/mgr/smb/tests/test_resources.py +++ b/src/pybind/mgr/smb/tests/test_resources.py @@ -117,10 +117,6 @@ domain_settings: join_sources: - source_type: resource ref: bob - - source_type: password - auth: - username: Administrator - password: fallb4kP4ssw0rd --- resource_type: ceph.smb.share cluster_id: chacha @@ -168,13 +164,10 @@ def test_load_yaml_resource_yaml1(): assert cluster.intent == enums.Intent.PRESENT assert cluster.auth_mode == enums.AuthMode.ACTIVE_DIRECTORY assert cluster.domain_settings.realm == 'CEPH.SINK.TEST' - assert len(cluster.domain_settings.join_sources) == 2 + assert len(cluster.domain_settings.join_sources) == 1 jsrc = cluster.domain_settings.join_sources assert jsrc[0].source_type == enums.JoinSourceType.RESOURCE assert jsrc[0].ref == 'bob' - assert jsrc[1].source_type == enums.JoinSourceType.PASSWORD - assert jsrc[1].auth.username == 'Administrator' - assert jsrc[1].auth.password == 'fallb4kP4ssw0rd' assert isinstance(loaded[1], smb.resources.Share) assert isinstance(loaded[2], smb.resources.Share) @@ -427,7 +420,7 @@ domain_settings: "exc_type": ValueError, "error": "not supported", }, - # u/g inline missing + # u/g empty with extra ref { "yaml": """ resource_type: ceph.smb.cluster @@ -435,12 +428,13 @@ cluster_id: randolph intent: present auth_mode: user user_group_settings: - - source_type: inline + - source_type: empty + ref: xyz """, "exc_type": ValueError, - "error": "requires values", + "error": "ref may not be", }, - # u/g inline extra uri + # u/g resource missing { "yaml": """ resource_type: ceph.smb.cluster @@ -448,121 +442,168 @@ cluster_id: randolph intent: present auth_mode: user user_group_settings: - - source_type: inline - values: - users: [] - groups: [] - uri: http://foo.bar.example.com/baz.txt + - source_type: resource """, "exc_type": ValueError, - "error": "does not take", + "error": "reference value must be", }, - # u/g inline extra ref + # missing name field in login_control { "yaml": """ -resource_type: ceph.smb.cluster -cluster_id: randolph -intent: present -auth_mode: user -user_group_settings: - - source_type: inline - values: - users: [] - groups: [] - ref: xyz +resource_type: ceph.smb.share +cluster_id: floop +share_id: ploof +cephfs: + volume: abc + path: /share1 + subvolume: foo +login_control: + - nmae: frink + access: r """, "exc_type": ValueError, - "error": "does not take", + "error": "field: name", }, - # u/g uri missing + # bad value in access field in login_control { "yaml": """ -resource_type: ceph.smb.cluster -cluster_id: randolph -intent: present -auth_mode: user -user_group_settings: - - source_type: http_uri +resource_type: ceph.smb.share +cluster_id: floop +share_id: ploof +cephfs: + volume: abc + path: /share1 + subvolume: foo +login_control: + - name: frink + access: rwx """, "exc_type": ValueError, - "error": "requires", + "error": "rwx", }, - # u/g uri extra values + # bad value in category field in login_control { "yaml": """ -resource_type: ceph.smb.cluster -cluster_id: randolph -intent: present -auth_mode: user -user_group_settings: - - source_type: http_uri - values: - users: [] - groups: [] - uri: http://foo.bar.example.com/baz.txt +resource_type: ceph.smb.share +cluster_id: floop +share_id: ploof +cephfs: + volume: abc + path: /share1 + subvolume: foo +login_control: + - category: admins + name: frink + access: admin """, "exc_type": ValueError, - "error": "does not take", + "error": "admins", }, - # u/g uri extra ref + # bad value in category field in login_control { "yaml": """ -resource_type: ceph.smb.cluster -cluster_id: randolph -intent: present -auth_mode: user -user_group_settings: - - source_type: http_uri - uri: http://boop.example.net - ref: xyz +resource_type: ceph.smb.share +cluster_id: floop +share_id: ploof +cephfs: + volume: abc + path: /share1 + subvolume: foo +restrict_access: true """, "exc_type": ValueError, - "error": "does not take", + "error": "restricted access", }, - # u/g resource missing + # removed share, no cluster id value { "yaml": """ -resource_type: ceph.smb.cluster -cluster_id: randolph -intent: present -auth_mode: user -user_group_settings: - - source_type: resource +resource_type: ceph.smb.share +cluster_id: "" +share_id: whammo +intent: removed +""", + "exc_type": ValueError, + "error": "cluster_id", + }, + # removed share, no share id value + { + "yaml": """ +resource_type: ceph.smb.share +cluster_id: whammo +share_id: "" +intent: removed +""", + "exc_type": ValueError, + "error": "share_id", + }, + # share w/o cephfs sub-obj + { + "yaml": """ +resource_type: ceph.smb.share +cluster_id: whammo +share_id: blammo """, "exc_type": ValueError, - "error": "requires", + "error": "cephfs", }, - # u/g resource extra values + # ad cluster, invalid join source, no ref { "yaml": """ resource_type: ceph.smb.cluster -cluster_id: randolph -intent: present -auth_mode: user -user_group_settings: - - source_type: resource - ref: xyz - uri: http://example.net/foo +cluster_id: whammo +auth_mode: active-directory +domain_settings: + realm: FOO.EXAMPLE.NET + join_sources: + - {} """, "exc_type": ValueError, - "error": "does not take", + "error": "reference value", }, - # u/g resource extra resource + # removed cluster, no cluster_id value { "yaml": """ resource_type: ceph.smb.cluster -cluster_id: randolph -intent: present -auth_mode: user -user_group_settings: - - source_type: resource - ref: xyz - values: - users: [] - groups: [] +cluster_id: "" +intent: removed """, "exc_type": ValueError, - "error": "does not take", + "error": "cluster_id", + }, + # u&g, missing id value + { + "yaml": """ +resource_type: ceph.smb.usersgroups +users_groups_id: "" +""", + "exc_type": ValueError, + "error": "users_groups_id", + }, + # u&g, bad linked_to_cluster value + { + "yaml": """ +resource_type: ceph.smb.usersgroups +users_groups_id: wobble +linked_to_cluster: ~~~ +values: + users: + - name: charlie + password: 7unaF1sh + - name: lucky + password: CH4rmz + groups: [] +""", + "exc_type": ValueError, + "error": "not a valid", + }, + # join auth, missing id value + { + "yaml": """ +resource_type: ceph.smb.join.auth +auth_id: "" +""", + "exc_type": ValueError, + "error": "auth_id", }, ], ) @@ -636,3 +677,164 @@ placement: assert sd assert 'placement' in sd assert sd['placement'] == {'count': 3, 'label': 'ilovesmb'} + + +def test_share_with_login_control_1(): + import yaml + + yaml_str = """ +resource_type: ceph.smb.share +cluster_id: rhumba +share_id: shake +name: Shake It +cephfs: + volume: abc + path: /shake1 + subvolume: foo +login_control: + - name: bob + access: read +""" + data = yaml.safe_load_all(yaml_str) + loaded = smb.resources.load(data) + assert loaded + share = loaded[0] + assert share.login_control + assert len(share.login_control) == 1 + assert share.login_control[0].name == 'bob' + assert share.login_control[0].category == enums.LoginCategory.USER + assert share.login_control[0].access == enums.LoginAccess.READ_ONLY + + +def test_share_with_login_control_2(): + import yaml + + yaml_str = """ +resource_type: ceph.smb.share +cluster_id: rhumba +share_id: shake +name: Shake It +cephfs: + volume: abc + path: /shake1 + subvolume: foo +login_control: + - name: alice + access: r + - name: itstaff + category: group + access: rw + - name: caldor + category: user + access: admin + - name: delbard + access: none +""" + data = yaml.safe_load_all(yaml_str) + loaded = smb.resources.load(data) + assert loaded + share = loaded[0] + assert share.login_control + assert len(share.login_control) == 4 + assert share.login_control[0].name == 'alice' + assert share.login_control[0].category == enums.LoginCategory.USER + assert share.login_control[0].access == enums.LoginAccess.READ_ONLY + assert share.login_control[1].name == 'itstaff' + assert share.login_control[1].category == enums.LoginCategory.GROUP + assert share.login_control[1].access == enums.LoginAccess.READ_WRITE + assert share.login_control[2].name == 'caldor' + assert share.login_control[2].category == enums.LoginCategory.USER + assert share.login_control[2].access == enums.LoginAccess.ADMIN + assert share.login_control[3].name == 'delbard' + assert share.login_control[3].category == enums.LoginCategory.USER + assert share.login_control[3].access == enums.LoginAccess.NONE + + +@pytest.mark.parametrize( + "params", + [ + # single share json + { + "txt": """ +{ + "resource_type": "ceph.smb.share", + "cluster_id": "foo", + "share_id": "bar", + "cephfs": {"volume": "zippy", "path": "/"} +} +""", + 'simplified': [ + { + 'resource_type': 'ceph.smb.share', + 'cluster_id': 'foo', + 'share_id': 'bar', + 'intent': 'present', + 'name': 'bar', + 'cephfs': { + 'volume': 'zippy', + 'path': '/', + 'provider': 'samba-vfs', + }, + 'browseable': True, + 'readonly': False, + } + ], + }, + # single share yaml + { + "txt": """ +resource_type: ceph.smb.share +cluster_id: foo +share_id: bar +cephfs: {volume: zippy, path: /} +""", + 'simplified': [ + { + 'resource_type': 'ceph.smb.share', + 'cluster_id': 'foo', + 'share_id': 'bar', + 'intent': 'present', + 'name': 'bar', + 'cephfs': { + 'volume': 'zippy', + 'path': '/', + 'provider': 'samba-vfs', + }, + 'browseable': True, + 'readonly': False, + } + ], + }, + # invalid share yaml + { + "txt": """ +resource_type: ceph.smb.share +""", + 'exc_type': ValueError, + 'error': 'missing', + }, + # invalid input + { + "txt": """ +: +""", + 'exc_type': ValueError, + 'error': 'parsing', + }, + # invalid json, but useless yaml + { + "txt": """ +slithy +""", + 'exc_type': ValueError, + 'error': 'input', + }, + ], +) +def test_load_text(params): + if 'simplified' in params: + loaded = smb.resources.load_text(params['txt']) + assert params['simplified'] == [r.to_simplified() for r in loaded] + else: + with pytest.raises(params['exc_type'], match=params['error']): + smb.resources.load_text(params['txt']) diff --git a/src/pybind/mgr/smb/tests/test_smb.py b/src/pybind/mgr/smb/tests/test_smb.py index 03648750360..8943123f9d1 100644 --- a/src/pybind/mgr/smb/tests/test_smb.py +++ b/src/pybind/mgr/smb/tests/test_smb.py @@ -39,11 +39,7 @@ def test_internal_apply_cluster(tmodule): auth_mode=smb.enums.AuthMode.USER, user_group_settings=[ smb.resources.UserGroupSource( - source_type=smb.resources.UserGroupSourceType.INLINE, - values=smb.resources.UserGroupSettings( - users=[], - groups=[], - ), + source_type=smb.resources.UserGroupSourceType.EMPTY, ), ], ) @@ -58,11 +54,7 @@ def test_cluster_add_cluster_ls(tmodule): auth_mode=smb.enums.AuthMode.USER, user_group_settings=[ smb.resources.UserGroupSource( - source_type=smb.resources.UserGroupSourceType.INLINE, - values=smb.resources.UserGroupSettings( - users=[], - groups=[], - ), + source_type=smb.resources.UserGroupSourceType.EMPTY, ), ], ) @@ -80,11 +72,7 @@ def test_internal_apply_cluster_and_share(tmodule): auth_mode=smb.enums.AuthMode.USER, user_group_settings=[ smb.resources.UserGroupSource( - source_type=smb.resources.UserGroupSourceType.INLINE, - values=smb.resources.UserGroupSettings( - users=[], - groups=[], - ), + source_type=smb.resources.UserGroupSourceType.EMPTY, ), ], ) @@ -117,8 +105,7 @@ def test_internal_apply_remove_cluster(tmodule): 'intent': 'present', 'user_group_settings': [ { - 'source_type': 'inline', - 'values': {'users': [], 'groups': []}, + 'source_type': 'empty', } ], } @@ -149,8 +136,7 @@ def test_internal_apply_remove_shares(tmodule): 'intent': 'present', 'user_group_settings': [ { - 'source_type': 'inline', - 'values': {'users': [], 'groups': []}, + 'source_type': 'empty', } ], }, @@ -230,8 +216,7 @@ def test_internal_apply_add_joinauth(tmodule): 'intent': 'present', 'user_group_settings': [ { - 'source_type': 'inline', - 'values': {'users': [], 'groups': []}, + 'source_type': 'empty', } ], } @@ -262,8 +247,7 @@ def test_internal_apply_add_usergroups(tmodule): 'intent': 'present', 'user_group_settings': [ { - 'source_type': 'inline', - 'values': {'users': [], 'groups': []}, + 'source_type': 'empty', } ], } @@ -296,15 +280,21 @@ def _example_cfg_1(tmodule): 'realm': 'dom1.example.com', 'join_sources': [ { - 'source_type': 'password', - 'auth': { - 'username': 'testadmin', - 'password': 'Passw0rd', - }, + 'source_type': 'resource', + 'ref': 'foo', } ], }, }, + 'join_auths.foo': { + 'resource_type': 'ceph.smb.join.auth', + 'auth_id': 'foo', + 'intent': 'present', + 'auth': { + 'username': 'testadmin', + 'password': 'Passw0rd', + }, + }, 'shares.foo.s1': { 'resource_type': 'ceph.smb.share', 'cluster_id': 'foo', @@ -490,15 +480,24 @@ def test_cluster_create_ad1(tmodule): assert len(result.src.domain_settings.join_sources) == 1 assert ( result.src.domain_settings.join_sources[0].source_type - == smb.enums.JoinSourceType.PASSWORD + == smb.enums.JoinSourceType.RESOURCE ) + assert result.src.domain_settings.join_sources[0].ref.startswith('fizzle') + assert 'additional_results' in result.status + assert len(result.status['additional_results']) == 1 assert ( - result.src.domain_settings.join_sources[0].auth.username - == 'Administrator' + result.status['additional_results'][0]['resource']['resource_type'] + == 'ceph.smb.join.auth' ) assert ( - result.src.domain_settings.join_sources[0].auth.password == 'Passw0rd' + result.status['additional_results'][0]['resource'][ + 'linked_to_cluster' + ] + == 'fizzle' ) + assert result.status['additional_results'][0]['resource'][ + 'auth_id' + ].startswith('fizzle') def test_cluster_create_ad2(tmodule): @@ -556,6 +555,24 @@ def test_cluster_create_user1(tmodule): assert len(result.src.user_group_settings) == 1 +def test_cluster_create_user2(tmodule): + _example_cfg_1(tmodule) + + result = tmodule.cluster_create( + 'dizzle', + smb.enums.AuthMode.USER, + define_user_pass=['alice%123letmein', 'bob%1n0wh4t1t15'], + ) + assert result.success + assert result.status['state'] == 'created' + assert result.src.cluster_id == 'dizzle' + assert len(result.src.user_group_settings) == 1 + assert ( + result.src.user_group_settings[0].source_type + == smb.enums.UserGroupSourceType.RESOURCE + ) + + def test_cluster_create_badpass(tmodule): _example_cfg_1(tmodule) @@ -619,11 +636,8 @@ def test_cmd_show_resource_json(tmodule): "realm": "dom1.example.com", "join_sources": [ { - "source_type": "password", - "auth": { - "username": "testadmin", - "password": "Passw0rd" - } + "source_type": "resource", + "ref": "foo" } ] } @@ -649,9 +663,132 @@ intent: present domain_settings: realm: dom1.example.com join_sources: - - source_type: password - auth: - username: testadmin - password: Passw0rd + - source_type: resource + ref: foo """.strip() ) + + +def test_apply_invalid_res(tmodule): + result = tmodule.apply_resources( + """ +resource_type: ceph.smb.cluster +cluster_id: "" +auth_mode: doop +""" + ) + assert not result.success + assert 'doop' in result.to_simplified()['results'][0]['msg'] + + +def test_show_all(tmodule): + _example_cfg_1(tmodule) + out = tmodule.show() + assert 'resources' in out + res = out['resources'] + assert len(res) == 4 + assert {r['resource_type'] for r in res} == { + 'ceph.smb.cluster', + 'ceph.smb.share', + 'ceph.smb.join.auth', + } + + +def test_show_shares(tmodule): + _example_cfg_1(tmodule) + out = tmodule.show(['ceph.smb.share']) + assert 'resources' in out + res = out['resources'] + assert len(res) == 2 + assert {r['resource_type'] for r in res} == { + 'ceph.smb.share', + } + + +def test_show_shares_in_cluster(tmodule): + _example_cfg_1(tmodule) + out = tmodule.show(['ceph.smb.share.foo']) + assert 'resources' in out + res = out['resources'] + assert len(res) == 2 + assert {r['resource_type'] for r in res} == { + 'ceph.smb.share', + } + assert {r['cluster_id'] for r in res} == {'foo'} + + +def test_show_specific_share(tmodule): + _example_cfg_1(tmodule) + out = tmodule.show(['ceph.smb.share.foo.s1']) + assert 'resources' not in out + assert out['resource_type'] == 'ceph.smb.share' + assert out['cluster_id'] == 'foo' + assert out['share_id'] == 's1' + + +def test_show_nomatches(tmodule): + _example_cfg_1(tmodule) + out = tmodule.show(['ceph.smb.share.foo.whoops']) + assert 'resources' in out + assert out['resources'] == [] + + +def test_show_invalid_input(tmodule): + _example_cfg_1(tmodule) + with pytest.raises(smb.cli.InvalidInputValue): + tmodule.show(['ceph.smb.export']) + + +def test_show_cluster_without_shares(tmodule): + # this cluster will have no shares associated with it + tmodule._internal_store.overwrite( + { + 'clusters.foo': { + 'resource_type': 'ceph.smb.cluster', + 'cluster_id': 'foo', + 'auth_mode': 'active-directory', + 'intent': 'present', + 'domain_settings': { + 'realm': 'dom1.example.com', + 'join_sources': [ + { + 'source_type': 'resource', + 'ref': 'foo', + } + ], + }, + }, + 'join_auths.foo': { + 'resource_type': 'ceph.smb.join.auth', + 'auth_id': 'foo', + 'intent': 'present', + 'auth': { + 'username': 'testadmin', + 'password': 'Passw0rd', + }, + }, + } + ) + + res, body, status = tmodule.show.command(['ceph.smb.cluster.foo']) + assert res == 0 + assert ( + body.strip() + == """ +{ + "resource_type": "ceph.smb.cluster", + "cluster_id": "foo", + "auth_mode": "active-directory", + "intent": "present", + "domain_settings": { + "realm": "dom1.example.com", + "join_sources": [ + { + "source_type": "resource", + "ref": "foo" + } + ] + } +} + """.strip() + ) diff --git a/src/pybind/mgr/smb/tests/test_utils.py b/src/pybind/mgr/smb/tests/test_utils.py new file mode 100644 index 00000000000..99f9ce53faa --- /dev/null +++ b/src/pybind/mgr/smb/tests/test_utils.py @@ -0,0 +1,43 @@ +import pytest + +import smb.utils + + +def test_one(): + assert smb.utils.one(['a']) == 'a' + with pytest.raises(ValueError): + smb.utils.one([]) + with pytest.raises(ValueError): + smb.utils.one(['a', 'b']) + + +def test_rand_name(): + name = smb.utils.rand_name('bob') + assert name.startswith('bob') + assert len(name) == 11 + name = smb.utils.rand_name('carla') + assert name.startswith('carla') + assert len(name) == 13 + name = smb.utils.rand_name('dangeresque') + assert name.startswith('dangeresqu') + assert len(name) == 18 + name = smb.utils.rand_name('fhqwhgadsfhqwhgadsfhqwhgads') + assert name.startswith('fhqwhgadsf') + assert len(name) == 18 + name = smb.utils.rand_name('') + assert len(name) == 8 + + +def test_checked(): + assert smb.utils.checked('foo') == 'foo' + assert smb.utils.checked(77) == 77 + assert smb.utils.checked(0) == 0 + with pytest.raises(smb.utils.IsNoneError): + smb.utils.checked(None) + + +def test_ynbool(): + assert smb.utils.ynbool(True) == 'Yes' + assert smb.utils.ynbool(False) == 'No' + # for giggles + assert smb.utils.ynbool(0) == 'No' diff --git a/src/pybind/mgr/smb/tests/test_validation.py b/src/pybind/mgr/smb/tests/test_validation.py index 6210c179c88..248b68966cf 100644 --- a/src/pybind/mgr/smb/tests/test_validation.py +++ b/src/pybind/mgr/smb/tests/test_validation.py @@ -75,3 +75,58 @@ def test_valid_path(value, valid): else: with pytest.raises(ValueError): smb.validation.check_path(value) + + +def _ovr(value): + value[ + smb.validation.CUSTOM_CAUTION_KEY + ] = smb.validation.CUSTOM_CAUTION_VALUE + return value + + +@pytest.mark.parametrize( + "value,errmatch", + [ + ({"foo": "bar"}, "lack"), + (_ovr({"foo": "bar"}), ""), + (_ovr({"foo": "bar", "zip": "zap"}), ""), + (_ovr({"mod:foo": "bar", "zip": "zap"}), ""), + (_ovr({"foo\n": "bar"}), "newlines"), + (_ovr({"foo": "bar\n"}), "newlines"), + (_ovr({"[foo]": "bar\n"}), "brackets"), + ], +) +def test_check_custom_options(value, errmatch): + if not errmatch: + smb.validation.check_custom_options(value) + else: + with pytest.raises(ValueError, match=errmatch): + smb.validation.check_custom_options(value) + + +def test_clean_custom_options(): + orig = {'foo': 'bar', 'big': 'bad', 'bugs': 'bongo'} + updated = _ovr(dict(orig)) + smb.validation.check_custom_options(updated) + assert smb.validation.clean_custom_options(updated) == orig + assert smb.validation.clean_custom_options(None) is None + + +@pytest.mark.parametrize( + "value,ok,err_match", + [ + ("tim", True, ""), + ("britons\\arthur", True, ""), + ("lance a lot", False, "spaces, tabs, or newlines"), + ("tabs\ta\tlot", False, "spaces, tabs, or newlines"), + ("bed\nivere", False, "spaces, tabs, or newlines"), + ("runawa" + ("y" * 122), True, ""), + ("runawa" + ("y" * 123), False, "128"), + ], +) +def test_check_access_name(value, ok, err_match): + if ok: + smb.validation.check_access_name(value) + else: + with pytest.raises(ValueError, match=err_match): + smb.validation.check_access_name(value) diff --git a/src/pybind/mgr/smb/utils.py b/src/pybind/mgr/smb/utils.py new file mode 100644 index 00000000000..2646815f112 --- /dev/null +++ b/src/pybind/mgr/smb/utils.py @@ -0,0 +1,46 @@ +"""Assorted utility functions for smb mgr module.""" +from typing import List, Optional, TypeVar + +import random +import string + +T = TypeVar('T') + + +def one(lst: List[T]) -> T: + """Given a list, ensure that the list contains exactly one item and return + it. A ValueError will be raised in the case that the list does not contain + exactly one item. + """ + if len(lst) != 1: + raise ValueError("list does not contain exactly one element") + return lst[0] + + +class IsNoneError(ValueError): + """A ValueError subclass raised by ``checked`` function.""" + + pass + + +def checked(v: Optional[T]) -> T: + """Ensures the provided value is not a None or raises a IsNoneError. + Intended use is similar to an `assert v is not None` but more usable in + one-liners and list/dict/etc comprehensions. + """ + if v is None: + raise IsNoneError('value is None') + return v + + +def ynbool(value: bool) -> str: + """Convert a bool to an smb.conf-style boolean string.""" + return 'Yes' if value else 'No' + + +def rand_name(prefix: str, max_len: int = 18, suffix_len: int = 8) -> str: + trunc = prefix[: (max_len - suffix_len)] + suffix = ''.join( + random.choice(string.ascii_lowercase) for _ in range(suffix_len) + ) + return f'{trunc}{suffix}' diff --git a/src/pybind/mgr/smb/validation.py b/src/pybind/mgr/smb/validation.py index 3e04650dea9..f9607cddcdf 100644 --- a/src/pybind/mgr/smb/validation.py +++ b/src/pybind/mgr/smb/validation.py @@ -1,3 +1,5 @@ +from typing import Dict, Optional + import posixpath import re @@ -24,7 +26,7 @@ def valid_id(value: str) -> bool: def check_id(value: str) -> None: """Raise ValueError if value is not a valid ID.""" if not valid_id(value): - raise ValueError(f"{value:!r} is not a valid ID") + raise ValueError(f"{value!r} is not a valid ID") def valid_share_name(value: str) -> bool: @@ -35,7 +37,7 @@ def valid_share_name(value: str) -> bool: def check_share_name(value: str) -> None: """Raise ValueError if value is not a valid share name.""" if not valid_share_name(value): - raise ValueError(f"{value:!r} is not a valid share name") + raise ValueError(f"{value!r} is not a valid share name") # alias for normpath so other smb libs can just import validation module @@ -60,3 +62,53 @@ def check_path(value: str) -> None: """Raise ValueError if value is not a valid share path.""" if not valid_path(value): raise ValueError(f'{value!r} is not a valid share path') + + +CUSTOM_CAUTION_KEY = '_allow_customization' +CUSTOM_CAUTION_VALUE = ( + 'i-take-responsibility-for-all-samba-configuration-errors' +) + + +def check_custom_options(opts: Optional[Dict[str, str]]) -> None: + """Raise ValueError if a custom configuration options dict is not valid.""" + if opts is None: + return + if opts.get(CUSTOM_CAUTION_KEY) != CUSTOM_CAUTION_VALUE: + raise ValueError( + 'options lack custom override permission key and value' + f' (review documentation pertaining to {CUSTOM_CAUTION_KEY})' + ) + for key, value in opts.items(): + if '[' in key or ']' in key: + raise ValueError( + f'custom option key may not contain square brackets: {key!r}' + ) + if '\n' in key: + raise ValueError( + f'custom option key may not contain newlines: {key!r}' + ) + if '\n' in value: + raise ValueError( + f'custom option value may not contain newlines: {key!r}' + ) + + +def clean_custom_options( + opts: Optional[Dict[str, str]] +) -> Optional[Dict[str, str]]: + """Return a version of the custom options dictionary cleaned of special + validation parameters. + """ + if opts is None: + return None + return {k: v for k, v in opts.items() if k != CUSTOM_CAUTION_KEY} + + +def check_access_name(name: str) -> None: + if ' ' in name or '\t' in name or '\n' in name: + raise ValueError( + 'login name may not contain spaces, tabs, or newlines' + ) + if len(name) > 128: + raise ValueError('login name may not exceed 128 characters') diff --git a/src/pybind/mgr/tox.ini b/src/pybind/mgr/tox.ini index 53a0a776626..4190bcf44db 100644 --- a/src/pybind/mgr/tox.ini +++ b/src/pybind/mgr/tox.ini @@ -1,13 +1,13 @@ [tox] envlist = - py3 - mypy - fix flake8 + mypy jinjalint nooptional check-black check-isort + py3 + py37 skipsdist = true skip_missing_interpreters = true @@ -124,30 +124,6 @@ setenv = {[testenv]setenv} deps = {[testenv]deps} commands = {[testenv]commands} -[testenv:fix] -basepython = python3 -deps = - autopep8 -modules = - alerts \ - balancer \ - cephadm \ - cli_api \ - crash \ - devicehealth \ - diskprediction_local \ - insights \ - iostat \ - nfs \ - orchestrator \ - prometheus \ - rgw \ - status \ - telemetry -commands = - python --version - autopep8 {[autopep8]addopts} \ - {posargs:{[testenv:fix]modules}} [testenv:pylint] deps = @@ -172,7 +148,10 @@ modules = hello \ iostat \ localpool \ + mgr_module.py \ + mgr_util.py \ nfs \ + object_format.py \ orchestrator \ prometheus \ rbd_support \ diff --git a/src/pybind/mgr/volumes/fs/operations/clone_index.py b/src/pybind/mgr/volumes/fs/operations/clone_index.py index f5a850638d8..1f16500a6bd 100644 --- a/src/pybind/mgr/volumes/fs/operations/clone_index.py +++ b/src/pybind/mgr/volumes/fs/operations/clone_index.py @@ -48,24 +48,26 @@ class CloneIndex(Index): raise IndexException(-e.args[0], e.args[1]) def get_oldest_clone_entry(self, exclude=[]): - min_ctime_entry = None - exclude_tracking_ids = [v[0] for v in exclude] - log.debug("excluded tracking ids: {0}".format(exclude_tracking_ids)) - for entry in list_one_entry_at_a_time(self.fs, self.path): - dname = entry.d_name - dpath = os.path.join(self.path, dname) - st = self.fs.lstat(dpath) - if dname not in exclude_tracking_ids and stat.S_ISLNK(st.st_mode): - if min_ctime_entry is None or st.st_ctime < min_ctime_entry[1].st_ctime: - min_ctime_entry = (dname, st) - if min_ctime_entry: - try: + try: + min_ctime_entry = None + exclude_tracking_ids = [v[0] for v in exclude] + log.debug("excluded tracking ids: {0}".format(exclude_tracking_ids)) + for entry in list_one_entry_at_a_time(self.fs, self.path): + dname = entry.d_name + dpath = os.path.join(self.path, dname) + st = self.fs.lstat(dpath) + if dname not in exclude_tracking_ids and stat.S_ISLNK(st.st_mode): + if min_ctime_entry is None or st.st_ctime < min_ctime_entry[1].st_ctime: + min_ctime_entry = (dname, st) + if min_ctime_entry: linklen = min_ctime_entry[1].st_size sink_path = self.fs.readlink(os.path.join(self.path, min_ctime_entry[0]), CloneIndex.PATH_MAX) return (min_ctime_entry[0], sink_path[:linklen]) - except cephfs.Error as e: - raise IndexException(-e.args[0], e.args[1]) - return None + return None + except cephfs.Error as e: + log.debug('Exception cephfs.Error has been caught. Printing ' + f'the exception - {e}') + raise IndexException(-e.args[0], e.args[1]) def find_clone_entry_index(self, sink_path): try: diff --git a/src/pybind/rbd/c_rbd.pxd b/src/pybind/rbd/c_rbd.pxd index c451baac58e..c39e945b465 100644 --- a/src/pybind/rbd/c_rbd.pxd +++ b/src/pybind/rbd/c_rbd.pxd @@ -86,6 +86,10 @@ cdef extern from "rbd/librbd.h" nogil: char *group_name char *group_snap_name + ctypedef struct rbd_snap_trash_namespace_t: + rbd_snap_namespace_type_t original_namespace_type; + char *original_name; + ctypedef enum rbd_snap_mirror_state_t: _RBD_SNAP_MIRROR_STATE_PRIMARY "RBD_SNAP_MIRROR_STATE_PRIMARY" _RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED "RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED" @@ -329,6 +333,9 @@ cdef extern from "rbd/librbd.h" nogil: int rbd_clone3(rados_ioctx_t p_ioctx, const char *p_name, const char *p_snapname, rados_ioctx_t c_ioctx, const char *c_name, rbd_image_options_t c_opts) + int rbd_clone4(rados_ioctx_t p_ioctx, const char *p_name, + uint64_t p_snap_id, rados_ioctx_t c_ioctx, + const char *c_name, rbd_image_options_t c_opts) int rbd_remove_with_progress(rados_ioctx_t io, const char *name, librbd_progress_fn_t cb, void *cbdata) int rbd_rename(rados_ioctx_t src_io_ctx, const char *srcname, @@ -546,8 +553,11 @@ cdef extern from "rbd/librbd.h" nogil: size_t snap_group_namespace_size) void rbd_snap_group_namespace_cleanup(rbd_snap_group_namespace_t *group_spec, size_t snap_group_namespace_size) - int rbd_snap_get_trash_namespace(rbd_image_t image, uint64_t snap_id, - char *original_name, size_t max_length) + int rbd_snap_get_trash_namespace2(rbd_image_t image, uint64_t snap_id, + rbd_snap_trash_namespace_t *trash_snap, + size_t trash_snap_size) + void rbd_snap_trash_namespace_cleanup(rbd_snap_trash_namespace_t *trash_snap, + size_t trash_snap_size) int rbd_snap_get_mirror_namespace( rbd_image_t image, uint64_t snap_id, rbd_snap_mirror_namespace_t *mirror_ns, @@ -662,6 +672,8 @@ cdef extern from "rbd/librbd.h" nogil: int rbd_group_create(rados_ioctx_t p, const char *name) int rbd_group_remove(rados_ioctx_t p, const char *name) int rbd_group_list(rados_ioctx_t p, char *names, size_t *size) + int rbd_group_get_id(rados_ioctx_t p, const char *group_name, + char *group_id, size_t *size) int rbd_group_rename(rados_ioctx_t p, const char *src, const char *dest) void rbd_group_info_cleanup(rbd_group_info_t *group_info, size_t group_info_size) diff --git a/src/pybind/rbd/mock_rbd.pxi b/src/pybind/rbd/mock_rbd.pxi index 15f2c78e3b1..bc6132ce6b9 100644 --- a/src/pybind/rbd/mock_rbd.pxi +++ b/src/pybind/rbd/mock_rbd.pxi @@ -90,6 +90,10 @@ cdef nogil: char *group_name char *group_snap_name + ctypedef struct rbd_snap_trash_namespace_t: + rbd_snap_namespace_type_t original_namespace_type; + char *original_name; + ctypedef enum rbd_snap_mirror_state_t: _RBD_SNAP_MIRROR_STATE_PRIMARY "RBD_SNAP_MIRROR_STATE_PRIMARY" _RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED "RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED" @@ -350,6 +354,10 @@ cdef nogil: const char *p_snapname, rados_ioctx_t c_ioctx, const char *c_name, rbd_image_options_t c_opts): pass + int rbd_clone4(rados_ioctx_t p_ioctx, const char *p_name, + uint64_t p_snap_id, rados_ioctx_t c_ioctx, + const char *c_name, rbd_image_options_t c_opts): + pass int rbd_remove_with_progress(rados_ioctx_t io, const char *name, librbd_progress_fn_t cb, void *cbdata): pass @@ -673,8 +681,12 @@ cdef nogil: void rbd_snap_group_namespace_cleanup(rbd_snap_group_namespace_t *group_spec, size_t snap_group_namespace_size): pass - int rbd_snap_get_trash_namespace(rbd_image_t image, uint64_t snap_id, - char *original_name, size_t max_length): + int rbd_snap_get_trash_namespace2(rbd_image_t image, uint64_t snap_id, + rbd_snap_trash_namespace_t *trash_snap, + size_t trash_snap_size): + pass + void rbd_snap_trash_namespace_cleanup(rbd_snap_trash_namespace_t *trash_snap, + size_t trash_snap_size): pass int rbd_snap_get_mirror_namespace( rbd_image_t image, uint64_t snap_id, @@ -841,6 +853,9 @@ cdef nogil: pass int rbd_group_list(rados_ioctx_t p, char *names, size_t *size): pass + int rbd_group_get_id(rados_ioctx_t p, const char *group_name, + char *group_id, size_t *size): + pass int rbd_group_rename(rados_ioctx_t p, const char *src, const char *dest): pass void rbd_group_info_cleanup(rbd_group_info_t *group_info, diff --git a/src/pybind/rbd/rbd.pyx b/src/pybind/rbd/rbd.pyx index df176a410a9..d023e231bcf 100644 --- a/src/pybind/rbd/rbd.pyx +++ b/src/pybind/rbd/rbd.pyx @@ -29,7 +29,7 @@ try: from collections.abc import Iterable except ImportError: from collections import Iterable -from datetime import datetime +from datetime import datetime, timezone import errno from itertools import chain import time @@ -632,7 +632,7 @@ class RBD(object): if ret < 0: raise make_ex(ret, 'error creating image') - def clone(self, p_ioctx, p_name, p_snapname, c_ioctx, c_name, + def clone(self, p_ioctx, p_name, p_snapshot, c_ioctx, c_name, features=None, order=None, stripe_unit=None, stripe_count=None, data_pool=None, clone_format=None): """ @@ -642,7 +642,7 @@ class RBD(object): :type ioctx: :class:`rados.Ioctx` :param p_name: the parent image name :type name: str - :param p_snapname: the parent image snapshot name + :param p_snapshot: the parent image snapshot name or id :type name: str :param c_ioctx: the child context that represents the new clone :type ioctx: :class:`rados.Ioctx` @@ -666,7 +666,6 @@ class RBD(object): :raises: :class:`FunctionNotSupported` :raises: :class:`ArgumentOutOfRange` """ - p_snapname = cstr(p_snapname, 'p_snapname') p_name = cstr(p_name, 'p_name') c_name = cstr(c_name, 'c_name') data_pool = cstr(data_pool, 'data_pool', opt=True) @@ -674,9 +673,18 @@ class RBD(object): rados_ioctx_t _p_ioctx = convert_ioctx(p_ioctx) rados_ioctx_t _c_ioctx = convert_ioctx(c_ioctx) char *_p_name = p_name - char *_p_snapname = p_snapname + char *_p_snap_name + uint64_t _p_snap_id char *_c_name = c_name rbd_image_options_t opts + if isinstance(p_snapshot, str): + p_snap_name = cstr(p_snapshot, 'p_snapshot') + _p_snap_name = p_snap_name + elif isinstance(p_snapshot, int): + p_snap_name = None + _p_snap_id = p_snapshot + else: + raise TypeError("p_snapshot must be a string or an integer") rbd_image_options_create(&opts) try: @@ -698,9 +706,14 @@ class RBD(object): if clone_format is not None: rbd_image_options_set_uint64(opts, RBD_IMAGE_OPTION_CLONE_FORMAT, clone_format) - with nogil: - ret = rbd_clone3(_p_ioctx, _p_name, _p_snapname, - _c_ioctx, _c_name, opts) + if p_snap_name is not None: + with nogil: + ret = rbd_clone3(_p_ioctx, _p_name, _p_snap_name, + _c_ioctx, _c_name, opts) + else: + with nogil: + ret = rbd_clone4(_p_ioctx, _p_name, _p_snap_id, + _c_ioctx, _c_name, opts) finally: rbd_image_options_destroy(opts) if ret < 0: @@ -922,8 +935,10 @@ class RBD(object): 'id' : decode_cstr(c_info.id), 'name' : decode_cstr(c_info.name), 'source' : __source_string[c_info.source], - 'deletion_time' : datetime.utcfromtimestamp(c_info.deletion_time), - 'deferment_end_time' : datetime.utcfromtimestamp(c_info.deferment_end_time) + 'deletion_time' : datetime.fromtimestamp(c_info.deletion_time, + tz=timezone.utc), + 'deferment_end_time' : datetime.fromtimestamp(c_info.deferment_end_time, + tz=timezone.utc) } rbd_trash_get_cleanup(&c_info) return info @@ -2283,7 +2298,8 @@ cdef class MirrorImageStatusIterator(object): site_status = { 'state' : s_status.state, 'description' : decode_cstr(s_status.description), - 'last_update' : datetime.utcfromtimestamp(s_status.last_update), + 'last_update' : datetime.fromtimestamp(s_status.last_update, + tz=timezone.utc), 'up' : s_status.up, } mirror_uuid = decode_cstr(s_status.mirror_uuid) @@ -2642,6 +2658,29 @@ cdef class Group(object): def __exit__(self, type_, value, traceback): return False + + def id(self): + """ + Get group's id. + + :returns: str - group id + """ + cdef: + size_t size = 32 + char *id = NULL + try: + while True: + id = <char *>realloc_chk(id, size) + with nogil: + ret = rbd_group_get_id(self._ioctx, self._name, id, &size) + if ret >= 0: + break + elif ret != -errno.ERANGE: + raise make_ex(ret, 'error getting id for group %s' % self._name, + group_errno_to_exception) + return decode_cstr(id) + finally: + free(id) def add_image(self, image_ioctx, image_name): """ @@ -3164,9 +3203,23 @@ cdef class Image(object): Get spec of the cloned image's parent :returns: dict - contains the following keys: + + * ``pool_id`` (int) - parent pool id + * ``pool_name`` (str) - parent pool name + * ``pool_namespace`` (str) - parent pool namespace + + * ``image_id`` (str) - parent image id + * ``image_name`` (str) - parent image name + + * ``trash`` (bool) - True if parent image is in trash bin + + * ``snap_id`` (int) - parent snapshot id + + * ``snap_namespace_type`` (int) - parent snapshot namespace type + * ``snap_name`` (str) - parent snapshot name :raises: :class:`ImageNotFound` if the image doesn't have a parent @@ -3179,9 +3232,14 @@ cdef class Image(object): if ret != 0: raise make_ex(ret, 'error getting parent info for image %s' % self.name) - result = {'pool_name': decode_cstr(parent_spec.pool_name), + result = {'pool_id': parent_spec.pool_id, + 'pool_name': decode_cstr(parent_spec.pool_name), 'pool_namespace': decode_cstr(parent_spec.pool_namespace), + 'image_id': decode_cstr(parent_spec.image_id), 'image_name': decode_cstr(parent_spec.image_name), + 'trash': parent_spec.trash, + 'snap_id': snap_spec.id, + 'snap_namespace_type': snap_spec.namespace_type, 'snap_name': decode_cstr(snap_spec.name)} rbd_linked_image_spec_cleanup(&parent_spec) @@ -3759,7 +3817,7 @@ cdef class Image(object): ret = rbd_snap_get_timestamp(self.image, _snap_id, ×tamp) if ret != 0: raise make_ex(ret, 'error getting snapshot timestamp for image: %s, snap_id: %d' % (self.name, snap_id)) - return datetime.utcfromtimestamp(timestamp.tv_sec) + return datetime.fromtimestamp(timestamp.tv_sec, tz=timezone.utc) @requires_not_closed def remove_snap_limit(self): @@ -4081,7 +4139,7 @@ written." % (self.name, ret, length)) ret = rbd_get_create_timestamp(self.image, ×tamp) if ret != 0: raise make_ex(ret, 'error getting create timestamp for image: %s' % (self.name)) - return datetime.utcfromtimestamp(timestamp.tv_sec) + return datetime.fromtimestamp(timestamp.tv_sec, tz=timezone.utc) @requires_not_closed def access_timestamp(self): @@ -4094,7 +4152,7 @@ written." % (self.name, ret, length)) ret = rbd_get_access_timestamp(self.image, ×tamp) if ret != 0: raise make_ex(ret, 'error getting access timestamp for image: %s' % (self.name)) - return datetime.fromtimestamp(timestamp.tv_sec) + return datetime.fromtimestamp(timestamp.tv_sec, tz=timezone.utc) @requires_not_closed def modify_timestamp(self): @@ -4107,7 +4165,7 @@ written." % (self.name, ret, length)) ret = rbd_get_modify_timestamp(self.image, ×tamp) if ret != 0: raise make_ex(ret, 'error getting modify timestamp for image: %s' % (self.name)) - return datetime.fromtimestamp(timestamp.tv_sec) + return datetime.fromtimestamp(timestamp.tv_sec, tz=timezone.utc) @requires_not_closed def flatten(self, on_progress=None): @@ -4678,7 +4736,8 @@ written." % (self.name, ret, length)) site_status = { 'state' : s_status.state, 'description' : decode_cstr(s_status.description), - 'last_update' : datetime.utcfromtimestamp(s_status.last_update), + 'last_update' : datetime.fromtimestamp(s_status.last_update, + tz=timezone.utc), 'up' : s_status.up, } mirror_uuid = decode_cstr(s_status.mirror_uuid) @@ -5135,28 +5194,26 @@ written." % (self.name, ret, length)) :type key: int :returns: dict - contains the following keys: + * ``original_namespace_type`` (int) - original snap namespace type + * ``original_name`` (str) - original snap name """ cdef: + rbd_snap_trash_namespace_t trash_snap uint64_t _snap_id = snap_id - size_t _size = 512 - char *_name = NULL - try: - while True: - _name = <char*>realloc_chk(_name, _size); - with nogil: - ret = rbd_snap_get_trash_namespace(self.image, _snap_id, - _name, _size) - if ret >= 0: - break - elif ret != -errno.ERANGE: - raise make_ex(ret, 'error getting snapshot trash ' - 'namespace image: %s, snap_id: %d' % (self.name, snap_id)) - return { - 'original_name' : decode_cstr(_name) - } - finally: - free(_name) + with nogil: + ret = rbd_snap_get_trash_namespace2(self.image, _snap_id, + &trash_snap, sizeof(trash_snap)) + if ret != 0: + raise make_ex(ret, 'error getting snapshot trash ' + 'namespace for image: %s, snap_id: %d' % + (self.name, snap_id)) + result = { + 'original_namespace_type' : trash_snap.original_namespace_type, + 'original_name' : decode_cstr(trash_snap.original_name) + } + rbd_snap_trash_namespace_cleanup(&trash_snap, sizeof(trash_snap)) + return result @requires_not_closed def snap_get_mirror_namespace(self, snap_id): @@ -5650,8 +5707,10 @@ cdef class TrashIterator(object): 'id' : decode_cstr(self.entries[i].id), 'name' : decode_cstr(self.entries[i].name), 'source' : TrashIterator.__source_string[self.entries[i].source], - 'deletion_time' : datetime.utcfromtimestamp(self.entries[i].deletion_time), - 'deferment_end_time' : datetime.utcfromtimestamp(self.entries[i].deferment_end_time) + 'deletion_time' : datetime.fromtimestamp(self.entries[i].deletion_time, + tz=timezone.utc), + 'deferment_end_time' : datetime.fromtimestamp(self.entries[i].deferment_end_time, + tz=timezone.utc) } def __dealloc__(self): diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index b91b62b02ac..4b88cf80442 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -765,6 +765,7 @@ class ServiceSpec(object): 'elasticsearch', 'grafana', 'ingress', + 'mgmt-gateway', 'iscsi', 'jaeger-agent', 'jaeger-collector', @@ -819,6 +820,7 @@ class ServiceSpec(object): 'nvmeof': NvmeofServiceSpec, 'alertmanager': AlertManagerSpec, 'ingress': IngressSpec, + 'mgmt-gateway': MgmtGatewaySpec, 'container': CustomContainerSpec, 'grafana': GrafanaSpec, 'node-exporter': MonitoringSpec, @@ -1331,6 +1333,7 @@ class NvmeofServiceSpec(ServiceSpec): server_cert: Optional[str] = None, client_key: Optional[str] = None, client_cert: Optional[str] = None, + root_ca_cert: Optional[str] = None, spdk_path: Optional[str] = None, tgt_path: Optional[str] = None, spdk_timeout: Optional[float] = 60.0, @@ -1408,13 +1411,15 @@ class NvmeofServiceSpec(ServiceSpec): #: ``bdevs_per_cluster`` number of bdevs per cluster self.bdevs_per_cluster = bdevs_per_cluster #: ``server_key`` gateway server key - self.server_key = server_key or './server.key' + self.server_key = server_key #: ``server_cert`` gateway server certificate - self.server_cert = server_cert or './server.crt' + self.server_cert = server_cert #: ``client_key`` client key - self.client_key = client_key or './client.key' + self.client_key = client_key #: ``client_cert`` client certificate - self.client_cert = client_cert or './client.crt' + self.client_cert = client_cert + #: ``root_ca_cert`` CA cert for server/client certs + self.root_ca_cert = root_ca_cert #: ``spdk_path`` path to SPDK self.spdk_path = spdk_path or '/usr/local/bin/nvmf_tgt' #: ``tgt_path`` nvmeof target path @@ -1469,9 +1474,15 @@ class NvmeofServiceSpec(ServiceSpec): raise SpecValidationError('Cannot add NVMEOF: No Pool specified') if self.enable_auth: - if not any([self.server_key, self.server_cert, self.client_key, self.client_cert]): - raise SpecValidationError( - 'enable_auth is true but client/server certificates are missing') + if not all([self.server_key, self.server_cert, self.client_key, + self.client_cert, self.root_ca_cert]): + err_msg = 'enable_auth is true but ' + for cert_key_attr in ['server_key', 'server_cert', 'client_key', + 'client_cert', 'root_ca_cert']: + if not hasattr(self, cert_key_attr): + err_msg += f'{cert_key_attr}, ' + err_msg += 'attribute(s) not set in the spec' + raise SpecValidationError(err_msg) if self.transports not in ['tcp']: raise SpecValidationError('Invalid transport. Valid values are tcp') @@ -1755,6 +1766,135 @@ class IngressSpec(ServiceSpec): yaml.add_representer(IngressSpec, ServiceSpec.yaml_representer) +class MgmtGatewaySpec(ServiceSpec): + def __init__(self, + service_type: str = 'mgmt-gateway', + service_id: Optional[str] = None, + config: Optional[Dict[str, str]] = None, + networks: Optional[List[str]] = None, + placement: Optional[PlacementSpec] = None, + disable_https: Optional[bool] = False, + port: Optional[int] = None, + ssl_certificate: Optional[str] = None, + ssl_certificate_key: Optional[str] = None, + ssl_prefer_server_ciphers: Optional[str] = None, + ssl_session_tickets: Optional[str] = None, + ssl_session_timeout: Optional[str] = None, + ssl_session_cache: Optional[str] = None, + server_tokens: Optional[str] = None, + ssl_stapling: Optional[str] = None, + ssl_stapling_verify: Optional[str] = None, + ssl_protocols: Optional[List[str]] = None, + ssl_ciphers: Optional[List[str]] = None, + preview_only: bool = False, + unmanaged: bool = False, + extra_container_args: Optional[GeneralArgList] = None, + extra_entrypoint_args: Optional[GeneralArgList] = None, + custom_configs: Optional[List[CustomConfig]] = None, + ): + assert service_type == 'mgmt-gateway' + + super(MgmtGatewaySpec, self).__init__( + 'mgmt-gateway', service_id=service_id, + placement=placement, config=config, + networks=networks, + preview_only=preview_only, + extra_container_args=extra_container_args, + extra_entrypoint_args=extra_entrypoint_args, + custom_configs=custom_configs + ) + #: Is a flag to disable HTTPS. If True, the server will use unsecure HTTP + self.disable_https = disable_https + #: The port number on which the server will listen + self.port = port + #: A multi-line string that contains the SSL certificate + self.ssl_certificate = ssl_certificate + #: A multi-line string that contains the SSL key + self.ssl_certificate_key = ssl_certificate_key + #: Prefer server ciphers over client ciphers: on | off + self.ssl_prefer_server_ciphers = ssl_prefer_server_ciphers + #: A multioption flag to control session tickets: on | off + self.ssl_session_tickets = ssl_session_tickets + #: The duration for SSL session timeout. Syntax: time (i.e: 5m) + self.ssl_session_timeout = ssl_session_timeout + #: Duration an SSL/TLS session is cached: off | none | [builtin[:size]] [shared:name:size] + self.ssl_session_cache = ssl_session_cache + #: Flag control server tokens in responses: on | off | build | string + self.server_tokens = server_tokens + #: Flag to enable or disable SSL stapling: on | off + self.ssl_stapling = ssl_stapling + #: Flag to control verification of SSL stapling: on | off + self.ssl_stapling_verify = ssl_stapling_verify + #: A list of supported SSL protocols (as supported by nginx) + self.ssl_protocols = ssl_protocols + #: List of supported secure SSL ciphers. Changing this list may reduce system security. + self.ssl_ciphers = ssl_ciphers + + def get_port_start(self) -> List[int]: + ports = [] + if self.port is not None: + ports.append(cast(int, self.port)) + return ports + + def validate(self) -> None: + super(MgmtGatewaySpec, self).validate() + self._validate_port(self.port) + self._validate_certificate(self.ssl_certificate, "ssl_certificate") + self._validate_private_key(self.ssl_certificate_key, "ssl_certificate_key") + self._validate_boolean_switch(self.ssl_prefer_server_ciphers, "ssl_prefer_server_ciphers") + self._validate_boolean_switch(self.ssl_session_tickets, "ssl_session_tickets") + self._validate_session_timeout(self.ssl_session_timeout) + self._validate_session_cache(self.ssl_session_cache) + self._validate_server_tokens(self.server_tokens) + self._validate_boolean_switch(self.ssl_stapling, "ssl_stapling") + self._validate_boolean_switch(self.ssl_stapling_verify, "ssl_stapling_verify") + self._validate_ssl_protocols(self.ssl_protocols) + + def _validate_port(self, port: Optional[int]) -> None: + if port is not None and not (1 <= port <= 65535): + raise SpecValidationError(f"Invalid port: {port}. Must be between 1 and 65535.") + + def _validate_certificate(self, cert: Optional[str], name: str) -> None: + if cert is not None and not isinstance(cert, str): + raise SpecValidationError(f"Invalid {name}. Must be a string.") + + def _validate_private_key(self, key: Optional[str], name: str) -> None: + if key is not None and not isinstance(key, str): + raise SpecValidationError(f"Invalid {name}. Must be a string.") + + def _validate_boolean_switch(self, value: Optional[str], name: str) -> None: + if value is not None and value not in ['on', 'off']: + raise SpecValidationError(f"Invalid {name}: {value}. Supported values: on | off.") + + def _validate_session_timeout(self, timeout: Optional[str]) -> None: + if timeout is not None and not re.match(r'^\d+[smhd]$', timeout): + raise SpecValidationError(f"Invalid SSL Session Timeout: {timeout}. \ + Value must be a number followed by 's', 'm', 'h', or 'd'.") + + def _validate_session_cache(self, cache: Optional[str]) -> None: + valid_caches = ['none', 'off', 'builtin', 'shared'] + if cache is not None and not any(cache.startswith(vc) for vc in valid_caches): + raise SpecValidationError(f"Invalid SSL Session Cache: {cache}. Supported values are: \ + off | none | [builtin[:size]] [shared:name:size]") + + def _validate_server_tokens(self, tokens: Optional[str]) -> None: + if tokens is not None and tokens not in ['on', 'off', 'build', 'string']: + raise SpecValidationError(f"Invalid Server Tokens: {tokens}. Must be one of \ + ['on', 'off', 'build', 'version'].") + + def _validate_ssl_protocols(self, protocols: Optional[List[str]]) -> None: + if protocols is None: + return + valid_protocols = ['TLSv1.2', 'TLSv1.3'] + for protocol in protocols: + if protocol not in valid_protocols: + raise SpecValidationError(f"Invalid SSL Protocol: {protocol}. \ + Must be one of {valid_protocols}.") + + +yaml.add_representer(MgmtGatewaySpec, ServiceSpec.yaml_representer) + + class InitContainerSpec(object): """An init container is not a service that lives on its own, but rather is used to run and exit prior to a service container starting in order diff --git a/src/python-common/ceph/rgw/rgwam_core.py b/src/python-common/ceph/rgw/rgwam_core.py index 333f4901585..312d66362ec 100644 --- a/src/python-common/ceph/rgw/rgwam_core.py +++ b/src/python-common/ceph/rgw/rgwam_core.py @@ -786,7 +786,7 @@ class RGWAM: "secret": secret}) return realms_info - def zone_create(self, rgw_spec, start_radosgw): + def zone_create(self, rgw_spec, start_radosgw, secondary_zone_period_retry_limit=5): if not rgw_spec.rgw_realm_token: raise RGWAMException('missing realm token') @@ -818,12 +818,30 @@ class RGWAM: zonegroup = period.get_master_zonegroup() if not zonegroup: - raise RGWAMException('Cannot find master zonegroup of realm {realm_name}') + raise RGWAMException(f'Cannot find master zonegroup of realm {realm_name}') zone = self.create_zone(realm, zonegroup, rgw_spec.rgw_zone, False, # secondary zone access_key, secret, endpoints=rgw_spec.zone_endpoints) - self.update_period(realm, zonegroup, zone) + + # Adding a retry limit for period update in case the default 10s timeout is not sufficient + rgw_limit = 0 + + while rgw_limit != int(secondary_zone_period_retry_limit): + try: + self.update_period(realm, zonegroup, zone) + break + except RGWAMException as e: + logging.info(f'Failed to update Period in 10s. Retrying with current limit \ + & retry-limit values {rgw_limit} {secondary_zone_period_retry_limit}') + rgw_limit += 1 + if rgw_limit == secondary_zone_period_retry_limit: + raise RGWAMException(f'Period Update failed for zone {zone}. \ + Exception raised while period update {e.message}') + continue + + # By default the above operation is expected to be completed in 10s timeout but if we + # updating this for secondary site it would take some time because of pool creation period = RGWPeriod(period_info) logging.debug(period.to_json()) @@ -837,7 +855,7 @@ class RGWAM: realm_token_b = secondary_realm_token.to_json().encode('utf-8') realm_token_s = base64.b64encode(realm_token_b).decode('utf-8') rgw_spec.update_endpoints = True - rgw_spec.rgw_token = realm_token_s + rgw_spec.rgw_realm_token = realm_token_s rgw_spec.rgw_zonegroup = zonegroup.name # master zonegroup is used self.env.mgr.apply_rgw(rgw_spec) diff --git a/src/rgw/CMakeLists.txt b/src/rgw/CMakeLists.txt index 13e8970aeaf..cf065cedcf8 100644 --- a/src/rgw/CMakeLists.txt +++ b/src/rgw/CMakeLists.txt @@ -63,6 +63,7 @@ set(librgw_common_srcs rgw_acl_swift.cc rgw_aio.cc rgw_aio_throttle.cc + rgw_asio_thread.cc rgw_auth.cc rgw_auth_s3.cc rgw_arn.cc @@ -70,6 +71,7 @@ set(librgw_common_srcs rgw_bucket.cc rgw_bucket_layout.cc rgw_cache.cc + rgw_cksum_pipe.cc rgw_common.cc rgw_compression.cc rgw_cors.cc @@ -304,15 +306,17 @@ target_link_libraries(rgw_common RapidJSON::RapidJSON Boost::context ${FMT_LIB} - OpenSSL::SSL) + OpenSSL::SSL + BLAKE3::blake3) target_include_directories(rgw_common PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/services" PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw" PUBLIC "${LUA_INCLUDE_DIR}") -if(WITH_RADOSGW_D4N) - target_include_directories(rgw_common SYSTEM PUBLIC "${CMAKE_SOURCE_DIR}/src/boost_redis/include") -endif() +# work around https://github.com/Cyan4973/xxHash/issues/943 for debug builds +target_compile_definitions(rgw_common PUBLIC + $<$<CONFIG:Debug>:XXH_NO_INLINE_HINTS=1> + $<$<CONFIG:RelWithDebInfo>:XXH_NO_INLINE_HINTS=1>) if(WITH_RADOSGW_KAFKA_ENDPOINT) # used by rgw_kafka.cc diff --git a/src/rgw/driver/d4n/d4n_directory.cc b/src/rgw/driver/d4n/d4n_directory.cc index 35b34a2c1d9..1fd9437691d 100644 --- a/src/rgw/driver/d4n/d4n_directory.cc +++ b/src/rgw/driver/d4n/d4n_directory.cc @@ -171,6 +171,7 @@ int ObjectDirectory::get(CacheObj* object, optional_yield y) return 0; } +/* Note: This method is not compatible for use on Ubuntu systems. */ int ObjectDirectory::copy(CacheObj* object, std::string copyName, std::string copyBucketName, optional_yield y) { std::string key = build_index(object); @@ -475,6 +476,7 @@ int BlockDirectory::get(CacheBlock* block, optional_yield y) return 0; } +/* Note: This method is not compatible for use on Ubuntu systems. */ int BlockDirectory::copy(CacheBlock* block, std::string copyName, std::string copyBucketName, optional_yield y) { std::string key = build_index(block); diff --git a/src/rgw/driver/d4n/rgw_sal_d4n.cc b/src/rgw/driver/d4n/rgw_sal_d4n.cc index 3b6124478a0..712cebbfc05 100644 --- a/src/rgw/driver/d4n/rgw_sal_d4n.cc +++ b/src/rgw/driver/d4n/rgw_sal_d4n.cc @@ -110,107 +110,8 @@ int D4NFilterBucket::create(const DoutPrefixProvider* dpp, return next->create(dpp, params, y); } -int D4NFilterObject::copy_object(const ACLOwner& owner, - const rgw_user& remote_user, - req_info* info, - const rgw_zone_id& source_zone, - rgw::sal::Object* dest_object, - rgw::sal::Bucket* dest_bucket, - rgw::sal::Bucket* src_bucket, - const rgw_placement_rule& dest_placement, - ceph::real_time* src_mtime, - ceph::real_time* mtime, - const ceph::real_time* mod_ptr, - const ceph::real_time* unmod_ptr, - bool high_precision_time, - const char* if_match, - const char* if_nomatch, - AttrsMod attrs_mod, - bool copy_if_newer, - Attrs& attrs, - RGWObjCategory category, - uint64_t olh_epoch, - boost::optional<ceph::real_time> delete_at, - std::string* version_id, - std::string* tag, - std::string* etag, - void (*progress_cb)(off_t, void *), - void* progress_data, - const DoutPrefixProvider* dpp, - optional_yield y) -{ - rgw::d4n::CacheObj obj = rgw::d4n::CacheObj{ - .objName = this->get_key().get_oid(), - .bucketName = src_bucket->get_name() - }; - - if (driver->get_obj_dir()->copy(&obj, dest_object->get_name(), dest_bucket->get_name(), y) < 0) - ldpp_dout(dpp, 10) << "D4NFilterObject::" << __func__ << "(): BlockDirectory copy method failed." << dendl; - - /* Append additional metadata to attributes */ - rgw::sal::Attrs baseAttrs = this->get_attrs(); - buffer::list bl; - - bl.append(to_iso_8601(*mtime)); - baseAttrs.insert({"mtime", bl}); - bl.clear(); - - if (version_id != NULL) { - bl.append(*version_id); - baseAttrs.insert({"version_id", bl}); - bl.clear(); - } - - if (!etag->empty()) { - bl.append(*etag); - baseAttrs.insert({"etag", bl}); - bl.clear(); - } - - if (attrs_mod == rgw::sal::ATTRSMOD_REPLACE) { /* Replace */ - rgw::sal::Attrs::iterator iter; - - for (const auto& pair : attrs) { - iter = baseAttrs.find(pair.first); - - if (iter != baseAttrs.end()) { - iter->second = pair.second; - } else { - baseAttrs.insert({pair.first, pair.second}); - } - } - } else if (attrs_mod == rgw::sal::ATTRSMOD_MERGE) { /* Merge */ - baseAttrs.insert(attrs.begin(), attrs.end()); - } - - /* - int copy_attrsReturn = driver->get_cache_driver()->copy_attrs(this->get_key().get_oid(), dest_object->get_key().get_oid(), &baseAttrs); - - if (copy_attrsReturn < 0) { - ldpp_dout(dpp, 20) << "D4N Filter: Cache copy attributes operation failed." << dendl; - } else { - int copy_dataReturn = driver->get_cache_driver()->copy_data(this->get_key().get_oid(), dest_object->get_key().get_oid()); - - if (copy_dataReturn < 0) { - ldpp_dout(dpp, 20) << "D4N Filter: Cache copy data operation failed." << dendl; - } else { - ldpp_dout(dpp, 20) << "D4N Filter: Cache copy object operation succeeded." << dendl; - } - }*/ - - return next->copy_object(owner, remote_user, info, source_zone, - nextObject(dest_object), - nextBucket(dest_bucket), - nextBucket(src_bucket), - dest_placement, src_mtime, mtime, - mod_ptr, unmod_ptr, high_precision_time, if_match, - if_nomatch, attrs_mod, copy_if_newer, attrs, - category, olh_epoch, delete_at, version_id, tag, - etag, progress_cb, progress_data, dpp, y); -} - int D4NFilterObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, - Attrs* delattrs, optional_yield y) + Attrs* delattrs, optional_yield y, uint32_t flags) { if (setattrs != NULL) { /* Ensure setattrs and delattrs do not overlap */ @@ -241,7 +142,7 @@ int D4NFilterObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattr ldpp_dout(dpp, 10) << "D4NFilterObject::" << __func__ << "(): CacheDriver delete_attrs method failed." << dendl; } - return next->set_obj_attrs(dpp, setattrs, delattrs, y); + return next->set_obj_attrs(dpp, setattrs, delattrs, y, flags); } int D4NFilterObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, @@ -910,6 +811,7 @@ int D4NFilterWriter::process(bufferlist&& data, uint64_t offset) int D4NFilterWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -929,7 +831,7 @@ int D4NFilterWriter::complete(size_t accounted_size, const std::string& etag, ldpp_dout(save_dpp, 10) << "D4NFilterWriter::" << __func__ << "(): ObjectDirectory set method failed." << dendl; /* Retrieve complete set of attrs */ - int ret = next->complete(accounted_size, etag, mtime, set_mtime, attrs, + int ret = next->complete(accounted_size, etag, mtime, set_mtime, attrs, cksum, delete_at, if_match, if_nomatch, user_data, zones_trace, canceled, rctx, flags); obj->get_obj_attrs(rctx.y, save_dpp, NULL); diff --git a/src/rgw/driver/d4n/rgw_sal_d4n.h b/src/rgw/driver/d4n/rgw_sal_d4n.h index 42436b92d1d..e7d041d2a19 100644 --- a/src/rgw/driver/d4n/rgw_sal_d4n.h +++ b/src/rgw/driver/d4n/rgw_sal_d4n.h @@ -177,25 +177,9 @@ class D4NFilterObject : public FilterObject { driver(_driver) {} virtual ~D4NFilterObject() = default; - virtual int copy_object(const ACLOwner& owner, - const rgw_user& remote_user, - req_info* info, const rgw_zone_id& source_zone, - rgw::sal::Object* dest_object, rgw::sal::Bucket* dest_bucket, - rgw::sal::Bucket* src_bucket, - const rgw_placement_rule& dest_placement, - ceph::real_time* src_mtime, ceph::real_time* mtime, - const ceph::real_time* mod_ptr, const ceph::real_time* unmod_ptr, - bool high_precision_time, - const char* if_match, const char* if_nomatch, - AttrsMod attrs_mod, bool copy_if_newer, Attrs& attrs, - RGWObjCategory category, uint64_t olh_epoch, - boost::optional<ceph::real_time> delete_at, - std::string* version_id, std::string* tag, std::string* etag, - void (*progress_cb)(off_t, void *), void* progress_data, - const DoutPrefixProvider* dpp, optional_yield y) override; virtual const std::string &get_name() const override { return next->get_name(); } virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, - Attrs* delattrs, optional_yield y) override; + Attrs* delattrs, optional_yield y, uint32_t flags) override; virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, @@ -235,14 +219,15 @@ class D4NFilterWriter : public FilterWriter { virtual int prepare(optional_yield y); virtual int process(bufferlist&& data, uint64_t offset) override; virtual int complete(size_t accounted_size, const std::string& etag, - ceph::real_time *mtime, ceph::real_time set_mtime, - std::map<std::string, bufferlist>& attrs, - ceph::real_time delete_at, - const char *if_match, const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, bool *canceled, - const req_context& rctx, - uint32_t flags) override; + ceph::real_time *mtime, ceph::real_time set_mtime, + std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, + ceph::real_time delete_at, + const char *if_match, const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, bool *canceled, + const req_context& rctx, + uint32_t flags) override; bool is_atomic() { return atomic; }; const DoutPrefixProvider* dpp() { return save_dpp; } }; diff --git a/src/rgw/driver/daos/rgw_sal_daos.cc b/src/rgw/driver/daos/rgw_sal_daos.cc index b105d9684b7..6d83c7d159a 100644 --- a/src/rgw/driver/daos/rgw_sal_daos.cc +++ b/src/rgw/driver/daos/rgw_sal_daos.cc @@ -898,7 +898,7 @@ int DaosObject::load_obj_state(const DoutPrefixProvider* dpp, DaosObject::~DaosObject() { close(nullptr); } int DaosObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, - Attrs* delattrs, optional_yield y) { + Attrs* delattrs, optional_yield y, uint32_t flags) { ldpp_dout(dpp, 20) << "DEBUG: DaosObject::set_obj_attrs()" << dendl; // TODO handle target_obj // Get object's metadata (those stored in rgw_bucket_dir_entry) @@ -957,7 +957,7 @@ int DaosObject::delete_obj_attrs(const DoutPrefixProvider* dpp, bufferlist bl; rmattr[attr_name] = bl; - return set_obj_attrs(dpp, nullptr, &rmattr, y); + return set_obj_attrs(dpp, nullptr, &rmattr, y, rgw::sal::FLAG_LOG_OP); } bool DaosObject::is_expired() { @@ -1596,6 +1596,7 @@ int DaosMultipartUpload::init(const DoutPrefixProvider* dpp, optional_yield y, multipart_upload_info upload_info; upload_info.dest_placement = dest_placement; + upload_info.cksum_type = cksum_type; ent.encode(bl); encode(attrs, bl); @@ -1968,6 +1969,7 @@ int DaosMultipartUpload::get_info(const DoutPrefixProvider* dpp, // Now decode the placement rule decode(upload_info, iter); + cksum_type = upload_info.cksum_type; placement = upload_info.dest_placement; *rule = &placement; diff --git a/src/rgw/driver/daos/rgw_sal_daos.h b/src/rgw/driver/daos/rgw_sal_daos.h index 8b40c645caf..8cff96088ee 100644 --- a/src/rgw/driver/daos/rgw_sal_daos.h +++ b/src/rgw/driver/daos/rgw_sal_daos.h @@ -620,7 +620,7 @@ class DaosObject : public StoreObject { virtual int load_obj_state(const DoutPrefixProvider *dpp, optional_yield y, bool follow_olh = true) override; virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, - Attrs* delattrs, optional_yield y) override; + Attrs* delattrs, optional_yield y, uint32_t flags) override; virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, @@ -742,6 +742,7 @@ class DaosAtomicWriter : public StoreWriter { virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time* mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char* if_match, const char* if_nomatch, const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled, @@ -788,6 +789,7 @@ class DaosMultipartWriter : public StoreWriter { virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time* mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char* if_match, const char* if_nomatch, const std::string* user_data, rgw_zone_set* zones_trace, bool* canceled, @@ -810,6 +812,10 @@ class DaosMultipartPart : public StoreMultipartPart { virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.modified; } + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return info.cksum; + } + friend class DaosMultipartUpload; }; diff --git a/src/rgw/driver/motr/rgw_sal_motr.cc b/src/rgw/driver/motr/rgw_sal_motr.cc index 740d82a330c..b584ca30360 100644 --- a/src/rgw/driver/motr/rgw_sal_motr.cc +++ b/src/rgw/driver/motr/rgw_sal_motr.cc @@ -1182,7 +1182,7 @@ MotrObject::~MotrObject() { // return read_op.prepare(dpp); // } -int MotrObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) +int MotrObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y, uint32_t flags) { // TODO: implement ldpp_dout(dpp, 20) <<__func__<< ": MotrObject::set_obj_attrs()" << dendl; @@ -1238,7 +1238,7 @@ int MotrObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, op } set_atomic(); state.attrset[attr_name] = attr_val; - return set_obj_attrs(dpp, &state.attrset, nullptr, y); + return set_obj_attrs(dpp, &state.attrset, nullptr, y, rgw::sal::FLAG_LOG_OP); } int MotrObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) @@ -1249,7 +1249,7 @@ int MotrObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr set_atomic(); rmattr[attr_name] = bl; - return set_obj_attrs(dpp, nullptr, &rmattr, y); + return set_obj_attrs(dpp, nullptr, &rmattr, y, rgw::sal::FLAG_LOG_OP); } bool MotrObject::is_expired() { diff --git a/src/rgw/driver/motr/rgw_sal_motr.h b/src/rgw/driver/motr/rgw_sal_motr.h index caf7c8667f7..4d9c189993b 100644 --- a/src/rgw/driver/motr/rgw_sal_motr.h +++ b/src/rgw/driver/motr/rgw_sal_motr.h @@ -677,7 +677,7 @@ class MotrObject : public StoreObject { virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; } virtual int load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh = true) override; - virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) override; + virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y, uint32_t flags) override; virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override; virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) override; @@ -791,6 +791,7 @@ class MotrAtomicWriter : public StoreWriter { virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -839,6 +840,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -888,6 +890,9 @@ public: virtual uint64_t get_size() { return info.accounted_size; } virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.modified; } + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return info.cksum; + } RGWObjManifest& get_manifest() { return info.manifest; } diff --git a/src/rgw/driver/posix/rgw_sal_posix.cc b/src/rgw/driver/posix/rgw_sal_posix.cc index 312422109c4..145a5789653 100644 --- a/src/rgw/driver/posix/rgw_sal_posix.cc +++ b/src/rgw/driver/posix/rgw_sal_posix.cc @@ -1543,7 +1543,7 @@ int POSIXObject::load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, } int POSIXObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, - Attrs* delattrs, optional_yield y) + Attrs* delattrs, optional_yield y, uint32_t flags) { if (delattrs) { for (auto& it : *delattrs) { @@ -2175,7 +2175,7 @@ int POSIXObject::generate_attrs(const DoutPrefixProvider* dpp, optional_yield y) int POSIXObject::generate_mp_etag(const DoutPrefixProvider* dpp, optional_yield y) { - int64_t count = 0; + int32_t count = 0; char etag_buf[CEPH_CRYPTO_MD5_DIGESTSIZE]; char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16]; std::string etag; @@ -2231,7 +2231,7 @@ int POSIXObject::generate_mp_etag(const DoutPrefixProvider* dpp, optional_yield buf_to_hex((unsigned char *)etag_buf, sizeof(etag_buf), final_etag_str); snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, - "-%lld", (long long)count); + "-%" PRId32, count); etag = final_etag_str; ldpp_dout(dpp, 10) << "calculated etag: " << etag << dendl; @@ -2408,7 +2408,7 @@ int POSIXObject::copy(const DoutPrefixProvider *dpp, optional_yield y, return ret; } - ret = dobj->set_obj_attrs(dpp, &get_attrs(), NULL, y); + ret = dobj->set_obj_attrs(dpp, &get_attrs(), NULL, y, rgw::sal::FLAG_LOG_OP); if (ret < 0) { ldpp_dout(dpp, 0) << "ERROR: could not write attrs to dest object " << dobj->get_name() << dendl; @@ -2509,6 +2509,7 @@ int POSIXMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, meta_obj = get_meta_obj(); + mp_obj.upload_info.cksum_type = cksum_type; mp_obj.upload_info.dest_placement = dest_placement; bufferlist bl; @@ -2516,7 +2517,7 @@ int POSIXMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, attrs[RGW_POSIX_ATTR_MPUPLOAD] = bl; - return meta_obj->set_obj_attrs(dpp, &attrs, nullptr, y); + return meta_obj->set_obj_attrs(dpp, &attrs, nullptr, y, rgw::sal::FLAG_LOG_OP); } int POSIXMultipartUpload::list_parts(const DoutPrefixProvider *dpp, CephContext *cct, @@ -2796,9 +2797,12 @@ int POSIXMultipartWriter::process(bufferlist&& data, uint64_t offset) return obj->write(offset, data, dpp, null_yield); } -int POSIXMultipartWriter::complete(size_t accounted_size, const std::string& etag, +int POSIXMultipartWriter::complete( + size_t accounted_size, + const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -2828,6 +2832,7 @@ int POSIXMultipartWriter::complete(size_t accounted_size, const std::string& eta info.num = part_num; info.etag = etag; + info.cksum = cksum; info.mtime = set_mtime; bufferlist bl; @@ -2867,6 +2872,7 @@ int POSIXAtomicWriter::process(bufferlist&& data, uint64_t offset) int POSIXAtomicWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/driver/posix/rgw_sal_posix.h b/src/rgw/driver/posix/rgw_sal_posix.h index ac83173a00d..7483139da33 100644 --- a/src/rgw/driver/posix/rgw_sal_posix.h +++ b/src/rgw/driver/posix/rgw_sal_posix.h @@ -341,7 +341,7 @@ public: virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; } virtual int load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh = true) override; virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, - Attrs* delattrs, optional_yield y) override; + Attrs* delattrs, optional_yield y, uint32_t flags) override; virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, @@ -479,19 +479,24 @@ struct POSIXUploadPartInfo { uint32_t num{0}; std::string etag; ceph::real_time mtime; + std::optional<rgw::cksum::Cksum> cksum; void encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 1, bl); encode(num, bl); encode(etag, bl); encode(mtime, bl); + encode(cksum, bl); ENCODE_FINISH(bl); } void decode(bufferlist::const_iterator& bl) { - DECODE_START(1, bl); + DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); decode(num, bl); decode(etag, bl); decode(mtime, bl); + if (struct_v > 1) { + decode(cksum, bl); + } DECODE_FINISH(bl); } }; @@ -514,8 +519,12 @@ public: virtual uint64_t get_size() { return shadow->get_size(); } virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.mtime; } + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return info.cksum; + } - int load(const DoutPrefixProvider* dpp, optional_yield y, POSIXDriver* driver, rgw_obj_key& key); + int load(const DoutPrefixProvider* dpp, optional_yield y, POSIXDriver* driver, + rgw_obj_key& key); friend class POSIXMultipartUpload; }; @@ -605,6 +614,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -645,6 +655,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/driver/posix/zpp_bits.h b/src/rgw/driver/posix/zpp_bits.h index 90e8916b0b0..c9e99ec59c2 100644 --- a/src/rgw/driver/posix/zpp_bits.h +++ b/src/rgw/driver/posix/zpp_bits.h @@ -3454,7 +3454,7 @@ struct [[nodiscard]] value_or_errc constexpr value_or_errc(value_or_errc && other) noexcept { - if (other.is_value()) { + if (other.success()) { if constexpr (!std::is_void_v<Type>) { if constexpr (!std::is_reference_v<Type>) { ::new (std::addressof(m_return_value)) diff --git a/src/rgw/driver/rados/cls_fifo_legacy.cc b/src/rgw/driver/rados/cls_fifo_legacy.cc index fad9404fdb3..7e614adae67 100644 --- a/src/rgw/driver/rados/cls_fifo_legacy.cc +++ b/src/rgw/driver/rados/cls_fifo_legacy.cc @@ -1171,7 +1171,7 @@ int FIFO::open(const DoutPrefixProvider *dpp, lr::IoCtx ioctx, std::string oid, fifo::info info; std::uint32_t size; std::uint32_t over; - int r = get_meta(dpp, ioctx, std::move(oid), objv, &info, &size, &over, 0, y, + int r = get_meta(dpp, ioctx, oid, objv, &info, &size, &over, 0, y, probe); if (r < 0) { if (!(probe && (r == -ENOENT || r == -ENODATA))) { diff --git a/src/rgw/driver/rados/rgw_bucket.cc b/src/rgw/driver/rados/rgw_bucket.cc index 08de61b172b..14219dad4a8 100644 --- a/src/rgw/driver/rados/rgw_bucket.cc +++ b/src/rgw/driver/rados/rgw_bucket.cc @@ -146,9 +146,6 @@ bool rgw_bucket_object_check_filter(const std::string& oid) int rgw_remove_object(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, rgw::sal::Bucket* bucket, rgw_obj_key& key, optional_yield y) { - if (key.instance.empty()) { - key.instance = "null"; - } std::unique_ptr<rgw::sal::Object> object = bucket->get_object(key); diff --git a/src/rgw/driver/rados/rgw_bucket_sync.cc b/src/rgw/driver/rados/rgw_bucket_sync.cc index dafbb6df46f..1e7316d4271 100644 --- a/src/rgw/driver/rados/rgw_bucket_sync.cc +++ b/src/rgw/driver/rados/rgw_bucket_sync.cc @@ -984,6 +984,19 @@ void RGWBucketSyncPolicyHandler::get_pipes(std::set<rgw_sync_bucket_pipe> *_sour } } +bool RGWBucketSyncPolicyHandler::bucket_exports_object(const std::string& obj_name, const RGWObjTags& tags) const { + if (bucket_exports_data()) { + for (auto& entry : target_pipes.pipe_map) { + auto& filter = entry.second.params.source.filter; + if (filter.check_prefix(obj_name) && filter.check_tags(tags.get_tags())) { + return true; + } + } + } + + return false; +} + bool RGWBucketSyncPolicyHandler::bucket_exports_data() const { if (!bucket) { diff --git a/src/rgw/driver/rados/rgw_bucket_sync.h b/src/rgw/driver/rados/rgw_bucket_sync.h index d425ecf1732..eb3226b7047 100644 --- a/src/rgw/driver/rados/rgw_bucket_sync.h +++ b/src/rgw/driver/rados/rgw_bucket_sync.h @@ -402,6 +402,7 @@ public: return target_hints; } + bool bucket_exports_object(const std::string& obj_name, const RGWObjTags& tags) const; bool bucket_exports_data() const; bool bucket_imports_data() const; diff --git a/src/rgw/driver/rados/rgw_cr_rados.cc b/src/rgw/driver/rados/rgw_cr_rados.cc index 41d5bed4a55..5b69c5725ff 100644 --- a/src/rgw/driver/rados/rgw_cr_rados.cc +++ b/src/rgw/driver/rados/rgw_cr_rados.cc @@ -717,13 +717,13 @@ int RGWRadosBILogTrimCR::request_complete() return r; } -int send_sync_notification(const DoutPrefixProvider* dpp, - rgw::sal::RadosStore* store, - rgw::sal::Bucket* bucket, - rgw::sal::Object* obj, - const rgw::sal::Attrs& attrs, - uint64_t obj_size, - const rgw::notify::EventTypeList& event_types) { +void send_sync_notification(const DoutPrefixProvider* dpp, + rgw::sal::RadosStore* store, + rgw::sal::Bucket* bucket, + rgw::sal::Object* obj, + const rgw::sal::Attrs& attrs, + uint64_t obj_size, + const rgw::notify::EventTypeList& event_types) { // send notification that object was successfully synced std::string user_id = "rgw sync"; std::string req_id = "0"; @@ -738,7 +738,6 @@ int send_sync_notification(const DoutPrefixProvider* dpp, ldpp_dout(dpp, 1) << "ERROR: " << __func__ << ": caught buffer::error couldn't decode TagSet " << dendl; - return -EIO; } } // bucket attrs are required for notification and since its not loaded, @@ -748,7 +747,7 @@ int send_sync_notification(const DoutPrefixProvider* dpp, ldpp_dout(dpp, 1) << "ERROR: failed to load bucket attrs for bucket:" << bucket->get_name() << " with error ret= " << r << " . Not sending notification" << dendl; - return r; + return; } rgw::notify::reservation_t notify_res(dpp, store, obj, nullptr, bucket, user_id, bucket->get_tenant(), req_id, @@ -772,7 +771,6 @@ int send_sync_notification(const DoutPrefixProvider* dpp, << ret << dendl; } } - return ret; } int RGWAsyncFetchRemoteObj::_send_request(const DoutPrefixProvider *dpp) @@ -938,6 +936,7 @@ int RGWAsyncRemoveObj::_send_request(const DoutPrefixProvider *dpp) if (versioned) { del_op->params.versioning_status = BUCKET_VERSIONED; } + del_op->params.olh_epoch = versioned_epoch; del_op->params.marker_version_id = marker_version_id; del_op->params.obj_owner.id = rgw_user(owner); @@ -945,6 +944,7 @@ int RGWAsyncRemoveObj::_send_request(const DoutPrefixProvider *dpp) del_op->params.mtime = timestamp; del_op->params.high_precision_time = true; del_op->params.zones_trace = &zones_trace; + del_op->params.null_verid = false; ret = del_op->delete_obj(dpp, null_yield, true); if (ret < 0) { diff --git a/src/rgw/driver/rados/rgw_data_sync.cc b/src/rgw/driver/rados/rgw_data_sync.cc index dbea56d4de7..151fd7fe84e 100644 --- a/src/rgw/driver/rados/rgw_data_sync.cc +++ b/src/rgw/driver/rados/rgw_data_sync.cc @@ -277,7 +277,7 @@ public: return io_block(0); } yield { - op_ret = http_op->wait(shard_info, null_yield); + op_ret = http_op->wait(dpp, shard_info, null_yield); http_op->put(); } @@ -377,7 +377,7 @@ public: } yield { timer.reset(); - op_ret = http_op->wait(&response, null_yield); + op_ret = http_op->wait(dpp, &response, null_yield); http_op->put(); } @@ -495,7 +495,7 @@ public: } int request_complete() override { - int ret = http_op->wait(result, null_yield); + int ret = http_op->wait(sync_env->dpp, result, null_yield); http_op->put(); if (ret < 0 && ret != -ENOENT) { ldpp_dout(sync_env->dpp, 5) << "ERROR: failed to list remote datalog shard, ret=" << ret << dendl; @@ -4391,6 +4391,7 @@ class RGWBucketSyncSingleEntryCR : public RGWCoroutine { rgw_obj_key key; bool versioned; + bool null_verid; std::optional<uint64_t> versioned_epoch; rgw_bucket_entry_owner owner; real_time timestamp; @@ -4418,6 +4419,7 @@ public: RGWBucketSyncSingleEntryCR(RGWDataSyncCtx *_sc, rgw_bucket_sync_pipe& _sync_pipe, const rgw_obj_key& _key, bool _versioned, + bool _null_verid, std::optional<uint64_t> _versioned_epoch, real_time& _timestamp, const rgw_bucket_entry_owner& _owner, @@ -4426,7 +4428,8 @@ public: RGWSyncTraceNodeRef& _tn_parent) : RGWCoroutine(_sc->cct), sc(_sc), sync_env(_sc->env), sync_pipe(_sync_pipe), bs(_sync_pipe.info.source_bs), - key(_key), versioned(_versioned), versioned_epoch(_versioned_epoch), + key(_key), versioned(_versioned), + null_verid(_null_verid),versioned_epoch(_versioned_epoch), owner(_owner), timestamp(_timestamp), op(_op), op_state(_op_state), @@ -4504,6 +4507,9 @@ public: if (op == CLS_RGW_OP_UNLINK_INSTANCE) { versioned = true; } + if (null_verid) { + key.instance = "null"; + } tn->log(10, SSTR("removing obj: " << sc->source_zone << "/" << bs.bucket << "/" << key << "[" << versioned_epoch.value_or(0) << "]")); call(data_sync_module->remove_object(dpp, sc, sync_pipe, key, timestamp, versioned, versioned_epoch.value_or(0), &zones_trace)); // our copy of the object is more recent, continue as if it succeeded @@ -4714,6 +4720,7 @@ int RGWBucketFullSyncCR::operate(const DoutPrefixProvider *dpp) using SyncCR = RGWBucketSyncSingleEntryCR<rgw_obj_key, rgw_obj_key>; yield spawn(new SyncCR(sc, sync_pipe, entry->key, false, /* versioned, only matters for object removal */ + false, entry->versioned_epoch, entry->mtime, entry->owner, entry->get_modify_op(), CLS_RGW_STATE_COMPLETE, entry->key, &marker_tracker, zones_trace, tn), @@ -5122,7 +5129,7 @@ int RGWBucketShardIncrementalSyncCR::operate(const DoutPrefixProvider *dpp) tn->log(20, SSTR("entry->timestamp=" << entry->timestamp)); using SyncCR = RGWBucketSyncSingleEntryCR<string, rgw_obj_key>; spawn(new SyncCR(sc, sync_pipe, key, - entry->is_versioned(), versioned_epoch, + entry->is_versioned(), entry->is_null_verid(), versioned_epoch, entry->timestamp, owner, entry->op, entry->state, cur_id, &marker_tracker, entry->zones_trace, tn), false); diff --git a/src/rgw/driver/rados/rgw_lc_tier.cc b/src/rgw/driver/rados/rgw_lc_tier.cc index eea5c9b01a7..64c55700eb2 100644 --- a/src/rgw/driver/rados/rgw_lc_tier.cc +++ b/src/rgw/driver/rados/rgw_lc_tier.cc @@ -269,7 +269,7 @@ static int cloud_tier_get_object(RGWLCCloudTierCtx& tier_ctx, bool head, } /* fetch headers */ - ret = tier_ctx.conn.complete_request(in_req, nullptr, nullptr, nullptr, nullptr, &headers, null_yield); + ret = tier_ctx.conn.complete_request(tier_ctx.dpp, in_req, nullptr, nullptr, nullptr, nullptr, &headers, null_yield); if (ret < 0 && ret != -ENOENT) { ldpp_dout(tier_ctx.dpp, 20) << "ERROR: " << __func__ << "(): conn.complete_request() returned ret=" << ret << dendl; return ret; @@ -704,8 +704,7 @@ RGWGetDataCB *RGWLCCloudStreamPut::get_cb() { } int RGWLCCloudStreamPut::complete_request() { - int ret = conn.complete_request(out_req, etag, &obj_properties.mtime, null_yield); - return ret; + return conn.complete_request(dpp, out_req, etag, &obj_properties.mtime, null_yield); } /* Read local copy and write to Cloud endpoint */ diff --git a/src/rgw/driver/rados/rgw_log_backing.cc b/src/rgw/driver/rados/rgw_log_backing.cc index 325d0510901..110a54015a3 100644 --- a/src/rgw/driver/rados/rgw_log_backing.cc +++ b/src/rgw/driver/rados/rgw_log_backing.cc @@ -444,13 +444,17 @@ bs::error_code logback_generations::write(const DoutPrefixProvider *dpp, entries encode(e, bl); op.write_full(bl); cls_version_inc(op); + auto oldv = version; + l.unlock(); auto r = rgw_rados_operate(dpp, ioctx, oid, &op, y); if (r == 0) { + if (oldv != version) { + return { ECANCELED, bs::system_category() }; + } entries_ = std::move(e); version.inc(); return {}; } - l.unlock(); if (r < 0 && r != -ECANCELED) { ldpp_dout(dpp, -1) << __PRETTY_FUNCTION__ << ":" << __LINE__ << ": failed reading oid=" << oid @@ -609,17 +613,19 @@ bs::error_code logback_generations::remove_empty(const DoutPrefixProvider *dpp, if (ec) return ec; auto tries = 0; entries_t new_entries; - std::unique_lock l(m); - ceph_assert(!entries_.empty()); + entries_t es; + auto now = ceph::real_clock::now(); { - auto i = lowest_nomempty(entries_); - if (i == entries_.begin()) { - return {}; + std::unique_lock l(m); + ceph_assert(!entries_.empty()); + { + auto i = lowest_nomempty(entries_); + if (i == entries_.begin()) { + return {}; + } } + l.unlock(); } - entries_t es; - auto now = ceph::real_clock::now(); - l.unlock(); do { std::copy_if(entries_.cbegin(), entries_.cend(), std::inserter(es, es.end()), @@ -646,7 +652,7 @@ bs::error_code logback_generations::remove_empty(const DoutPrefixProvider *dpp, es2.erase(i); } } - l.lock(); + std::unique_lock l(m); es.clear(); ec = write(dpp, std::move(es2), std::move(l), y); ++tries; diff --git a/src/rgw/driver/rados/rgw_log_backing.h b/src/rgw/driver/rados/rgw_log_backing.h index 6cda9a4ca97..737d6725eb4 100644 --- a/src/rgw/driver/rados/rgw_log_backing.h +++ b/src/rgw/driver/rados/rgw_log_backing.h @@ -269,7 +269,7 @@ class LazyFIFO { // FIFO supports multiple clients by design, so it's safe to // race to create them. std::unique_ptr<rgw::cls::fifo::FIFO> fifo_tmp; - auto r = rgw::cls::fifo::FIFO::create(dpp, ioctx, oid, &fifo, y); + auto r = rgw::cls::fifo::FIFO::create(dpp, ioctx, oid, &fifo_tmp, y); if (r) { return r; } diff --git a/src/rgw/driver/rados/rgw_notify.cc b/src/rgw/driver/rados/rgw_notify.cc index dd94d3155f8..6c1fe53d7e3 100644 --- a/src/rgw/driver/rados/rgw_notify.cc +++ b/src/rgw/driver/rados/rgw_notify.cc @@ -26,50 +26,6 @@ namespace rgw::notify { -struct event_entry_t { - rgw_pubsub_s3_event event; - std::string push_endpoint; - std::string push_endpoint_args; - std::string arn_topic; - ceph::coarse_real_time creation_time; - uint32_t time_to_live = DEFAULT_GLOBAL_VALUE; - uint32_t max_retries = DEFAULT_GLOBAL_VALUE; - uint32_t retry_sleep_duration = DEFAULT_GLOBAL_VALUE; - - void encode(bufferlist& bl) const { - ENCODE_START(3, 1, bl); - encode(event, bl); - encode(push_endpoint, bl); - encode(push_endpoint_args, bl); - encode(arn_topic, bl); - encode(creation_time, bl); - encode(time_to_live, bl); - encode(max_retries, bl); - encode(retry_sleep_duration, bl); - ENCODE_FINISH(bl); - } - - void decode(bufferlist::const_iterator& bl) { - DECODE_START(3, bl); - decode(event, bl); - decode(push_endpoint, bl); - decode(push_endpoint_args, bl); - decode(arn_topic, bl); - if (struct_v > 1) { - decode(creation_time, bl); - } else { - creation_time = ceph::coarse_real_clock::zero(); - } - if (struct_v > 2) { - decode(time_to_live, bl); - decode(max_retries, bl); - decode(retry_sleep_duration, bl); - } - DECODE_FINISH(bl); - } -}; -WRITE_CLASS_ENCODER(event_entry_t) - static inline std::ostream& operator<<(std::ostream& out, const event_entry_t& e) { std::string host; @@ -239,8 +195,13 @@ private: // processing of a specific entry // return whether processing was successful (true) or not (false) - EntryProcessingResult process_entry(const ConfigProxy& conf, persistency_tracker& entry_persistency_tracker, - const cls_queue_entry& entry, boost::asio::yield_context yield) { + EntryProcessingResult process_entry( + const ConfigProxy& conf, + persistency_tracker& entry_persistency_tracker, + const cls_queue_entry& entry, + RGWPubSubEndpoint* const push_endpoint, + const rgw_pubsub_topic& topic, + boost::asio::yield_context yield) { event_entry_t event_entry; auto iter = entry.data.cbegin(); try { @@ -253,7 +214,12 @@ private: if (event_entry.creation_time == ceph::coarse_real_clock::zero()) { return EntryProcessingResult::Migrating; } - + // overwrite the event entry values from the topics object fetched. + event_entry.event.opaque_data = topic.opaque_data; + event_entry.arn_topic = topic.dest.arn_topic; + event_entry.time_to_live = topic.dest.time_to_live; + event_entry.max_retries = topic.dest.max_retries; + event_entry.retry_sleep_duration = topic.dest.retry_sleep_duration; const auto topic_persistency_ttl = event_entry.time_to_live != DEFAULT_GLOBAL_VALUE ? event_entry.time_to_live : conf->rgw_topic_persistency_time_to_live; const auto topic_persistency_max_retries = event_entry.max_retries != DEFAULT_GLOBAL_VALUE ? @@ -282,31 +248,19 @@ private: << " retry_number: " << entry_persistency_tracker.retires_num << " current time: " << time_now << dendl; - try { - // TODO move endpoint creation to queue level - const auto push_endpoint = RGWPubSubEndpoint::create(event_entry.push_endpoint, event_entry.arn_topic, - RGWHTTPArgs(event_entry.push_endpoint_args, this), - cct); - ldpp_dout(this, 20) << "INFO: push endpoint created: " << event_entry.push_endpoint << - " for entry: " << entry.marker << dendl; - const auto ret = push_endpoint->send(event_entry.event, yield); - if (ret < 0) { - ldpp_dout(this, 5) << "WARNING: push entry marker: " << entry.marker - << " failed. error: " << ret - << " (will retry) for event with " << event_entry - << dendl; - return EntryProcessingResult::Failure; - } else { - ldpp_dout(this, 5) << "INFO: push entry marker: " << entry.marker - << " ok for event with " << event_entry << dendl; - if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_ok); - return EntryProcessingResult::Successful; - } - } catch (const RGWPubSubEndpoint::configuration_error& e) { - ldpp_dout(this, 5) << "WARNING: failed to create push endpoint: " - << event_entry.push_endpoint << " for entry: " << entry.marker << ". error: " << e.what() << " (will retry) " << dendl; + const auto ret = push_endpoint->send(this, event_entry.event, yield); + if (ret < 0) { + ldpp_dout(this, 5) << "WARNING: push entry marker: " << entry.marker + << " failed. error: " << ret + << " (will retry) for event with " << event_entry + << dendl; return EntryProcessingResult::Failure; } + ldpp_dout(this, 5) << "INFO: push entry marker: " << entry.marker + << " ok for event with " << event_entry << dendl; + if (perfcounter) + perfcounter->inc(l_rgw_pubsub_push_ok); + return EntryProcessingResult::Successful; } // clean stale reservation from queue @@ -369,6 +323,42 @@ private: return ret; } + int get_topic_info(const std::string& queue_name, + const cls_queue_entry& queue_entry, + rgw_pubsub_topic& topic, + boost::asio::yield_context yield) { + std::string queue_topic_tenant; + std::string queue_topic_name; + parse_topic_metadata_key(queue_name, queue_topic_tenant, queue_topic_name); + rgw_pubsub_topic topic_info; + RGWPubSub ps(&rados_store, queue_topic_tenant, site); + int ret = ps.get_topic(this, queue_topic_name, topic_info, yield, nullptr); + if (ret < 0) { + ldpp_dout(this, 1) << "WARNING: failed to fetch topic: " + << queue_topic_name << " error: " << ret + << ". using cached topic attributes!" << dendl; + event_entry_t event_entry; + auto iter = queue_entry.data.cbegin(); + try { + decode(event_entry, iter); + } catch (buffer::error& err) { + ldpp_dout(this, 1) << "ERROR: failed to decode entry. error: " + << err.what() << dendl; + return -EIO; + } + topic_info.dest.push_endpoint = event_entry.push_endpoint; + topic_info.dest.push_endpoint_args = event_entry.push_endpoint_args; + topic_info.dest.arn_topic = event_entry.arn_topic; + topic_info.dest.arn_topic = event_entry.arn_topic; + topic_info.dest.time_to_live = event_entry.time_to_live; + topic_info.dest.max_retries = event_entry.max_retries; + topic_info.dest.retry_sleep_duration = event_entry.retry_sleep_duration; + topic_info.opaque_data = event_entry.event.opaque_data; + } + topic = std::move(topic_info); + return 0; + } + // processing of a specific queue void process_queue(const std::string& queue_name, boost::asio::yield_context yield) { constexpr auto max_elements = 1024; @@ -448,7 +438,25 @@ private: // log when queue is not idle ldpp_dout(this, 20) << "INFO: found: " << total_entries << " entries in: " << queue_name << ". end marker is: " << end_marker << dendl; - + rgw_pubsub_topic topic_info; + if (get_topic_info(queue_name, entries.front(), topic_info, yield) < 0) { + continue; + } + RGWPubSubEndpoint::Ptr push_endpoint; + try { + push_endpoint = RGWPubSubEndpoint::create( + topic_info.dest.push_endpoint, topic_info.dest.arn_topic, + RGWHTTPArgs(topic_info.dest.push_endpoint_args, this), cct); + ldpp_dout(this, 20) + << "INFO: push endpoint created: " << topic_info.dest.push_endpoint + << dendl; + } catch (const RGWPubSubEndpoint::configuration_error& e) { + ldpp_dout(this, 5) << "WARNING: failed to create push endpoint: " + << topic_info.dest.push_endpoint + << ". error: " << e.what() + << " (will retry sending events) " << dendl; + continue; + } is_idle = false; auto has_error = false; auto remove_entries = false; @@ -463,11 +471,16 @@ private: entries_persistency_tracker& notifs_persistency_tracker = topics_persistency_tracker[queue_name]; boost::asio::spawn(yield, std::allocator_arg, make_stack_allocator(), - [this, ¬ifs_persistency_tracker, &queue_name, entry_idx, total_entries, &end_marker, - &remove_entries, &has_error, &waiter, &entry, &needs_migration_vector](boost::asio::yield_context yield) { + [this, ¬ifs_persistency_tracker, &queue_name, entry_idx, + total_entries, &end_marker, &remove_entries, &has_error, &waiter, + &entry, &needs_migration_vector, + push_endpoint = push_endpoint.get(), + &topic_info](boost::asio::yield_context yield) { const auto token = waiter.make_token(); auto& persistency_tracker = notifs_persistency_tracker[entry.marker]; - auto result = process_entry(this->get_cct()->_conf, persistency_tracker, entry, yield); + auto result = + process_entry(this->get_cct()->_conf, persistency_tracker, + entry, push_endpoint, topic_info, yield); if (result == EntryProcessingResult::Successful || result == EntryProcessingResult::Expired || result == EntryProcessingResult::Migrating) { ldpp_dout(this, 20) << "INFO: processing of entry: " << entry.marker @@ -767,9 +780,11 @@ public: throw err; } }); - const auto rc = ceph_pthread_setname(workers.back().native_handle(), - (WORKER_THREAD_NAME+std::to_string(worker_id)).c_str()); - ceph_assert(rc == 0); + const auto thread_name = WORKER_THREAD_NAME+std::to_string(worker_id); + if (const auto rc = ceph_pthread_setname(workers.back().native_handle(), thread_name.c_str()); rc != 0) { + ldpp_dout(this, 1) << "ERROR: failed to set notification manager thread name to: " << thread_name + << ". error: " << rc << dendl; + } } ldpp_dout(this, 10) << "INfO: started notification manager with: " << worker_count << " workers" << dendl; } @@ -1111,7 +1126,7 @@ int publish_reserve(const DoutPrefixProvider* dpp, // either the topic is deleted but the corresponding notification // still exist or in v2 mode the notification could have synced first // but topic is not synced yet. - return 0; + continue; } ldpp_dout(res.dpp, 1) << "WARN: Using the stored topic from bucket notification struct." @@ -1251,7 +1266,7 @@ int publish_commit(rgw::sal::Object* obj, dpp->get_cct()); ldpp_dout(res.dpp, 20) << "INFO: push endpoint created: " << topic.cfg.dest.push_endpoint << dendl; - const auto ret = push_endpoint->send(event_entry.event, res.yield); + const auto ret = push_endpoint->send(dpp, event_entry.event, res.yield); if (ret < 0) { ldpp_dout(dpp, 1) << "ERROR: failed to push sync notification event with error: " diff --git a/src/rgw/driver/rados/rgw_pubsub_push.cc b/src/rgw/driver/rados/rgw_pubsub_push.cc index b5b97c9ba62..4e73eeb770a 100644 --- a/src/rgw/driver/rados/rgw_pubsub_push.cc +++ b/src/rgw/driver/rados/rgw_pubsub_push.cc @@ -9,6 +9,7 @@ #include "common/Formatter.h" #include "common/iso_8601.h" #include "common/async/completion.h" +#include "rgw_asio_thread.h" #include "rgw_common.h" #include "rgw_data_sync.h" #include "rgw_pubsub.h" @@ -88,7 +89,8 @@ public: } } - int send(const rgw_pubsub_s3_event& event, optional_yield y) override { + int send(const DoutPrefixProvider* dpp, const rgw_pubsub_s3_event& event, + optional_yield y) override { std::shared_lock lock(s_http_manager_mutex); if (!s_http_manager) { ldout(cct, 1) << "ERROR: send failed. http endpoint manager not running" << dendl; @@ -114,7 +116,7 @@ public: if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_pending); auto rc = s_http_manager->add_request(&request); if (rc == 0) { - rc = request.wait(y); + rc = request.wait(dpp, y); } if (perfcounter) perfcounter->dec(l_rgw_pubsub_push_pending); // TODO: use read_bl to process return code and handle according to ack level @@ -144,7 +146,7 @@ class Waiter { mutable std::condition_variable cond; public: - int wait(optional_yield y) { + int wait(const DoutPrefixProvider* dpp, optional_yield y) { std::unique_lock l{lock}; if (done) { return ret; @@ -160,6 +162,8 @@ public: }, token, yield.get_executor()); return -ec.value(); } + maybe_warn_about_blocking(dpp); + cond.wait(l, [this]{return (done==true);}); return ret; } @@ -247,7 +251,7 @@ public: } } - int send(const rgw_pubsub_s3_event& event, optional_yield y) override { + int send(const DoutPrefixProvider* dpp, const rgw_pubsub_s3_event& event, optional_yield y) override { if (ack_level == ack_level_t::None) { return amqp::publish(conn_id, topic, json_format_pubsub_event(event)); } else { @@ -262,7 +266,7 @@ public: // failed to publish, does not wait for reply return rc; } - return w->wait(y); + return w->wait(dpp, y); } } @@ -289,8 +293,7 @@ private: }; const std::string topic; const ack_level_t ack_level; - std::string conn_name; - + kafka::connection_id_t conn_id; ack_level_t get_ack_level(const RGWHTTPArgs& args) { bool exists; @@ -311,38 +314,36 @@ public: const RGWHTTPArgs& args) : topic(_topic), ack_level(get_ack_level(args)) { - if (!kafka::connect(conn_name, _endpoint, - get_bool(args, "use-ssl", false), - get_bool(args, "verify-ssl", true), - args.get_optional("ca-location"), - args.get_optional("mechanism"), - args.get_optional("user-name"), - args.get_optional("password"))) { - throw configuration_error("Kafka: failed to create connection to: " + _endpoint); - } - } - - int send(const rgw_pubsub_s3_event& event, optional_yield y) override { + if (!kafka::connect( + conn_id, _endpoint, get_bool(args, "use-ssl", false), + get_bool(args, "verify-ssl", true), args.get_optional("ca-location"), + args.get_optional("mechanism"), args.get_optional("user-name"), + args.get_optional("password"))) { + throw configuration_error("Kafka: failed to create connection to: " + + _endpoint); + } + } + + int send(const DoutPrefixProvider* dpp, const rgw_pubsub_s3_event& event, + optional_yield y) override { if (ack_level == ack_level_t::None) { - return kafka::publish(conn_name, topic, json_format_pubsub_event(event)); + return kafka::publish(conn_id, topic, json_format_pubsub_event(event)); } else { auto w = std::make_unique<Waiter>(); - const auto rc = kafka::publish_with_confirm(conn_name, - topic, - json_format_pubsub_event(event), - [wp = w.get()](int r) { wp->finish(r); } - ); + const auto rc = kafka::publish_with_confirm( + conn_id, topic, json_format_pubsub_event(event), + [wp = w.get()](int r) { wp->finish(r); }); if (rc < 0) { // failed to publish, does not wait for reply return rc; } - return w->wait(y); + return w->wait(dpp, y); } } std::string to_str() const override { std::string str("Kafka Endpoint"); - str += "\nBroker: " + conn_name; + str += "\nBroker: " + to_string(conn_id); str += "\nTopic: " + topic; return str; } diff --git a/src/rgw/driver/rados/rgw_pubsub_push.h b/src/rgw/driver/rados/rgw_pubsub_push.h index bacebfba44c..84207c5a1d7 100644 --- a/src/rgw/driver/rados/rgw_pubsub_push.h +++ b/src/rgw/driver/rados/rgw_pubsub_push.h @@ -8,6 +8,7 @@ #include "include/common_fwd.h" #include "common/async/yield_context.h" +class DoutPrefixProvider; class RGWHTTPArgs; struct rgw_pubsub_s3_event; @@ -28,7 +29,9 @@ public: // this method is used in order to send notification and wait for completion // in async manner via a coroutine when invoked in the frontend environment - virtual int send(const rgw_pubsub_s3_event& event, optional_yield y) = 0; + virtual int send(const DoutPrefixProvider* dpp, + const rgw_pubsub_s3_event& event, + optional_yield y) = 0; // present as string virtual std::string to_str() const = 0; diff --git a/src/rgw/driver/rados/rgw_putobj_processor.cc b/src/rgw/driver/rados/rgw_putobj_processor.cc index d41678cdb06..79be7b3209b 100644 --- a/src/rgw/driver/rados/rgw_putobj_processor.cc +++ b/src/rgw/driver/rados/rgw_putobj_processor.cc @@ -338,19 +338,21 @@ int AtomicObjectProcessor::prepare(optional_yield y) return 0; } -int AtomicObjectProcessor::complete(size_t accounted_size, - const std::string& etag, - ceph::real_time *mtime, - ceph::real_time set_mtime, - rgw::sal::Attrs& attrs, - ceph::real_time delete_at, - const char *if_match, - const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, - bool *pcanceled, - const req_context& rctx, - uint32_t flags) +int AtomicObjectProcessor::complete( + size_t accounted_size, + const std::string& etag, + ceph::real_time *mtime, + ceph::real_time set_mtime, + rgw::sal::Attrs& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, + ceph::real_time delete_at, + const char *if_match, + const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, + bool *pcanceled, + const req_context& rctx, + uint32_t flags) { int r = writer.drain(); if (r < 0) { @@ -486,19 +488,21 @@ int MultipartObjectProcessor::prepare(optional_yield y) return prepare_head(); } -int MultipartObjectProcessor::complete(size_t accounted_size, - const std::string& etag, - ceph::real_time *mtime, - ceph::real_time set_mtime, - std::map<std::string, bufferlist>& attrs, - ceph::real_time delete_at, - const char *if_match, - const char *if_nomatch, - const std::string *user_data, - rgw_zone_set *zones_trace, - bool *pcanceled, - const req_context& rctx, - uint32_t flags) +int MultipartObjectProcessor::complete( + size_t accounted_size, + const std::string& etag, + ceph::real_time *mtime, + ceph::real_time set_mtime, + std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, + ceph::real_time delete_at, + const char *if_match, + const char *if_nomatch, + const std::string *user_data, + rgw_zone_set *zones_trace, + bool *pcanceled, + const req_context& rctx, + uint32_t flags) { int r = writer.drain(); if (r < 0) { @@ -541,6 +545,7 @@ int MultipartObjectProcessor::complete(size_t accounted_size, } info.num = part_num; info.etag = etag; + info.cksum = cksum; info.size = actual_size; info.accounted_size = accounted_size; info.modified = real_clock::now(); @@ -701,11 +706,16 @@ int AppendObjectProcessor::prepare(optional_yield y) return 0; } -int AppendObjectProcessor::complete(size_t accounted_size, const string &etag, ceph::real_time *mtime, - ceph::real_time set_mtime, rgw::sal::Attrs& attrs, - ceph::real_time delete_at, const char *if_match, const char *if_nomatch, - const string *user_data, rgw_zone_set *zones_trace, bool *pcanceled, - const req_context& rctx, uint32_t flags) +int AppendObjectProcessor::complete( + size_t accounted_size, + const string &etag, ceph::real_time *mtime, + ceph::real_time set_mtime, rgw::sal::Attrs& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, + ceph::real_time delete_at, const char *if_match, + const char *if_nomatch, + const string *user_data, rgw_zone_set *zones_trace, + bool *pcanceled, + const req_context& rctx, uint32_t flags) { int r = writer.drain(); if (r < 0) @@ -764,7 +774,8 @@ int AppendObjectProcessor::complete(size_t accounted_size, const string &etag, c } r = obj_op.write_meta(actual_size + cur_size, accounted_size + *cur_accounted_size, - attrs, rctx, writer.get_trace(), flags & rgw::sal::FLAG_LOG_OP); + attrs, rctx, writer.get_trace(), + flags & rgw::sal::FLAG_LOG_OP); if (r < 0) { return r; } diff --git a/src/rgw/driver/rados/rgw_putobj_processor.h b/src/rgw/driver/rados/rgw_putobj_processor.h index 655428f83e9..3f3b0b31fcf 100644 --- a/src/rgw/driver/rados/rgw_putobj_processor.h +++ b/src/rgw/driver/rados/rgw_putobj_processor.h @@ -193,6 +193,7 @@ class AtomicObjectProcessor : public ManifestObjectProcessor { int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -241,6 +242,7 @@ class MultipartObjectProcessor : public ManifestObjectProcessor { int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -279,7 +281,9 @@ class MultipartObjectProcessor : public ManifestObjectProcessor { int prepare(optional_yield y) override; int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, - std::map<std::string, bufferlist>& attrs, ceph::real_time delete_at, + std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, + ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, const req_context& rctx, diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index ec1bd23f59c..b010a63d443 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -23,6 +23,7 @@ #include "common/BackTrace.h" #include "common/ceph_time.h" +#include "rgw_cksum.h" #include "rgw_sal.h" #include "rgw_zone.h" #include "rgw_cache.h" @@ -3055,7 +3056,7 @@ int RGWRados::swift_versioning_restore(RGWObjectCtx& obj_ctx, /* Need to remove the archived copy. */ ret = delete_obj(dpp, obj_ctx, archive_binfo, archive_obj, - archive_binfo.versioning_status(), y); + archive_binfo.versioning_status(), y, false); return ret; }; @@ -4083,7 +4084,7 @@ int RGWRados::stat_remote_obj(const DoutPrefixProvider *dpp, return ret; } - ret = conn->complete_request(in_stream_req, nullptr, &set_mtime, psize, + ret = conn->complete_request(dpp, in_stream_req, nullptr, &set_mtime, psize, nullptr, pheaders, y); if (ret < 0) { if (ret == -EIO && tries < NUM_ENPOINT_IOERROR_RETRIES - 1) { @@ -4318,7 +4319,7 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, goto set_err_state; } - ret = conn->complete_request(in_stream_req, &etag, &set_mtime, + ret = conn->complete_request(rctx.dpp, in_stream_req, &etag, &set_mtime, &accounted_size, nullptr, nullptr, rctx.y); if (ret < 0) { if (ret == -EIO && tries < NUM_ENPOINT_IOERROR_RETRIES - 1) { @@ -4489,8 +4490,9 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, for (i = 0; i < MAX_COMPLETE_RETRY; i++) { bool canceled = false; ret = processor.complete(accounted_size, etag, mtime, set_mtime, - attrs, delete_at, nullptr, nullptr, nullptr, - zones_trace, &canceled, rctx, rgw::sal::FLAG_LOG_OP); + attrs, rgw::cksum::no_cksum, delete_at, nullptr, nullptr, + nullptr, zones_trace, &canceled, rctx, + rgw::sal::FLAG_LOG_OP); if (ret < 0) { goto set_err_state; } @@ -4578,7 +4580,7 @@ int RGWRados::copy_obj_to_remote_dest(const DoutPrefixProvider *dpp, return ret; } - ret = rest_master_conn->complete_request(out_stream_req, etag, mtime, y); + ret = rest_master_conn->complete_request(dpp, out_stream_req, etag, mtime, y); if (ret < 0) { if (ret == -EIO && tries < NUM_ENPOINT_IOERROR_RETRIES - 1) { ldpp_dout(dpp, 20) << __func__ << "(): failed to put_obj_async_init. retries=" << tries << dendl; @@ -5047,7 +5049,8 @@ int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx, } const req_context rctx{dpp, y, nullptr}; - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, + rgw::cksum::no_cksum, delete_at, nullptr, nullptr, nullptr, nullptr, nullptr, rctx, log_op ? rgw::sal::FLAG_LOG_OP : 0); } @@ -5473,7 +5476,7 @@ static int resync_encrypted_multipart(const DoutPrefixProvider* dpp, }; return store->set_attrs(dpp, &obj_ctx, bucket_info, state.obj, - add_attrs, nullptr, y, set_mtime); + add_attrs, nullptr, y, true, set_mtime); } static void try_resync_encrypted_multipart(const DoutPrefixProvider* dpp, @@ -5738,7 +5741,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi } result.delete_marker = dirent.is_delete_marker(); r = store->unlink_obj_instance(dpp, target->get_ctx(), target->get_bucket_info(), obj, params.olh_epoch, - y, params.zones_trace, add_log); + y, params.bilog_flags, params.null_verid, params.zones_trace, add_log); if (r < 0) { return r; } @@ -5834,6 +5837,11 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi index_op.set_zones_trace(params.zones_trace); index_op.set_bilog_flags(params.bilog_flags); + if (params.null_verid) { + index_op.set_bilog_flags(params.bilog_flags | RGW_BILOG_NULL_VERSION); + } + + r = index_op.prepare(dpp, CLS_RGW_OP_DEL, &state->write_tag, y, log_op); if (r < 0) return r; @@ -5885,6 +5893,7 @@ int RGWRados::delete_obj(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw_obj& obj, int versioning_status, optional_yield y,// versioning flags defined in enum RGWBucketFlags + bool null_verid, uint16_t bilog_flags, const real_time& expiration_time, rgw_zone_set *zones_trace, @@ -5898,6 +5907,7 @@ int RGWRados::delete_obj(const DoutPrefixProvider *dpp, del_op.params.bilog_flags = bilog_flags; del_op.params.expiration_time = expiration_time; del_op.params.zones_trace = zones_trace; + del_op.params.null_verid = null_verid; return del_op.delete_obj(y, dpp, log_op ? rgw::sal::FLAG_LOG_OP : 0); } @@ -6472,13 +6482,14 @@ int RGWRados::set_attr(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBuc { map<string, bufferlist> attrs; attrs[name] = bl; - return set_attrs(dpp, octx, bucket_info, obj, attrs, NULL, y); + return set_attrs(dpp, octx, bucket_info, obj, attrs, NULL, y, true); } int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBucketInfo& bucket_info, const rgw_obj& src_obj, map<string, bufferlist>& attrs, map<string, bufferlist>* rmattrs, optional_yield y, + bool log_op, ceph::real_time set_mtime /* = zero() */) { rgw_obj obj = src_obj; @@ -6550,7 +6561,7 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBu string tag; append_rand_alpha(cct, tag, tag, 32); state->write_tag = tag; - r = index_op.prepare(dpp, CLS_RGW_OP_ADD, &state->write_tag, y); + r = index_op.prepare(dpp, CLS_RGW_OP_ADD, &state->write_tag, y, log_op); if (r < 0) return r; @@ -6594,9 +6605,9 @@ int RGWRados::set_attrs(const DoutPrefixProvider *dpp, RGWObjectCtx* octx, RGWBu int64_t poolid = ioctx.get_id(); r = index_op.complete(dpp, poolid, epoch, state->size, state->accounted_size, mtime, etag, content_type, storage_class, owner, - RGWObjCategory::Main, nullptr, y); + RGWObjCategory::Main, nullptr, y, nullptr, false, log_op); } else { - int ret = index_op.cancel(dpp, nullptr, y); + int ret = index_op.cancel(dpp, nullptr, y, log_op); if (ret < 0) { ldpp_dout(dpp, 0) << "ERROR: complete_update_index_cancel() returned ret=" << ret << dendl; } @@ -6639,12 +6650,15 @@ static int get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y, } // navigate to the requested part in the manifest RGWObjManifest::obj_iterator end = manifest->obj_end(dpp); - if (end.get_cur_part_id() == 0) { // not multipart + const int last_part_id = end.get_cur_part_id(); + if (last_part_id == 0) { // not multipart ldpp_dout(dpp, 20) << "object does not have a multipart manifest" << dendl; return -ERR_INVALID_PART; } if (parts_count) { - *parts_count = end.get_cur_part_id() - 1; + // when a multipart upload only contains a single part, the last part id + // is off by one. don't let parts_count go to 0 + *parts_count = std::max(1, last_part_id - 1); } ldpp_dout(dpp, 20) << "seeking to part #" << part_num << " in the object manifest" << dendl; @@ -6704,7 +6718,7 @@ static int get_part_obj_state(const DoutPrefixProvider* dpp, optional_yield y, do { ++iter; gen.create_next(iter.get_ofs() - part_offset); - } while (iter.get_cur_part_id() == part_num); + } while (iter != end && iter.get_cur_part_id() == part_num); // update the object size sm->state.size = part_manifest.get_obj_size(); @@ -6736,6 +6750,15 @@ int RGWRados::Object::Read::prepare(optional_yield y, const DoutPrefixProvider * if (r < 0) return r; + if (manifest /* params.parts_count */) { + RGWObjManifest::obj_iterator end = manifest->obj_end(dpp); + auto cur_part_id = end.get_cur_part_id(); + if (cur_part_id != 0 ) { + /* end.get_cur_part_id() returns 0 for non-multipart manifests */ + params.parts_count = (cur_part_id == 1) ? 1 : cur_part_id - 1; + } + } + if (!astate->exists) { return -ENOENT; } @@ -7769,7 +7792,7 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs, } // if taking of lock succeeded } // block to encapsulate recovery from incomplete reshard - ret = reshard_wait->wait(y); + ret = reshard_wait->wait(dpp, y); if (ret < 0) { ldpp_dout(dpp, 0) << __func__ << " ERROR: bucket is still resharding, please retry" << dendl; @@ -7841,6 +7864,7 @@ int RGWRados::bucket_index_unlink_instance(const DoutPrefixProvider *dpp, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, optional_yield y, + uint16_t bilog_flags, rgw_zone_set *_zones_trace, bool log_op) { rgw_rados_ref ref; @@ -7865,7 +7889,7 @@ int RGWRados::bucket_index_unlink_instance(const DoutPrefixProvider *dpp, op.assert_exists(); // bucket index shard must exist cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING); cls_rgw_bucket_unlink_instance(op, key, op_tag, - olh_tag, olh_epoch, log_op, zones_trace); + olh_tag, olh_epoch, log_op, bilog_flags, zones_trace); return rgw_rados_operate(dpp, ref.ioctx, ref.obj.oid, &op, y); }, y); if (r < 0) { @@ -8068,8 +8092,10 @@ int RGWRados::apply_olh_log(const DoutPrefixProvider *dpp, bufferlist& olh_tag, std::map<uint64_t, std::vector<rgw_bucket_olh_log_entry> >& log, uint64_t *plast_ver, - optional_yield y, rgw_zone_set* zones_trace, - bool log_op) + optional_yield y, + bool null_verid, + rgw_zone_set* zones_trace, + bool log_op) { if (log.empty()) { return 0; @@ -8182,7 +8208,7 @@ int RGWRados::apply_olh_log(const DoutPrefixProvider *dpp, liter != remove_instances.end(); ++liter) { cls_rgw_obj_key& key = *liter; rgw_obj obj_instance(bucket, key); - int ret = delete_obj(dpp, obj_ctx, bucket_info, obj_instance, 0, y, RGW_BILOG_FLAG_VERSIONED_OP, ceph::real_time(), zones_trace, log_op); + int ret = delete_obj(dpp, obj_ctx, bucket_info, obj_instance, 0, y, null_verid, RGW_BILOG_FLAG_VERSIONED_OP, ceph::real_time(), zones_trace, log_op); if (ret < 0 && ret != -ENOENT) { ldpp_dout(dpp, 0) << "ERROR: delete_obj() returned " << ret << " obj_instance=" << obj_instance << dendl; return ret; @@ -8286,7 +8312,7 @@ int RGWRados::clear_olh(const DoutPrefixProvider *dpp, /* * read olh log and apply it */ -int RGWRados::update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y, rgw_zone_set *zones_trace, bool log_op) +int RGWRados::update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y, rgw_zone_set *zones_trace, bool null_verid, bool log_op) { map<uint64_t, vector<rgw_bucket_olh_log_entry> > log; bool is_truncated; @@ -8297,7 +8323,7 @@ int RGWRados::update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, R if (ret < 0) { return ret; } - ret = apply_olh_log(dpp, obj_ctx, *state, bucket_info, obj, state->olh_tag, log, &ver_marker, y, zones_trace, log_op); + ret = apply_olh_log(dpp, obj_ctx, *state, bucket_info, obj, state->olh_tag, log, &ver_marker, y, null_verid, zones_trace, log_op); if (ret < 0) { return ret; } @@ -8399,7 +8425,7 @@ int RGWRados::set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, } int RGWRados::unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, - uint64_t olh_epoch, optional_yield y, rgw_zone_set *zones_trace, bool log_op) + uint64_t olh_epoch, optional_yield y, uint16_t bilog_flags, bool null_verid, rgw_zone_set *zones_trace, bool log_op) { string op_tag; @@ -8438,7 +8464,13 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& o std::this_thread::sleep_for(cct->_conf->rgw_debug_inject_latency_bi_unlink * std::chrono::seconds{1}); } - ret = bucket_index_unlink_instance(dpp, bucket_info, target_obj, op_tag, olh_tag, olh_epoch, y, zones_trace, log_op); + if (null_verid) { + bilog_flags = bilog_flags | RGW_BILOG_FLAG_VERSIONED_OP | RGW_BILOG_NULL_VERSION; + } else { + bilog_flags = bilog_flags | RGW_BILOG_FLAG_VERSIONED_OP; + } + + ret = bucket_index_unlink_instance(dpp, bucket_info, target_obj, op_tag, olh_tag, olh_epoch, y, bilog_flags, zones_trace, log_op); if (ret < 0) { olh_cancel_modification(dpp, bucket_info, *state, olh_obj, op_tag, y); ldpp_dout(dpp, 20) << "bucket_index_unlink_instance() target_obj=" << target_obj << " returned " << ret << dendl; @@ -8448,7 +8480,7 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& o // it's possible that the pending xattr from this op prevented the olh // object from being cleaned by another thread that was deleting the last // existing version. We invoke a best-effort update_olh here to handle this case. - int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, zones_trace, log_op); + int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, zones_trace, null_verid, log_op); if (r < 0 && r != -ECANCELED) { ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; } @@ -8462,7 +8494,7 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& o return -EIO; } - ret = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, zones_trace, log_op); + ret = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y, zones_trace, null_verid, log_op); if (ret == -ECANCELED) { /* already did what we needed, no need to retry, raced with another user */ return 0; } diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h index 2a2b4d85edc..278d1182b4d 100644 --- a/src/rgw/driver/rados/rgw_rados.h +++ b/src/rgw/driver/rados/rgw_rados.h @@ -841,6 +841,7 @@ public: struct DeleteParams { rgw_owner bucket_owner; // for quota stats update int versioning_status; // versioning flags defined in enum RGWBucketFlags + bool null_verid; ACLOwner obj_owner; // needed for creation of deletion marker uint64_t olh_epoch; std::string marker_version_id; @@ -854,7 +855,7 @@ public: bool abortmp; uint64_t parts_accounted_size; - DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr), abortmp(false), parts_accounted_size(0) {} + DeleteParams() : versioning_status(0), null_verid(false), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr), abortmp(false), parts_accounted_size(0) {} } params; struct DeleteResult { @@ -968,6 +969,10 @@ public: bilog_flags = flags; } + int get_bilog_flags() { + return bilog_flags; + } + void set_zones_trace(rgw_zone_set *_zones_trace) { zones_trace = _zones_trace; } @@ -1259,6 +1264,7 @@ public: const RGWBucketInfo& bucket_info, const rgw_obj& obj, int versioning_status, optional_yield y, // versioning flags defined in enum RGWBucketFlags + bool null_verid, uint16_t bilog_flags = 0, const ceph::real_time& expiration_time = ceph::real_time(), rgw_zone_set *zones_trace = nullptr, @@ -1284,6 +1290,7 @@ public: std::map<std::string, bufferlist>& attrs, std::map<std::string, bufferlist>* rmattrs, optional_yield y, + bool log_op, ceph::real_time set_mtime = ceph::real_clock::zero()); int get_obj_state(const DoutPrefixProvider *dpp, RGWObjectCtx *rctx, @@ -1353,6 +1360,7 @@ public: const rgw_obj& obj_instance, const std::string& op_tag, const std::string& olh_tag, uint64_t olh_epoch, optional_yield y, + uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr, bool log_op = true); int bucket_index_read_olh_log(const DoutPrefixProvider *dpp, @@ -1363,9 +1371,9 @@ public: int bucket_index_clear_olh(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const std::string& olh_tag, const rgw_obj& obj_instance, optional_yield y); int apply_olh_log(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState& obj_state, RGWBucketInfo& bucket_info, const rgw_obj& obj, bufferlist& obj_tag, std::map<uint64_t, std::vector<rgw_bucket_olh_log_entry> >& log, - uint64_t *plast_ver, optional_yield y, rgw_zone_set *zones_trace = nullptr, bool log_op = true); + uint64_t *plast_ver, optional_yield y, bool null_verid, rgw_zone_set *zones_trace = nullptr, bool log_op = true); int update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y, - rgw_zone_set *zones_trace = nullptr, bool log_op = true); + rgw_zone_set *zones_trace = nullptr, bool null_verid = false, bool log_op = true); int clear_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, const rgw_obj& obj, @@ -1389,7 +1397,7 @@ public: int repair_olh(const DoutPrefixProvider *dpp, RGWObjState* state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y); int unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, - uint64_t olh_epoch, optional_yield y, rgw_zone_set *zones_trace = nullptr, bool log_op = true); + uint64_t olh_epoch, optional_yield y, uint16_t bilog_flags, bool null_verid, rgw_zone_set *zones_trace = nullptr, bool log_op = true); void check_pending_olh_entries(const DoutPrefixProvider *dpp, std::map<std::string, bufferlist>& pending_entries, std::map<std::string, bufferlist> *rm_pending_entries); int remove_olh_pending_entries(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, std::map<std::string, bufferlist>& pending_attrs, optional_yield y); diff --git a/src/rgw/driver/rados/rgw_reshard.cc b/src/rgw/driver/rados/rgw_reshard.cc index 3d38aa29aa0..9bd4aad89a1 100644 --- a/src/rgw/driver/rados/rgw_reshard.cc +++ b/src/rgw/driver/rados/rgw_reshard.cc @@ -7,6 +7,7 @@ #include "rgw_zone.h" #include "driver/rados/rgw_bucket.h" +#include "rgw_asio_thread.h" #include "rgw_reshard.h" #include "rgw_sal.h" #include "rgw_sal_rados.h" @@ -1261,7 +1262,7 @@ int RGWReshard::clear_bucket_resharding(const DoutPrefixProvider *dpp, const str return 0; } -int RGWReshardWait::wait(optional_yield y) +int RGWReshardWait::wait(const DoutPrefixProvider* dpp, optional_yield y) { std::unique_lock lock(mutex); @@ -1285,6 +1286,7 @@ int RGWReshardWait::wait(optional_yield y) waiters.erase(waiters.iterator_to(waiter)); return -ec.value(); } + maybe_warn_about_blocking(dpp); cond.wait_for(lock, duration); diff --git a/src/rgw/driver/rados/rgw_reshard.h b/src/rgw/driver/rados/rgw_reshard.h index 818bf216495..a2097318827 100644 --- a/src/rgw/driver/rados/rgw_reshard.h +++ b/src/rgw/driver/rados/rgw_reshard.h @@ -266,7 +266,7 @@ public: ~RGWReshardWait() { ceph_assert(going_down); } - int wait(optional_yield y); + int wait(const DoutPrefixProvider* dpp, optional_yield y); // unblock any threads waiting on reshard void stop(); }; diff --git a/src/rgw/driver/rados/rgw_rest_user.cc b/src/rgw/driver/rados/rgw_rest_user.cc index 34112c94727..71bb8c4c039 100644 --- a/src/rgw/driver/rados/rgw_rest_user.cc +++ b/src/rgw/driver/rados/rgw_rest_user.cc @@ -157,6 +157,7 @@ void RGWOp_User_Create::execute(optional_yield y) std::string op_mask_str; std::string default_placement_str; std::string placement_tags_str; + std::string default_storage_class_str; bool gen_key; bool suspended; @@ -188,6 +189,7 @@ void RGWOp_User_Create::execute(optional_yield y) RESTArgs::get_bool(s, "exclusive", false, &exclusive); RESTArgs::get_string(s, "op-mask", op_mask_str, &op_mask_str); RESTArgs::get_string(s, "default-placement", default_placement_str, &default_placement_str); + RESTArgs::get_string(s, "default-storage-class", default_storage_class_str, &default_storage_class_str); RESTArgs::get_string(s, "placement-tags", placement_tags_str, &placement_tags_str); RESTArgs::get_string(s, "account-id", "", &op_state.account_id); RESTArgs::get_string(s, "path", "", &op_state.path); @@ -251,7 +253,10 @@ void RGWOp_User_Create::execute(optional_yield y) if (!default_placement_str.empty()) { rgw_placement_rule target_rule; - target_rule.from_str(default_placement_str); + target_rule.name = default_placement_str; + if (!default_storage_class_str.empty()){ + target_rule.storage_class = default_storage_class_str; + } if (!driver->valid_placement(target_rule)) { ldpp_dout(this, 0) << "NOTICE: invalid dest placement: " << target_rule.to_str() << dendl; op_ret = -EINVAL; @@ -309,6 +314,7 @@ void RGWOp_User_Modify::execute(optional_yield y) std::string op_mask_str; std::string default_placement_str; std::string placement_tags_str; + std::string default_storage_class_str; bool gen_key; bool suspended; @@ -336,6 +342,7 @@ void RGWOp_User_Modify::execute(optional_yield y) RESTArgs::get_bool(s, "account-root", false, &account_root); RESTArgs::get_string(s, "op-mask", op_mask_str, &op_mask_str); RESTArgs::get_string(s, "default-placement", default_placement_str, &default_placement_str); + RESTArgs::get_string(s, "default-storage-class", default_storage_class_str, &default_storage_class_str); RESTArgs::get_string(s, "placement-tags", placement_tags_str, &placement_tags_str); RESTArgs::get_string(s, "account-id", "", &op_state.account_id); RESTArgs::get_string(s, "path", "", &op_state.path); @@ -404,7 +411,10 @@ void RGWOp_User_Modify::execute(optional_yield y) if (!default_placement_str.empty()) { rgw_placement_rule target_rule; - target_rule.from_str(default_placement_str); + target_rule.name = default_placement_str; + if (!default_storage_class_str.empty()){ + target_rule.storage_class = default_storage_class_str; + } if (!driver->valid_placement(target_rule)) { ldpp_dout(this, 0) << "NOTICE: invalid dest placement: " << target_rule.to_str() << dendl; op_ret = -EINVAL; diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc index eace9df5f3f..aa63743a249 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.cc +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -59,6 +59,8 @@ #include "services/svc_meta.h" #include "services/svc_meta_be_sobj.h" #include "services/svc_cls.h" +#include "services/svc_bilog_rados.h" +#include "services/svc_bi_rados.h" #include "services/svc_zone.h" #include "services/svc_tier_rados.h" #include "services/svc_quota.h" @@ -2254,6 +2256,40 @@ RadosObject::~RadosObject() delete rados_ctx; } +bool RadosObject::is_sync_completed(const DoutPrefixProvider* dpp, + const ceph::real_time& obj_mtime) +{ + const auto& bucket_info = get_bucket()->get_info(); + if (bucket_info.is_indexless()) { + ldpp_dout(dpp, 0) << "ERROR: Trying to check object replication status for object in an indexless bucket. obj=" << get_key() << dendl; + return false; + } + + const auto& log_layout = bucket_info.layout.logs.front(); + const uint32_t shard_count = num_shards(log_to_index_layout(log_layout)); + + std::string marker; + bool truncated; + list<rgw_bi_log_entry> entries; + + const int shard_id = RGWSI_BucketIndex_RADOS::bucket_shard_index(get_key(), shard_count); + + int ret = store->svc()->bilog_rados->log_list(dpp, bucket_info, log_layout, shard_id, + marker, 1, entries, &truncated); + + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: Failed to retrieve bilog info for obj=" << get_key() << dendl; + return false; + } + + if (entries.empty()) { + return true; + } + + const rgw_bi_log_entry& earliest_marker = entries.front(); + return earliest_marker.timestamp > obj_mtime; +} + int RadosObject::load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh) { RGWObjState *pstate{nullptr}; @@ -2286,15 +2322,19 @@ int RadosObject::read_attrs(const DoutPrefixProvider* dpp, RGWRados::Object::Rea return read_op.prepare(y, dpp); } -int RadosObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) +int RadosObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y, uint32_t flags) { Attrs empty; + const bool log_op = flags & rgw::sal::FLAG_LOG_OP; + // make a tiny adjustment to the existing mtime so that fetch_remote_obj() + // won't return ERR_NOT_MODIFIED when syncing the modified object + const auto mtime = log_op ? state.mtime + std::chrono::nanoseconds(1) : state.mtime; return store->getRados()->set_attrs(dpp, rados_ctx, bucket->get_info(), get_obj(), setattrs ? *setattrs : empty, delattrs ? delattrs : nullptr, - y); + y, log_op, mtime); } int RadosObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj) @@ -2318,7 +2358,7 @@ int RadosObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, o state.obj = target; set_atomic(); state.attrset[attr_name] = attr_val; - r = set_obj_attrs(dpp, &state.attrset, nullptr, y); + r = set_obj_attrs(dpp, &state.attrset, nullptr, y, rgw::sal::FLAG_LOG_OP); /* Restore target */ state.obj = save; @@ -2332,7 +2372,7 @@ int RadosObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* att set_atomic(); rmattr[attr_name] = bl; - return set_obj_attrs(dpp, nullptr, &rmattr, y); + return set_obj_attrs(dpp, nullptr, &rmattr, y, rgw::sal::FLAG_LOG_OP); } bool RadosObject::is_expired() { @@ -2485,7 +2525,7 @@ int RadosObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_y set_atomic(); map<string, bufferlist> attrs; attrs[RGW_ATTR_ACL] = bl; - r = set_obj_attrs(dpp, &attrs, nullptr, y); + r = set_obj_attrs(dpp, &attrs, nullptr, y, rgw::sal::FLAG_LOG_OP); if (r < 0) { ldpp_dout(dpp, 0) << "ERROR: modify attr failed " << cpp_strerror(-r) << dendl; return r; @@ -2807,6 +2847,7 @@ int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, option parent_op.params.zones_trace = params.zones_trace; parent_op.params.abortmp = params.abortmp; parent_op.params.parts_accounted_size = params.parts_accounted_size; + parent_op.params.null_verid = params.null_verid; int ret = parent_op.delete_obj(y, dpp, flags & FLAG_LOG_OP); if (ret < 0) @@ -3094,6 +3135,7 @@ int RadosMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, multipart_upload_info upload_info; upload_info.dest_placement = dest_placement; + upload_info.cksum_type = cksum_type; if (obj_legal_hold) { upload_info.obj_legal_hold_exist = true; @@ -3422,6 +3464,7 @@ int RadosMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield return 0; } + /* Handle caching */ if (rule) { if (!placement.empty()) { *rule = &placement; @@ -3434,6 +3477,14 @@ int RadosMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield } } + if (attrs) { + if (!cached_attrs.empty()) { + *attrs = cached_attrs; + if (!rule || *rule != nullptr) + return 0; + } + } + /* We need either attributes or placement, so we need a read */ std::unique_ptr<rgw::sal::Object> meta_obj; meta_obj = get_meta_obj(); @@ -3454,11 +3505,13 @@ int RadosMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield return ret; } + /* Cache attrs filled in by prepare */ + cached_attrs = meta_obj->get_attrs(); + extract_span_context(meta_obj->get_attrs(), trace_ctx); if (attrs) { - /* Attrs are filled in by prepare */ - *attrs = meta_obj->get_attrs(); + *attrs = cached_attrs; if (!rule || *rule != nullptr) { /* placement was cached; don't actually read */ return 0; @@ -3486,6 +3539,7 @@ int RadosMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield ldpp_dout(dpp, 0) << "ERROR: failed to decode multipart upload info" << dendl; return -EIO; } + cksum_type = upload_info.cksum_type; placement = upload_info.dest_placement; upload_information = upload_info; *rule = &placement; @@ -3685,6 +3739,7 @@ int RadosAtomicWriter::process(bufferlist&& data, uint64_t offset) int RadosAtomicWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -3692,8 +3747,9 @@ int RadosAtomicWriter::complete(size_t accounted_size, const std::string& etag, const req_context& rctx, uint32_t flags) { - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, rctx, flags); + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, + cksum, delete_at, if_match, if_nomatch, + user_data, zones_trace, canceled, rctx, flags); } int RadosAppendWriter::prepare(optional_yield y) @@ -3709,6 +3765,7 @@ int RadosAppendWriter::process(bufferlist&& data, uint64_t offset) int RadosAppendWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -3716,8 +3773,9 @@ int RadosAppendWriter::complete(size_t accounted_size, const std::string& etag, const req_context& rctx, uint32_t flags) { - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, rctx, flags); + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, + cksum, delete_at, if_match, if_nomatch, + user_data, zones_trace, canceled, rctx, flags); } int RadosMultipartWriter::prepare(optional_yield y) @@ -3730,9 +3788,12 @@ int RadosMultipartWriter::process(bufferlist&& data, uint64_t offset) return processor.process(std::move(data), offset); } -int RadosMultipartWriter::complete(size_t accounted_size, const std::string& etag, +int RadosMultipartWriter::complete( + size_t accounted_size, + const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -3740,8 +3801,9 @@ int RadosMultipartWriter::complete(size_t accounted_size, const std::string& eta const req_context& rctx, uint32_t flags) { - return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, rctx, flags); + return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, + cksum, delete_at, if_match, if_nomatch, + user_data, zones_trace, canceled, rctx, flags); } bool RadosZoneGroup::placement_target_exists(std::string& target) const diff --git a/src/rgw/driver/rados/rgw_sal_rados.h b/src/rgw/driver/rados/rgw_sal_rados.h index f880319b56e..6d4465093bc 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.h +++ b/src/rgw/driver/rados/rgw_sal_rados.h @@ -591,10 +591,13 @@ class RadosObject : public StoreObject { StoreObject::set_compressed(); } + + virtual bool is_sync_completed(const DoutPrefixProvider* dpp, + const ceph::real_time& obj_mtime) override; /* For rgw_admin.cc */ RGWObjState& get_state() { return state; } virtual int load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh = true) override; - virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) override; + virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y, uint32_t flags) override; virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override; virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) override; @@ -773,6 +776,9 @@ public: virtual uint64_t get_size() { return info.accounted_size; } virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.modified; } + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return info.cksum; + } /* For RadosStore code */ RGWObjManifest& get_manifest() { return info.manifest; } @@ -789,6 +795,7 @@ class RadosMultipartUpload : public StoreMultipartUpload { rgw_placement_rule placement; RGWObjManifest manifest; multipart_upload_info upload_information; + rgw::sal::Attrs cached_attrs; public: RadosMultipartUpload(RadosStore* _store, Bucket* _bucket, const std::string& oid, @@ -977,6 +984,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -1026,6 +1034,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -1072,6 +1081,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/driver/rados/rgw_sync.cc b/src/rgw/driver/rados/rgw_sync.cc index a9ea2ecf549..bd730dfd6c2 100644 --- a/src/rgw/driver/rados/rgw_sync.cc +++ b/src/rgw/driver/rados/rgw_sync.cc @@ -505,7 +505,7 @@ public: return io_block(0); } yield { - op_ret = http_op->wait(shard_info, null_yield); + op_ret = http_op->wait(dpp, shard_info, null_yield); http_op->put(); } @@ -586,7 +586,7 @@ public: } int request_complete() override { - int ret = http_op->wait(result, null_yield); + int ret = http_op->wait(sync_env->dpp, result, null_yield); http_op->put(); if (ret < 0 && ret != -ENOENT) { ldpp_dout(sync_env->dpp, 5) << "ERROR: failed to list remote mdlog shard, ret=" << ret << dendl; @@ -1089,7 +1089,7 @@ public: return io_block(0); } yield { - op_ret = http_op->wait(pbl, null_yield); + op_ret = http_op->wait(dpp, pbl, null_yield); http_op->put(); } @@ -1481,6 +1481,7 @@ class RGWMetaSyncShardCR : public RGWCoroutine { bool done_with_period = false; int total_entries = 0; + string old_mdlog_marker; RGWSyncTraceNodeRef tn; public: @@ -1832,6 +1833,7 @@ public: if (mdlog_marker <= max_marker || !truncated) { /* we're at the tip, try to bring more entries */ ldpp_dout(sync_env->dpp, 20) << __func__ << ":" << __LINE__ << ": shard_id=" << shard_id << " syncing mdlog for shard_id=" << shard_id << dendl; + old_mdlog_marker = mdlog_marker; yield call(new RGWCloneMetaLogCoroutine(sync_env, mdlog, period, shard_id, mdlog_marker, &mdlog_marker)); @@ -1902,7 +1904,8 @@ public: tn->log(10, SSTR(*this << ": done with period")); break; } - if (mdlog_marker == max_marker && can_adjust_marker) { + if (mdlog_marker == old_mdlog_marker && can_adjust_marker) { + tn->log(20, SSTR("mdlog_marker=" << mdlog_marker << " old_mdlog_marker=" << old_mdlog_marker)); tn->unset_flag(RGW_SNS_FLAG_ACTIVE); yield wait(utime_t(cct->_conf->rgw_meta_sync_poll_interval, 0)); } @@ -2544,7 +2547,7 @@ int RGWCloneMetaLogCoroutine::state_send_rest_request(const DoutPrefixProvider * int RGWCloneMetaLogCoroutine::state_receive_rest_response() { - op_ret = http_op->wait(&data, null_yield); + op_ret = http_op->wait(sync_env->dpp, &data, null_yield); if (op_ret < 0 && op_ret != -EIO) { error_stream << "http operation failed: " << http_op->to_str() << " status=" << http_op->get_http_status() << std::endl; ldpp_dout(sync_env->dpp, 5) << "failed to wait for op, ret=" << op_ret << dendl; diff --git a/src/rgw/driver/rados/rgw_tools.cc b/src/rgw/driver/rados/rgw_tools.cc index eec4a799115..0af353b866f 100644 --- a/src/rgw/driver/rados/rgw_tools.cc +++ b/src/rgw/driver/rados/rgw_tools.cc @@ -11,8 +11,8 @@ #include "rgw_tools.h" #include "rgw_acl_s3.h" #include "rgw_aio_throttle.h" +#include "rgw_asio_thread.h" #include "rgw_compression.h" -#include "common/BackTrace.h" #define dout_subsys ceph_subsys_rgw @@ -212,13 +212,7 @@ int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, con } return -ec.value(); } - // work on asio threads should be asynchronous, so warn when they block - if (is_asio_thread) { - ldpp_dout(dpp, 20) << "WARNING: blocking librados call" << dendl; -#ifdef _BACKTRACE_LOGGING - ldpp_dout(dpp, 20) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl; -#endif - } + maybe_warn_about_blocking(dpp); return ioctx.operate(oid, op, nullptr, flags); } @@ -232,12 +226,7 @@ int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, con librados::async_operate(yield, ioctx, oid, op, flags, trace_info, yield[ec]); return -ec.value(); } - if (is_asio_thread) { - ldpp_dout(dpp, 20) << "WARNING: blocking librados call" << dendl; -#ifdef _BACKTRACE_LOGGING - ldpp_dout(dpp, 20) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl; -#endif - } + maybe_warn_about_blocking(dpp); return ioctx.operate(oid, op, flags, trace_info); } @@ -255,12 +244,7 @@ int rgw_rados_notify(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, cons } return -ec.value(); } - if (is_asio_thread) { - ldpp_dout(dpp, 20) << "WARNING: blocking librados call" << dendl; -#ifdef _BACKTRACE_LOGGING - ldpp_dout(dpp, 20) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl; -#endif - } + maybe_warn_about_blocking(dpp); return ioctx.notify2(oid, bl, timeout_ms, pbl); } diff --git a/src/rgw/driver/rados/rgw_tools.h b/src/rgw/driver/rados/rgw_tools.h index aa365deb42a..257e513a9f7 100644 --- a/src/rgw/driver/rados/rgw_tools.h +++ b/src/rgw/driver/rados/rgw_tools.h @@ -90,10 +90,6 @@ const char *rgw_find_mime_by_ext(std::string& ext); void rgw_filter_attrset(std::map<std::string, bufferlist>& unfiltered_attrset, const std::string& check_prefix, std::map<std::string, bufferlist> *attrset); -/// indicates whether the current thread is in boost::asio::io_context::run(), -/// used to log warnings if synchronous librados calls are made -extern thread_local bool is_asio_thread; - /// perform the rados operation, using the yield context when given int rgw_rados_operate(const DoutPrefixProvider *dpp, librados::IoCtx& ioctx, const std::string& oid, librados::ObjectReadOperation *op, bufferlist* pbl, diff --git a/src/rgw/driver/rados/rgw_user.cc b/src/rgw/driver/rados/rgw_user.cc index e154813aac1..7a8161e5741 100644 --- a/src/rgw/driver/rados/rgw_user.cc +++ b/src/rgw/driver/rados/rgw_user.cc @@ -1552,6 +1552,7 @@ static void rename_swift_keys(const rgw_user& user, user.to_str(user_id); auto modify_keys = std::move(keys); + keys = {}; for ([[maybe_unused]] auto& [k, key] : modify_keys) { std::string id = user_id + ":" + key.subuser; key.id = id; diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index f03d4c5f565..aa2c92fbcab 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -32,6 +32,8 @@ extern "C" { #include "cls/rgw/cls_rgw_types.h" #include "cls/rgw/cls_rgw_client.h" +#include "cls/2pc_queue/cls_2pc_queue_types.h" +#include "cls/2pc_queue/cls_2pc_queue_client.h" #include "include/utime.h" #include "include/str_list.h" @@ -327,6 +329,7 @@ void usage() cout << " topic get get a bucket notifications topic\n"; cout << " topic rm remove a bucket notifications topic\n"; cout << " topic stats get a bucket notifications persistent topic stats (i.e. reservations, entries & size)\n"; + cout << " topic dump dump (in JSON format) all pending bucket notifications of a persistent topic\n"; cout << " script put upload a Lua script to a context\n"; cout << " script get get the Lua script of a context\n"; cout << " script rm remove the Lua scripts of a context\n"; @@ -867,6 +870,7 @@ enum class OPT { PUBSUB_NOTIFICATION_GET, PUBSUB_NOTIFICATION_RM, PUBSUB_TOPIC_STATS, + PUBSUB_TOPIC_DUMP, SCRIPT_PUT, SCRIPT_GET, SCRIPT_RM, @@ -1115,6 +1119,7 @@ static SimpleCmd::Commands all_cmds = { { "notification get", OPT::PUBSUB_NOTIFICATION_GET }, { "notification rm", OPT::PUBSUB_NOTIFICATION_RM }, { "topic stats", OPT::PUBSUB_TOPIC_STATS }, + { "topic dump", OPT::PUBSUB_TOPIC_DUMP }, { "script put", OPT::SCRIPT_PUT }, { "script get", OPT::SCRIPT_GET }, { "script rm", OPT::SCRIPT_RM }, @@ -4326,6 +4331,7 @@ int main(int argc, const char **argv) OPT::PUBSUB_TOPIC_GET, OPT::PUBSUB_NOTIFICATION_GET, OPT::PUBSUB_TOPIC_STATS , + OPT::PUBSUB_TOPIC_DUMP , OPT::SCRIPT_GET, }; @@ -4426,6 +4432,7 @@ int main(int argc, const char **argv) && opt_cmd != OPT::PUBSUB_TOPIC_RM && opt_cmd != OPT::PUBSUB_NOTIFICATION_RM && opt_cmd != OPT::PUBSUB_TOPIC_STATS + && opt_cmd != OPT::PUBSUB_TOPIC_DUMP && opt_cmd != OPT::SCRIPT_PUT && opt_cmd != OPT::SCRIPT_GET && opt_cmd != OPT::SCRIPT_RM @@ -11270,9 +11277,10 @@ next: return ENOENT; } + auto ioctx = static_cast<rgw::sal::RadosStore*>(driver)->getRados()->get_notif_pool_ctx(); rgw::notify::rgw_topic_stats stats; ret = rgw::notify::get_persistent_queue_stats( - dpp(), static_cast<rgw::sal::RadosStore *>(driver)->getRados()->get_notif_pool_ctx(), + dpp(), ioctx, topic.dest.persistent_queue, stats, null_yield); if (ret < 0) { cerr << "ERROR: could not get persistent queue: " << cpp_strerror(-ret) << std::endl; @@ -11281,6 +11289,67 @@ next: encode_json("", stats, formatter.get()); formatter->flush(cout); } + + if (opt_cmd == OPT::PUBSUB_TOPIC_DUMP) { + if (topic_name.empty()) { + cerr << "ERROR: topic name was not provided (via --topic)" << std::endl; + return EINVAL; + } + const std::string& account = !account_id.empty() ? account_id : tenant; + RGWPubSub ps(driver, account, *site); + + rgw_pubsub_topic topic; + ret = ps.get_topic(dpp(), topic_name, topic, null_yield, nullptr); + if (ret < 0) { + cerr << "ERROR: could not get topic. error: " << cpp_strerror(-ret) << std::endl; + return -ret; + } + + if (topic.dest.persistent_queue.empty()) { + cerr << "ERROR: topic does not have a persistent queue" << std::endl; + return ENOENT; + } + + auto ioctx = static_cast<rgw::sal::RadosStore*>(driver)->getRados()->get_notif_pool_ctx(); + std::string marker; + std::string end_marker; + librados::ObjectReadOperation rop; + std::vector<cls_queue_entry> queue_entries; + bool truncated = true; + formatter->open_array_section("eventEntries"); + while (truncated) { + bufferlist bl; + int rc; + cls_2pc_queue_list_entries(rop, marker, max_entries, &bl, &rc); + ioctx.operate(topic.dest.persistent_queue, &rop, nullptr); + if (rc < 0 ) { + cerr << "ERROR: could not list entries from queue. error: " << cpp_strerror(-ret) << std::endl; + return -rc; + } + rc = cls_2pc_queue_list_entries_result(bl, queue_entries, &truncated, end_marker); + if (rc < 0) { + cerr << "ERROR: failed to parse list entries from queue (skipping). error: " << cpp_strerror(-ret) << std::endl; + return -rc; + } + + std::for_each(queue_entries.cbegin(), + queue_entries.cend(), + [&formatter](const auto& queue_entry) { + rgw::notify::event_entry_t event_entry; + bufferlist::const_iterator iter{&queue_entry.data}; + try { + event_entry.decode(iter); + encode_json("", event_entry, formatter.get()); + } catch (const buffer::error& e) { + cerr << "ERROR: failed to decode queue entry. error: " << e.what() << std::endl; + } + }); + formatter->flush(cout); + marker = end_marker; + } + formatter->close_section(); + formatter->flush(cout); + } if (opt_cmd == OPT::SCRIPT_PUT) { if (!str_script_ctx) { diff --git a/src/rgw/rgw_amqp.cc b/src/rgw/rgw_amqp.cc index ea824b8295a..0e4025516a0 100644 --- a/src/rgw/rgw_amqp.cc +++ b/src/rgw/rgw_amqp.cc @@ -513,10 +513,10 @@ bool new_state(connection_t* conn, const connection_id_t& conn_id) { /// struct used for holding messages in the message queue struct message_wrapper_t { - connection_id_t conn_id; - std::string topic; - std::string message; - reply_callback_t cb; + const connection_id_t conn_id; + const std::string topic; + const std::string message; + const reply_callback_t cb; message_wrapper_t(const connection_id_t& _conn_id, const std::string& _topic, @@ -836,8 +836,11 @@ public: // when a new connection is added. connections.max_load_factor(10.0); // give the runner thread a name for easier debugging - const auto rc = ceph_pthread_setname(runner.native_handle(), "amqp_manager"); - ceph_assert(rc==0); + const char* thread_name = "amqp_manager"; + if (const auto rc = ceph_pthread_setname(runner.native_handle(), thread_name); rc != 0) { + ldout(cct, 1) << "ERROR: failed to set amqp manager thread name to: " << thread_name + << ". error: " << rc << dendl; + } } // non copyable diff --git a/src/rgw/rgw_amqp.h b/src/rgw/rgw_amqp.h index c363f4d7403..99bd3aef1ed 100644 --- a/src/rgw/rgw_amqp.h +++ b/src/rgw/rgw_amqp.h @@ -26,10 +26,10 @@ void shutdown(); // key class for the connection list struct connection_id_t { std::string host; - int port; + int port = 0; std::string vhost; std::string exchange; - bool ssl; + bool ssl = false; connection_id_t() = default; connection_id_t(const amqp_connection_info& info, const std::string& _exchange); }; diff --git a/src/rgw/rgw_appmain.cc b/src/rgw/rgw_appmain.cc index 9969811083e..8273ac1c96b 100644 --- a/src/rgw/rgw_appmain.cc +++ b/src/rgw/rgw_appmain.cc @@ -26,6 +26,7 @@ #include "include/str_list.h" #include "include/stringify.h" #include "rgw_main.h" +#include "rgw_asio_thread.h" #include "rgw_common.h" #include "rgw_sal.h" #include "rgw_sal_config.h" diff --git a/src/rgw/rgw_asio_frontend.cc b/src/rgw/rgw_asio_frontend.cc index ace3b7aff49..bd04165ff19 100644 --- a/src/rgw/rgw_asio_frontend.cc +++ b/src/rgw/rgw_asio_frontend.cc @@ -25,6 +25,7 @@ #include "rgw_asio_client.h" #include "rgw_asio_frontend.h" +#include "rgw_asio_thread.h" #ifdef WITH_RADOSGW_BEAST_OPENSSL #include <boost/asio/ssl.hpp> @@ -323,7 +324,7 @@ void handle_connection(boost::asio::io_context& context, // if we failed before reading the entire message, discard any remaining // bytes before reading the next while (!expect_continue && !parser.is_done()) { - static std::array<char, 1024> discard_buffer; + static std::array<char, 1024*1024> discard_buffer; auto& body = parser.get().body(); body.size = discard_buffer.size(); diff --git a/src/rgw/rgw_asio_thread.cc b/src/rgw/rgw_asio_thread.cc new file mode 100644 index 00000000000..a1dfd59e528 --- /dev/null +++ b/src/rgw/rgw_asio_thread.cc @@ -0,0 +1,40 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "rgw_asio_thread.h" + +#include "common/BackTrace.h" +#include "common/dout.h" +#include "include/ceph_assert.h" + +thread_local bool is_asio_thread = false; + +void maybe_warn_about_blocking(const DoutPrefixProvider* dpp) +{ + // work on asio threads should be asynchronous, so warn when they block + if (!is_asio_thread) { + return; + } + + // for validation, tests can assert that no requests block + const auto& conf = dpp->get_cct()->_conf; + ceph_assert_always(!conf->rgw_asio_assert_yielding); + + // otherwise just log the warning and optional backtrace + ldpp_dout(dpp, 20) << "WARNING: blocking librados call" << dendl; +#ifdef _BACKTRACE_LOGGING + ldpp_dout(dpp, 20) << "BACKTRACE: " << ClibBackTrace(0) << dendl; +#endif +} diff --git a/src/rgw/rgw_asio_thread.h b/src/rgw/rgw_asio_thread.h new file mode 100644 index 00000000000..cafe071fdc0 --- /dev/null +++ b/src/rgw/rgw_asio_thread.h @@ -0,0 +1,26 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +class DoutPrefixProvider; + +/// indicates whether the current thread is in boost::asio::io_context::run(), +/// used to log warnings if synchronous librados calls are made +extern thread_local bool is_asio_thread; + +/// call when an operation will block the calling thread due to an empty +/// optional_yield. a warning is logged when is_asio_thread is true +void maybe_warn_about_blocking(const DoutPrefixProvider* dpp); diff --git a/src/rgw/rgw_auth_keystone.cc b/src/rgw/rgw_auth_keystone.cc index 0bcd1a32b0f..9a0a973df43 100644 --- a/src/rgw/rgw_auth_keystone.cc +++ b/src/rgw/rgw_auth_keystone.cc @@ -88,7 +88,7 @@ admin_token_retry: validate.set_url(url); - ret = validate.process(y); + ret = validate.process(dpp, y); /* NULL terminate for debug output. */ token_body_bl.append(static_cast<char>(0)); @@ -464,7 +464,7 @@ EC2Engine::get_from_keystone(const DoutPrefixProvider* dpp, const std::string_vi validate.set_send_length(os.str().length()); /* send request */ - ret = validate.process(y); + ret = validate.process(dpp, y); /* if the supplied signature is wrong, we will get 401 from Keystone */ if (validate.get_http_status() == @@ -544,7 +544,7 @@ auto EC2Engine::get_secret_from_keystone(const DoutPrefixProvider* dpp, secret.set_verify_ssl(cct->_conf->rgw_keystone_verify_ssl); /* send request */ - ret = secret.process(y); + ret = secret.process(dpp, y); /* if the supplied access key isn't found, we will get 404 from Keystone */ if (secret.get_http_status() == diff --git a/src/rgw/rgw_auth_s3.cc b/src/rgw/rgw_auth_s3.cc index 61c2118b672..d521f60f6b1 100644 --- a/src/rgw/rgw_auth_s3.cc +++ b/src/rgw/rgw_auth_s3.cc @@ -1469,7 +1469,7 @@ inline void AWSv4ComplMulti::extract_trailing_headers( /* populate trailer map with expected headers and their values, if sent */ trailer_map.insert(trailer_map_t::value_type(k, v)); /* populate to req_info.env as well */ - put_prop(ys_header_mangle(k), v); + put_prop(ys_header_mangle(fmt::format("HTTP-{}", k)), v); }); consumed += get<2>(ex_header); } /* one trailer */ diff --git a/src/rgw/rgw_basic_types.h b/src/rgw/rgw_basic_types.h index cd56db1081b..d09f06a4a98 100644 --- a/src/rgw/rgw_basic_types.h +++ b/src/rgw/rgw_basic_types.h @@ -21,6 +21,7 @@ #pragma once #include <string> +#include <optional> #include <fmt/format.h> #include "include/types.h" @@ -31,6 +32,7 @@ #include "rgw_user_types.h" #include "rgw_bucket_types.h" #include "rgw_obj_types.h" +#include "rgw_cksum.h" #include "driver/rados/rgw_obj_manifest.h" // FIXME: subclass dependency @@ -258,6 +260,7 @@ struct RGWUploadPartInfo { ceph::real_time modified; RGWObjManifest manifest; RGWCompressionInfo cs_info; + std::optional<rgw::cksum::Cksum> cksum; // Previous part obj prefixes. Recorded here for later cleanup. std::set<std::string> past_prefixes; @@ -265,7 +268,7 @@ struct RGWUploadPartInfo { RGWUploadPartInfo() : num(0), size(0) {} void encode(bufferlist& bl) const { - ENCODE_START(5, 2, bl); + ENCODE_START(6, 2, bl); encode(num, bl); encode(size, bl); encode(etag, bl); @@ -274,10 +277,11 @@ struct RGWUploadPartInfo { encode(cs_info, bl); encode(accounted_size, bl); encode(past_prefixes, bl); + encode(cksum, bl); ENCODE_FINISH(bl); } void decode(bufferlist::const_iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(5, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(6, 2, 2, bl); decode(num, bl); decode(size, bl); decode(etag, bl); @@ -293,6 +297,9 @@ struct RGWUploadPartInfo { if (struct_v >= 5) { decode(past_prefixes, bl); } + if (struct_v >= 6) { + decode(cksum, bl); + } DECODE_FINISH(bl); } void dump(Formatter *f) const; diff --git a/src/rgw/rgw_blake3_digest.h b/src/rgw/rgw_blake3_digest.h new file mode 100644 index 00000000000..9dc51596ee3 --- /dev/null +++ b/src/rgw/rgw_blake3_digest.h @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include <stdint.h> +#include <stdio.h> +#include "BLAKE3/c/blake3.h" + +namespace rgw { namespace digest { + +class Blake3 { + private: + blake3_hasher h; + + public: + static constexpr uint16_t digest_size = BLAKE3_OUT_LEN /* 32 bytes */; + + Blake3() { Restart(); } + + void Restart() { blake3_hasher_init(&h); } + + void Update(const unsigned char *data, uint64_t len) { + blake3_hasher_update(&h, data, len); + } + + void Final(unsigned char* digest) { + blake3_hasher_finalize(&h, digest, digest_size); + } +}; /* Blake3 */ + +}} /* namespace */ diff --git a/src/rgw/rgw_cksum.h b/src/rgw/rgw_cksum.h new file mode 100644 index 00000000000..955b553f27d --- /dev/null +++ b/src/rgw/rgw_cksum.h @@ -0,0 +1,226 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include <boost/algorithm/string/case_conv.hpp> +#include <boost/algorithm/string/predicate.hpp> +#include <cstdint> +#include <cstring> +#include <optional> +#include <stdint.h> +#include <string> +#include <string_view> +#include <array> +#include <iterator> +#include <boost/algorithm/string.hpp> +#include "fmt/format.h" +#include "common/armor.h" +#include <boost/algorithm/hex.hpp> +#include "rgw_hex.h" +#include "rgw_b64.h" + +#include "include/buffer.h" +#include "include/encoding.h" + +namespace rgw { namespace cksum { + + enum class Type : uint16_t + { + none = 0, + crc32, /* !cryptographic, but AWS supports */ + crc32c, /* !cryptographic, but AWS supports */ + xxh3, /* !cryptographic, but strong and very fast */ + sha1, /* unsafe, but AWS supports */ + sha256, + sha512, + blake3, + }; + + static constexpr uint16_t FLAG_NONE = 0x0000; + static constexpr uint16_t FLAG_AWS_CKSUM = 0x0001; + + class Desc + { + public: + const Type type; + const char* name; + const uint16_t digest_size; + const uint16_t armored_size; + const uint16_t flags; + + constexpr uint16_t to_armored_size(uint16_t sz) { + return sz / 3 * 4 + 4; + } + + constexpr Desc(Type _type, const char* _name, uint16_t _size, + uint16_t _flags) + : type(_type), name(_name), + digest_size(_size), + armored_size(to_armored_size(digest_size)), + flags(_flags) + {} + + constexpr bool aws() const { + return (flags & FLAG_AWS_CKSUM); + } + }; /* Desc */ + + namespace ba = boost::algorithm; + + class Cksum { + public: + static constexpr std::array<Desc, 8> checksums = + { + Desc(Type::none, "none", 0, FLAG_NONE), + Desc(Type::crc32, "crc32", 4, FLAG_AWS_CKSUM), + Desc(Type::crc32c, "crc32c", 4, FLAG_AWS_CKSUM), + Desc(Type::xxh3, "xxh3", 8, FLAG_NONE), + Desc(Type::sha1, "sha1", 20, FLAG_AWS_CKSUM), + Desc(Type::sha256, "sha256", 32, FLAG_AWS_CKSUM), + Desc(Type::sha512, "sha512", 64, FLAG_NONE), + Desc(Type::blake3, "blake3", 32, FLAG_NONE), + }; + + static constexpr uint16_t max_digest_size = 64; + using value_type = std::array<unsigned char, max_digest_size>; + + Type type; + value_type digest; + + Cksum(Type _type = Type::none) : type(_type) {} + Cksum(Type _type, const char* _armored_text) + : type(_type) { + const auto& ckd = checksums[uint16_t(type)]; + (void) ceph_unarmor((char*) digest.begin(), + (char*) digest.begin() + ckd.digest_size, + _armored_text, + _armored_text + std::strlen(_armored_text)); + } + + const char* type_string() const { + return (Cksum::checksums[uint16_t(type)]).name; + } + + const bool aws() const { + return (Cksum::checksums[uint16_t(type)]).aws(); + } + + std::string aws_name() const { + return fmt::format("x-amz-checksum-{}", type_string()); + } + + std::string rgw_name() const { + return fmt::format("x-rgw-checksum-{}", type_string()); + } + + std::string header_name() const { + return (aws()) ? aws_name() : rgw_name(); + } + + std::string element_name() const { + std::string ts{type_string()}; + return fmt::format("Checksum{}", boost::to_upper_copy(ts)); + } + + std::string_view raw() const { + const auto& ckd = checksums[uint16_t(type)]; + return std::string_view((char*) digest.begin(), ckd.digest_size); + } + + std::string to_armor() const { + std::string hs; + const auto& ckd = checksums[uint16_t(type)]; + hs.resize(ckd.armored_size); + ceph_armor((char*) hs.data(), (char*) hs.data() + ckd.armored_size, + (char*) digest.begin(), (char*) digest.begin() + + ckd.digest_size); + return hs; + } + + std::string hex() const { + std::string hs; + const auto& ckd = checksums[uint16_t(type)]; + hs.reserve(ckd.digest_size * 2 + 1); + ba::hex_lower(digest.begin(), digest.begin() + ckd.digest_size, + std::back_inserter(hs)); + return hs; + } + + std::string to_base64() const { + return rgw::to_base64(hex()); + } + + std::string to_string() const { + std::string hs; + const auto& ckd = checksums[uint16_t(type)]; + return fmt::format("{{{}}}{}", ckd.name, to_base64()); + } + + void encode(buffer::list& bl) const { + const auto& ckd = checksums[uint16_t(type)]; + ENCODE_START(1, 1, bl); + encode(uint16_t(type), bl); + encode(ckd.digest_size, bl); + bl.append((char*)digest.data(), ckd.digest_size); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& p) { + DECODE_START(1, p); + uint16_t tt; + decode(tt, p); + type = cksum::Type(tt); + decode(tt, p); /* <= max_digest_size */ + p.copy(tt, (char*)digest.data()); + DECODE_FINISH(p); + } + }; /* Cksum */ + WRITE_CLASS_ENCODER(Cksum); + + static inline const std::optional<rgw::cksum::Cksum> no_cksum{std::nullopt}; + + static inline std::string to_string(const Type type) { + std::string hs; + const auto& ckd = Cksum::checksums[uint16_t(type)]; + return ckd.name; + } + + static inline Type parse_cksum_type(const char* name) + { + for (const auto& ck : Cksum::checksums) { + if (boost::iequals(ck.name, name)) + return ck.type; + } + return Type::none; + } /* parse_cksum_type */ + + static inline Type parse_cksum_type_hdr(const std::string_view hdr_name) { + auto pos = hdr_name.find("x-amz-checksum-", 0); + if (pos == std::string::npos) { + return Type::none; + } + constexpr int8_t psz = sizeof("x-amz-checksum-") - 1; + if ((hdr_name.size() - psz) > 0 ) { + std::string ck_name{hdr_name.substr(psz)}; + return parse_cksum_type(ck_name.c_str()); + } + return Type::none; + } /* parse_cksum_type_hdr */ + + static inline bool is_checksum_hdr(const std::string_view hdr_name) { + return hdr_name == "x-amz-checksum-algorithm" || + parse_cksum_type_hdr(hdr_name) != Type::none; + } /* is_cksum_hdr */ + +}} /* namespace */ diff --git a/src/rgw/rgw_cksum_digest.h b/src/rgw/rgw_cksum_digest.h new file mode 100644 index 00000000000..ba7e3bd58c6 --- /dev/null +++ b/src/rgw/rgw_cksum_digest.h @@ -0,0 +1,134 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include <boost/variant.hpp> +#include <boost/blank.hpp> +#include "common/ceph_crypto.h" +#include "rgw_blake3_digest.h" +#include "rgw_crc_digest.h" +#include "rgw_xxh_digest.h" + +#include "rgw_cksum.h" + +namespace rgw { namespace cksum { + + class Digest { + public: + virtual void Restart() = 0; + virtual void Update (const unsigned char *input, size_t length) = 0; + virtual void Update(const ceph::buffer::list& bl) = 0; + virtual void Final (unsigned char *digest) = 0; + virtual ~Digest() {} + }; + + template<class T> + class TDigest : public Digest + { + T d; + public: + TDigest() {} + TDigest(TDigest&& rhs) noexcept + : d(std::move(rhs.d)) + {} + void Restart() override { d.Restart(); } + void Update(const unsigned char* data, uint64_t len) override { + d.Update(data, len); + } + void Update(const ceph::buffer::list& bl) { + for (auto& p : bl.buffers()) { + d.Update((const unsigned char *)p.c_str(), p.length()); + } + } + void Final(unsigned char* digest) override { + d.Final(digest); + } + }; + + typedef TDigest<rgw::digest::Blake3> Blake3; + typedef TDigest<rgw::digest::Crc32> Crc32; + typedef TDigest<rgw::digest::Crc32c> Crc32c; + typedef TDigest<rgw::digest::XXH3> XXH3; + typedef TDigest<ceph::crypto::SHA1> SHA1; + typedef TDigest<ceph::crypto::SHA256> SHA256; + typedef TDigest<ceph::crypto::SHA512> SHA512; + + typedef boost::variant<boost::blank, + Blake3, + Crc32, + Crc32c, + XXH3, + SHA1, + SHA256, + SHA512> DigestVariant; + + struct get_digest_ptr : public boost::static_visitor<Digest*> + { + get_digest_ptr() {}; + Digest* operator()(const boost::blank& b) const { return nullptr; } + Digest* operator()(Blake3& digest) const { return &digest; } + Digest* operator()(Crc32& digest) const { return &digest; } + Digest* operator()(Crc32c& digest) const { return &digest; } + Digest* operator()(XXH3& digest) const { return &digest; } + Digest* operator()(SHA1& digest) const { return &digest; } + Digest* operator()(SHA256& digest) const { return &digest; } + Digest* operator()(SHA512& digest) const { return &digest; } + }; + + static inline Digest* get_digest(DigestVariant& ev) + { + return boost::apply_visitor(get_digest_ptr{}, ev); + } + + static inline DigestVariant digest_factory(const Type cksum_type) + { + switch (cksum_type) { + case Type::blake3: + return Blake3(); + break; + case Type::sha256: + return SHA256(); + break; + case Type::crc32: + return Crc32(); + break; + case Type::crc32c: + return Crc32c(); + break; + case Type::xxh3: + return XXH3(); + break; + case Type::sha512: + return SHA512(); + break; + case Type::sha1: + return SHA1(); + break; + case Type::none: + break; + }; + return boost::blank(); + } /* digest_factory */ + + static inline Cksum finalize_digest(Digest* digest, Type type) + { + Cksum cksum(type); + if (digest) { + auto data = cksum.digest.data(); + digest->Final(data); + } + return cksum; + } + +}} /* namespace */ diff --git a/src/rgw/rgw_cksum_pipe.cc b/src/rgw/rgw_cksum_pipe.cc new file mode 100644 index 00000000000..e06957e2715 --- /dev/null +++ b/src/rgw/rgw_cksum_pipe.cc @@ -0,0 +1,64 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "rgw_cksum_pipe.h" +#include <memory> +#include <string> +#include <fmt/format.h> +#include <boost/algorithm/string.hpp> +#include "rgw_common.h" +#include "common/dout.h" +#include "rgw_client_io.h" + +namespace rgw::putobj { + + RGWPutObj_Cksum::RGWPutObj_Cksum(rgw::sal::DataProcessor* next, + rgw::cksum::Type _typ, + cksum_hdr_t&& _hdr) + : Pipe(next), + _type(_typ), + dv(rgw::cksum::digest_factory(_type)), + _digest(cksum::get_digest(dv)), cksum_hdr(_hdr) + {} + + std::unique_ptr<RGWPutObj_Cksum> RGWPutObj_Cksum::Factory( + rgw::sal::DataProcessor* next, const RGWEnv& env) + { + /* look for matching headers */ + auto algo_header = cksum_algorithm_hdr(env); + if (algo_header.first) { + if (algo_header.second) { + auto cksum_type = cksum::parse_cksum_type(algo_header.second); + return + std::make_unique<RGWPutObj_Cksum>( + next, cksum_type, std::move(algo_header)); + } + /* malformed checksum algorithm header(s) */ + throw rgw::io::Exception(EINVAL, std::system_category()); + } + /* no checksum header */ + return std::unique_ptr<RGWPutObj_Cksum>(); + } + + int RGWPutObj_Cksum::process(ceph::buffer::list &&data, uint64_t logical_offset) + { + for (const auto& ptr : data.buffers()) { + _digest->Update(reinterpret_cast<const unsigned char*>(ptr.c_str()), + ptr.length()); + } + return Pipe::process(std::move(data), logical_offset); + } + +} // namespace rgw::putobj diff --git a/src/rgw/rgw_cksum_pipe.h b/src/rgw/rgw_cksum_pipe.h new file mode 100644 index 00000000000..fddcd283c84 --- /dev/null +++ b/src/rgw/rgw_cksum_pipe.h @@ -0,0 +1,146 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include <cstdint> +#include <utility> +#include <tuple> +#include <cstring> +#include <boost/algorithm/string/case_conv.hpp> +#include "rgw_cksum_digest.h" +#include "rgw_common.h" +#include "rgw_putobj.h" + +namespace rgw::putobj { + + namespace cksum = rgw::cksum; + using cksum_hdr_t = std::pair<const char*, const char*>; + + static inline const cksum_hdr_t cksum_algorithm_hdr(const RGWEnv& env) { + /* If the individual checksum value you provide through + x-amz-checksum-algorithm doesn't match the checksum algorithm + you set through x-amz-sdk-checksum-algorithm, Amazon S3 ignores + any provided ChecksumAlgorithm parameter and uses the checksum + algorithm that matches the provided value in + x-amz-checksum-algorithm. + https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html + */ + for (const auto hk : {"HTTP_X_AMZ_CHECKSUM_ALGORITHM", + "HTTP_X_AMZ_SDK_CHECKSUM_ALGORITHM"}) { + auto hv = env.get(hk); + if (hv) { + return cksum_hdr_t(hk, hv); + } + } + return cksum_hdr_t(nullptr, nullptr); + } /* cksum_algorithm_hdr */ + + using GetHeaderCksumResult = std::pair<cksum::Cksum, std::string_view>; + + static inline GetHeaderCksumResult get_hdr_cksum(const RGWEnv& env) { + cksum::Type cksum_type; + auto algo_hdr = cksum_algorithm_hdr(env); + if (algo_hdr.first) { + if (algo_hdr.second) { + cksum_type = cksum::parse_cksum_type(algo_hdr.second); + auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", algo_hdr.second); + auto hv = env.get(hk.c_str()); + if (hv) { + return + GetHeaderCksumResult(cksum::Cksum(cksum_type, hv), + std::string_view(hv, std::strlen(hv))); + } + } + } + return GetHeaderCksumResult(cksum::Cksum(cksum_type), ""); + } /* get_hdr_cksum */ + + /* CompleteMultipartUpload can have a checksum value but unlike + * PutObject, it won't have a checksum algorithm header, so we + * need to search for one */ + static inline GetHeaderCksumResult find_hdr_cksum(const RGWEnv& env) { + cksum::Type cksum_type; + for (int16_t ix = int16_t(cksum::Type::crc32); + ix <= uint16_t(cksum::Type::blake3); ++ix) { + cksum_type = cksum::Type(ix); + auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", + boost::to_upper_copy(to_string(cksum_type))); + auto hv = env.get(hk.c_str()); + if (hv) { + return + GetHeaderCksumResult(cksum::Cksum(cksum_type, hv), + std::string_view(hv, std::strlen(hv))); + } + } + return GetHeaderCksumResult(cksum::Cksum(cksum_type), ""); + } /* find_hdr_cksum */ + + // PutObj filter for streaming checksums + class RGWPutObj_Cksum : public rgw::putobj::Pipe { + + cksum::Type _type; + cksum::DigestVariant dv; + cksum::Digest* _digest; + cksum::Cksum _cksum; + cksum_hdr_t cksum_hdr; + + public: + + using VerifyResult = std::tuple<bool, const cksum::Cksum&>; + + static std::unique_ptr<RGWPutObj_Cksum> Factory( + rgw::sal::DataProcessor* next, const RGWEnv&); + + RGWPutObj_Cksum(rgw::sal::DataProcessor* next, rgw::cksum::Type _type, + cksum_hdr_t&& _hdr); + RGWPutObj_Cksum(RGWPutObj_Cksum& rhs) = delete; + ~RGWPutObj_Cksum() {} + + cksum::Type type() { return _type; } + cksum::Digest* digest() const { return _digest; } + const cksum::Cksum& cksum() { return _cksum; }; + + const cksum_hdr_t& header() const { + return cksum_hdr; + } + + const cksum::Cksum& finalize() { + _cksum = finalize_digest(_digest, _type); + return _cksum; + } + + const char* expected(const RGWEnv& env) { + auto hk = fmt::format("HTTP_X_AMZ_CHECKSUM_{}", cksum_hdr.second); + auto hv = env.get(hk.c_str()); + return hv; + } + + VerifyResult verify(const RGWEnv& env) { + if (_cksum.type == cksum::Type::none) [[likely]] { + (void) finalize(); + } + auto hv = expected(env); + auto cv = _cksum.to_armor(); + return VerifyResult(cksum_hdr.first && + hv && !std::strcmp(hv, cv.c_str()), + _cksum); + } + + int process(bufferlist &&data, uint64_t logical_offset) override; + + }; /* RGWPutObj_Cksum */ + +} // namespace rgw::putobj diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc index 4c920309348..987475bf390 100644 --- a/src/rgw/rgw_common.cc +++ b/src/rgw/rgw_common.cc @@ -175,8 +175,6 @@ rgw_http_errors rgw_http_iam_errors({ using namespace std; using namespace ceph::crypto; -thread_local bool is_asio_thread = false; - rgw_err:: rgw_err() { diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 2002ae51ec9..c4fdd83d7fa 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -17,6 +17,7 @@ #pragma once #include <array> +#include <cstdint> #include <string_view> #include <atomic> #include <unordered_map> @@ -25,6 +26,7 @@ #include <boost/container/flat_map.hpp> #include <boost/container/flat_set.hpp> +#include "common/dout_fmt.h" #include "common/ceph_crypto.h" #include "common/random_string.h" #include "common/tracer.h" @@ -48,6 +50,7 @@ #include "include/rados/librados.hpp" #include "rgw_public_access.h" #include "rgw_sal_fwd.h" +#include "rgw_hex.h" namespace ceph { class Formatter; @@ -81,7 +84,8 @@ using ceph::crypto::MD5; #define RGW_ATTR_RATELIMIT RGW_ATTR_PREFIX "ratelimit" #define RGW_ATTR_LC RGW_ATTR_PREFIX "lc" #define RGW_ATTR_CORS RGW_ATTR_PREFIX "cors" -#define RGW_ATTR_ETAG RGW_ATTR_PREFIX "etag" +#define RGW_ATTR_ETAG RGW_ATTR_PREFIX "etag" +#define RGW_ATTR_CKSUM RGW_ATTR_PREFIX "cksum" #define RGW_ATTR_BUCKETS RGW_ATTR_PREFIX "buckets" #define RGW_ATTR_META_PREFIX RGW_ATTR_PREFIX RGW_AMZ_META_PREFIX #define RGW_ATTR_CONTENT_TYPE RGW_ATTR_PREFIX "content_type" @@ -1027,6 +1031,8 @@ enum RGWBucketFlags { class RGWSI_Zone; +#include "rgw_cksum.h" + struct RGWBucketInfo { rgw_bucket bucket; rgw_owner owner; @@ -1054,6 +1060,8 @@ struct RGWBucketInfo { std::map<std::string, uint32_t> mdsearch_config; + rgw::cksum::Type cksum_type = rgw::cksum::Type::none; + // resharding cls_rgw_reshard_status reshard_status{cls_rgw_reshard_status::NOT_RESHARDING}; std::string new_bucket_instance_id; @@ -1064,7 +1072,6 @@ struct RGWBucketInfo { void encode(bufferlist& bl) const; void decode(bufferlist::const_iterator& bl); - void dump(Formatter *f) const; static void generate_test_instances(std::list<RGWBucketInfo*>& o); @@ -1511,25 +1518,33 @@ struct multipart_upload_info bool obj_legal_hold_exist{false}; RGWObjectRetention obj_retention; RGWObjectLegalHold obj_legal_hold; + rgw::cksum::Type cksum_type {rgw::cksum::Type::none}; void encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); + ENCODE_START(3, 1, bl); encode(dest_placement, bl); encode(obj_retention_exist, bl); encode(obj_legal_hold_exist, bl); encode(obj_retention, bl); encode(obj_legal_hold, bl); + uint16_t ct{uint16_t(cksum_type)}; + encode(ct, bl); ENCODE_FINISH(bl); } void decode(bufferlist::const_iterator& bl) { - DECODE_START(2, bl); + DECODE_START_LEGACY_COMPAT_LEN(3, 1, 1, bl); decode(dest_placement, bl); if (struct_v >= 2) { decode(obj_retention_exist, bl); decode(obj_legal_hold_exist, bl); decode(obj_retention, bl); decode(obj_legal_hold, bl); + if (struct_v >= 3) { + uint16_t ct; + decode(ct, bl); + cksum_type = rgw::cksum::Type(ct); + } } else { obj_retention_exist = false; obj_legal_hold_exist = false; @@ -1550,61 +1565,6 @@ struct multipart_upload_info }; WRITE_CLASS_ENCODER(multipart_upload_info) -static inline void buf_to_hex(const unsigned char* const buf, - const size_t len, - char* const str) -{ - str[0] = '\0'; - for (size_t i = 0; i < len; i++) { - ::sprintf(&str[i*2], "%02x", static_cast<int>(buf[i])); - } -} - -template<size_t N> static inline std::array<char, N * 2 + 1> -buf_to_hex(const std::array<unsigned char, N>& buf) -{ - static_assert(N > 0, "The input array must be at least one element long"); - - std::array<char, N * 2 + 1> hex_dest; - buf_to_hex(buf.data(), N, hex_dest.data()); - return hex_dest; -} - -static inline int hexdigit(char c) -{ - if (c >= '0' && c <= '9') - return (c - '0'); - c = toupper(c); - if (c >= 'A' && c <= 'F') - return c - 'A' + 0xa; - return -EINVAL; -} - -static inline int hex_to_buf(const char *hex, char *buf, int len) -{ - int i = 0; - const char *p = hex; - while (*p) { - if (i >= len) - return -EINVAL; - buf[i] = 0; - int d = hexdigit(*p); - if (d < 0) - return d; - buf[i] = d << 4; - p++; - if (!*p) - return -EINVAL; - d = hexdigit(*p); - if (d < 0) - return d; - buf[i] += d; - i++; - p++; - } - return i; -} - static inline int rgw_str_to_bool(const char *s, int def_val) { if (!s) diff --git a/src/rgw/rgw_coroutine.cc b/src/rgw/rgw_coroutine.cc index 9e69c0876ef..3b789b8b859 100644 --- a/src/rgw/rgw_coroutine.cc +++ b/src/rgw/rgw_coroutine.cc @@ -4,6 +4,7 @@ #include "include/Context.h" #include "common/ceph_json.h" #include "rgw_coroutine.h" +#include "rgw_asio_thread.h" // re-include our assert to clobber the system one; fix dout: #include "include/ceph_assert.h" @@ -615,6 +616,8 @@ void RGWCoroutinesManager::io_complete(RGWCoroutine *cr, const rgw_io_id& io_id) int RGWCoroutinesManager::run(const DoutPrefixProvider *dpp, list<RGWCoroutinesStack *>& stacks) { + maybe_warn_about_blocking(dpp); + int ret = 0; int blocked_count = 0; int interval_wait_count = 0; diff --git a/src/rgw/rgw_cr_rest.cc b/src/rgw/rgw_cr_rest.cc index 04920a1551b..fce41decb41 100644 --- a/src/rgw/rgw_cr_rest.cc +++ b/src/rgw/rgw_cr_rest.cc @@ -84,7 +84,8 @@ RGWStreamReadHTTPResourceCRF::~RGWStreamReadHTTPResourceCRF() { if (req) { req->cancel(); - req->wait(null_yield); + auto dpp = NoDoutPrefix{cct, ceph_subsys_rgw}; + req->wait(&dpp, null_yield); delete req; } } @@ -188,7 +189,8 @@ RGWStreamWriteHTTPResourceCRF::~RGWStreamWriteHTTPResourceCRF() { if (req) { req->cancel(); - req->wait(null_yield); + auto dpp = NoDoutPrefix{cct, ceph_subsys_rgw}; + req->wait(&dpp, null_yield); delete req; } } diff --git a/src/rgw/rgw_cr_rest.h b/src/rgw/rgw_cr_rest.h index 531d7ee68ef..4b9c0de445a 100644 --- a/src/rgw/rgw_cr_rest.h +++ b/src/rgw/rgw_cr_rest.h @@ -90,14 +90,13 @@ public: - virtual int wait_result() { - return http_op->wait(result, null_yield); + virtual int wait_result(const DoutPrefixProvider* dpp) { + return http_op->wait(dpp, result, null_yield); } int request_complete() override { - int ret; - - ret = wait_result(); + auto dpp = NoDoutPrefix{cct, ceph_subsys_rgw}; + int ret = wait_result(&dpp); auto op = std::move(http_op); // release ref on return if (ret < 0) { @@ -138,8 +137,8 @@ class RGWReadRESTResourceCR : public RGWReadRawRESTResourceCR { : RGWReadRawRESTResourceCR(_cct, _conn, _http_manager, _path, params, hdrs), result(_result) {} - int wait_result() override { - return http_op->wait(result, null_yield); + int wait_result(const DoutPrefixProvider* dpp) override { + return http_op->wait(dpp, result, null_yield); } }; @@ -207,12 +206,13 @@ class RGWSendRawRESTResourceCR: public RGWSimpleCoroutine { } int request_complete() override { + auto dpp = NoDoutPrefix{cct, ceph_subsys_rgw}; int ret; if (result || err_result) { - ret = http_op->wait(result, null_yield, err_result); + ret = http_op->wait(&dpp, result, null_yield, err_result); } else { bufferlist bl; - ret = http_op->wait(&bl, null_yield); + ret = http_op->wait(&dpp, &bl, null_yield); } auto op = std::move(http_op); // release ref on return if (ret < 0) { @@ -366,9 +366,9 @@ public: } int request_complete() override { - int ret; + auto dpp = NoDoutPrefix{cct, ceph_subsys_rgw}; bufferlist bl; - ret = http_op->wait(&bl, null_yield); + int ret = http_op->wait(&dpp, &bl, null_yield); auto op = std::move(http_op); // release ref on return if (ret < 0) { error_stream << "http operation failed: " << op->to_str() @@ -517,6 +517,7 @@ public: class RGWStreamWriteHTTPResourceCRF : public RGWStreamWriteResourceCRF { protected: + CephContext *cct; RGWCoroutinesEnv *env; RGWCoroutine *caller; RGWHTTPManager *http_manager; @@ -546,10 +547,13 @@ public: RGWStreamWriteHTTPResourceCRF(CephContext *_cct, RGWCoroutinesEnv *_env, RGWCoroutine *_caller, - RGWHTTPManager *_http_manager) : env(_env), - caller(_caller), - http_manager(_http_manager), - write_drain_notify_cb(this) {} + RGWHTTPManager *_http_manager) + : cct(_cct), + env(_env), + caller(_caller), + http_manager(_http_manager), + write_drain_notify_cb(this) + {} virtual ~RGWStreamWriteHTTPResourceCRF(); int init() override { diff --git a/src/rgw/rgw_crc_digest.h b/src/rgw/rgw_crc_digest.h new file mode 100644 index 00000000000..8e97df56b48 --- /dev/null +++ b/src/rgw/rgw_crc_digest.h @@ -0,0 +1,93 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include <bit> +#include <array> +#include <concepts> +#include <algorithm> +#include <stdio.h> +#include "include/crc32c.h" +#include <boost/crc.hpp> + +namespace rgw { namespace digest { + + /* crib impl of c++23 std::byteswap from + * https://en.cppreference.com/w/cpp/numeric/byteswap */ + template <std::integral T> constexpr T byteswap(T value) noexcept { + static_assert(std::has_unique_object_representations_v<T>, + "T may not have padding bits"); + auto value_representation = + std::bit_cast<std::array<std::byte, sizeof(T)>>(value); + std::ranges::reverse(value_representation); + return std::bit_cast<T>(value_representation); + } /* byteswap */ + + /* impl. using boost::crc, as configured by imtzw */ + class Crc32 { + private: + using crc32_type = boost::crc_optimal< + 32, 0x04C11DB7, 0xFFFFFFFF, 0xFFFFFFFF, true, true>; + crc32_type crc; + + public: + static constexpr uint16_t digest_size = 4; + + Crc32() { Restart(); } + + void Restart() { crc.reset(); } + + void Update(const unsigned char *data, uint64_t len) { + crc.process_bytes(data, len); + } + + void Final(unsigned char* digest) { + /* XXX crc32 and cksfb utilities both treat the byteswapped result + * as canonical--possibly this needs to be omitted when BigEndian? */ + uint32_t final = crc(); + if constexpr (std::endian::native != std::endian::big) { + final = rgw::digest::byteswap(final); + } + memcpy((char*) digest, &final, sizeof(final)); + } + }; /* Crc32 */ + + /* use Ceph hw-specialized crc32c (0x1EDC6F41) */ + class Crc32c { + private: + uint32_t crc; + + public: + static constexpr uint16_t digest_size = 4; + static constexpr uint32_t initial_value = 0xffffffff; + + Crc32c() { Restart(); } + + void Restart() { crc = initial_value; } + + void Update(const unsigned char *data, uint64_t len) { + crc = ceph_crc32c(crc, data, len); + } + + void Final(unsigned char* digest) { + crc = crc ^ 0xffffffff; + if constexpr (std::endian::native != std::endian::big) { + crc = rgw::digest::byteswap(crc); + } + memcpy((char*) digest, &crc, sizeof(crc)); + } + }; /* Crc32c */ +}} /* namespace */ diff --git a/src/rgw/rgw_data_access.cc b/src/rgw/rgw_data_access.cc index 06e13c3890c..74461a69399 100644 --- a/src/rgw/rgw_data_access.cc +++ b/src/rgw/rgw_data_access.cc @@ -1,10 +1,12 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab ft=cpp +#include <optional> #include "rgw_data_access.h" #include "rgw_acl_s3.h" #include "rgw_aio_throttle.h" #include "rgw_compression.h" +#include "rgw_cksum.h" #include "common/BackTrace.h" #define dout_subsys ceph_subsys_rgw @@ -208,12 +210,13 @@ int RGWDataAccess::Object::put(bufferlist& data, const req_context rctx{dpp, y, nullptr}; return processor->complete(obj_size, etag, - &mtime, mtime, - attrs, delete_at, - nullptr, nullptr, - puser_data, - nullptr, nullptr, - rctx, rgw::sal::FLAG_LOG_OP); + &mtime, mtime, attrs, + rgw::cksum::no_cksum, + delete_at, + nullptr, nullptr, + puser_data, + nullptr, nullptr, + rctx, rgw::sal::FLAG_LOG_OP); } void RGWDataAccess::Object::set_policy(const RGWAccessControlPolicy& policy) diff --git a/src/rgw/rgw_file.cc b/src/rgw/rgw_file.cc index 66e883e7257..16d0047ff53 100644 --- a/src/rgw/rgw_file.cc +++ b/src/rgw/rgw_file.cc @@ -2018,9 +2018,9 @@ namespace rgw { } op_ret = processor->complete(state->obj_size, etag, &mtime, real_time(), attrs, - (delete_at ? *delete_at : real_time()), - if_match, if_nomatch, nullptr, nullptr, nullptr, - rctx, rgw::sal::FLAG_LOG_OP); + rgw::cksum::no_cksum, (delete_at ? *delete_at : real_time()), + if_match, if_nomatch, nullptr, nullptr, nullptr, + rctx, rgw::sal::FLAG_LOG_OP); if (op_ret != 0) { /* revert attr updates */ rgw_fh->set_mtime(omtime); diff --git a/src/rgw/rgw_file_int.h b/src/rgw/rgw_file_int.h index d61ef33c8a8..0a1db645207 100644 --- a/src/rgw/rgw_file_int.h +++ b/src/rgw/rgw_file_int.h @@ -37,6 +37,7 @@ #include "rgw_aio_throttle.h" #include "rgw_compression.h" #include "rgw_perf_counters.h" +#include "rgw_cksum.h" /* XXX diff --git a/src/rgw/rgw_hex.h b/src/rgw/rgw_hex.h new file mode 100644 index 00000000000..ceb29ff47f6 --- /dev/null +++ b/src/rgw/rgw_hex.h @@ -0,0 +1,78 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include <array> +#include <stdint.h> +#include <stddef.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> + +static inline void buf_to_hex(const unsigned char* const buf, + const size_t len, + char* const str) +{ + str[0] = '\0'; + for (size_t i = 0; i < len; i++) { + ::sprintf(&str[i*2], "%02x", static_cast<int>(buf[i])); + } +} + +template<size_t N> static inline std::array<char, N * 2 + 1> +buf_to_hex(const std::array<unsigned char, N>& buf) +{ + static_assert(N > 0, "The input array must be at least one element long"); + + std::array<char, N * 2 + 1> hex_dest; + buf_to_hex(buf.data(), N, hex_dest.data()); + return hex_dest; +} + +static inline int hexdigit(char c) +{ + if (c >= '0' && c <= '9') + return (c - '0'); + c = toupper(c); + if (c >= 'A' && c <= 'F') + return c - 'A' + 0xa; + return -EINVAL; +} + +static inline int hex_to_buf(const char *hex, char *buf, int len) +{ + int i = 0; + const char *p = hex; + while (*p) { + if (i >= len) + return -EINVAL; + buf[i] = 0; + int d = hexdigit(*p); + if (d < 0) + return d; + buf[i] = d << 4; + p++; + if (!*p) + return -EINVAL; + d = hexdigit(*p); + if (d < 0) + return d; + buf[i] += d; + i++; + p++; + } + return i; +} /* hex_to_buf */ diff --git a/src/rgw/rgw_http_client.cc b/src/rgw/rgw_http_client.cc index 50c0f6a775e..9f448bfc355 100644 --- a/src/rgw/rgw_http_client.cc +++ b/src/rgw/rgw_http_client.cc @@ -9,6 +9,7 @@ #include <curl/easy.h> #include <curl/multi.h> +#include "rgw_asio_thread.h" #include "rgw_common.h" #include "rgw_http_client.h" #include "rgw_http_errors.h" @@ -62,29 +63,28 @@ struct rgw_http_req_data : public RefCountedObject { } template <typename Executor, typename CompletionToken> - auto async_wait(const Executor& ex, CompletionToken&& token) { + auto async_wait(const Executor& ex, std::unique_lock<ceph::mutex>& lock, + CompletionToken&& token) { return boost::asio::async_initiate<CompletionToken, Signature>( - [this] (auto handler, auto ex) { - std::unique_lock l{lock}; + [this, &lock] (auto handler, auto ex) { completion = Completion::create(ex, std::move(handler)); + lock.unlock(); // unlock before suspend }, token, ex); } - int wait(optional_yield y) { + int wait(const DoutPrefixProvider* dpp, optional_yield y) { + std::unique_lock l{lock}; if (done) { return ret; } if (y) { auto& yield = y.get_yield_context(); boost::system::error_code ec; - async_wait(yield.get_executor(), yield[ec]); + async_wait(yield.get_executor(), l, yield[ec]); return -ec.value(); } - // work on asio threads should be asynchronous, so warn when they block - if (is_asio_thread) { - dout(20) << "WARNING: blocking http request" << dendl; - } - std::unique_lock l{lock}; + maybe_warn_about_blocking(dpp); + cond.wait(l, [this]{return done==true;}); return ret; } @@ -533,9 +533,9 @@ static bool is_upload_request(const string& method) /* * process a single simple one off request */ -int RGWHTTPClient::process(optional_yield y) +int RGWHTTPClient::process(const DoutPrefixProvider* dpp, optional_yield y) { - return RGWHTTP::process(this, y); + return RGWHTTP::process(dpp, this, y); } string RGWHTTPClient::to_str() @@ -647,9 +647,9 @@ bool RGWHTTPClient::is_done() /* * wait for async request to complete */ -int RGWHTTPClient::wait(optional_yield y) +int RGWHTTPClient::wait(const DoutPrefixProvider* dpp, optional_yield y) { - return req_data->wait(y); + return req_data->wait(dpp, y); } void RGWHTTPClient::cancel() @@ -1213,7 +1213,7 @@ int RGWHTTP::send(RGWHTTPClient *req) { return 0; } -int RGWHTTP::process(RGWHTTPClient *req, optional_yield y) { +int RGWHTTP::process(const DoutPrefixProvider* dpp, RGWHTTPClient *req, optional_yield y) { if (!req) { return 0; } @@ -1222,6 +1222,6 @@ int RGWHTTP::process(RGWHTTPClient *req, optional_yield y) { return r; } - return req->wait(y); + return req->wait(dpp, y); } diff --git a/src/rgw/rgw_http_client.h b/src/rgw/rgw_http_client.h index ef188390326..51833585c83 100644 --- a/src/rgw/rgw_http_client.h +++ b/src/rgw/rgw_http_client.h @@ -151,9 +151,9 @@ public: req_timeout = timeout; } - int process(optional_yield y); + int process(const DoutPrefixProvider* dpp, optional_yield y); - int wait(optional_yield y); + int wait(const DoutPrefixProvider* dpp, optional_yield y); void cancel(); bool is_done(); @@ -349,5 +349,6 @@ class RGWHTTP { public: static int send(RGWHTTPClient *req); - static int process(RGWHTTPClient *req, optional_yield y); + static int process(const DoutPrefixProvider* dpp, RGWHTTPClient *req, + optional_yield y); }; diff --git a/src/rgw/rgw_kafka.cc b/src/rgw/rgw_kafka.cc index 9a356d9c6f0..d3586e4900b 100644 --- a/src/rgw/rgw_kafka.cc +++ b/src/rgw/rgw_kafka.cc @@ -13,6 +13,7 @@ #include <thread> #include <atomic> #include <mutex> +#include <boost/functional/hash.hpp> #include <boost/lockfree/queue.hpp> #include "common/dout.h" @@ -70,6 +71,47 @@ inline std::string status_to_string(int s) { } } +connection_id_t::connection_id_t( + const std::string& _broker, + const std::string& _user, + const std::string& _password, + const boost::optional<const std::string&>& _ca_location, + const boost::optional<const std::string&>& _mechanism, + bool _ssl) + : broker(_broker), user(_user), password(_password), ssl(_ssl) { + if (_ca_location.has_value()) { + ca_location = _ca_location.get(); + } + if (_mechanism.has_value()) { + mechanism = _mechanism.get(); + } +} + +// equality operator and hasher functor are needed +// so that connection_id_t could be used as key in unordered_map +bool operator==(const connection_id_t& lhs, const connection_id_t& rhs) { + return lhs.broker == rhs.broker && lhs.user == rhs.user && + lhs.password == rhs.password && lhs.ca_location == rhs.ca_location && + lhs.mechanism == rhs.mechanism && lhs.ssl == rhs.ssl; +} + +struct connection_id_hasher { + std::size_t operator()(const connection_id_t& k) const { + std::size_t h = 0; + boost::hash_combine(h, k.broker); + boost::hash_combine(h, k.user); + boost::hash_combine(h, k.password); + boost::hash_combine(h, k.ca_location); + boost::hash_combine(h, k.mechanism); + boost::hash_combine(h, k.ssl); + return h; + } +}; + +std::string to_string(const connection_id_t& id) { + return id.broker + ":" + id.user; +} + // convert int status to errno - both RGW and librdkafka values inline int status_to_errno(int s) { if (s == 0) return 0; @@ -165,8 +207,9 @@ void message_callback(rd_kafka_t* rk, const rd_kafka_message_t* rkmessage, void* ldout(conn->cct, 20) << "Kafka run: ack received with result=" << rd_kafka_err2str(result) << dendl; } else { - ldout(conn->cct, 1) << "Kafka run: nack received with result=" << - rd_kafka_err2str(result) << dendl; + ldout(conn->cct, 1) << "Kafka run: nack received with result=" + << rd_kafka_err2str(result) + << " for broker: " << conn->broker << dendl; } if (!rkmessage->_private) { @@ -327,18 +370,21 @@ conf_error: // struct used for holding messages in the message queue struct message_wrapper_t { - std::string conn_name; + connection_id_t conn_id; std::string topic; std::string message; const reply_callback_t cb; - - message_wrapper_t(const std::string& _conn_name, - const std::string& _topic, - const std::string& _message, - reply_callback_t _cb) : conn_name(_conn_name), topic(_topic), message(_message), cb(_cb) {} + + message_wrapper_t(const connection_id_t& _conn_id, + const std::string& _topic, + const std::string& _message, + reply_callback_t _cb) + : conn_id(_conn_id), topic(_topic), message(_message), cb(_cb) {} }; -typedef std::unordered_map<std::string, connection_t_ptr> ConnectionList; +typedef std:: + unordered_map<connection_id_t, connection_t_ptr, connection_id_hasher> + ConnectionList; typedef boost::lockfree::queue<message_wrapper_t*, boost::lockfree::fixed_sized<true>> MessageQueue; class Manager { @@ -361,7 +407,7 @@ private: // TODO use rd_kafka_produce_batch for better performance void publish_internal(message_wrapper_t* message) { const std::unique_ptr<message_wrapper_t> msg_deleter(message); - const auto conn_it = connections.find(message->conn_name); + const auto conn_it = connections.find(message->conn_id); if (conn_it == connections.end()) { ldout(cct, 1) << "Kafka publish: connection was deleted while message was in the queue" << dendl; if (message->cb) { @@ -425,7 +471,9 @@ private: tag); if (rc == -1) { const auto err = rd_kafka_last_error(); - ldout(conn->cct, 1) << "Kafka publish: failed to produce: " << rd_kafka_err2str(err) << dendl; + ldout(conn->cct, 1) << "Kafka publish: failed to produce for topic: " + << message->topic + << ". with error: " << rd_kafka_err2str(err) << dendl; // immediatly invoke callback on error if needed if (message->cb) { message->cb(-rd_kafka_err2errno(err)); @@ -527,8 +575,11 @@ public: // when a new connection is added. connections.max_load_factor(10.0); // give the runner thread a name for easier debugging - const auto rc = ceph_pthread_setname(runner.native_handle(), "kafka_manager"); - ceph_assert(rc==0); + const char* thread_name = "kafka_manager"; + if (const auto rc = ceph_pthread_setname(runner.native_handle(), thread_name); rc != 0) { + ldout(cct, 1) << "ERROR: failed to set kafka manager thread name to: " << thread_name + << ". error: " << rc << dendl; + } } // non copyable @@ -541,14 +592,14 @@ public: } // connect to a broker, or reuse an existing connection if already connected - bool connect(std::string& broker, - const std::string& url, - bool use_ssl, - bool verify_ssl, - boost::optional<const std::string&> ca_location, - boost::optional<const std::string&> mechanism, - boost::optional<const std::string&> topic_user_name, - boost::optional<const std::string&> topic_password) { + bool connect(connection_id_t& conn_id, + const std::string& url, + bool use_ssl, + bool verify_ssl, + boost::optional<const std::string&> ca_location, + boost::optional<const std::string&> mechanism, + boost::optional<const std::string&> topic_user_name, + boost::optional<const std::string&> topic_password) { if (stopped) { ldout(cct, 1) << "Kafka connect: manager is stopped" << dendl; return false; @@ -556,6 +607,7 @@ public: std::string user; std::string password; + std::string broker; if (!parse_url_authority(url, broker, user, password)) { // TODO: increment counter ldout(cct, 1) << "Kafka connect: URL parsing failed" << dendl; @@ -584,14 +636,17 @@ public: ldout(cct, 1) << "Kafka connect: user/password are only allowed over secure connection" << dendl; return false; } - + connection_id_t tmp_id(broker, user, password, ca_location, mechanism, + use_ssl); std::lock_guard lock(connections_lock); - const auto it = connections.find(broker); + const auto it = connections.find(tmp_id); // note that ssl vs. non-ssl connection to the same host are two separate connections if (it != connections.end()) { // connection found - return even if non-ok - ldout(cct, 20) << "Kafka connect: connection found" << dendl; - return it->second.get(); + ldout(cct, 20) << "Kafka connect: connection found: " << to_string(tmp_id) + << dendl; + conn_id = std::move(tmp_id); + return true; } // connection not found, creating a new one @@ -607,20 +662,24 @@ public: return false; } ++connection_count; - connections.emplace(broker, std::move(conn)); + connections.emplace(tmp_id, std::move(conn)); - ldout(cct, 10) << "Kafka connect: new connection is created. Total connections: " << connection_count << dendl; + ldout(cct, 10) << "Kafka connect: new connection is created: " + << to_string(tmp_id) + << " . Total connections: " << connection_count << dendl; + conn_id = std::move(tmp_id); return true; } // TODO publish with confirm is needed in "none" case as well, cb should be invoked publish is ok (no ack) - int publish(const std::string& conn_name, - const std::string& topic, - const std::string& message) { + int publish(const connection_id_t& conn_id, + const std::string& topic, + const std::string& message) { if (stopped) { return -ESRCH; } - auto message_wrapper = std::make_unique<message_wrapper_t>(conn_name, topic, message, nullptr); + auto message_wrapper = + std::make_unique<message_wrapper_t>(conn_id, topic, message, nullptr); if (messages.push(message_wrapper.get())) { std::ignore = message_wrapper.release(); ++queued; @@ -628,15 +687,16 @@ public: } return -EBUSY; } - - int publish_with_confirm(const std::string& conn_name, - const std::string& topic, - const std::string& message, - reply_callback_t cb) { + + int publish_with_confirm(const connection_id_t& conn_id, + const std::string& topic, + const std::string& message, + reply_callback_t cb) { if (stopped) { return -ESRCH; } - auto message_wrapper = std::make_unique<message_wrapper_t>(conn_name, topic, message, cb); + auto message_wrapper = + std::make_unique<message_wrapper_t>(conn_id, topic, message, cb); if (messages.push(message_wrapper.get())) { std::ignore = message_wrapper.release(); ++queued; @@ -707,31 +767,35 @@ void shutdown() { s_manager = nullptr; } -bool connect(std::string& broker, const std::string& url, bool use_ssl, bool verify_ssl, - boost::optional<const std::string&> ca_location, - boost::optional<const std::string&> mechanism, - boost::optional<const std::string&> user_name, - boost::optional<const std::string&> password) { +bool connect(connection_id_t& conn_id, + const std::string& url, + bool use_ssl, + bool verify_ssl, + boost::optional<const std::string&> ca_location, + boost::optional<const std::string&> mechanism, + boost::optional<const std::string&> user_name, + boost::optional<const std::string&> password) { std::shared_lock lock(s_manager_mutex); if (!s_manager) return false; - return s_manager->connect(broker, url, use_ssl, verify_ssl, ca_location, mechanism, user_name, password); + return s_manager->connect(conn_id, url, use_ssl, verify_ssl, ca_location, + mechanism, user_name, password); } -int publish(const std::string& conn_name, - const std::string& topic, - const std::string& message) { +int publish(const connection_id_t& conn_id, + const std::string& topic, + const std::string& message) { std::shared_lock lock(s_manager_mutex); if (!s_manager) return -ESRCH; - return s_manager->publish(conn_name, topic, message); + return s_manager->publish(conn_id, topic, message); } -int publish_with_confirm(const std::string& conn_name, - const std::string& topic, - const std::string& message, - reply_callback_t cb) { +int publish_with_confirm(const connection_id_t& conn_id, + const std::string& topic, + const std::string& message, + reply_callback_t cb) { std::shared_lock lock(s_manager_mutex); if (!s_manager) return -ESRCH; - return s_manager->publish_with_confirm(conn_name, topic, message, cb); + return s_manager->publish_with_confirm(conn_id, topic, message, cb); } size_t get_connection_count() { diff --git a/src/rgw/rgw_kafka.h b/src/rgw/rgw_kafka.h index a6a38ed81ab..b7aa0d15759 100644 --- a/src/rgw/rgw_kafka.h +++ b/src/rgw/rgw_kafka.h @@ -21,28 +21,47 @@ bool init(CephContext* cct); // shutdown the kafka manager void shutdown(); +// key class for the connection list +struct connection_id_t { + std::string broker; + std::string user; + std::string password; + std::string ca_location; + std::string mechanism; + bool ssl = false; + connection_id_t() = default; + connection_id_t(const std::string& _broker, + const std::string& _user, + const std::string& _password, + const boost::optional<const std::string&>& _ca_location, + const boost::optional<const std::string&>& _mechanism, + bool _ssl); +}; + +std::string to_string(const connection_id_t& id); + // connect to a kafka endpoint -bool connect(std::string& broker, - const std::string& url, - bool use_ssl, - bool verify_ssl, - boost::optional<const std::string&> ca_location, - boost::optional<const std::string&> mechanism, - boost::optional<const std::string&> user_name, - boost::optional<const std::string&> password); +bool connect(connection_id_t& conn_id, + const std::string& url, + bool use_ssl, + bool verify_ssl, + boost::optional<const std::string&> ca_location, + boost::optional<const std::string&> mechanism, + boost::optional<const std::string&> user_name, + boost::optional<const std::string&> password); // publish a message over a connection that was already created -int publish(const std::string& conn_name, - const std::string& topic, - const std::string& message); +int publish(const connection_id_t& conn_id, + const std::string& topic, + const std::string& message); // publish a message over a connection that was already created // and pass a callback that will be invoked (async) when broker confirms // receiving the message -int publish_with_confirm(const std::string& conn_name, - const std::string& topic, - const std::string& message, - reply_callback_t cb); +int publish_with_confirm(const connection_id_t& conn_id, + const std::string& topic, + const std::string& message, + reply_callback_t cb); // convert the integer status returned from the "publish" function to a string std::string status_to_string(int s); diff --git a/src/rgw/rgw_keystone.cc b/src/rgw/rgw_keystone.cc index 2fa35d74f6f..8a0873a047c 100644 --- a/src/rgw/rgw_keystone.cc +++ b/src/rgw/rgw_keystone.cc @@ -212,7 +212,7 @@ int Service::issue_admin_token_request(const DoutPrefixProvider *dpp, token_req.set_url(token_url); - const int ret = token_req.process(y); + const int ret = token_req.process(dpp, y); /* Detect rejection earlier than during the token parsing step. */ if (token_req.get_http_status() == @@ -290,7 +290,7 @@ int Service::get_keystone_barbican_token(const DoutPrefixProvider *dpp, token_req.set_url(token_url); ldpp_dout(dpp, 20) << "Requesting secret from barbican url=" << token_url << dendl; - const int ret = token_req.process(y); + const int ret = token_req.process(dpp, y); if (ret < 0) { ldpp_dout(dpp, 20) << "Barbican process error:" << token_bl.c_str() << dendl; return ret; diff --git a/src/rgw/rgw_kmip_client.cc b/src/rgw/rgw_kmip_client.cc index e801972ea80..9b75e8eca74 100644 --- a/src/rgw/rgw_kmip_client.cc +++ b/src/rgw/rgw_kmip_client.cc @@ -4,6 +4,7 @@ #include "common/Thread.h" #include "include/compat.h" #include "common/errno.h" +#include "rgw_asio_thread.h" #include "rgw_common.h" #include "rgw_kmip_client.h" @@ -15,10 +16,14 @@ RGWKMIPManager *rgw_kmip_manager; int -RGWKMIPTransceiver::wait(optional_yield y) +RGWKMIPTransceiver::wait(const DoutPrefixProvider* dpp, optional_yield y) { if (done) return ret; + + // TODO: when given a coroutine yield context, suspend instead of blocking + maybe_warn_about_blocking(dpp); + std::unique_lock l{lock}; if (!done) cond.wait(l); @@ -39,12 +44,12 @@ RGWKMIPTransceiver::send() } int -RGWKMIPTransceiver::process(optional_yield y) +RGWKMIPTransceiver::process(const DoutPrefixProvider* dpp, optional_yield y) { int r = send(); if (r < 0) return r; - return wait(y); + return wait(dpp, y); } RGWKMIPTransceiver::~RGWKMIPTransceiver() diff --git a/src/rgw/rgw_kmip_client.h b/src/rgw/rgw_kmip_client.h index 2992921136e..410bb7d57fe 100644 --- a/src/rgw/rgw_kmip_client.h +++ b/src/rgw/rgw_kmip_client.h @@ -3,6 +3,7 @@ #pragma once +class DoutPrefixProvider; class RGWKMIPManager; class RGWKMIPTransceiver { @@ -35,7 +36,7 @@ public: ceph::mutex lock = ceph::make_mutex("rgw_kmip_req::lock"); ceph::condition_variable cond; - int wait(optional_yield y); + int wait(const DoutPrefixProvider* dpp, optional_yield y); RGWKMIPTransceiver(CephContext * const cct, kmip_operation operation) : cct(cct), @@ -46,7 +47,7 @@ public: ~RGWKMIPTransceiver(); int send(); - int process(optional_yield y); + int process(const DoutPrefixProvider* dpp, optional_yield y); }; class RGWKMIPManager { diff --git a/src/rgw/rgw_kms.cc b/src/rgw/rgw_kms.cc index d5854ccea92..02f7837272c 100644 --- a/src/rgw/rgw_kms.cc +++ b/src/rgw/rgw_kms.cc @@ -306,7 +306,7 @@ protected: secret_req.set_client_key(kctx.ssl_clientkey()); } - res = secret_req.process(y); + res = secret_req.process(dpp, y); // map 401 to EACCES instead of EPERM if (secret_req.get_http_status() == @@ -782,8 +782,8 @@ private: protected: KmipGetTheKey(CephContext *cct) : cct(cct) {} KmipGetTheKey& keyid_to_keyname(std::string_view key_id); - KmipGetTheKey& get_uniqueid_for_keyname(optional_yield y); - int get_key_for_uniqueid(optional_yield y, std::string &); + KmipGetTheKey& get_uniqueid_for_keyname(const DoutPrefixProvider* dpp, optional_yield y); + int get_key_for_uniqueid(const DoutPrefixProvider* dpp, optional_yield y, std::string &); friend KmipSecretEngine; }; @@ -808,12 +808,13 @@ KmipGetTheKey::keyid_to_keyname(std::string_view key_id) } KmipGetTheKey& -KmipGetTheKey::get_uniqueid_for_keyname(optional_yield y) +KmipGetTheKey::get_uniqueid_for_keyname(const DoutPrefixProvider* dpp, + optional_yield y) { RGWKMIPTransceiver secret_req(cct, RGWKMIPTransceiver::LOCATE); secret_req.name = work.data(); - ret = secret_req.process(y); + ret = secret_req.process(dpp, y); if (ret < 0) { failed = true; } else if (!secret_req.outlist->string_count) { @@ -834,12 +835,13 @@ KmipGetTheKey::get_uniqueid_for_keyname(optional_yield y) } int -KmipGetTheKey::get_key_for_uniqueid(optional_yield y, std::string& actual_key) +KmipGetTheKey::get_key_for_uniqueid(const DoutPrefixProvider* dpp, + optional_yield y, std::string& actual_key) { if (failed) return ret; RGWKMIPTransceiver secret_req(cct, RGWKMIPTransceiver::GET); secret_req.unique_id = work.data(); - ret = secret_req.process(y); + ret = secret_req.process(dpp, y); if (ret < 0) { failed = true; } else { @@ -866,8 +868,8 @@ public: int r; r = KmipGetTheKey{cct} .keyid_to_keyname(key_id) - .get_uniqueid_for_keyname(y) - .get_key_for_uniqueid(y, actual_key); + .get_uniqueid_for_keyname(dpp, y) + .get_key_for_uniqueid(dpp, y, actual_key); return r; } }; @@ -937,7 +939,7 @@ static int request_key_from_barbican(const DoutPrefixProvider *dpp, secret_req.append_header("Accept", "application/octet-stream"); secret_req.append_header("X-Auth-Token", barbican_token); - res = secret_req.process(y); + res = secret_req.process(dpp, y); // map 401 to EACCES instead of EPERM if (secret_req.get_http_status() == RGWHTTPTransceiver::HTTP_STATUS_UNAUTHORIZED) { diff --git a/src/rgw/rgw_lc.cc b/src/rgw/rgw_lc.cc index a654713eafb..0d510d6642d 100644 --- a/src/rgw/rgw_lc.cc +++ b/src/rgw/rgw_lc.cc @@ -538,6 +538,35 @@ static bool pass_size_limit_checks(const DoutPrefixProvider *dpp, lc_op_ctx& oc) static std::string lc_id = "rgw lifecycle"; static std::string lc_req_id = "0"; +static void send_notification(const DoutPrefixProvider* dpp, + rgw::sal::Driver* driver, + rgw::sal::Object* obj, + rgw::sal::Bucket* bucket, + const std::string& etag, + uint64_t size, + const std::string& version_id, + const rgw::notify::EventTypeList& event_types) { + // notification supported only for RADOS driver for now + auto notify = driver->get_notification( + dpp, obj, nullptr, event_types, bucket, lc_id, + const_cast<std::string&>(bucket->get_tenant()), lc_req_id, null_yield); + + int ret = notify->publish_reserve(dpp, nullptr); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: notify publish_reserve failed, with error: " + << ret << " for lc object: " << obj->get_name() + << " for event_types: " << event_types << dendl; + return; + } + ret = notify->publish_commit(dpp, size, ceph::real_clock::now(), etag, + version_id); + if (ret < 0) { + ldpp_dout(dpp, 5) << "WARNING: notify publish_commit failed, with error: " + << ret << " for lc object: " << obj->get_name() + << " for event_types: " << event_types << dendl; + } +} + /* do all zones in the zone group process LC? */ static bool zonegroup_lc_check(const DoutPrefixProvider *dpp, rgw::sal::Zone* zone) { @@ -571,7 +600,6 @@ static int remove_expired_obj(const DoutPrefixProvider* dpp, auto& meta = o.meta; int ret; auto version_id = obj_key.instance; // deep copy, so not cleared below - std::unique_ptr<rgw::sal::Notification> notify; /* per discussion w/Daniel, Casey,and Eric, we *do need* * a new sal object handle, based on the following decision @@ -593,6 +621,7 @@ static int remove_expired_obj(const DoutPrefixProvider* dpp, if (obj->get_attr(RGW_ATTR_ETAG, bl)) { etag = rgw_bl_str(bl); } + auto size = obj->get_size(); std::unique_ptr<rgw::sal::Object::DeleteOp> del_op = obj->get_delete_op(); @@ -603,20 +632,6 @@ static int remove_expired_obj(const DoutPrefixProvider* dpp, del_op->params.bucket_owner = bucket_info.owner; del_op->params.unmod_since = meta.mtime; - // notification supported only for RADOS driver for now - notify = driver->get_notification( - dpp, obj.get(), nullptr, event_types, oc.bucket, lc_id, - const_cast<std::string&>(oc.bucket->get_tenant()), lc_req_id, null_yield); - - ret = notify->publish_reserve(dpp, nullptr); - if ( ret < 0) { - ldpp_dout(dpp, 1) - << "ERROR: notify reservation failed, deferring delete of object k=" - << o.key - << dendl; - return ret; - } - uint32_t flags = (!remove_indeed || !zonegroup_lc_check(dpp, oc.driver->get_zone())) ? rgw::sal::FLAG_LOG_OP : 0; ret = del_op->delete_obj(dpp, null_yield, flags); @@ -624,14 +639,8 @@ static int remove_expired_obj(const DoutPrefixProvider* dpp, ldpp_dout(dpp, 1) << fmt::format("ERROR: {} failed, with error: {}", __func__, ret) << dendl; } else { - // send request to notification manager - int publish_ret = notify->publish_commit(dpp, obj->get_size(), - ceph::real_clock::now(), - etag, - version_id); - if (publish_ret < 0) { - ldpp_dout(dpp, 5) << "WARNING: notify publish_commit failed, with error: " << publish_ret << dendl; - } + send_notification(dpp, driver, obj.get(), oc.bucket, etag, size, version_id, + event_types); } return ret; @@ -880,8 +889,6 @@ int RGWLC::handle_multipart_expiration(rgw::sal::Bucket* target, params.ns = RGW_OBJ_NS_MULTIPART; params.access_list_filter = MultipartMetaFilter; - const auto event_type = rgw::notify::ObjectExpirationAbortMPU; - auto pf = [&](RGWLC::LCWorker *wk, WorkQ *wq, WorkItem &wi) { int ret{0}; auto wt = boost::get<std::tuple<lc_op, rgw_bucket_dir_entry>>(wi); @@ -901,36 +908,13 @@ int RGWLC::handle_multipart_expiration(rgw::sal::Bucket* target, if (sal_obj->get_attr(RGW_ATTR_ETAG, bl)) { etag = rgw_bl_str(bl); } - - std::unique_ptr<rgw::sal::Notification> notify - = driver->get_notification( - this, sal_obj.get(), nullptr, {event_type}, target, lc_id, - const_cast<std::string&>(target->get_tenant()), lc_req_id, - null_yield); - auto version_id = obj.key.instance; - - ret = notify->publish_reserve(this, nullptr); - if (ret < 0) { - ldpp_dout(wk->get_lc(), 0) - << "ERROR: reserving persistent notification for " - "abort_multipart_upload, ret=" - << ret << ", thread:" << wq->thr_name() - << ", deferring mpu cleanup for meta:" << obj.key << dendl; - return ret; - } + auto size = sal_obj->get_size(); ret = mpu->abort(this, cct, null_yield); if (ret == 0) { - int publish_ret = notify->publish_commit( - this, sal_obj->get_size(), - ceph::real_clock::now(), - etag, - version_id); - if (publish_ret < 0) { - ldpp_dout(wk->get_lc(), 5) - << "WARNING: notify publish_commit failed, with error: " << ret - << dendl; - } + const auto event_type = rgw::notify::ObjectExpirationAbortMPU; + send_notification(this, driver, sal_obj.get(), target, etag, size, + obj.key.instance, {event_type}); if (perfcounter) { perfcounter->inc(l_rgw_lc_abort_mpu, 1); } @@ -1247,7 +1231,7 @@ public: int process(lc_op_ctx& oc) override { auto& o = oc.o; int r = remove_expired_obj(oc.dpp, oc, true, - {rgw::notify::ObjectExpirationNoncurrent}); + {rgw::notify::ObjectExpirationNonCurrent}); if (r < 0) { ldpp_dout(oc.dpp, 0) << "ERROR: remove_expired_obj (non-current expiration) " << oc.bucket << ":" << o.key @@ -1390,7 +1374,7 @@ public: << "flags: " << oc.o.flags << dendl; } else { ret = remove_expired_obj(oc.dpp, oc, true, - {rgw::notify::ObjectTransitionNoncurrent}); + {rgw::notify::ObjectTransitionNonCurrent}); ldpp_dout(oc.dpp, 20) << "delete_tier_obj Object(key:" << oc.o.key << ") not current " << "versioned_epoch: " << oc.o.versioned_epoch @@ -1420,31 +1404,7 @@ public: if (obj->get_attr(RGW_ATTR_ETAG, bl)) { etag = rgw_bl_str(bl); } - - rgw::notify::EventTypeList event_types; - if (bucket->versioned() && oc.o.is_current() && !oc.o.is_delete_marker()) { - event_types.insert(event_types.end(), - {rgw::notify::ObjectTransitionCurrent, - rgw::notify::LifecycleTransition}); - } else { - event_types.push_back(rgw::notify::ObjectTransitionNoncurrent); - } - - std::unique_ptr<rgw::sal::Notification> notify = - oc.driver->get_notification( - oc.dpp, obj.get(), nullptr, event_types, bucket, lc_id, - const_cast<std::string&>(oc.bucket->get_tenant()), lc_req_id, - null_yield); - auto version_id = oc.o.key.instance; - - ret = notify->publish_reserve(oc.dpp, nullptr); - if (ret < 0) { - ldpp_dout(oc.dpp, 1) - << "ERROR: notify reservation failed, deferring transition of object k=" - << oc.o.key - << dendl; - return ret; - } + auto size = obj->get_size(); ret = oc.obj->transition_to_cloud(oc.bucket, oc.tier.get(), oc.o, oc.env.worker->get_cloud_targets(), @@ -1453,15 +1413,17 @@ public: if (ret < 0) { return ret; } else { - // send request to notification manager - int publish_ret = notify->publish_commit(oc.dpp, obj->get_size(), - ceph::real_clock::now(), - etag, - version_id); - if (publish_ret < 0) { - ldpp_dout(oc.dpp, 5) << - "WARNING: notify publish_commit failed, with error: " << publish_ret << dendl; + rgw::notify::EventTypeList event_types; + if (bucket->versioned() && oc.o.is_current() && + !oc.o.is_delete_marker()) { + event_types.insert(event_types.end(), + {rgw::notify::ObjectTransitionCurrent, + rgw::notify::LifecycleTransition}); + } else { + event_types.push_back(rgw::notify::ObjectTransitionNonCurrent); } + send_notification(oc.dpp, oc.driver, obj.get(), oc.bucket, etag, size, + oc.o.key.instance, event_types); } if (delete_object) { diff --git a/src/rgw/rgw_lua_background.cc b/src/rgw/rgw_lua_background.cc index 93c509a78cc..ef97a5d6f65 100644 --- a/src/rgw/rgw_lua_background.cc +++ b/src/rgw/rgw_lua_background.cc @@ -83,9 +83,11 @@ void Background::start() { } started = true; runner = std::thread(&Background::run, this); - const auto rc = ceph_pthread_setname(runner.native_handle(), - "lua_background"); - ceph_assert(rc == 0); + const char* thread_name = "lua_background"; + if (const auto rc = ceph_pthread_setname(runner.native_handle(), thread_name); rc != 0) { + ldout(cct, 1) << "ERROR: failed to set lua background thread name to: " << thread_name + << ". error: " << rc << dendl; + } } void Background::pause() { diff --git a/src/rgw/rgw_notify_event_type.cc b/src/rgw/rgw_notify_event_type.cc index 9baa5df798b..577b368d65f 100644 --- a/src/rgw/rgw_notify_event_type.cc +++ b/src/rgw/rgw_notify_event_type.cc @@ -31,7 +31,7 @@ namespace rgw::notify { case ObjectExpirationCurrent: return "s3:ObjectLifecycle:Expiration:Current"; case ObjectExpirationNoncurrent: - return "s3:ObjectLifecycle:Expiration:Noncurrent"; + return "s3:ObjectLifecycle:Expiration:NonCurrent"; case ObjectExpirationDeleteMarker: return "s3:ObjectLifecycle:Expiration:DeleteMarker"; case ObjectExpirationAbortMPU: @@ -41,7 +41,7 @@ namespace rgw::notify { case ObjectTransitionCurrent: return "s3:ObjectLifecycle:Transition:Current"; case ObjectTransitionNoncurrent: - return "s3:ObjectLifecycle:Transition:Noncurrent"; + return "s3:ObjectLifecycle:Transition:NonCurrent"; case ObjectSynced: return "s3:ObjectSynced:*"; case ObjectSyncedCreate: @@ -99,8 +99,10 @@ namespace rgw::notify { return ObjectExpiration; if (s == "s3:ObjectLifecycle:Expiration:Current") return ObjectExpirationCurrent; + if (s == "s3:ObjectLifecycle:Expiration:NonCurrent") + return ObjectExpirationNonCurrent; if (s == "s3:ObjectLifecycle:Expiration:Noncurrent") - return ObjectExpirationNoncurrent; + return ObjectExpirationNonCurrent; if (s == "s3:ObjectLifecycle:Expiration:DeleteMarker") return ObjectExpirationDeleteMarker; if (s == "s3:ObjectLifecycle:Expiration:AbortMultipartUpload") @@ -109,8 +111,10 @@ namespace rgw::notify { return ObjectTransition; if (s == "s3:ObjectLifecycle:Transition:Current") return ObjectTransitionCurrent; + if (s == "s3:ObjectLifecycle:Transition:NonCurrent") + return ObjectTransitionNonCurrent; if (s == "s3:ObjectLifecycle:Transition:Noncurrent") - return ObjectTransitionNoncurrent; + return ObjectTransitionNonCurrent; if (s == "s3:ObjectSynced:*") return ObjectSynced; if (s == "s3:ObjectSynced:Create") diff --git a/src/rgw/rgw_notify_event_type.h b/src/rgw/rgw_notify_event_type.h index 34a73d22997..4850572a937 100644 --- a/src/rgw/rgw_notify_event_type.h +++ b/src/rgw/rgw_notify_event_type.h @@ -20,11 +20,13 @@ namespace rgw::notify { ObjectExpiration = 0xF00, ObjectExpirationCurrent = 0x100, ObjectExpirationNoncurrent = 0x200, + ObjectExpirationNonCurrent = 0x200, ObjectExpirationDeleteMarker = 0x400, ObjectExpirationAbortMPU = 0x800, ObjectTransition = 0xF000, ObjectTransitionCurrent = 0x1000, ObjectTransitionNoncurrent = 0x2000, + ObjectTransitionNonCurrent = 0x2000, ObjectSynced = 0xF0000, ObjectSyncedCreate = 0x10000, ObjectSyncedDelete = 0x20000, diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index e1c7ee484f4..31a74e183e6 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -2,6 +2,7 @@ // vim: ts=8 sw=2 smarttab ft=cpp #include <errno.h> +#include <optional> #include <stdlib.h> #include <system_error> #include <unistd.h> @@ -12,16 +13,20 @@ #include <boost/algorithm/string/predicate.hpp> #include <boost/optional.hpp> #include <boost/utility/in_place_factory.hpp> +#include <fmt/format.h> #include "include/scope_guard.h" #include "common/Clock.h" #include "common/armor.h" +#include "common/async/spawn_throttle.h" #include "common/errno.h" #include "common/mime.h" #include "common/utf8.h" #include "common/ceph_json.h" #include "common/static_ptr.h" #include "common/perf_counters_key.h" +#include "rgw_cksum_digest.h" +#include "rgw_common.h" #include "rgw_tracer.h" #include "rgw_rados.h" @@ -54,9 +59,11 @@ #include "rgw_sal.h" #include "rgw_sal_rados.h" #include "rgw_torrent.h" +#include "rgw_cksum_pipe.h" #include "rgw_lua_data_filter.h" #include "rgw_lua.h" #include "rgw_iam_managed_policy.h" +#include "rgw_bucket_sync.h" #include "services/svc_zone.h" #include "services/svc_quota.h" @@ -908,6 +915,27 @@ void rgw_build_iam_environment(rgw::sal::Driver* driver, } } +void handle_replication_status_header( + const DoutPrefixProvider *dpp, + rgw::sal::Attrs& attrs, + req_state* s, + const ceph::real_time &obj_mtime) { + auto attr_iter = attrs.find(RGW_ATTR_OBJ_REPLICATION_STATUS); + if (attr_iter != attrs.end() && attr_iter->second.to_str() == "PENDING") { + if (s->object->is_sync_completed(dpp, obj_mtime)) { + s->object->set_atomic(); + rgw::sal::Attrs setattrs, rmattrs; + bufferlist bl; + bl.append("COMPLETED"); + setattrs[RGW_ATTR_OBJ_REPLICATION_STATUS] = std::move(bl); + int ret = s->object->set_obj_attrs(dpp, &setattrs, &rmattrs, s->yield, 0); + if (ret == 0) { + ldpp_dout(dpp, 20) << *s->object << " has amz-replication-status header set to COMPLETED" << dendl; + } + } + } +} + /* * GET on CloudTiered objects is processed only when sent from the sync client. * In all other cases, fail with `ERR_INVALID_OBJECT_STATE`. @@ -2296,6 +2324,7 @@ void RGWGetObj::execute(optional_yield y) } #endif + op_ret = rgw_compression_info_from_attrset(attrs, need_decompress, cs_info); if (op_ret < 0) { ldpp_dout(this, 0) << "ERROR: failed to decode compression info, cannot decompress" << dendl; @@ -2314,6 +2343,8 @@ void RGWGetObj::execute(optional_yield y) filter = &*decompress; } + handle_replication_status_header(this, attrs, s, lastmod); + attr_iter = attrs.find(RGW_ATTR_OBJ_REPLICATION_TRACE); if (attr_iter != attrs.end()) { try { @@ -2331,6 +2362,7 @@ void RGWGetObj::execute(optional_yield y) } catch (const buffer::error&) {} } + if (get_type() == RGW_OP_GET_OBJ && get_data) { op_ret = handle_cloudtier_obj(attrs, sync_cloudtiered); if (op_ret < 0) { @@ -3919,11 +3951,8 @@ int RGWPutObj::verify_permission(optional_yield y) rgw_add_to_iam_environment(s->env, "s3:x-amz-acl", s->canned_acl); - if (obj_tags != nullptr && obj_tags->count() > 0){ - auto tags = obj_tags->get_tags(); - for (const auto& kv: tags){ - rgw_add_to_iam_environment(s->env, "s3:RequestObjectTag/"+kv.first, kv.second); - } + for (const auto& kv: obj_tags.get_tags()) { + rgw_add_to_iam_environment(s->env, "s3:RequestObjectTag/"+kv.first, kv.second); } // add server-side encryption headers @@ -4183,7 +4212,7 @@ void RGWPutObj::execute(optional_yield y) s->object.get(), s->src_object.get(), s, rgw::notify::ObjectCreatedPut, y); if(!multipart) { - op_ret = res->publish_reserve(this, obj_tags.get()); + op_ret = res->publish_reserve(this, &obj_tags); if (op_ret < 0) { return; } @@ -4297,9 +4326,13 @@ void RGWPutObj::execute(optional_yield y) std::optional<RGWPutObj_Compress> compressor; std::optional<RGWPutObj_Torrent> torrent; + /* XXX Cksum::DigestVariant was designed to avoid allocation, but going with + * factory method to avoid issues with move assignment when wrapped */ + std::unique_ptr<rgw::putobj::RGWPutObj_Cksum> cksum_filter; std::unique_ptr<rgw::sal::DataProcessor> encrypt; std::unique_ptr<rgw::sal::DataProcessor> run_lua; + /* data processor filters--last filter runs first */ if (!append) { // compression and encryption only apply to full object uploads op_ret = get_encrypt_filter(&encrypt, filter); if (op_ret < 0) { @@ -4327,7 +4360,8 @@ void RGWPutObj::execute(optional_yield y) if (torrent = get_torrent_filter(filter); torrent) { filter = &*torrent; } - // run lua script before data is compressed and encrypted - last filter runs first + /* checksum lua filters must run before compression and encryption + * filters, checksum first (probably?) */ op_ret = get_lua_filter(&run_lua, filter); if (op_ret < 0) { return; @@ -4335,7 +4369,18 @@ void RGWPutObj::execute(optional_yield y) if (run_lua) { filter = &*run_lua; } - } + /* optional streaming checksum */ + try { + cksum_filter = + rgw::putobj::RGWPutObj_Cksum::Factory(filter, *s->info.env); + } catch (const rgw::io::Exception& e) { + op_ret = -e.code().value(); + return; + } + if (cksum_filter) { + filter = &*cksum_filter; + } + } /* !append */ tracepoint(rgw_op, before_data_transfer, s->req_id.c_str()); do { bufferlist data; @@ -4449,6 +4494,19 @@ void RGWPutObj::execute(optional_yield y) } } + RGWBucketSyncPolicyHandlerRef policy_handler; + op_ret = driver->get_sync_policy_handler(this, std::nullopt, s->bucket->get_key(), &policy_handler, s->yield); + + if (op_ret < 0) { + ldpp_dout(this, 0) << "failed to read sync policy for bucket: " << s->bucket << dendl; + return; + } + if (policy_handler && policy_handler->bucket_exports_object(s->object->get_name(), obj_tags)) { + bufferlist repl_bl; + repl_bl.append("PENDING"); + emplace_attr(RGW_ATTR_OBJ_REPLICATION_STATUS, std::move(repl_bl)); + } + if (slo_info) { bufferlist manifest_bl; encode(*slo_info, manifest_bl); @@ -4462,13 +4520,47 @@ void RGWPutObj::execute(optional_yield y) bl.append(etag.c_str(), etag.size()); emplace_attr(RGW_ATTR_ETAG, std::move(bl)); + if (cksum_filter) { + const auto& hdr = cksum_filter->header(); + auto cksum_verify = + cksum_filter->verify(*s->info.env); // valid or no supplied cksum + cksum = get<1>(cksum_verify); + if (std::get<0>(cksum_verify)) { + buffer::list cksum_bl; + + ldpp_dout_fmt(this, 16, + "{} checksum verified " + "\n\tcomputed={} == \n\texpected={}", + hdr.second, + cksum->to_armor(), + cksum_filter->expected(*s->info.env)); + + cksum->encode(cksum_bl); + emplace_attr(RGW_ATTR_CKSUM, std::move(cksum_bl)); + } else { + /* content checksum mismatch */ + auto computed_ck = cksum->to_armor(); + auto expected_ck = cksum_filter->expected(*s->info.env); + + ldpp_dout_fmt(this, 4, + "{} content checksum mismatch" + "\n\tcalculated={} != \n\texpected={}", + hdr.second, + computed_ck, + (!!expected_ck) ? expected_ck : "(checksum unavailable)"); + + op_ret = -ERR_INVALID_REQUEST; + return; + } + } + populate_with_generic_attrs(s, attrs); op_ret = rgw_get_request_metadata(this, s->cct, s->info, attrs); if (op_ret < 0) { return; } encode_delete_at_attr(delete_at, attrs); - encode_obj_tags_attr(obj_tags.get(), attrs); + encode_obj_tags_attr(obj_tags, attrs); rgw_cond_decode_objtags(s, attrs); /* Add a custom metadata to expose the information whether an object @@ -4496,10 +4588,13 @@ void RGWPutObj::execute(optional_yield y) tracepoint(rgw_op, processor_complete_enter, s->req_id.c_str()); const req_context rctx{this, s->yield, s->trace.get()}; - op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs, - (delete_at ? *delete_at : real_time()), if_match, if_nomatch, - (user_data.empty() ? nullptr : &user_data), nullptr, nullptr, - rctx, complete_flags); + + op_ret = + processor->complete(s->obj_size, etag, &mtime, real_time(), attrs, + cksum, (delete_at ? *delete_at : real_time()), + if_match, if_nomatch, + (user_data.empty() ? nullptr : &user_data), + nullptr, nullptr, rctx, complete_flags); tracepoint(rgw_op, processor_complete_exit, s->req_id.c_str()); if (op_ret < 0) { return; @@ -4511,7 +4606,7 @@ void RGWPutObj::execute(optional_yield y) ldpp_dout(this, 1) << "ERROR: publishing notification failed, with error: " << ret << dendl; // too late to rollback operation, hence op_ret is not set here } -} +} /* RGWPutObj::execute() */ int RGWPostObj::init_processing(optional_yield y) { @@ -4557,7 +4652,8 @@ void RGWPostObj::execute(optional_yield y) // make reservation for notification if needed std::unique_ptr<rgw::sal::Notification> res - = driver->get_notification(s->object.get(), s->src_object.get(), s, rgw::notify::ObjectCreatedPost, y); + = driver->get_notification(s->object.get(), s->src_object.get(), s, + rgw::notify::ObjectCreatedPost, y); op_ret = res->publish_reserve(this); if (op_ret < 0) { return; @@ -4608,10 +4704,13 @@ void RGWPostObj::execute(optional_yield y) return; } + std::unique_ptr<rgw::putobj::RGWPutObj_Cksum> cksum_filter; + std::unique_ptr<rgw::sal::DataProcessor> encrypt; + /* No filters by default. */ rgw::sal::DataProcessor *filter = processor.get(); - std::unique_ptr<rgw::sal::DataProcessor> encrypt; + /* last filter runs first */ op_ret = get_encrypt_filter(&encrypt, filter); if (op_ret < 0) { return; @@ -4632,6 +4731,20 @@ void RGWPostObj::execute(optional_yield y) } } + /* XXX no lua filter? */ + + /* optional streaming checksum */ + try { + cksum_filter = + rgw::putobj::RGWPutObj_Cksum::Factory(filter, *s->info.env); + } catch (const rgw::io::Exception& e) { + op_ret = -e.code().value(); + return; + } + if (cksum_filter) { + filter = &*cksum_filter; + } + bool again; do { ceph::bufferlist data; @@ -4646,6 +4759,7 @@ void RGWPostObj::execute(optional_yield y) break; } + /* XXXX we should modernize to use component buffers? */ hash.Update((const unsigned char *)data.c_str(), data.length()); op_ret = filter->process(std::move(data), ofs); if (op_ret < 0) { @@ -4717,14 +4831,42 @@ void RGWPostObj::execute(optional_yield y) emplace_attr(RGW_ATTR_COMPRESSION, std::move(tmp)); } + if (cksum_filter) { + auto cksum_verify = + cksum_filter->verify(*s->info.env); // valid or no supplied cksum + cksum = get<1>(cksum_verify); + if (std::get<0>(cksum_verify)) { + buffer::list cksum_bl; + cksum->encode(cksum_bl); + emplace_attr(RGW_ATTR_CKSUM, std::move(cksum_bl)); + } else { + /* content checksum mismatch */ + const auto &hdr = cksum_filter->header(); + + ldpp_dout_fmt(this, 4, + "{} content checksum mismatch" + "\n\tcalculated={} != \n\texpected={}", + hdr.second, + cksum->to_armor(), + cksum_filter->expected(*s->info.env)); + + op_ret = -ERR_INVALID_REQUEST; + return; + } + } + const req_context rctx{this, s->yield, s->trace.get()}; - op_ret = processor->complete(s->obj_size, etag, nullptr, real_time(), attrs, - (delete_at ? *delete_at : real_time()), - nullptr, nullptr, nullptr, nullptr, nullptr, - rctx, rgw::sal::FLAG_LOG_OP); + op_ret = processor->complete(s->obj_size, etag, nullptr, real_time(), + attrs, cksum, + (delete_at ? *delete_at : real_time()), + nullptr, nullptr, nullptr, nullptr, nullptr, + rctx, rgw::sal::FLAG_LOG_OP); if (op_ret < 0) { return; } + + /* XXX shouldn't we have an op-counter update here? */ + } while (is_next_file_to_upload()); // send request to notification manager @@ -4733,8 +4875,7 @@ void RGWPostObj::execute(optional_yield y) ldpp_dout(this, 1) << "ERROR: publishing notification failed, with error: " << ret << dendl; // too late to rollback operation, hence op_ret is not set here } -} - +} /* RGWPostObj::execute() */ void RGWPutMetadataAccount::filter_out_temp_url(map<string, bufferlist>& add_attrs, const set<string>& rmattr_names, @@ -5006,7 +5147,7 @@ void RGWPutMetadataObject::execute(optional_yield y) } } - op_ret = s->object->set_obj_attrs(this, &attrs, &rmattrs, s->yield); + op_ret = s->object->set_obj_attrs(this, &attrs, &rmattrs, s->yield, rgw::sal::FLAG_LOG_OP); } int RGWDeleteObj::handle_slo_manifest(bufferlist& bl, optional_yield y) @@ -5117,9 +5258,12 @@ void RGWDeleteObj::execute(optional_yield y) if (!rgw::sal::Object::empty(s->object.get())) { uint64_t obj_size = 0; std::string etag; + bool null_verid; { int state_loaded = -1; bool check_obj_lock = s->object->have_instance() && s->bucket->get_info().obj_lock_enabled(); + null_verid = (s->object->get_instance() == "null"); + op_ret = state_loaded = s->object->load_obj_state(this, s->yield, true); if (op_ret < 0) { if (need_object_expiration() || multipart_delete) { @@ -5216,6 +5360,7 @@ void RGWDeleteObj::execute(optional_yield y) del_op->params.high_precision_time = s->system_request; del_op->params.olh_epoch = epoch; del_op->params.marker_version_id = version_id; + del_op->params.null_verid = null_verid; op_ret = del_op->delete_obj(this, y, rgw::sal::FLAG_LOG_OP); if (op_ret >= 0) { @@ -6218,7 +6363,6 @@ void RGWInitMultipart::execute(optional_yield y) { multipart_trace = tracing::rgw::tracer.start_trace(tracing::rgw::MULTIPART, s->trace_enabled); bufferlist aclbl, tracebl; - rgw::sal::Attrs attrs; op_ret = get_params(y); if (op_ret < 0) { @@ -6253,6 +6397,7 @@ void RGWInitMultipart::execute(optional_yield y) upload_id); upload->obj_legal_hold = obj_legal_hold; upload->obj_retention = obj_retention; + upload->cksum_type = cksum_algo; op_ret = upload->init(this, s->yield, s->owner, s->dest_placement, attrs); if (op_ret == 0) { @@ -6285,9 +6430,111 @@ void RGWCompleteMultipart::pre_exec() rgw_bucket_object_pre_exec(s); } +static inline int +try_sum_part_cksums(const DoutPrefixProvider *dpp, + CephContext *cct, + rgw::sal::MultipartUpload* upload, + RGWMultiCompleteUpload* parts, + std::optional<rgw::cksum::Cksum>& out_cksum, + optional_yield y) +{ + /* 1. need checksum-algorithm header (if invalid, fail) + 2. conditional on have-checksum, + 3. need digest for supplied algo + 4. iterate over parts, confirm each has same algo, if not, fail + 5. for each part-checksum, accumlate bytes into new checksum + 6. return armored and append "-<nparts>" + 7. verify -- if invalid, fail */ + + /* rgw_sal.h says that list_parts is called for the side effect of loading + * the parts of an upload into "cache"--the api is strange and truncated + * flag suggests that it needs to be called multiple times to handle large + * uploads--but does not explain how that affects the hidden cache; I'm + * assuming it turns over? */ + + int op_ret = 0; + bool truncated = false; + int marker = 0; + auto num_parts = int(parts->parts.size()); + + rgw::cksum::Type& cksum_type = upload->cksum_type; + + int again_count{0}; + again: + op_ret = upload->list_parts(dpp, cct, num_parts, marker, + &marker, &truncated, y); + if (op_ret < 0) { + return op_ret; + } + + if (truncated) { + + ldpp_dout_fmt(dpp, 20, + "WARNING: {} upload->list_parts {} {} truncated, " + "again_count={}!", + __func__, num_parts, marker, again_count); + + truncated = false; + ++again_count; + goto again; + } + + if (cksum_type == rgw::cksum::Type::none) [[unlikely]] { + /* ordinary, no-checksum case */ + return 0; + } + + rgw::cksum::DigestVariant dv = rgw::cksum::digest_factory(cksum_type); + rgw::cksum::Digest* digest = rgw::cksum::get_digest(dv); + + /* returns the parts (currently?) in cache */ + auto parts_ix{0}; + auto& parts_map = upload->get_parts(); + for (auto& part : parts_map) { + ++parts_ix; + auto& part_cksum = part.second->get_cksum(); + + ldpp_dout_fmt(dpp, 16, + "INFO: {} iterate part: {} {} {}", + __func__, parts_ix, part_cksum->type_string(), + part_cksum->to_armor()); + + if ((part_cksum->type != cksum_type)) { + /* if parts have inconsistent checksum, fail now */ + + ldpp_dout_fmt(dpp, 14, + "ERROR: multipart part checksum type mismatch\n\tcomplete " + "multipart header={} part={}", + to_string(part_cksum->type), to_string(cksum_type)); + + op_ret = -ERR_INVALID_REQUEST; + return op_ret; + } + + /* the checksum of the final object is a checksum (of the same type, + * presumably) of the concatenated checksum bytes of the parts, plus + * "-<num-parts>. See + * https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html#large-object-checksums + */ + auto ckr = part_cksum->raw(); + digest->Update((unsigned char *)ckr.data(), ckr.length()); + } /* all-parts */ + + /* we cannot verify this checksum, only compute it */ + out_cksum = rgw::cksum::finalize_digest(digest, cksum_type); + + ldpp_dout_fmt(dpp, 16, + "INFO: {} combined checksum {} {}-{}", + __func__, + out_cksum->type_string(), + out_cksum->to_armor(), num_parts); + + return op_ret; +} /* try_sum_part_chksums */ + void RGWCompleteMultipart::execute(optional_yield y) { - RGWMultiCompleteUpload *parts; + RGWMultiCompleteUpload* parts; RGWMultiXMLParser parser; std::unique_ptr<rgw::sal::MultipartUpload> upload; off_t ofs = 0; @@ -6327,7 +6574,6 @@ void RGWCompleteMultipart::execute(optional_yield y) return; } - if ((int)parts->parts.size() > s->cct->_conf->rgw_multipart_part_upload_limit) { op_ret = -ERANGE; @@ -6335,6 +6581,25 @@ void RGWCompleteMultipart::execute(optional_yield y) } upload = s->bucket->get_multipart_upload(s->object->get_name(), upload_id); + ldpp_dout(this, 16) << + fmt::format("INFO: {}->get_multipart_upload for obj {}, {} cksum_type {}", + s->bucket->get_name(), + s->object->get_name(), upload_id, + (!!upload) ? to_string(upload->cksum_type) : 0) + << dendl; + + rgw_placement_rule* dest_placement; + op_ret = upload->get_info(this, s->yield, &dest_placement); + if (op_ret < 0) { + /* XXX this fails consistently when !checksum */ + ldpp_dout(this, 0) << + "WARNING: MultipartUpload::get_info() for placement failed " + << "ret=" << op_ret << dendl; + if (upload->cksum_type != rgw::cksum::Type::none) { + op_ret = -ERR_INTERNAL_ERROR; + return; + } + } RGWCompressionInfo cs_info; bool compressed = false; @@ -6346,8 +6611,8 @@ void RGWCompleteMultipart::execute(optional_yield y) meta_obj->set_in_extra_data(true); meta_obj->set_hash_source(s->object->get_name()); - /*take a cls lock on meta_obj to prevent racing completions (or retries) - from deleting the parts*/ + /* take a cls lock on meta_obj to prevent racing completions (or retries) + from deleting the parts*/ int max_lock_secs_mp = s->cct->_conf.get_val<int64_t>("rgw_mp_lock_max_time"); utime_t dur(max_lock_secs_mp, 0); @@ -6377,6 +6642,14 @@ void RGWCompleteMultipart::execute(optional_yield y) extract_span_context(meta_obj->get_attrs(), trace_ctx); multipart_trace = tracing::rgw::tracer.add_span(name(), trace_ctx); + /* checksum computation */ + if (upload->cksum_type != rgw::cksum::Type::none) { + op_ret = try_sum_part_cksums(this, s->cct, upload.get(), parts, cksum, y); + if (op_ret < 0) { + return; + } + } + if (s->bucket->versioning_enabled()) { if (!version_id.empty()) { s->object->set_instance(version_id); @@ -6385,17 +6658,48 @@ void RGWCompleteMultipart::execute(optional_yield y) version_id = s->object->get_instance(); } } - s->object->set_attrs(meta_obj->get_attrs()); + + auto& target_attrs = meta_obj->get_attrs(); + + if (cksum) { + armored_cksum = + fmt::format("{}-{}", cksum->to_armor(), parts->parts.size()); + + /* validate computed checksum against supplied checksum, if present */ + auto [hdr_cksum, supplied_cksum] = + rgw::putobj::find_hdr_cksum(*(s->info.env)); + + ldpp_dout_fmt(this, 10, + "INFO: client supplied checksum {}: {}", + hdr_cksum.header_name(), supplied_cksum); + + if (! (supplied_cksum.empty()) && + (supplied_cksum != armored_cksum)) { + op_ret = -ERR_INVALID_REQUEST; + return; + } + + buffer::list cksum_bl; + cksum->encode(cksum_bl); + target_attrs.emplace(RGW_ATTR_CKSUM, std::move(cksum_bl)); + } /* cksum */ + + s->object->set_attrs(target_attrs); // make reservation for notification if needed std::unique_ptr<rgw::sal::Notification> res; - res = driver->get_notification(s->object.get(), nullptr, s, rgw::notify::ObjectCreatedCompleteMultipartUpload, y); + res = driver->get_notification( + s->object.get(), nullptr, s, + rgw::notify::ObjectCreatedCompleteMultipartUpload, y); op_ret = res->publish_reserve(this); if (op_ret < 0) { return; } - op_ret = upload->complete(this, y, s->cct, parts->parts, remove_objs, accounted_size, compressed, cs_info, ofs, s->req_id, s->owner, olh_epoch, s->object.get()); + op_ret = + upload->complete(this, y, s->cct, parts->parts, remove_objs, accounted_size, + compressed, cs_info, ofs, s->req_id, s->owner, olh_epoch, + s->object.get()); if (op_ret < 0) { ldpp_dout(this, 0) << "ERROR: upload complete failed ret=" << op_ret << dendl; return; @@ -6442,8 +6746,9 @@ bool RGWCompleteMultipart::check_previously_completed(const RGWMultiCompleteUplo char petag[CEPH_CRYPTO_MD5_DIGESTSIZE]; hex_to_buf(partetag.c_str(), petag, CEPH_CRYPTO_MD5_DIGESTSIZE); hash.Update((const unsigned char *)petag, sizeof(petag)); - ldpp_dout(this, 20) << __func__ << "() re-calculating multipart etag: part: " - << index << ", etag: " << partetag << dendl; + ldpp_dout(this, 20) + << __func__ << "() re-calculating multipart etag: part: " + << index << ", etag: " << partetag << dendl; } unsigned char final_etag[CEPH_CRYPTO_MD5_DIGESTSIZE]; @@ -6560,6 +6865,21 @@ void RGWListMultipart::execute(optional_yield y) if (op_ret < 0) return; + iter = attrs.find(RGW_ATTR_CKSUM); + if (iter != attrs.end()) { + auto bliter = iter->second.cbegin(); + try { + rgw::cksum::Cksum tcksum; + tcksum.decode(bliter); + cksum = std::move(tcksum); + } catch (buffer::error& err) { + ldpp_dout(this, 0) << "ERROR: could not decode stored cksum, caught buffer::error" << dendl; + op_ret = -EIO; + } + } + if (op_ret < 0) + return; + op_ret = upload->list_parts(this, s->cct, max_parts, marker, NULL, &truncated, y); } @@ -6671,26 +6991,22 @@ void RGWDeleteMultiObj::write_ops_log_entry(rgw_log_entry& entry) const { entry.delete_multi_obj_meta.objects = std::move(ops_log_entries); } -void RGWDeleteMultiObj::wait_flush(optional_yield y, - boost::asio::deadline_timer *formatter_flush_cond, - std::function<bool()> predicate) +void RGWDeleteMultiObj::handle_individual_object(const rgw_obj_key& o, optional_yield y) { - if (y && formatter_flush_cond) { - auto yc = y.get_yield_context(); - while (!predicate()) { - boost::system::error_code error; - formatter_flush_cond->async_wait(yc[error]); - rgw_flush_formatter(s, s->formatter); + // add the object key to the dout prefix so we can trace concurrent calls + struct ObjectPrefix : public DoutPrefixPipe { + const rgw_obj_key& o; + ObjectPrefix(const DoutPrefixProvider& dpp, const rgw_obj_key& o) + : DoutPrefixPipe(dpp), o(o) {} + void add_prefix(std::ostream& out) const override { + out << o << ' '; } - } -} + } prefix{*this, o}; + const DoutPrefixProvider* dpp = &prefix; -void RGWDeleteMultiObj::handle_individual_object(const rgw_obj_key& o, optional_yield y, - boost::asio::deadline_timer *formatter_flush_cond) -{ std::unique_ptr<rgw::sal::Object> obj = bucket->get_object(o); if (o.empty()) { - send_partial_response(o, false, "", -EINVAL, formatter_flush_cond); + send_partial_response(o, false, "", -EINVAL); return; } @@ -6698,11 +7014,11 @@ void RGWDeleteMultiObj::handle_individual_object(const rgw_obj_key& o, optional_ const auto action = o.instance.empty() ? rgw::IAM::s3DeleteObject : rgw::IAM::s3DeleteObjectVersion; - if (!verify_bucket_permission(this, s, ARN(obj->get_obj()), s->user_acl, + if (!verify_bucket_permission(dpp, s, ARN(obj->get_obj()), s->user_acl, s->bucket_acl, s->iam_policy, s->iam_identity_policies, s->session_policies, action)) { - send_partial_response(o, false, "", -EACCES, formatter_flush_cond); + send_partial_response(o, false, "", -EACCES); return; } @@ -6712,7 +7028,7 @@ void RGWDeleteMultiObj::handle_individual_object(const rgw_obj_key& o, optional_ if (!rgw::sal::Object::empty(obj.get())) { int state_loaded = -1; bool check_obj_lock = obj->have_instance() && bucket->get_info().obj_lock_enabled(); - const auto ret = state_loaded = obj->load_obj_state(this, y, true); + const auto ret = state_loaded = obj->load_obj_state(dpp, y, true); if (ret < 0) { if (ret == -ENOENT) { @@ -6720,7 +7036,7 @@ void RGWDeleteMultiObj::handle_individual_object(const rgw_obj_key& o, optional_ check_obj_lock = false; } else { // Something went wrong. - send_partial_response(o, false, "", ret, formatter_flush_cond); + send_partial_response(o, false, "", ret); return; } } else { @@ -6730,9 +7046,9 @@ void RGWDeleteMultiObj::handle_individual_object(const rgw_obj_key& o, optional_ if (check_obj_lock) { ceph_assert(state_loaded == 0); - int object_lock_response = verify_object_lock(this, obj->get_attrs(), bypass_perm, bypass_governance_mode); + int object_lock_response = verify_object_lock(dpp, obj->get_attrs(), bypass_perm, bypass_governance_mode); if (object_lock_response != 0) { - send_partial_response(o, false, "", object_lock_response, formatter_flush_cond); + send_partial_response(o, false, "", object_lock_response); return; } } @@ -6745,9 +7061,9 @@ void RGWDeleteMultiObj::handle_individual_object(const rgw_obj_key& o, optional_ rgw::notify::ObjectRemovedDelete; std::unique_ptr<rgw::sal::Notification> res = driver->get_notification(obj.get(), s->src_object.get(), s, event_type, y); - op_ret = res->publish_reserve(this); + op_ret = res->publish_reserve(dpp); if (op_ret < 0) { - send_partial_response(o, false, "", op_ret, formatter_flush_cond); + send_partial_response(o, false, "", op_ret); return; } @@ -6760,67 +7076,59 @@ void RGWDeleteMultiObj::handle_individual_object(const rgw_obj_key& o, optional_ del_op->params.bucket_owner = s->bucket_owner.id; del_op->params.marker_version_id = version_id; - op_ret = del_op->delete_obj(this, y, rgw::sal::FLAG_LOG_OP); + op_ret = del_op->delete_obj(dpp, y, rgw::sal::FLAG_LOG_OP); if (op_ret == -ENOENT) { op_ret = 0; } if (op_ret == 0) { // send request to notification manager - int ret = res->publish_commit(this, obj_size, ceph::real_clock::now(), etag, version_id); + int ret = res->publish_commit(dpp, obj_size, ceph::real_clock::now(), etag, version_id); if (ret < 0) { - ldpp_dout(this, 1) << "ERROR: publishing notification failed, with error: " << ret << dendl; + ldpp_dout(dpp, 1) << "ERROR: publishing notification failed, with error: " << ret << dendl; // too late to rollback operation, hence op_ret is not set here } } - send_partial_response(o, del_op->result.delete_marker, del_op->result.version_id, op_ret, formatter_flush_cond); + send_partial_response(o, del_op->result.delete_marker, del_op->result.version_id, op_ret); } void RGWDeleteMultiObj::execute(optional_yield y) { - RGWMultiDelDelete *multi_delete; - vector<rgw_obj_key>::iterator iter; - RGWMultiDelXMLParser parser; - uint32_t aio_count = 0; - const uint32_t max_aio = std::max<uint32_t>(1, s->cct->_conf->rgw_multi_obj_del_max_aio); - char* buf; - std::optional<boost::asio::deadline_timer> formatter_flush_cond; - if (y) { - auto ex = y.get_yield_context().get_executor(); - formatter_flush_cond = std::make_optional<boost::asio::deadline_timer>(ex); - } - - buf = data.c_str(); + const char* buf = data.c_str(); if (!buf) { op_ret = -EINVAL; - goto error; + return; } + RGWMultiDelXMLParser parser; if (!parser.init()) { op_ret = -EINVAL; - goto error; + return; } if (!parser.parse(buf, data.length(), 1)) { - op_ret = -EINVAL; - goto error; + s->err.message = "Failed to parse xml input"; + op_ret = -ERR_MALFORMED_XML; + return; } - multi_delete = static_cast<RGWMultiDelDelete *>(parser.find_first("Delete")); + auto multi_delete = static_cast<RGWMultiDelDelete *>(parser.find_first("Delete")); if (!multi_delete) { - op_ret = -EINVAL; - goto error; - } else { -#define DELETE_MULTI_OBJ_MAX_NUM 1000 - int max_num = s->cct->_conf->rgw_delete_multi_obj_max_num; - if (max_num < 0) { - max_num = DELETE_MULTI_OBJ_MAX_NUM; - } - int multi_delete_object_num = multi_delete->objects.size(); - if (multi_delete_object_num > max_num) { - op_ret = -ERR_MALFORMED_XML; - goto error; - } + s->err.message = "Missing require element Delete"; + op_ret = -ERR_MALFORMED_XML; + return; + } + + constexpr int DEFAULT_MAX_NUM = 1000; + int max_num = s->cct->_conf->rgw_delete_multi_obj_max_num; + if (max_num < 0) { + max_num = DEFAULT_MAX_NUM; + } + const int multi_delete_object_num = multi_delete->objects.size(); + if (multi_delete_object_num > max_num) { + s->err.message = fmt::format("Object count limit {} exceeded", max_num); + op_ret = -ERR_MALFORMED_XML; + return; } if (multi_delete->is_quiet()) @@ -6837,53 +7145,38 @@ void RGWDeleteMultiObj::execute(optional_yield y) if (has_versioned && !s->mfa_verified) { ldpp_dout(this, 5) << "NOTICE: multi-object delete request with a versioned object, mfa auth not provided" << dendl; op_ret = -ERR_MFA_REQUIRED; - goto error; + return; } } begin_response(); - if (multi_delete->objects.empty()) { - goto done; - } - for (iter = multi_delete->objects.begin(); - iter != multi_delete->objects.end(); - ++iter) { - rgw_obj_key obj_key = *iter; - if (y) { - wait_flush(y, &*formatter_flush_cond, [&aio_count, max_aio] { - return aio_count < max_aio; - }); - aio_count++; - boost::asio::spawn(y.get_yield_context(), [this, &aio_count, obj_key, &formatter_flush_cond] (boost::asio::yield_context yield) { - handle_individual_object(obj_key, yield, &*formatter_flush_cond); - aio_count--; - }, [] (std::exception_ptr eptr) { - if (eptr) std::rethrow_exception(eptr); - }); - } else { - handle_individual_object(obj_key, y, nullptr); - } - } - if (formatter_flush_cond) { - wait_flush(y, &*formatter_flush_cond, [this, n=multi_delete->objects.size()] { - return n == ops_log_entries.size(); - }); + // process up to max_aio object deletes in parallel + const uint32_t max_aio = std::max<uint32_t>(1, s->cct->_conf->rgw_multi_obj_del_max_aio); + auto group = ceph::async::spawn_throttle{y, max_aio}; + + for (const auto& key : multi_delete->objects) { + boost::asio::spawn(group.get_executor(), + [this, &key] (boost::asio::yield_context yield) { + handle_individual_object(key, yield); + }, group); + + rgw_flush_formatter(s, s->formatter); } + group.wait(); /* set the return code to zero, errors at this point will be dumped to the response */ op_ret = 0; -done: // will likely segfault if begin_response() has not been called end_response(); - return; +} -error: +void RGWDeleteMultiObj::send_response() +{ + // if we haven't already written a response, send the error response send_status(); - return; - } bool RGWBulkDelete::Deleter::verify_permission(RGWBucketInfo& binfo, @@ -7400,12 +7693,15 @@ int RGWBulkUploadOp::handle_file(const std::string_view path, attrs.emplace(RGW_ATTR_COMPRESSION, std::move(tmp)); } + /* XXX I don't think bulk upload can support checksums */ + /* Complete the transaction. */ const req_context rctx{this, s->yield, s->trace.get()}; op_ret = processor->complete(size, etag, nullptr, ceph::real_time(), - attrs, ceph::real_time() /* delete_at */, - nullptr, nullptr, nullptr, nullptr, nullptr, - rctx, rgw::sal::FLAG_LOG_OP); + attrs, rgw::cksum::no_cksum, + ceph::real_time() /* delete_at */, + nullptr, nullptr, nullptr, nullptr, nullptr, + rctx, rgw::sal::FLAG_LOG_OP); if (op_ret < 0) { ldpp_dout(this, 20) << "processor::complete returned op_ret=" << op_ret << dendl; } @@ -7625,7 +7921,7 @@ void RGWRMAttrs::execute(optional_yield y) s->object->set_atomic(); - op_ret = s->object->set_obj_attrs(this, nullptr, &attrs, y); + op_ret = s->object->set_obj_attrs(this, nullptr, &attrs, y, rgw::sal::FLAG_LOG_OP); if (op_ret < 0) { ldpp_dout(this, 0) << "ERROR: failed to delete obj attrs, obj=" << s->object << " ret=" << op_ret << dendl; @@ -7662,7 +7958,7 @@ void RGWSetAttrs::execute(optional_yield y) if (!rgw::sal::Object::empty(s->object.get())) { rgw::sal::Attrs a(attrs); - op_ret = s->object->set_obj_attrs(this, &a, nullptr, y); + op_ret = s->object->set_obj_attrs(this, &a, nullptr, y, rgw::sal::FLAG_LOG_OP); } else { op_ret = s->bucket->merge_and_store_attrs(this, attrs, y); } diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index 8fe5540e96d..5801d1a0d11 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -34,6 +34,7 @@ #include "common/ceph_json.h" #include "common/ceph_time.h" +#include "rgw_cksum.h" #include "rgw_common.h" #include "rgw_dmclock.h" #include "rgw_sal.h" @@ -1227,7 +1228,7 @@ protected: std::string etag; bool chunked_upload; RGWAccessControlPolicy policy; - std::unique_ptr <RGWObjTags> obj_tags; + RGWObjTags obj_tags; const char *dlo_manifest; RGWSLOInfo *slo_info; rgw::sal::Attrs attrs; @@ -1253,6 +1254,8 @@ protected: RGWObjectRetention *obj_retention; RGWObjectLegalHold *obj_legal_hold; + std::optional<rgw::cksum::Cksum> cksum; + public: RGWPutObj() : ofs(0), supplied_md5_b64(NULL), @@ -1332,6 +1335,7 @@ protected: RGWAccessControlPolicy policy; std::map<std::string, bufferlist> attrs; boost::optional<ceph::real_time> delete_at; + std::optional<rgw::cksum::Cksum> cksum; /* Must be called after get_data() or the result is undefined. */ virtual std::string get_current_filename() const = 0; @@ -1846,6 +1850,8 @@ protected: //object lock std::optional<RGWObjectRetention> obj_retention = std::nullopt; std::optional<RGWObjectLegalHold> obj_legal_hold = std::nullopt; + rgw::sal::Attrs attrs; + rgw::cksum::Type cksum_algo{rgw::cksum::Type::none}; public: RGWInitMultipart() {} @@ -1872,6 +1878,10 @@ protected: jspan_ptr multipart_trace; ceph::real_time upload_time; std::unique_ptr<rgw::sal::Notification> res; + std::unique_ptr<rgw::sal::Object> meta_obj; + std::optional<rgw::cksum::Cksum> cksum; + std::optional<std::string> armored_cksum; + off_t ofs = 0; public: RGWCompleteMultipart() {} @@ -1915,6 +1925,7 @@ protected: RGWAccessControlPolicy policy; bool truncated; rgw_placement_rule* placement; + std::optional<rgw::cksum::Cksum> cksum; public: RGWListMultipart() { @@ -2027,24 +2038,7 @@ class RGWDeleteMultiObj : public RGWOp { * Handles the deletion of an individual object and uses * set_partial_response to record the outcome. */ - void handle_individual_object(const rgw_obj_key& o, - optional_yield y, - boost::asio::deadline_timer *formatter_flush_cond); - - /** - * When the request is being executed in a coroutine, performs - * the actual formatter flushing and is responsible for the - * termination condition (when when all partial object responses - * have been sent). Note that the formatter flushing must be handled - * on the coroutine that invokes the execute method vs. the - * coroutines that are spawned to handle individual objects because - * the flush logic uses a yield context that was captured - * and saved on the req_state vs. one that is passed on the stack. - * This is a no-op in the case where we're not executing as a coroutine. - */ - void wait_flush(optional_yield y, - boost::asio::deadline_timer *formatter_flush_cond, - std::function<bool()> predicate); + void handle_individual_object(const rgw_obj_key& o, optional_yield y); protected: std::vector<delete_multi_obj_entry> ops_log_entries; @@ -2067,13 +2061,14 @@ public: int verify_permission(optional_yield y) override; void pre_exec() override; void execute(optional_yield y) override; + void send_response() override; virtual int get_params(optional_yield y) = 0; virtual void send_status() = 0; virtual void begin_response() = 0; virtual void send_partial_response(const rgw_obj_key& key, bool delete_marker, - const std::string& marker_version_id, int ret, - boost::asio::deadline_timer *formatter_flush_cond) = 0; + const std::string& marker_version_id, + int ret) = 0; virtual void end_response() = 0; const char* name() const override { return "multi_object_delete"; } RGWOpType get_type() override { return RGW_OP_DELETE_MULTI_OBJ; } @@ -2168,7 +2163,12 @@ inline int rgw_get_request_metadata(const DoutPrefixProvider *dpp, "x-amz-server-side-encryption-customer-algorithm", "x-amz-server-side-encryption-customer-key", "x-amz-server-side-encryption-customer-key-md5", - "x-amz-storage-class" + /* XXX agreed w/cbodley that probably a cleanup is needed here--we probably + * don't want to store these, esp. under user.rgw */ + "x-amz-storage-class", + "x-amz-content-sha256", + "x-amz-checksum-algorithm", + "x-amz-date" }; size_t valid_meta_count = 0; @@ -2234,18 +2234,14 @@ inline void encode_delete_at_attr(boost::optional<ceph::real_time> delete_at, attrs[RGW_ATTR_DELETE_AT] = delatbl; } /* encode_delete_at_attr */ -inline void encode_obj_tags_attr(RGWObjTags* obj_tags, std::map<std::string, bufferlist>& attrs) +inline void encode_obj_tags_attr(const RGWObjTags& obj_tags, std::map<std::string, bufferlist>& attrs) { - if (obj_tags == nullptr){ - // we assume the user submitted a tag format which we couldn't parse since - // this wouldn't be parsed later by get/put obj tags, lets delete if the - // attr was populated + if (obj_tags.empty()) { return; } - bufferlist tagsbl; - obj_tags->encode(tagsbl); - attrs[RGW_ATTR_TAGS] = tagsbl; + obj_tags.encode(tagsbl); + attrs[RGW_ATTR_TAGS] = std::move(tagsbl); } inline int encode_dlo_manifest_attr(const char * const dlo_manifest, diff --git a/src/rgw/rgw_opa.cc b/src/rgw/rgw_opa.cc index 7422615aec9..0bda4d62a51 100644 --- a/src/rgw/rgw_opa.cc +++ b/src/rgw/rgw_opa.cc @@ -71,7 +71,7 @@ int rgw_opa_authorize(RGWOp *& op, req.set_send_length(ss.str().length()); /* send request */ - ret = req.process(null_yield); + ret = req.process(op, s->yield); if (ret < 0) { ldpp_dout(op, 2) << "OPA process error:" << bl.c_str() << dendl; return ret; diff --git a/src/rgw/rgw_policy_s3.cc b/src/rgw/rgw_policy_s3.cc index e017cc8871d..1c183644b30 100644 --- a/src/rgw/rgw_policy_s3.cc +++ b/src/rgw/rgw_policy_s3.cc @@ -7,6 +7,7 @@ #include "rgw_policy_s3.h" #include "rgw_common.h" #include "rgw_crypt_sanitize.h" +#include "rgw_cksum.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rgw @@ -101,15 +102,20 @@ bool RGWPolicyEnv::get_value(const string& s, string& val, map<string, bool, lts return get_var(var, val); } - -bool RGWPolicyEnv::match_policy_vars(map<string, bool, ltstr_nocase>& policy_vars, string& err_msg) +bool RGWPolicyEnv::match_policy_vars( + map<string, bool, ltstr_nocase>& policy_vars, string& err_msg) { map<string, string, ltstr_nocase>::iterator iter; string ignore_prefix = "x-ignore-"; for (iter = vars.begin(); iter != vars.end(); ++iter) { const string& var = iter->first; - if (strncasecmp(ignore_prefix.c_str(), var.c_str(), ignore_prefix.size()) == 0) + if (strncasecmp(ignore_prefix.c_str(), var.c_str(), + ignore_prefix.size()) == 0) { + continue; + } + if (rgw::cksum::is_checksum_hdr(var)) { continue; + } if (policy_vars.count(var) == 0) { err_msg = "Policy missing condition: "; err_msg.append(iter->first); @@ -118,7 +124,7 @@ bool RGWPolicyEnv::match_policy_vars(map<string, bool, ltstr_nocase>& policy_var } } return true; -} +} /* match_policy_vars */ RGWPolicy::~RGWPolicy() { diff --git a/src/rgw/rgw_process.cc b/src/rgw/rgw_process.cc index 10e544b577d..8c93f77278b 100644 --- a/src/rgw/rgw_process.cc +++ b/src/rgw/rgw_process.cc @@ -351,7 +351,7 @@ int process_request(const RGWProcessEnv& penv, goto done; } req->op = op; - ldpp_dout(op, 10) << "op=" << typeid(*op).name() << dendl; + ldpp_dout(op, 10) << "op=" << typeid(*op).name() << " " << dendl; s->op_type = op->get_type(); try { diff --git a/src/rgw/rgw_pubsub.cc b/src/rgw/rgw_pubsub.cc index 08118f57b36..120f21062b4 100644 --- a/src/rgw/rgw_pubsub.cc +++ b/src/rgw/rgw_pubsub.cc @@ -378,6 +378,23 @@ void rgw_pubsub_s3_event::dump(Formatter *f) const { encode_json("opaqueData", opaque_data, f); } +namespace rgw::notify { + void event_entry_t::dump(Formatter *f) const { + Formatter::ObjectSection s(*f, "entry"); + { + Formatter::ObjectSection sub_s(*f, "event"); + event.dump(f); + } + encode_json("pushEndpoint", push_endpoint, f); + encode_json("pushEndpointArgs", push_endpoint_args, f); + encode_json("topic", arn_topic, f); + encode_json("creationTime", creation_time, f); + encode_json("TTL", time_to_live, f); + encode_json("maxRetries", max_retries, f); + encode_json("retrySleepDuration", retry_sleep_duration, f); + } +} + void rgw_pubsub_topic::dump(Formatter *f) const { encode_json("owner", owner, f); @@ -1081,21 +1098,23 @@ int RGWPubSub::remove_topic_v2(const DoutPrefixProvider* dpp, << dendl; return 0; } - ret = driver->remove_topic_v2(name, tenant, objv_tracker, y, dpp); - if (ret < 0) { - ldpp_dout(dpp, 1) << "ERROR: failed to remove topic info: ret=" << ret - << dendl; - return ret; - } const rgw_pubsub_dest& dest = topic.dest; if (!dest.push_endpoint.empty() && dest.persistent && !dest.persistent_queue.empty()) { ret = driver->remove_persistent_topic(dpp, y, dest.persistent_queue); if (ret < 0 && ret != -ENOENT) { - ldpp_dout(dpp, 1) << "WARNING: failed to remove queue for " + ldpp_dout(dpp, 1) << "ERROR: failed to remove queue for " "persistent topic: " << cpp_strerror(ret) << dendl; - } // not fatal + return ret; + } + } + + ret = driver->remove_topic_v2(name, tenant, objv_tracker, y, dpp); + if (ret < 0) { + ldpp_dout(dpp, 1) << "ERROR: failed to remove topic info: ret=" << ret + << dendl; + return ret; } return 0; } @@ -1127,7 +1146,15 @@ int RGWPubSub::remove_topic(const DoutPrefixProvider *dpp, const std::string& na if (t == topics.topics.end()) { return -ENOENT; } - const rgw_pubsub_dest dest = std::move(t->second.dest); + if (!t->second.dest.push_endpoint.empty() && t->second.dest.persistent && + !t->second.dest.persistent_queue.empty()) { + ret = driver->remove_persistent_topic(dpp, y, t->second.dest.persistent_queue); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 1) << "ERROR: failed to remove queue for " + "persistent topic: " << cpp_strerror(ret) << dendl; + return ret; + } + } topics.topics.erase(t); ret = write_topics_v1(dpp, topics, &objv_tracker, y); @@ -1135,14 +1162,5 @@ int RGWPubSub::remove_topic(const DoutPrefixProvider *dpp, const std::string& na ldpp_dout(dpp, 1) << "ERROR: failed to remove topics info: ret=" << ret << dendl; return ret; } - - if (!dest.push_endpoint.empty() && dest.persistent && - !dest.persistent_queue.empty()) { - ret = driver->remove_persistent_topic(dpp, y, dest.persistent_queue); - if (ret < 0 && ret != -ENOENT) { - ldpp_dout(dpp, 1) << "WARNING: failed to remove queue for " - "persistent topic: " << cpp_strerror(ret) << dendl; - } // not fatal - } return 0; } diff --git a/src/rgw/rgw_pubsub.h b/src/rgw/rgw_pubsub.h index 3835407eb45..b7ce443af03 100644 --- a/src/rgw/rgw_pubsub.h +++ b/src/rgw/rgw_pubsub.h @@ -672,12 +672,56 @@ public: }; namespace rgw::notify { - // Denotes that the topic has not overridden the global configurations for (time_to_live / max_retries / retry_sleep_duration) // defaults: (rgw_topic_persistency_time_to_live / rgw_topic_persistency_max_retries / rgw_topic_persistency_sleep_duration) constexpr uint32_t DEFAULT_GLOBAL_VALUE = UINT32_MAX; // Used in case the topic is using the default global value for dumping in a formatter constexpr static const std::string_view DEFAULT_CONFIG{"None"}; + struct event_entry_t { + rgw_pubsub_s3_event event; + std::string push_endpoint; + std::string push_endpoint_args; + std::string arn_topic; + ceph::coarse_real_time creation_time; + uint32_t time_to_live = DEFAULT_GLOBAL_VALUE; + uint32_t max_retries = DEFAULT_GLOBAL_VALUE; + uint32_t retry_sleep_duration = DEFAULT_GLOBAL_VALUE; + + void encode(bufferlist& bl) const { + ENCODE_START(3, 1, bl); + encode(event, bl); + encode(push_endpoint, bl); + encode(push_endpoint_args, bl); + encode(arn_topic, bl); + encode(creation_time, bl); + encode(time_to_live, bl); + encode(max_retries, bl); + encode(retry_sleep_duration, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(3, bl); + decode(event, bl); + decode(push_endpoint, bl); + decode(push_endpoint_args, bl); + decode(arn_topic, bl); + if (struct_v > 1) { + decode(creation_time, bl); + } else { + creation_time = ceph::coarse_real_clock::zero(); + } + if (struct_v > 2) { + decode(time_to_live, bl); + decode(max_retries, bl); + decode(retry_sleep_duration, bl); + } + DECODE_FINISH(bl); + } + + void dump(Formatter *f) const; + }; + WRITE_CLASS_ENCODER(event_entry_t) } std::string topic_to_unique(const std::string& topic, diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc index 377e8c74701..f55064ff53f 100644 --- a/src/rgw/rgw_quota.cc +++ b/src/rgw/rgw_quota.cc @@ -363,7 +363,7 @@ class RGWOwnerStatsCache : public RGWQuotaCache<rgw_owner> { // option, so we can assume it won't change while the RGW server // is running, so we'll handle it once before we loop double sync_interval_factor = 1.0; - const uint64_t debug_interval = cct->_conf->rgw_reshard_debug_interval; + const int64_t debug_interval = cct->_conf->rgw_reshard_debug_interval; if (debug_interval >= 1) { constexpr double secs_per_day = 60 * 60 * 24; sync_interval_factor = debug_interval / secs_per_day; diff --git a/src/rgw/rgw_ratelimit.h b/src/rgw/rgw_ratelimit.h index 2639d4d4274..0db1813f050 100644 --- a/src/rgw/rgw_ratelimit.h +++ b/src/rgw/rgw_ratelimit.h @@ -286,7 +286,8 @@ class ActiveRateLimiter : public DoutPrefix { void start() { ldpp_dout(this, 20) << "starting ratelimit_gc thread" << dendl; runner = std::thread(&ActiveRateLimiter::replace_active, this); - const auto rc = ceph_pthread_setname(runner.native_handle(), "ratelimit_gc"); - ceph_assert(rc==0); + if (const auto rc = ceph_pthread_setname(runner.native_handle(), "ratelimit_gc"); rc != 0) { + ldpp_dout(this, 1) << "ERROR: failed to set ratelimit_gc thread name. error: " << rc << dendl; + } } }; diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc index 3a0652e82e2..102f05d7862 100644 --- a/src/rgw/rgw_rest.cc +++ b/src/rgw/rgw_rest.cc @@ -2164,6 +2164,11 @@ int RGWREST::preprocess(req_state *s, rgw::io::BasicClient* cio) << " s->info.domain=" << s->info.domain << " s->info.request_uri=" << s->info.request_uri << dendl; + } else if (s3website_enabled && api_priority_s3website > api_priority_s3) { + // If the Host header is missing, but the s3website API is enabled and has + // a higher priority than the regular S3 API, then we should still treat + // the request as a website request. + s->prot_flags |= RGW_REST_WEBSITE; } if (s->info.domain.empty()) { diff --git a/src/rgw/rgw_rest_client.cc b/src/rgw/rgw_rest_client.cc index 941856e6006..45b5e3076f4 100644 --- a/src/rgw/rgw_rest_client.cc +++ b/src/rgw/rgw_rest_client.cc @@ -452,7 +452,7 @@ int RGWRESTSimpleRequest::forward_request(const DoutPrefixProvider *dpp, const R method = new_info.method; url = new_url; - int r = process(y); + int r = process(dpp, y); if (r < 0){ if (r == -EINVAL){ // curl_easy has errored, generally means the service is not available @@ -922,14 +922,15 @@ int RGWRESTStreamRWRequest::send(RGWHTTPManager *mgr) return RGWHTTPStreamRWRequest::send(mgr); } -int RGWHTTPStreamRWRequest::complete_request(optional_yield y, +int RGWHTTPStreamRWRequest::complete_request(const DoutPrefixProvider* dpp, + optional_yield y, string *etag, real_time *mtime, uint64_t *psize, map<string, string> *pattrs, map<string, string> *pheaders) { - int ret = wait(y); + int ret = wait(dpp, y); if (ret < 0) { return ret; } diff --git a/src/rgw/rgw_rest_client.h b/src/rgw/rgw_rest_client.h index 923f8cc1783..ffedcc17a94 100644 --- a/src/rgw/rgw_rest_client.h +++ b/src/rgw/rgw_rest_client.h @@ -168,7 +168,7 @@ public: virtual int send(RGWHTTPManager *mgr); - int complete_request(optional_yield y, + int complete_request(const DoutPrefixProvider* dpp, optional_yield y, std::string *etag = nullptr, real_time *mtime = nullptr, uint64_t *psize = nullptr, diff --git a/src/rgw/rgw_rest_conn.cc b/src/rgw/rgw_rest_conn.cc index 39d171dfb52..770ccae2990 100644 --- a/src/rgw/rgw_rest_conn.cc +++ b/src/rgw/rgw_rest_conn.cc @@ -257,10 +257,11 @@ int RGWRESTConn::put_obj_async_init(const DoutPrefixProvider *dpp, const rgw_own return 0; } -int RGWRESTConn::complete_request(RGWRESTStreamS3PutObj *req, string& etag, +int RGWRESTConn::complete_request(const DoutPrefixProvider* dpp, + RGWRESTStreamS3PutObj *req, string& etag, real_time *mtime, optional_yield y) { - int ret = req->complete_request(y, &etag, mtime); + int ret = req->complete_request(dpp, y, &etag, mtime); if (ret == -EIO) { ldout(cct, 5) << __func__ << ": complete_request() returned ret=" << ret << dendl; set_url_unconnectable(req->get_url_orig()); @@ -408,7 +409,8 @@ done_err: return r; } -int RGWRESTConn::complete_request(RGWRESTStreamRWRequest *req, +int RGWRESTConn::complete_request(const DoutPrefixProvider* dpp, + RGWRESTStreamRWRequest *req, string *etag, real_time *mtime, uint64_t *psize, @@ -416,7 +418,7 @@ int RGWRESTConn::complete_request(RGWRESTStreamRWRequest *req, map<string, string> *pheaders, optional_yield y) { - int ret = req->complete_request(y, etag, mtime, psize, pattrs, pheaders); + int ret = req->complete_request(dpp, y, etag, mtime, psize, pattrs, pheaders); if (ret == -EIO) { ldout(cct, 5) << __func__ << ": complete_request() returned ret=" << ret << dendl; set_url_unconnectable(req->get_url_orig()); @@ -467,7 +469,7 @@ int RGWRESTConn::get_resource(const DoutPrefixProvider *dpp, return ret; } - ret = req.complete_request(y); + ret = req.complete_request(dpp, y); if (ret == -EIO) { set_url_unconnectable(url); if (tries < NUM_ENPOINT_IOERROR_RETRIES - 1) { @@ -521,7 +523,7 @@ int RGWRESTConn::send_resource(const DoutPrefixProvider *dpp, const std::string& return ret; } - ret = req.complete_request(y); + ret = req.complete_request(dpp, y); if (ret == -EIO) { set_url_unconnectable(url); if (tries < NUM_ENPOINT_IOERROR_RETRIES - 1) { @@ -580,7 +582,7 @@ int RGWRESTReadResource::read(const DoutPrefixProvider *dpp, optional_yield y) return ret; } - ret = req.complete_request(y); + ret = req.complete_request(dpp, y); if (ret == -EIO) { conn->set_url_unconnectable(req.get_url_orig()); ldpp_dout(dpp, 20) << __func__ << ": complete_request() returned ret=" << ret << dendl; @@ -647,7 +649,7 @@ int RGWRESTSendResource::send(const DoutPrefixProvider *dpp, bufferlist& outbl, return ret; } - ret = req.complete_request(y); + ret = req.complete_request(dpp, y); if (ret == -EIO) { conn->set_url_unconnectable(req.get_url_orig()); ldpp_dout(dpp, 20) << __func__ << ": complete_request() returned ret=" << ret << dendl; diff --git a/src/rgw/rgw_rest_conn.h b/src/rgw/rgw_rest_conn.h index b2d1affb640..7abf86a3d3f 100644 --- a/src/rgw/rgw_rest_conn.h +++ b/src/rgw/rgw_rest_conn.h @@ -140,7 +140,8 @@ public: int put_obj_send_init(const rgw_obj& obj, const rgw_http_param_pair *extra_params, RGWRESTStreamS3PutObj **req); int put_obj_async_init(const DoutPrefixProvider *dpp, const rgw_owner& uid, const rgw_obj& obj, std::map<std::string, bufferlist>& attrs, RGWRESTStreamS3PutObj **req); - int complete_request(RGWRESTStreamS3PutObj *req, std::string& etag, + int complete_request(const DoutPrefixProvider* dpp, + RGWRESTStreamS3PutObj *req, std::string& etag, ceph::real_time *mtime, optional_yield y); struct get_obj_params { @@ -178,7 +179,8 @@ public: bool prepend_metadata, bool get_op, bool rgwx_stat, bool sync_manifest, bool skip_decrypt, rgw_zone_set_entry *dst_zone_trace, bool sync_cloudtiered, bool send, RGWHTTPStreamRWRequest::ReceiveCB *cb, RGWRESTStreamRWRequest **req); - int complete_request(RGWRESTStreamRWRequest *req, + int complete_request(const DoutPrefixProvider* dpp, + RGWRESTStreamRWRequest *req, std::string *etag, ceph::real_time *mtime, uint64_t *psize, @@ -344,8 +346,8 @@ public: return req.get_http_status(); } - int wait(bufferlist *pbl, optional_yield y) { - int ret = req.wait(y); + int wait(const DoutPrefixProvider* dpp, bufferlist *pbl, optional_yield y) { + int ret = req.wait(dpp, y); if (ret < 0) { if (ret == -EIO) { conn->set_url_unconnectable(req.get_url_orig()); @@ -361,7 +363,7 @@ public: } template <class T> - int wait(T *dest, optional_yield y); + int wait(const DoutPrefixProvider* dpp, T *dest, optional_yield y); template <class T> int fetch(const DoutPrefixProvider *dpp, T *dest, optional_yield y); @@ -398,9 +400,10 @@ int RGWRESTReadResource::fetch(const DoutPrefixProvider *dpp, T *dest, optional_ } template <class T> -int RGWRESTReadResource::wait(T *dest, optional_yield y) +int RGWRESTReadResource::wait(const DoutPrefixProvider* dpp, T *dest, + optional_yield y) { - int ret = req.wait(y); + int ret = req.wait(dpp, y); if (ret < 0) { if (ret == -EIO) { conn->set_url_unconnectable(req.get_url_orig()); @@ -472,8 +475,9 @@ public: } template <class E = int> - int wait(bufferlist *pbl, optional_yield y, E *err_result = nullptr) { - int ret = req.wait(y); + int wait(const DoutPrefixProvider* dpp, bufferlist *pbl, + optional_yield y, E *err_result = nullptr) { + int ret = req.wait(dpp, y); *pbl = bl; if (ret == -EIO) { @@ -488,13 +492,15 @@ public: } template <class T, class E = int> - int wait(T *dest, optional_yield y, E *err_result = nullptr); + int wait(const DoutPrefixProvider* dpp, T *dest, + optional_yield y, E *err_result = nullptr); }; template <class T, class E> -int RGWRESTSendResource::wait(T *dest, optional_yield y, E *err_result) +int RGWRESTSendResource::wait(const DoutPrefixProvider* dpp, T *dest, + optional_yield y, E *err_result) { - int ret = req.wait(y); + int ret = req.wait(dpp, y); if (ret == -EIO) { conn->set_url_unconnectable(req.get_url_orig()); } diff --git a/src/rgw/rgw_rest_pubsub.cc b/src/rgw/rgw_rest_pubsub.cc index f3437269473..a3784ca95b0 100644 --- a/src/rgw/rgw_rest_pubsub.cc +++ b/src/rgw/rgw_rest_pubsub.cc @@ -422,6 +422,8 @@ void RGWPSCreateTopicOp::execute(optional_yield y) { << op_ret << dendl; return; } + } else if (already_persistent) { // redundant call to CreateTopic + dest.persistent_queue = topic->dest.persistent_queue; } const RGWPubSub ps(driver, get_account_or_tenant(s->owner.id), *s->penv.site); op_ret = ps.create_topic(this, topic_name, dest, topic_arn.to_string(), @@ -882,7 +884,7 @@ void RGWPSSetTopicAttributesOp::execute(optional_yield y) { << op_ret << dendl; return; } - } else if (already_persistent) { + } else if (already_persistent && !topic_needs_queue(dest)) { // changing the persistent topic to non-persistent. op_ret = driver->remove_persistent_topic(this, y, result.dest.persistent_queue); if (op_ret != -ENOENT && op_ret < 0) { diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc index 171ace9162f..baead29ebcf 100644 --- a/src/rgw/rgw_rest_s3.cc +++ b/src/rgw/rgw_rest_s3.cc @@ -1,6 +1,7 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab ft=cpp +#include <boost/algorithm/string/case_conv.hpp> #include <cstdint> #include <errno.h> #include <array> @@ -15,6 +16,8 @@ #include "common/safe_io.h" #include "common/errno.h" #include "auth/Crypto.h" +#include "rgw_cksum.h" +#include "rgw_common.h" #include <boost/algorithm/string.hpp> #include <boost/algorithm/string/replace.hpp> #include <boost/algorithm/string/predicate.hpp> @@ -68,7 +71,7 @@ #include "rgw_rest_iam.h" #include "rgw_sts.h" #include "rgw_sal_rados.h" - +#include "rgw_cksum_pipe.h" #include "rgw_s3select.h" #define dout_context g_ceph_context @@ -306,6 +309,12 @@ int RGWGetObj_ObjStore_S3::get_params(optional_yield y) dst_zone_trace = s->info.args.get(RGW_SYS_PARAM_PREFIX "if-not-replicated-to"); get_torrent = s->info.args.exists("torrent"); + auto checksum_mode_hdr = + s->info.env->get_optional("HTTP_X_AMZ_CHECKSUM_MODE"); + checksum_mode = + (checksum_mode_hdr && + boost::algorithm::iequals(*checksum_mode_hdr, "enabled")); + // optional part number auto optstr = s->info.args.get_optional("partNumber"); if (optstr) { @@ -473,7 +482,7 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, } catch (const buffer::error&) {} } - if (multipart_parts_count) { + if (multipart_parts_count && *multipart_parts_count > 0) { dump_header(s, "x-amz-mp-parts-count", *multipart_parts_count); } @@ -491,6 +500,26 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs, } } + if (checksum_mode) { + if (auto i = attrs.find(RGW_ATTR_CKSUM); i != attrs.end()) { + try { + rgw::cksum::Cksum cksum; + decode(cksum, i->second); + if (multipart_parts_count && multipart_parts_count > 0) { + dump_header(s, cksum.header_name(), + fmt::format("{}-{}", cksum.to_armor(), *multipart_parts_count)); + } else { + dump_header(s, cksum.header_name(), cksum.to_armor()); + } + } catch (buffer::error& err) { + ldpp_dout(this, 0) << "ERROR: failed to decode rgw::cksum::Cksum" + << dendl; + /* XXX agreed to handle this case as if there is no checksum + * to avoid data unavailable */ + } + } + } /* checksum_mode */ + for (struct response_attr_param *p = resp_attr_params; p->param; p++) { bool exists; string val = s->info.args.get(p->param, &exists); @@ -2591,8 +2620,7 @@ int RGWPutObj_ObjStore_S3::get_params(optional_yield y) /* handle object tagging */ auto tag_str = s->info.env->get("HTTP_X_AMZ_TAGGING"); if (tag_str){ - obj_tags = std::make_unique<RGWObjTags>(); - ret = obj_tags->set_from_string(tag_str); + ret = obj_tags.set_from_string(tag_str); if (ret < 0){ ldpp_dout(this,0) << "setting obj tags failed with " << ret << dendl; if (ret == -ERR_INVALID_TAG){ @@ -2714,12 +2742,18 @@ void RGWPutObj_ObjStore_S3::send_response() dump_content_length(s, 0); dump_header_if_nonempty(s, "x-amz-version-id", version_id); dump_header_if_nonempty(s, "x-amz-expiration", expires); + if (cksum && cksum->aws()) { + dump_header(s, cksum->header_name(), cksum->to_armor()); + } for (auto &it : crypt_http_responses) dump_header(s, it.first, it.second); } else { dump_errno(s); dump_header_if_nonempty(s, "x-amz-version-id", version_id); dump_header_if_nonempty(s, "x-amz-expiration", expires); + if (cksum) { + dump_header(s, cksum->header_name(), cksum->to_armor()); + } end_header(s, this, to_mime_type(s->format)); dump_start(s); struct tm tmp; @@ -2930,21 +2964,33 @@ int RGWPostObj_ObjStore_S3::get_params(optional_yield y) } while (!done); for (auto &p: parts) { - if (! boost::istarts_with(p.first, "x-amz-server-side-encryption")) { - continue; - } - bufferlist &d { p.second.data }; - std::string v { rgw_trim_whitespace(std::string_view(d.c_str(), d.length())) }; - rgw_set_amz_meta_header(s->info.crypt_attribute_map, p.first, v, OVERWRITE); - } + if (boost::istarts_with(p.first, "x-amz-server-side-encryption")) { + bufferlist &d { p.second.data }; + std::string v { rgw_trim_whitespace(std::string_view(d.c_str(), d.length())) }; + rgw_set_amz_meta_header(s->info.crypt_attribute_map, p.first, v, OVERWRITE); + } + /* checksum headers */ + auto& k = p.first; + auto cksum_type = rgw::cksum::parse_cksum_type_hdr(k); + if (cksum_type != rgw::cksum::Type::none) { + put_prop("HTTP_X_AMZ_CHECKSUM_ALGORITHM", + boost::to_upper_copy(to_string(cksum_type))); + bufferlist& d = p.second.data; + std::string v { + rgw_trim_whitespace(std::string_view(d.c_str(), d.length()))}; + put_prop(ys_header_mangle(fmt::format("HTTP-{}", k)), v); + } + } /* each part */ + int r = get_encryption_defaults(s); if (r < 0) { - ldpp_dout(this, 5) << __func__ << "(): get_encryption_defaults() returned ret=" << r << dendl; + ldpp_dout(this, 5) + << __func__ << "(): get_encryption_defaults() returned ret=" << r << dendl; return r; } ldpp_dout(this, 20) << "adding bucket to policy env: " << s->bucket->get_name() - << dendl; + << dendl; env.add_var("bucket", s->bucket->get_name()); string object_str; @@ -2977,7 +3023,8 @@ int RGWPostObj_ObjStore_S3::get_params(optional_yield y) if (! storage_class.empty()) { s->dest_placement.storage_class = storage_class; if (!driver->valid_placement(s->dest_placement)) { - ldpp_dout(this, 0) << "NOTICE: invalid dest placement: " << s->dest_placement.to_str() << dendl; + ldpp_dout(this, 0) << "NOTICE: invalid dest placement: " + << s->dest_placement.to_str() << dendl; err_msg = "The storage class you specified is not valid"; return -EINVAL; } @@ -3027,14 +3074,11 @@ int RGWPostObj_ObjStore_S3::get_params(optional_yield y) if (r < 0) return r; - min_len = post_policy.min_length; max_len = post_policy.max_length; - - return 0; -} +} /* RGWPostObj_Objstore_S3::get_params() */ int RGWPostObj_ObjStore_S3::get_tags() { @@ -3978,6 +4022,11 @@ int RGWInitMultipart_ObjStore_S3::get_params(optional_yield y) return -ERR_INVALID_REQUEST; } + auto algo_hdr = rgw::putobj::cksum_algorithm_hdr(*(s->info.env)); + if (algo_hdr.second) { + cksum_algo = rgw::cksum::parse_cksum_type(algo_hdr.second); + } + return 0; } @@ -3995,6 +4044,10 @@ void RGWInitMultipart_ObjStore_S3::send_response() dump_time_header(s, "x-amz-abort-date", abort_date); dump_header_if_nonempty(s, "x-amz-abort-rule-id", rule_id); } + if (cksum_algo != rgw::cksum::Type::none) { + dump_header(s, "x-amz-checksum-algorithm", + boost::to_upper_copy(to_string(cksum_algo))); + } end_header(s, this, to_mime_type(s->format)); if (op_ret == 0) { dump_start(s); @@ -4057,6 +4110,9 @@ void RGWCompleteMultipart_ObjStore_S3::send_response() s->formatter->dump_string("Bucket", s->bucket_name); s->formatter->dump_string("Key", s->object->get_name()); s->formatter->dump_string("ETag", etag); + if (armored_cksum) { + s->formatter->dump_string(cksum->element_name(), *armored_cksum); + } s->formatter->close_section(); rgw_flush_formatter_and_reset(s, s->formatter); } @@ -4108,9 +4164,16 @@ void RGWListMultipart_ObjStore_S3::send_response() ACLOwner& owner = policy.get_owner(); dump_owner(s, owner.id, owner.display_name); + /* TODO: missing initiator: + Container element that identifies who initiated the multipart upload. If the initiator is an AWS account, this element provides the same information as the Owner element. If the initiator is an IAM User, this element provides the user ARN and display name, see https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListParts.html */ + + if (cksum && cksum->aws()) { + s->formatter->dump_string("ChecksumAlgorithm", + boost::to_upper_copy(std::string(cksum->type_string()))); + } + for (; iter != upload->get_parts().end(); ++iter) { rgw::sal::MultipartPart* part = iter->second.get(); - s->formatter->open_object_section("Part"); dump_time(s, "LastModified", part->get_mtime()); @@ -4118,6 +4181,11 @@ void RGWListMultipart_ObjStore_S3::send_response() s->formatter->dump_unsigned("PartNumber", part->get_num()); s->formatter->dump_format("ETag", "\"%s\"", part->get_etag().c_str()); s->formatter->dump_unsigned("Size", part->get_size()); + auto& part_cksum = part->get_cksum(); + if (part_cksum && part_cksum->aws()) { + s->formatter->dump_string(part_cksum->element_name(), + part_cksum->to_armor()); + } s->formatter->close_section(); } s->formatter->close_section(); @@ -4228,8 +4296,7 @@ void RGWDeleteMultiObj_ObjStore_S3::begin_response() void RGWDeleteMultiObj_ObjStore_S3::send_partial_response(const rgw_obj_key& key, bool delete_marker, const string& marker_version_id, - int ret, - boost::asio::deadline_timer *formatter_flush_cond) + int ret) { if (!key.empty()) { delete_multi_obj_entry ops_log_entry; @@ -4275,17 +4342,11 @@ void RGWDeleteMultiObj_ObjStore_S3::send_partial_response(const rgw_obj_key& key } ops_log_entries.push_back(std::move(ops_log_entry)); - if (formatter_flush_cond) { - formatter_flush_cond->cancel(); - } else { - rgw_flush_formatter(s, s->formatter); - } } } void RGWDeleteMultiObj_ObjStore_S3::end_response() { - s->formatter->close_section(); rgw_flush_formatter_and_reset(s, s->formatter); } diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h index d15ddaba35a..d86123a2525 100644 --- a/src/rgw/rgw_rest_s3.h +++ b/src/rgw/rgw_rest_s3.h @@ -42,6 +42,7 @@ protected: // Serving a custom error page from an object is really a 200 response with // just the status line altered. int custom_http_ret = 0; + bool checksum_mode{false}; std::map<std::string, std::string> crypt_http_responses; int override_range_hdr(const rgw::auth::StrategyRegistry& auth_registry, optional_yield y); public: @@ -302,6 +303,12 @@ class RGWPostObj_ObjStore_S3 : public RGWPostObj_ObjStore { std::string get_current_filename() const override; std::string get_current_content_type() const override; + inline void put_prop(const std::string_view k, const std::string_view v) { + /* assume the caller will mangle the key name, if required */ + auto& map = const_cast<env_map_t&>(s->info.env->get_map()); + map.insert(env_map_t::value_type(k, v)); + } + public: RGWPostObj_ObjStore_S3() {} ~RGWPostObj_ObjStore_S3() override {} @@ -518,8 +525,8 @@ public: void send_status() override; void begin_response() override; void send_partial_response(const rgw_obj_key& key, bool delete_marker, - const std::string& marker_version_id, int ret, - boost::asio::deadline_timer *formatter_flush_cond) override; + const std::string& marker_version_id, + int ret) override; void end_response() override; }; diff --git a/src/rgw/rgw_rest_sts.cc b/src/rgw/rgw_rest_sts.cc index b9c23aa159c..6df53f612cd 100644 --- a/src/rgw/rgw_rest_sts.cc +++ b/src/rgw/rgw_rest_sts.cc @@ -315,7 +315,7 @@ WebTokenEngine::get_cert_url(const string& iss, const DoutPrefixProvider *dpp, o //Headers openidc_req.append_header("Content-Type", "application/x-www-form-urlencoded"); - int res = openidc_req.process(y); + int res = openidc_req.process(dpp, y); if (res < 0) { ldpp_dout(dpp, 10) << "HTTP request res: " << res << dendl; throw -EINVAL; @@ -353,7 +353,7 @@ WebTokenEngine::validate_signature(const DoutPrefixProvider* dpp, const jwt::dec //Headers cert_req.append_header("Content-Type", "application/x-www-form-urlencoded"); - int res = cert_req.process(y); + int res = cert_req.process(dpp, y); if (res < 0) { ldpp_dout(dpp, 10) << "HTTP request res: " << res << dendl; throw -EINVAL; diff --git a/src/rgw/rgw_s3select.cc b/src/rgw/rgw_s3select.cc index 1b7dced2782..d989147cdc7 100644 --- a/src/rgw/rgw_s3select.cc +++ b/src/rgw/rgw_s3select.cc @@ -173,6 +173,7 @@ void aws_response_handler::init_success_response() void aws_response_handler::send_continuation_response() { + m_fp_chunk_encoding(); set_continue_buffer(); continue_result.resize(header_crc_size, '\0'); get_buffer()->clear(); @@ -203,6 +204,7 @@ void aws_response_handler::init_stats_response() void aws_response_handler::init_end_response() { + m_fp_chunk_encoding(); sql_result.resize(header_crc_size, '\0'); get_buffer()->clear(); header_size = create_header_end(); @@ -212,12 +214,13 @@ void aws_response_handler::init_end_response() rgw_flush_formatter_and_reset(s, s->formatter); } -void aws_response_handler::send_error_response(const char* error_message) +void aws_response_handler::send_error_response(const char* error_code, const char* error_message, const char* resource_id) { - //currently not in use. need to change the s3-test, this error-response raises a boto3 exception + m_fp_chunk_encoding(); + std::string out_error_msg = std::string(error_code) + " :" + std::string(error_message) + " :" + std::string(resource_id); error_result.resize(header_crc_size, '\0'); get_buffer()->clear(); - header_size = create_error_header_records(error_message); + header_size = create_error_header_records(out_error_msg.data()); error_result.append(get_buffer()->c_str(), header_size); int buff_len = create_message(header_size,&error_result); @@ -230,14 +233,17 @@ void aws_response_handler::send_success_response() #ifdef PAYLOAD_TAG sql_result.append(END_PAYLOAD_LINE); #endif + m_fp_chunk_encoding(); int buff_len = create_message(m_success_header_size); s->formatter->write_bin_data(sql_result.data(), buff_len); rgw_flush_formatter_and_reset(s, s->formatter); } -void aws_response_handler::send_error_response_rgw_formatter(const char* error_code, - const char* error_message, - const char* resource_id) +static constexpr const char* empty_error="--"; + +void aws_response_handler::send_error_response_rgw_formatter(const char* error_code = empty_error, + const char* error_message = empty_error, + const char* resource_id = empty_error) { set_req_state_err(s, 0); dump_errno(s, 400); @@ -254,6 +260,7 @@ void aws_response_handler::send_error_response_rgw_formatter(const char* error_c void aws_response_handler::send_progress_response() { + m_fp_chunk_encoding(); std::string progress_payload = fmt::format("<?xml version=\"1.0\" encoding=\"UTF-8\"?><Progress><BytesScanned>{}</BytesScanned><BytesProcessed>{}</BytesProcessed><BytesReturned>{}</BytesReturned></Progress>" , get_processed_size(), get_processed_size(), get_total_bytes_returned()); sql_result.append(progress_payload); @@ -264,6 +271,7 @@ void aws_response_handler::send_progress_response() void aws_response_handler::send_stats_response() { + m_fp_chunk_encoding(); std::string stats_payload = fmt::format("<?xml version=\"1.0\" encoding=\"UTF-8\"?><Stats><BytesScanned>{}</BytesScanned><BytesProcessed>{}</BytesProcessed><BytesReturned>{}</BytesReturned></Stats>" , get_processed_size(), get_processed_size(), get_total_bytes_returned()); sql_result.append(stats_payload); @@ -304,12 +312,10 @@ RGWSelectObj_ObjStore_S3::RGWSelectObj_ObjStore_S3(): return 0; }; fp_s3select_result_format = [this](std::string& result) { - fp_chunked_transfer_encoding(); m_aws_response_handler.send_success_response(); return 0; }; fp_s3select_continue = [this](std::string& result) { - fp_chunked_transfer_encoding(); m_aws_response_handler.send_continuation_response(); return 0; }; @@ -330,6 +336,7 @@ RGWSelectObj_ObjStore_S3::RGWSelectObj_ObjStore_S3(): } chunk_number++; }; + } RGWSelectObj_ObjStore_S3::~RGWSelectObj_ObjStore_S3() @@ -429,7 +436,7 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_csv(const char* query, const char* if (s3select_syntax.get_error_description().empty() == false) { //error-flow (syntax-error) - m_aws_response_handler.send_error_response_rgw_formatter(s3select_syntax_error,s3select_syntax.get_error_description().c_str(),s3select_resource_id); + m_aws_response_handler.send_error_response(s3select_syntax_error,s3select_syntax.get_error_description().c_str(),s3select_resource_id); ldpp_dout(this, 10) << "s3-select query: failed to prase the following query {" << query << "}" << dendl; ldpp_dout(this, 10) << "s3-select query: syntax-error {" << s3select_syntax.get_error_description() << "}" << dendl; return -1; @@ -446,7 +453,7 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_csv(const char* query, const char* if (status < 0) { //error flow(processing-time) - m_aws_response_handler.send_error_response_rgw_formatter(s3select_processTime_error,m_s3_csv_object.get_error_description().c_str(),s3select_resource_id); + m_aws_response_handler.send_error_response(s3select_processTime_error,m_s3_csv_object.get_error_description().data(),s3select_resource_id); ldpp_dout(this, 10) << "s3-select query: failed to process query; {" << m_s3_csv_object.get_error_description() << "}" << dendl; return -1; @@ -458,7 +465,6 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_csv(const char* query, const char* } ldpp_dout(this, 10) << "s3-select: complete chunk processing : chunk length = " << input_length << dendl; if (enable_progress == true) { - fp_chunked_transfer_encoding(); m_aws_response_handler.init_progress_response(); m_aws_response_handler.send_progress_response(); } @@ -491,8 +497,7 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_parquet(const char* query) } if (s3select_syntax.get_error_description().empty() == false) { //the SQL statement failed the syntax parser - fp_chunked_transfer_encoding(); - m_aws_response_handler.send_error_response(m_s3_parquet_object.get_error_description().c_str()); + m_aws_response_handler.send_error_response(s3select_syntax_error,m_s3_parquet_object.get_error_description().c_str(),s3select_resource_id); ldpp_dout(this, 10) << "s3-select query: failed to prase query; {" << s3select_syntax.get_error_description() << "}" << dendl; status = -1; @@ -502,8 +507,7 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_parquet(const char* query) status = m_s3_parquet_object.run_s3select_on_object(m_aws_response_handler.get_sql_result()); if (status < 0) { - fp_chunked_transfer_encoding(); - m_aws_response_handler.send_error_response(m_s3_parquet_object.get_error_description().c_str()); + m_aws_response_handler.send_error_response(s3select_processTime_error,m_s3_parquet_object.get_error_description().c_str(),s3select_resource_id); return -1; } @@ -516,7 +520,7 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_json(const char* query, const char { int status = 0; - m_s3_csv_object.set_external_system_functions(fp_s3select_continue, + m_s3_json_object.set_external_system_functions(fp_s3select_continue, fp_s3select_result_format, fp_result_header_format, fp_debug_mesg); @@ -537,7 +541,7 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_json(const char* query, const char s3select_syntax.parse_query(m_sql_query.c_str()); if (s3select_syntax.get_error_description().empty() == false) { //SQL statement is wrong(syntax). - m_aws_response_handler.send_error_response_rgw_formatter(s3select_syntax_error, + m_aws_response_handler.send_error_response(s3select_syntax_error, s3select_syntax.get_error_description().c_str(), s3select_resource_id); ldpp_dout(this, 10) << "s3-select query: failed to prase query; {" << s3select_syntax.get_error_description() << "}" << dendl; @@ -559,7 +563,7 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_json(const char* query, const char } catch(base_s3select_exception& e) { ldpp_dout(this, 10) << "S3select: failed to process JSON object: " << e.what() << dendl; m_aws_response_handler.get_sql_result().append(e.what()); - m_aws_response_handler.send_error_response_rgw_formatter(s3select_processTime_error, + m_aws_response_handler.send_error_response(s3select_processTime_error, e.what(), s3select_resource_id); return -EINVAL; @@ -568,13 +572,12 @@ int RGWSelectObj_ObjStore_S3::run_s3select_on_json(const char* query, const char m_aws_response_handler.update_total_bytes_returned(length_post_processing - length_before_processing); if (status < 0) { //error flow(processing-time) - m_aws_response_handler.send_error_response_rgw_formatter(s3select_processTime_error, + m_aws_response_handler.send_error_response(s3select_processTime_error, m_s3_json_object.get_error_description().c_str(), s3select_resource_id); ldpp_dout(this, 10) << "s3-select query: failed to process query; {" << m_s3_json_object.get_error_description() << "}" << dendl; return -EINVAL; } - fp_chunked_transfer_encoding(); if (length_post_processing-length_before_processing != 0) { m_aws_response_handler.send_success_response(); @@ -726,6 +729,21 @@ void RGWSelectObj_ObjStore_S3::execute(optional_yield y) #ifdef _ARROW_EXIST m_rgw_api.m_y = &y; #endif + + if (!m_aws_response_handler.is_set()) { + m_aws_response_handler.set(s, this, fp_chunked_transfer_encoding); + } + + if(s->cct->_conf->rgw_disable_s3select == true) + { + std::string error_msg="s3select : is disabled by rgw_disable_s3select configuration parameter"; + ldpp_dout(this, 10) << error_msg << dendl; + m_aws_response_handler.send_error_response_rgw_formatter(error_msg.data()); + + op_ret = -ERR_INVALID_REQUEST; + return; + } + if (m_parquet_type) { //parquet processing range_request(0, 4, parquet_magic, y); @@ -991,6 +1009,7 @@ int RGWSelectObj_ObjStore_S3::json_processing(bufferlist& bl, off_t ofs, off_t l int RGWSelectObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t ofs, off_t len) { + if (m_scan_range_ind == false){ m_object_size_for_processing = s->obj_size; } @@ -1005,7 +1024,7 @@ int RGWSelectObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t ofs, off_ } } if (!m_aws_response_handler.is_set()) { - m_aws_response_handler.set(s, this); + m_aws_response_handler.set(s, this, fp_chunked_transfer_encoding); } if (len == 0 && s->obj_size != 0) { return 0; diff --git a/src/rgw/rgw_s3select_private.h b/src/rgw/rgw_s3select_private.h index f6b7b4d83d3..c40ac8837ff 100644 --- a/src/rgw/rgw_s3select_private.h +++ b/src/rgw/rgw_s3select_private.h @@ -94,6 +94,7 @@ private: void push_header(const char* header_name, const char* header_value); int create_message(u_int32_t header_len,std::string*); + std::function<void(void)> m_fp_chunk_encoding; public: aws_response_handler(req_state* ps, RGWOp* rgwop) : s(ps), m_rgwop(rgwop), total_bytes_returned{0}, processed_size{0} @@ -110,10 +111,11 @@ public: return true; } - void set(req_state* ps, RGWOp* rgwop) + void set(req_state* ps, RGWOp* rgwop, std::function<void(void)>& fp_chunk_encoding) { s = ps; m_rgwop = rgwop; + m_fp_chunk_encoding = fp_chunk_encoding; } std::string& get_sql_result(); @@ -150,7 +152,9 @@ public: void init_stats_response(); - void send_error_response(const char* error_message); + void send_error_response(const char* error_code, + const char* error_message, + const char* resource_id); void send_success_response(); diff --git a/src/rgw/rgw_sal.h b/src/rgw/rgw_sal.h index 5fd20fda692..9bc23f2d0ae 100644 --- a/src/rgw/rgw_sal.h +++ b/src/rgw/rgw_sal.h @@ -15,10 +15,12 @@ #pragma once +#include <optional> #include <boost/intrusive_ptr.hpp> #include <boost/smart_ptr/intrusive_ref_counter.hpp> #include "common/tracer.h" +#include "rgw_cksum.h" #include "rgw_sal_fwd.h" #include "rgw_lua.h" #include "rgw_notify_event_type.h" @@ -179,6 +181,7 @@ class ObjectProcessor : public DataProcessor { virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -1046,8 +1049,8 @@ class Object { /// If non-null, read data/attributes from the given multipart part. int* part_num{nullptr}; - /// If part_num is specified and the object is multipart, the total - /// number of multipart parts is assigned to this output parameter. + /// If the object is multipart, the total number of multipart + /// parts is assigned to this output parameter. std::optional<int> parts_count; } params; @@ -1081,6 +1084,7 @@ class Object { rgw_owner bucket_owner; //< bucket owner for usage/quota accounting ACLOwner obj_owner; //< acl owner for delete marker if necessary int versioning_status{0}; + bool null_verid{false}; uint64_t olh_epoch{0}; std::string marker_version_id; uint32_t bilog_flags{0}; @@ -1144,6 +1148,9 @@ class Object { virtual void set_compressed() = 0; /** Check if this object is compressed */ virtual bool is_compressed() = 0; + /** Check if object is synced */ + virtual bool is_sync_completed(const DoutPrefixProvider* dpp, + const ceph::real_time& obj_mtime) = 0; /** Invalidate cached info about this object, except atomic, prefetch, and * compressed */ virtual void invalidate() = 0; @@ -1157,7 +1164,7 @@ class Object { virtual int load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh = true) = 0; /** Set attributes for this object from the backing store. Attrs can be set or * deleted. @note the attribute APIs may be revisited in the future. */ - virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) = 0; + virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y, uint32_t flags) = 0; /** Get attributes for this object */ virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) = 0; /** Modify attributes for this object. */ @@ -1339,6 +1346,8 @@ public: virtual const std::string& get_etag() = 0; /** Get the modification time of this part */ virtual ceph::real_time& get_mtime() = 0; + /** Get computed (or default/empty) checksum */ + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() = 0; }; /** @@ -1355,6 +1364,7 @@ public: //object lock std::optional<RGWObjectRetention> obj_retention = std::nullopt; std::optional<RGWObjectLegalHold> obj_legal_hold = std::nullopt; + rgw::cksum::Type cksum_type = rgw::cksum::Type::none; MultipartUpload() = default; virtual ~MultipartUpload() = default; @@ -1612,6 +1622,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/rgw_sal_dbstore.cc b/src/rgw/rgw_sal_dbstore.cc index 8c415feddc9..cb62ebeccee 100644 --- a/src/rgw/rgw_sal_dbstore.cc +++ b/src/rgw/rgw_sal_dbstore.cc @@ -526,7 +526,7 @@ namespace rgw::sal { return read_op.prepare(dpp); } - int DBObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) + int DBObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y, uint32_t flags) { Attrs empty; DB::Object op_target(store->getDB(), @@ -551,7 +551,7 @@ namespace rgw::sal { } set_atomic(); state.attrset[attr_name] = attr_val; - return set_obj_attrs(dpp, &state.attrset, nullptr, y); + return set_obj_attrs(dpp, &state.attrset, nullptr, y, rgw::sal::FLAG_LOG_OP); } int DBObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) @@ -561,7 +561,7 @@ namespace rgw::sal { set_atomic(); rmattr[attr_name] = bl; - return set_obj_attrs(dpp, nullptr, &rmattr, y); + return set_obj_attrs(dpp, nullptr, &rmattr, y, rgw::sal::FLAG_LOG_OP); } bool DBObject::is_expired() { @@ -1210,9 +1210,11 @@ namespace rgw::sal { return 0; } - int DBMultipartWriter::complete(size_t accounted_size, const std::string& etag, + int DBMultipartWriter::complete( + size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -1234,6 +1236,7 @@ namespace rgw::sal { RGWUploadPartInfo info; info.num = part_num; info.etag = etag; + info.cksum = cksum; info.size = total_data_size; info.accounted_size = accounted_size; info.modified = real_clock::now(); @@ -1368,6 +1371,7 @@ namespace rgw::sal { int DBAtomicWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/rgw_sal_dbstore.h b/src/rgw/rgw_sal_dbstore.h index b542028e53e..2dfc340315d 100644 --- a/src/rgw/rgw_sal_dbstore.h +++ b/src/rgw/rgw_sal_dbstore.h @@ -363,7 +363,9 @@ protected: virtual uint64_t get_size() { return info.accounted_size; } virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.modified; } - + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return info.cksum; + } }; class DBMPObj { @@ -541,8 +543,8 @@ protected: virtual RGWAccessControlPolicy& get_acl(void) override { return acls; } virtual int set_acl(const RGWAccessControlPolicy& acl) override { acls = acl; return 0; } + virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y, uint32_t flags) override; virtual int load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh = true) override; - virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, Attrs* delattrs, optional_yield y) override; virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) override; virtual int delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, optional_yield y) override; @@ -633,6 +635,7 @@ protected: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -682,6 +685,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/rgw_sal_filter.cc b/src/rgw/rgw_sal_filter.cc index ce2844f244b..8408620ba5c 100644 --- a/src/rgw/rgw_sal_filter.cc +++ b/src/rgw/rgw_sal_filter.cc @@ -1050,9 +1050,9 @@ int FilterObject::load_obj_state(const DoutPrefixProvider *dpp, } int FilterObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, - Attrs* delattrs, optional_yield y) + Attrs* delattrs, optional_yield y, uint32_t flags) { - return next->set_obj_attrs(dpp, setattrs, delattrs, y); + return next->set_obj_attrs(dpp, setattrs, delattrs, y, flags); } int FilterObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, @@ -1442,6 +1442,7 @@ int FilterWriter::process(bufferlist&& data, uint64_t offset) int FilterWriter::complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, @@ -1449,7 +1450,7 @@ int FilterWriter::complete(size_t accounted_size, const std::string& etag, const req_context& rctx, uint32_t flags) { - return next->complete(accounted_size, etag, mtime, set_mtime, attrs, + return next->complete(accounted_size, etag, mtime, set_mtime, attrs, cksum, delete_at, if_match, if_nomatch, user_data, zones_trace, canceled, rctx, flags); } diff --git a/src/rgw/rgw_sal_filter.h b/src/rgw/rgw_sal_filter.h index 7d26a466efe..664b37fa6a9 100644 --- a/src/rgw/rgw_sal_filter.h +++ b/src/rgw/rgw_sal_filter.h @@ -752,6 +752,8 @@ public: virtual bool is_prefetch_data() override { return next->is_prefetch_data(); } virtual void set_compressed() override { return next->set_compressed(); } virtual bool is_compressed() override { return next->is_compressed(); } + virtual bool is_sync_completed(const DoutPrefixProvider* dpp, + const ceph::real_time& obj_mtime) override { return next->is_sync_completed(dpp, obj_mtime); } virtual void invalidate() override { return next->invalidate(); } virtual bool empty() const override { return next->empty(); } virtual const std::string &get_name() const override { return next->get_name(); } @@ -759,7 +761,7 @@ public: virtual int load_obj_state(const DoutPrefixProvider *dpp, optional_yield y, bool follow_olh = true) override; virtual int set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, - Attrs* delattrs, optional_yield y) override; + Attrs* delattrs, optional_yield y, uint32_t flags) override; virtual int get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj = NULL) override; virtual int modify_obj_attrs(const char* attr_name, bufferlist& attr_val, @@ -876,6 +878,9 @@ public: virtual uint64_t get_size() override { return next->get_size(); } virtual const std::string& get_etag() override { return next->get_etag(); } virtual ceph::real_time& get_mtime() override { return next->get_mtime(); } + virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { + return next->get_cksum(); + } }; class FilterMultipartUpload : public MultipartUpload { @@ -1046,6 +1051,7 @@ public: virtual int complete(size_t accounted_size, const std::string& etag, ceph::real_time *mtime, ceph::real_time set_mtime, std::map<std::string, bufferlist>& attrs, + const std::optional<rgw::cksum::Cksum>& cksum, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, diff --git a/src/rgw/rgw_sal_store.h b/src/rgw/rgw_sal_store.h index a8788f4f51c..6084612d127 100644 --- a/src/rgw/rgw_sal_store.h +++ b/src/rgw/rgw_sal_store.h @@ -281,6 +281,8 @@ class StoreObject : public Object { virtual bool is_prefetch_data() override { return state.prefetch_data; } virtual void set_compressed() override { state.compressed = true; } virtual bool is_compressed() override { return state.compressed; } + virtual bool is_sync_completed(const DoutPrefixProvider* dpp, + const ceph::real_time& obj_mtime) override { return false; } virtual void invalidate() override { rgw_obj obj = state.obj; bool is_atomic = state.is_atomic; diff --git a/src/rgw/rgw_swift_auth.cc b/src/rgw/rgw_swift_auth.cc index af60a0e275d..032b3734bf9 100644 --- a/src/rgw/rgw_swift_auth.cc +++ b/src/rgw/rgw_swift_auth.cc @@ -479,7 +479,7 @@ ExternalTokenEngine::authenticate(const DoutPrefixProvider* dpp, ldpp_dout(dpp, 10) << "rgw_swift_validate_token url=" << url_buf << dendl; - int ret = validator.process(y); + int ret = validator.process(dpp, y); if (ret < 0) { throw ret; } diff --git a/src/rgw/rgw_sync_policy.cc b/src/rgw/rgw_sync_policy.cc index 0568262de67..b65752959e9 100644 --- a/src/rgw/rgw_sync_policy.cc +++ b/src/rgw/rgw_sync_policy.cc @@ -74,6 +74,14 @@ void rgw_sync_pipe_filter::set_prefix(std::optional<std::string> opt_prefix, } } +bool rgw_sync_pipe_filter::check_prefix(const std::string& obj_name) const +{ + if (prefix.has_value()) { + return boost::starts_with(obj_name, prefix.value()); + } + return true; +} + void rgw_sync_pipe_filter::set_tags(std::list<std::string>& tags_add, std::list<std::string>& tags_rm) { diff --git a/src/rgw/rgw_sync_policy.h b/src/rgw/rgw_sync_policy.h index ec9d1f2c623..062fb115324 100644 --- a/src/rgw/rgw_sync_policy.h +++ b/src/rgw/rgw_sync_policy.h @@ -244,6 +244,7 @@ struct rgw_sync_pipe_filter { bool check_tag(const std::string& k, const std::string& v) const; bool check_tags(const std::vector<std::string>& tags) const; bool check_tags(const RGWObjTags::tag_map_t& tags) const; + bool check_prefix(const std::string& obj_name) const; }; WRITE_CLASS_ENCODER(rgw_sync_pipe_filter) diff --git a/src/rgw/rgw_xxh_digest.h b/src/rgw/rgw_xxh_digest.h new file mode 100644 index 00000000000..fe78636fc51 --- /dev/null +++ b/src/rgw/rgw_xxh_digest.h @@ -0,0 +1,53 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 Red Hat, Inc + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +#include <stdint.h> +#include <stdio.h> +#include "rgw_crc_digest.h" + +#define XXH_INLINE_ALL 1 /* required for streaming variants */ +#include "xxhash.h" + +namespace rgw { namespace digest { + + class XXH3 { + private: + XXH3_state_t s; + + public: + static constexpr uint16_t digest_size = 8; + + XXH3() { + XXH3_INITSTATE(&s); + Restart(); + } + + void Restart() { XXH3_64bits_reset(&s); } + + void Update(const unsigned char *data, uint64_t len) { + XXH3_64bits_update(&s, data, len); + } + + void Final(unsigned char* digest) { + XXH64_hash_t final = XXH3_64bits_digest(&s); + if constexpr (std::endian::native != std::endian::big) { + final = rgw::digest::byteswap(final); + } + memcpy((char*) digest, &final, sizeof(final)); + } + }; /* XXH3 */ +}} /* namespace */ diff --git a/src/script/backport-create-issue b/src/script/backport-create-issue index 9fb627c6fac..e265c164db0 100755 --- a/src/script/backport-create-issue +++ b/src/script/backport-create-issue @@ -39,9 +39,9 @@ from redminelib.exceptions import ResourceAttrError redmine_endpoint = "https://tracker.ceph.com" project_name = "Ceph" release_id = 16 -custom_field_tag = 'cf_3' -tag_separator = ' ' -tag_backport_processed = 'backport_processed' +CF_TAGS = 31 # Tags custom field: https://tracker.ceph.com/custom_fields/31/edit +TAG_SEPARATOR = ' ' +TAG_BACKPORT_PROCESSED = 'backport_processed' delay_seconds = 5 redmine_key_file="~/.redmine_key" redmine_key_env="REDMINE_API_KEY" @@ -87,7 +87,7 @@ def parse_arguments(): "them even if not in 'Pending Backport' status. " "Otherwise, process all issues in 'Pending Backport' " "status even if already processed " - f"(tag '{tag_backport_processed}' added)", + f"(tag '{TAG_BACKPORT_PROCESSED}' added)", action="store_true") return parser.parse_args() @@ -305,20 +305,34 @@ def mark_as_processed(r, issue): This script will add a custom Tag to indicate whether the tracker was already processed for backport tracker creation. """ - custom_fields = list(issue['custom_fields'].values()) - for i, field in enumerate(custom_fields): - if field['name'] == 'Tags': - if tag_backport_processed not in field['value']: - if field['value']: - custom_fields[i]['value'] += (tag_separator + - tag_backport_processed) - else: - custom_fields[i]['value'] = tag_backport_processed - logging.info("%s adding tag '%s'", url(issue), - tag_backport_processed) - r.issue.update(issue.id, custom_fields=custom_fields) - return + logging.debug("custom_fields: %s", list(issue['custom_fields'])) + + tags_cf = next(filter(lambda x: x['id'] == CF_TAGS, issue['custom_fields']), None) + + if tags_cf is None: + tags = '' + else: + tags = tags_cf.value + if tags is None: + tags = '' + else: + tags.strip() + + if TAG_BACKPORT_PROCESSED not in tags: + if tags: + tags += f"{TAG_SEPARATOR}{TAG_BACKPORT_PROCESSED}" + else: + tags = TAG_BACKPORT_PROCESSED + + logging.info("%s adding tag '%s', now '%s'", url(issue), TAG_BACKPORT_PROCESSED, tags) + tags_cf = { + 'id': CF_TAGS, + 'value': tags, + } + r.issue.update(issue.id, custom_fields=[tags_cf]) + else: + logging.debug("%s already has tag '%s'", url(issue), TAG_BACKPORT_PROCESSED) def iterate_over_backports(r, issues, dry_run=False): counter = 0 @@ -377,7 +391,7 @@ if __name__ == '__main__': if args.force or args.resolve_parent: if args.force: logging.warn("--force option was given: ignoring '%s' tag!", - tag_backport_processed) + TAG_BACKPORT_PROCESSED) issues = redmine.issue.filter(project_id=ceph_project_id, status_id=pending_backport_status_id) else: @@ -385,9 +399,9 @@ if __name__ == '__main__': issues = redmine.issue.filter(project_id=ceph_project_id, status_id=pending_backport_status_id, **{ - custom_field_tag: + f"cf_{CF_TAGS}": '!~' + - tag_backport_processed}) + TAG_BACKPORT_PROCESSED}) if force_create: logging.info("Processing {} issues regardless of status" .format(len(issues))) diff --git a/src/script/ceph-backport.sh b/src/script/ceph-backport.sh index 4854edff8b3..f4f88499409 100755 --- a/src/script/ceph-backport.sh +++ b/src/script/ceph-backport.sh @@ -1570,6 +1570,7 @@ redmine_url="$(number_to_url "redmine" "${issue}")" debug "Considering Redmine issue: $redmine_url - is it in the Backport tracker?" remote_api_output="$(curl --silent "${redmine_url}.json")" +debug $remote_api_output tracker="$(echo "$remote_api_output" | jq -r '.issue.tracker.name')" if [ "$tracker" = "Backport" ]; then debug "Yes, $redmine_url is a Backport issue" @@ -1580,7 +1581,7 @@ else fi debug "Looking up release/milestone of $redmine_url" -milestone="$(echo "$remote_api_output" | jq -r '.issue.custom_fields[0].value')" +milestone="$(echo "$remote_api_output" | jq -r '.issue.custom_fields[] | select(.id == 16) | .value')" if [ "$milestone" ] ; then debug "Release/milestone: $milestone" else diff --git a/src/script/cpatch.py b/src/script/cpatch.py index fa1cecc5943..4ff9bd999d2 100755 --- a/src/script/cpatch.py +++ b/src/script/cpatch.py @@ -549,9 +549,9 @@ class Builder: if self._cached_py_site_packages is not None: return self._cached_py_site_packages # use the container image to probe for the correct python site-packages dir + py_vers = ['3.12', '3.11', '3.10', '3.9', '3.8', '3.6'] valid_site_packages = [ - "/usr/lib/python3.8/site-packages", - "/usr/lib/python3.6/site-packages", + f'/usr/lib/python{v}/site-packages' for v in py_vers ] cmd = [ self._ctx.engine, diff --git a/src/seastar b/src/seastar -Subproject 09a44e0d403a97db696837e75b4b61f592baf35 +Subproject 96a93ba0cf2d1013914aea203488d2c0cccc54c diff --git a/src/test/admin_socket.cc b/src/test/admin_socket.cc index a16a0cbb1f1..69a3cbefd0e 100644 --- a/src/test/admin_socket.cc +++ b/src/test/admin_socket.cc @@ -547,8 +547,8 @@ TEST_F(AdminSocketRaise, StopCont) ASSERT_EQ("", send_raise("CONT", 0.2)); ASSERT_EQ("", send_raise("STOP")); auto elapsed = system_clock::now() - then; - // give it a 1% slack - EXPECT_LE(milliseconds(198), duration_cast<milliseconds>(elapsed)); + // give it a 5% slack + EXPECT_LE(milliseconds(190), duration_cast<milliseconds>(elapsed)); } /* diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t index b0db0d5dd77..32117fd0838 100644 --- a/src/test/cli/radosgw-admin/help.t +++ b/src/test/cli/radosgw-admin/help.t @@ -194,6 +194,7 @@ topic get get a bucket notifications topic topic rm remove a bucket notifications topic topic stats get a bucket notifications persistent topic stats (i.e. reservations, entries & size) + topic dump dump (in JSON format) all pending bucket notifications of a persistent topic script put upload a Lua script to a context script get get the Lua script of a context script rm remove the Lua scripts of a context diff --git a/src/test/cli/rbd/help.t b/src/test/cli/rbd/help.t index 866bd8f11c8..c981996ea9a 100644 --- a/src/test/cli/rbd/help.t +++ b/src/test/cli/rbd/help.t @@ -48,6 +48,7 @@ group image add Add an image to a group. group image list (... ls) List images in a group. group image remove (... rm) Remove an image from a group. + group info Show information about a group. group list (group ls) List rbd groups. group remove (group rm) Delete a group. group rename Rename a group within pool. @@ -221,9 +222,10 @@ rbd help clone usage: rbd clone [--pool <pool>] [--namespace <namespace>] [--image <image>] - [--snap <snap>] [--dest-pool <dest-pool>] - [--dest-namespace <dest-namespace>] [--dest <dest>] - [--order <order>] [--object-size <object-size>] + [--snap <snap>] [--snap-id <snap-id>] + [--dest-pool <dest-pool>] [--dest-namespace <dest-namespace>] + [--dest <dest>] [--order <order>] + [--object-size <object-size>] [--image-feature <image-feature>] [--image-shared] [--stripe-unit <stripe-unit>] [--stripe-count <stripe-count>] [--data-pool <data-pool>] @@ -248,6 +250,7 @@ --namespace arg source namespace name --image arg source image name --snap arg source snapshot name + --snap-id arg source snapshot id --dest-pool arg destination pool name --dest-namespace arg destination namespace name --dest arg destination image name @@ -972,6 +975,24 @@ -p [ --pool ] arg pool name unless overridden --image-id arg image id + rbd help group info + usage: rbd group info [--pool <pool>] [--namespace <namespace>] + [--group <group>] [--format <format>] [--pretty-format] + <group-spec> + + Show information about a group. + + Positional arguments + <group-spec> group specification + (example: [<pool-name>/[<namespace>/]]<group-name>) + + Optional arguments + -p [ --pool ] arg pool name + --namespace arg namespace name + --group arg group name + --format arg output format (plain, json, or xml) [default: plain] + --pretty-format pretty formatting (json and xml) + rbd help group list usage: rbd group list [--pool <pool>] [--namespace <namespace>] [--format <format>] [--pretty-format] diff --git a/src/test/client/CMakeLists.txt b/src/test/client/CMakeLists.txt index 718c52cb95a..b085a954fb7 100644 --- a/src/test/client/CMakeLists.txt +++ b/src/test/client/CMakeLists.txt @@ -5,6 +5,7 @@ if(${WITH_CEPHFS}) ops.cc nonblocking.cc commands.cc + syncio.cc ) target_link_libraries(ceph_test_client client diff --git a/src/test/client/syncio.cc b/src/test/client/syncio.cc new file mode 100644 index 00000000000..f40503a3909 --- /dev/null +++ b/src/test/client/syncio.cc @@ -0,0 +1,79 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2024 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <errno.h> + +#include <iostream> +#include <string> + +#include <fmt/format.h> + +#include "test/client/TestClient.h" + +TEST_F(TestClient, LlreadvLlwritevInvalidFileHandleSync) { + /* Test provding null or invalid file handle returns an error + as expected*/ + Fh *fh_null = NULL; + char out_buf_0[] = "hello "; + char out_buf_1[] = "world\n"; + struct iovec iov_out[2] = { + {out_buf_0, sizeof(out_buf_0)}, + {out_buf_1, sizeof(out_buf_1)}, + }; + + char in_buf_0[sizeof(out_buf_0)]; + char in_buf_1[sizeof(out_buf_1)]; + struct iovec iov_in[2] = { + {in_buf_0, sizeof(in_buf_0)}, + {in_buf_1, sizeof(in_buf_1)}, + }; + + int64_t rc; + + rc = client->ll_writev(fh_null, iov_out, 2, 0); + ASSERT_EQ(rc, -CEPHFS_EBADF); + + rc = client->ll_readv(fh_null, iov_in, 2, 0); + ASSERT_EQ(rc, -CEPHFS_EBADF); + + // test after closing the file handle + int mypid = getpid(); + char filename[256]; + + client->unmount(); + TearDown(); + SetUp(); + + sprintf(filename, "test_llreadvllwritevinvalidfhfile%u", mypid); + + Inode *root, *file; + root = client->get_root(); + ASSERT_NE(root, (Inode *)NULL); + + Fh *fh; + struct ceph_statx stx; + + ASSERT_EQ(0, client->ll_createx(root, filename, 0666, + O_RDWR | O_CREAT | O_TRUNC, + &file, &fh, &stx, 0, 0, myperm)); + + client->ll_release(fh); + ASSERT_EQ(0, client->ll_unlink(root, filename, myperm)); + + rc = client->ll_writev(fh, iov_out, 2, 0); + ASSERT_EQ(rc, -CEPHFS_EBADF); + + rc = client->ll_readv(fh, iov_in, 2, 0); + ASSERT_EQ(rc, -CEPHFS_EBADF); +} diff --git a/src/test/common/CMakeLists.txt b/src/test/common/CMakeLists.txt index 428ef7b0147..11a7ea0e20c 100644 --- a/src/test/common/CMakeLists.txt +++ b/src/test/common/CMakeLists.txt @@ -312,6 +312,10 @@ target_link_libraries(unittest_dns_resolve global) add_ceph_unittest(unittest_dns_resolve) endif() +add_executable(unittest_dout_fmt test_dout_fmt.cc $<TARGET_OBJECTS:unit-main>) +target_link_libraries(unittest_dout_fmt global) +add_ceph_unittest(unittest_dout_fmt) + # We're getting an ICE when trying to compile this test using mingw-gcc and # recent Boost versions. Note that mingw-llvm works fine. if (NOT WIN32 OR (NOT(CMAKE_CXX_COMPILER_ID STREQUAL GNU))) diff --git a/src/test/common/test_dout_fmt.cc b/src/test/common/test_dout_fmt.cc new file mode 100644 index 00000000000..7d6b519fe35 --- /dev/null +++ b/src/test/common/test_dout_fmt.cc @@ -0,0 +1,57 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright contributors to the Ceph project + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/dout_fmt.h" +#include <gtest/gtest.h> + +TEST(DoutFmt, SubDout) +{ + // expect level 0 to always be gathered + lsubdout_fmt(g_ceph_context, test, 0, "{}: {}", "value", 42); + // expect level 99 to be compiled out + lsubdout_fmt(g_ceph_context, test, 99, "{}: {}", "value", 42); +} + +#define dout_subsys ceph_subsys_test + +TEST(DoutFmt, Dout) +{ + ldout_fmt(g_ceph_context, 0, "{}: {}", "value", 42); + ldout_fmt(g_ceph_context, 99, "{}: {}", "value", 42); +} + +#define dout_context g_ceph_context + +TEST(DoutFmt, DoutContext) +{ + dout_fmt(0, "{}: {}", "value", 42); + dout_fmt(99, "{}: {}", "value", 42); +} + +#undef dout_prefix +#define dout_prefix *_dout << "prefix: " + +TEST(DoutFmt, DoutPrefix) +{ + ldout_fmt(g_ceph_context, 0, "{}: {}", "value", 42); + ldout_fmt(g_ceph_context, 99, "{}: {}", "value", 42); +} + +TEST(DoutFmt, DppDout) +{ + const DoutPrefix dpp{g_ceph_context, dout_subsys, "prefix: "}; + ldpp_dout_fmt(&dpp, 0, "{}: {}", "value", 42); + ldpp_dout_fmt(&dpp, 99, "{}: {}", "value", 42); +} diff --git a/src/test/crimson/seastore/onode_tree/test_value.h b/src/test/crimson/seastore/onode_tree/test_value.h index 98249f8c956..c913f85e6d6 100644 --- a/src/test/crimson/seastore/onode_tree/test_value.h +++ b/src/test/crimson/seastore/onode_tree/test_value.h @@ -176,7 +176,11 @@ class TestValue final : public Value { } }; - TestValue(NodeExtentManager& nm, const ValueBuilder& vb, Ref<tree_cursor_t>& p_cursor) + TestValue( + const hobject_t &hobj, + NodeExtentManager& nm, + const ValueBuilder& vb, + Ref<tree_cursor_t>& p_cursor) : Value(nm, vb, p_cursor) {} ~TestValue() override = default; diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc index 0e258b9a36c..0f05bae2e93 100644 --- a/src/test/crimson/seastore/test_object_data_handler.cc +++ b/src/test/crimson/seastore/test_object_data_handler.cc @@ -26,7 +26,7 @@ class TestOnode final : public Onode { bool dirty = false; public: - TestOnode(uint32_t ddr, uint32_t dmr) : Onode(ddr, dmr) {} + TestOnode(uint32_t ddr, uint32_t dmr) : Onode(ddr, dmr, hobject_t()) {} const onode_layout_t &get_layout() const final { return layout; } diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index 36e08001971..a638896a86e 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -14,6 +14,7 @@ #include "crimson/os/seastore/segment_manager.h" #include "test/crimson/seastore/test_block.h" +#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" using namespace crimson; using namespace crimson::os; @@ -2173,6 +2174,42 @@ TEST_P(tm_single_device_intergrity_check_test_t, remap_lazy_read) }); } +TEST_P(tm_single_device_test_t, invalid_lba_mapping_detect) +{ + run_async([this] { + using namespace crimson::os::seastore::lba_manager::btree; + { + auto t = create_transaction(); + for (int i = 0; i < LEAF_NODE_CAPACITY; i++) { + auto extent = alloc_extent( + t, + i * 4096, + 4096, + 'a'); + } + submit_transaction(std::move(t)); + } + + { + auto t = create_transaction(); + auto pin = get_pin(t, (LEAF_NODE_CAPACITY - 1) * 4096); + assert(pin->is_parent_valid()); + auto extent = alloc_extent(t, LEAF_NODE_CAPACITY * 4096, 4096, 'a'); + assert(!pin->is_parent_valid()); + pin = get_pin(t, LEAF_NODE_CAPACITY * 4096); + std::ignore = alloc_extent(t, (LEAF_NODE_CAPACITY + 1) * 4096, 4096, 'a'); + assert(pin->is_parent_valid()); + assert(pin->parent_modified()); + pin->maybe_fix_pos(); + auto v = pin->get_logical_extent(*t.t); + assert(v.has_child()); + auto extent2 = v.get_child_fut().unsafe_get0(); + assert(extent.get() == extent2.get()); + submit_transaction(std::move(t)); + } + }); +} + TEST_P(tm_single_device_test_t, random_writes_concurrent) { test_random_writes_concurrent(); diff --git a/src/test/crimson/seastore/transaction_manager_test_state.h b/src/test/crimson/seastore/transaction_manager_test_state.h index c88bb15dced..f339b2baf69 100644 --- a/src/test/crimson/seastore/transaction_manager_test_state.h +++ b/src/test/crimson/seastore/transaction_manager_test_state.h @@ -183,7 +183,6 @@ class EphemeralTestState { #endif protected: - journal_type_t journal_type; size_t num_main_device_managers = 0; size_t num_cold_device_managers = 0; EphemeralDevicesRef devices; @@ -274,6 +273,7 @@ protected: Cache* cache; ExtentPlacementManager *epm; uint64_t seq = 0; + shard_stats_t shard_stats; TMTestState() : EphemeralTestState(1, 0) {} @@ -293,7 +293,8 @@ protected: "seastore_full_integrity_check", "false"); } #endif - tm = make_transaction_manager(p_dev, sec_devices, true); + shard_stats = {}; + tm = make_transaction_manager(p_dev, sec_devices, shard_stats, true); epm = tm->get_epm(); lba_manager = tm->get_lba_manager(); cache = tm->get_cache(); diff --git a/src/test/erasure-code/TestErasureCodeClay.cc b/src/test/erasure-code/TestErasureCodeClay.cc index cb474094894..a0e6ade8078 100644 --- a/src/test/erasure-code/TestErasureCodeClay.cc +++ b/src/test/erasure-code/TestErasureCodeClay.cc @@ -37,7 +37,7 @@ TEST(ErasureCodeClay, sanity_check_k) EXPECT_NE(std::string::npos, errors.str().find("must be >= 2")); } -TEST(ErasureCodeClay, encode_decode) +TEST(ErasureCodeClay, DISABLED_encode_decode) { ostringstream errors; ErasureCodeClay clay(g_conf().get_val<std::string>("erasure_code_dir")); @@ -134,7 +134,7 @@ TEST(ErasureCodeClay, encode_decode) } -TEST(ErasureCodeClay, encode_decode_aloof_nodes) +TEST(ErasureCodeClay, DISABLED_encode_decode_aloof_nodes) { ostringstream errors; ErasureCodeClay clay(g_conf().get_val<std::string>("erasure_code_dir")); @@ -243,7 +243,7 @@ TEST(ErasureCodeClay, encode_decode_aloof_nodes) } } -TEST(ErasureCodeClay, encode_decode_shortening_case) +TEST(ErasureCodeClay, DISABLED_encode_decode_shortening_case) { ostringstream errors; ErasureCodeClay clay(g_conf().get_val<std::string>("erasure_code_dir")); diff --git a/src/test/erasure-code/TestErasureCodeExample.cc b/src/test/erasure-code/TestErasureCodeExample.cc index b488a604b61..9e67b9c9ca7 100644 --- a/src/test/erasure-code/TestErasureCodeExample.cc +++ b/src/test/erasure-code/TestErasureCodeExample.cc @@ -194,12 +194,33 @@ TEST(ErasureCodeExample, decode) bufferlist out; EXPECT_EQ(0, example.decode_concat(encoded, &out)); bufferlist usable; + EXPECT_EQ(2u*encoded[0].length(), out.length()); usable.substr_of(out, 0, in.length()); EXPECT_TRUE(usable == in); + // partial chunk decode + map<int, bufferlist> partial_decode = encoded; + set<int> partial_want_to_read{want_to_encode, want_to_encode+1}; + EXPECT_EQ(1u, partial_want_to_read.size()); + out.clear(); + EXPECT_EQ(0, example.decode_concat(partial_want_to_read, + partial_decode, + &out)); + EXPECT_EQ(out.length(), encoded[0].length()); + + // partial degraded chunk decode + partial_decode = encoded; + partial_decode.erase(0); + EXPECT_EQ(1, partial_want_to_read.size()); + out.clear(); + EXPECT_EQ(0, example.decode_concat(partial_want_to_read, + partial_decode, + &out)); + EXPECT_EQ(out.length(), encoded[0].length()); + // cannot recover map<int, bufferlist> degraded; - degraded[0] = encoded[0]; + degraded[2] = encoded[2]; EXPECT_EQ(-ERANGE, example.decode_concat(degraded, &out)); } diff --git a/src/test/erasure-code/TestErasureCodeJerasure.cc b/src/test/erasure-code/TestErasureCodeJerasure.cc index 835f3c7b6c8..3946892c8aa 100644 --- a/src/test/erasure-code/TestErasureCodeJerasure.cc +++ b/src/test/erasure-code/TestErasureCodeJerasure.cc @@ -127,6 +127,33 @@ TYPED_TEST(ErasureCodeTest, encode_decode) EXPECT_EQ(0, memcmp(decoded[1].c_str(), in.c_str() + length, in.length() - length)); } + + // partial decode with the exact-sized decode_concat() + { + map<int, bufferlist> partial_decode = encoded; + // we have everything but want only the first chunk + set<int> partial_want_to_read = { 0 }; + EXPECT_EQ(1u, partial_want_to_read.size()); + bufferlist out; + EXPECT_EQ(0, jerasure.decode_concat(partial_want_to_read, + partial_decode, + &out)); + EXPECT_EQ(out.length(), partial_decode[0].length()); + } + + // partial degraded decode with the exact-sized decode_concat() + { + map<int, bufferlist> partial_decode = encoded; + // we have everything but what we really want + partial_decode.erase(0); + set<int> partial_want_to_read = { 0 }; + EXPECT_EQ(1u, partial_want_to_read.size()); + bufferlist out; + EXPECT_EQ(0, jerasure.decode_concat(partial_want_to_read, + partial_decode, + &out)); + EXPECT_EQ(out.length(), encoded[0].length()); + } } } diff --git a/src/test/libcephfs/CMakeLists.txt b/src/test/libcephfs/CMakeLists.txt index 2f3e0c7c299..6cbbbe246a5 100644 --- a/src/test/libcephfs/CMakeLists.txt +++ b/src/test/libcephfs/CMakeLists.txt @@ -10,7 +10,6 @@ if(WITH_LIBCEPHFS) main.cc deleg.cc monconfig.cc - vxattr.cc ) target_link_libraries(ceph_test_libcephfs ceph-common @@ -50,6 +49,21 @@ if(WITH_LIBCEPHFS) install(TARGETS ceph_test_libcephfs_suidsgid DESTINATION ${CMAKE_INSTALL_BINDIR}) + add_executable(ceph_test_libcephfs_vxattr + vxattr.cc + main.cc + ) + target_link_libraries(ceph_test_libcephfs_vxattr + ceph-common + cephfs + librados + ${UNITTEST_LIBS} + ${EXTRALIBS} + ${CMAKE_DL_LIBS} + ) + install(TARGETS ceph_test_libcephfs_vxattr + DESTINATION ${CMAKE_INSTALL_BINDIR}) + add_executable(ceph_test_libcephfs_newops main.cc newops.cc diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc index 57c5eefa6d3..f2c87168633 100644 --- a/src/test/libcephfs/test.cc +++ b/src/test/libcephfs/test.cc @@ -3520,39 +3520,6 @@ TEST(LibCephFS, SetMountTimeout) { ceph_shutdown(cmount); } -TEST(LibCephFS, FsCrypt) { - struct ceph_mount_info *cmount; - ASSERT_EQ(ceph_create(&cmount, NULL), 0); - ASSERT_EQ(ceph_conf_read_file(cmount, NULL), 0); - ASSERT_EQ(0, ceph_conf_parse_env(cmount, NULL)); - ASSERT_EQ(ceph_mount(cmount, NULL), 0); - - char test_xattr_file[NAME_MAX]; - sprintf(test_xattr_file, "test_fscrypt_%d", getpid()); - int fd = ceph_open(cmount, test_xattr_file, O_RDWR|O_CREAT, 0666); - ASSERT_GT(fd, 0); - - ASSERT_EQ(0, ceph_fsetxattr(cmount, fd, "ceph.fscrypt.auth", "foo", 3, CEPH_XATTR_CREATE)); - ASSERT_EQ(0, ceph_fsetxattr(cmount, fd, "ceph.fscrypt.file", "foo", 3, CEPH_XATTR_CREATE)); - - char buf[64]; - ASSERT_EQ(3, ceph_fgetxattr(cmount, fd, "ceph.fscrypt.auth", buf, sizeof(buf))); - ASSERT_EQ(3, ceph_fgetxattr(cmount, fd, "ceph.fscrypt.file", buf, sizeof(buf))); - ASSERT_EQ(0, ceph_close(cmount, fd)); - - ASSERT_EQ(0, ceph_unmount(cmount)); - ASSERT_EQ(0, ceph_mount(cmount, NULL)); - - fd = ceph_open(cmount, test_xattr_file, O_RDWR, 0666); - ASSERT_GT(fd, 0); - ASSERT_EQ(3, ceph_fgetxattr(cmount, fd, "ceph.fscrypt.auth", buf, sizeof(buf))); - ASSERT_EQ(3, ceph_fgetxattr(cmount, fd, "ceph.fscrypt.file", buf, sizeof(buf))); - - ASSERT_EQ(0, ceph_close(cmount, fd)); - ASSERT_EQ(0, ceph_unmount(cmount)); - ceph_shutdown(cmount); -} - TEST(LibCephFS, SnapdirAttrs) { struct ceph_mount_info *cmount; ASSERT_EQ(ceph_create(&cmount, NULL), 0); diff --git a/src/test/libcephfs/vxattr.cc b/src/test/libcephfs/vxattr.cc index 4d9eaf5e4d0..3d9c2b6d136 100644 --- a/src/test/libcephfs/vxattr.cc +++ b/src/test/libcephfs/vxattr.cc @@ -383,3 +383,75 @@ TEST(LibCephFS, GetAndSetDirRandom) { ceph_shutdown(cmount); } + +TEST(LibCephFS, FsCrypt) { + struct ceph_mount_info *cmount; + ASSERT_EQ(ceph_create(&cmount, NULL), 0); + ASSERT_EQ(ceph_conf_read_file(cmount, NULL), 0); + ASSERT_EQ(0, ceph_conf_parse_env(cmount, NULL)); + ASSERT_EQ(ceph_mount(cmount, NULL), 0); + + char test_xattr_file[NAME_MAX]; + sprintf(test_xattr_file, "test_fscrypt_%d", getpid()); + int fd = ceph_open(cmount, test_xattr_file, O_RDWR|O_CREAT, 0666); + ASSERT_GT(fd, 0); + + ASSERT_EQ(0, ceph_fsetxattr(cmount, fd, "ceph.fscrypt.auth", "foo", 3, XATTR_CREATE)); + ASSERT_EQ(0, ceph_fsetxattr(cmount, fd, "ceph.fscrypt.file", "foo", 3, XATTR_CREATE)); + + char buf[64]; + ASSERT_EQ(3, ceph_fgetxattr(cmount, fd, "ceph.fscrypt.auth", buf, sizeof(buf))); + ASSERT_EQ(3, ceph_fgetxattr(cmount, fd, "ceph.fscrypt.file", buf, sizeof(buf))); + ASSERT_EQ(0, ceph_close(cmount, fd)); + + ASSERT_EQ(0, ceph_unmount(cmount)); + ASSERT_EQ(0, ceph_mount(cmount, NULL)); + + fd = ceph_open(cmount, test_xattr_file, O_RDWR, 0666); + ASSERT_GT(fd, 0); + ASSERT_EQ(3, ceph_fgetxattr(cmount, fd, "ceph.fscrypt.auth", buf, sizeof(buf))); + ASSERT_EQ(3, ceph_fgetxattr(cmount, fd, "ceph.fscrypt.file", buf, sizeof(buf))); + + ASSERT_EQ(0, ceph_close(cmount, fd)); + ASSERT_EQ(0, ceph_unmount(cmount)); + ceph_shutdown(cmount); +} + +#define ACL_EA_ACCESS "system.posix_acl_access" +#define ACL_EA_DEFAULT "system.posix_acl_default" + +TEST(LibCephFS, Removexattr) { + struct ceph_mount_info *cmount; + ASSERT_EQ(ceph_create(&cmount, NULL), 0); + ASSERT_EQ(ceph_conf_read_file(cmount, NULL), 0); + ASSERT_EQ(0, ceph_conf_parse_env(cmount, NULL)); + ASSERT_EQ(ceph_mount(cmount, NULL), 0); + + char test_xattr_file[NAME_MAX]; + sprintf(test_xattr_file, "test_removexattr_%d", getpid()); + int fd = ceph_open(cmount, test_xattr_file, O_RDWR|O_CREAT, 0666); + ASSERT_GT(fd, 0); + + // remove xattr + ASSERT_EQ(-CEPHFS_ENODATA, ceph_fremovexattr(cmount, fd, "user.remove.xattr")); + ASSERT_EQ(0, ceph_fsetxattr(cmount, fd, "user.remove.xattr", "foo", 3, XATTR_CREATE)); + ASSERT_EQ(0, ceph_fremovexattr(cmount, fd, "user.remove.xattr")); + + // remove xattr via setxattr & XATTR_REPLACE + ASSERT_EQ(-CEPHFS_ENODATA, ceph_fsetxattr(cmount, fd, "user.remove.xattr", nullptr, 0, XATTR_REPLACE)); + ASSERT_EQ(0, ceph_fsetxattr(cmount, fd, "user.remove.xattr", "foo", 3, XATTR_CREATE)); + ASSERT_EQ(0, ceph_fsetxattr(cmount, fd, "user.remove.xattr", nullptr, 0, XATTR_REPLACE)); + + // ACL_EA_ACCESS and ACL_EA_DEFAULT are special and will always return success. + // If the corresponding attributes exist already the first one will remove it + // and the second one will remove the non-existing acl attributes. + ASSERT_EQ(0, ceph_fremovexattr(cmount, fd, ACL_EA_ACCESS)); + ASSERT_EQ(0, ceph_fremovexattr(cmount, fd, ACL_EA_ACCESS)); + ASSERT_EQ(0, ceph_fremovexattr(cmount, fd, ACL_EA_DEFAULT)); + ASSERT_EQ(0, ceph_fremovexattr(cmount, fd, ACL_EA_DEFAULT)); + + ASSERT_EQ(0, ceph_close(cmount, fd)); + ASSERT_EQ(0, ceph_unmount(cmount)); + ceph_shutdown(cmount); +} + diff --git a/src/test/librados/io_cxx.cc b/src/test/librados/io_cxx.cc index 35568a8ba31..144a1147a2f 100644 --- a/src/test/librados/io_cxx.cc +++ b/src/test/librados/io_cxx.cc @@ -879,10 +879,11 @@ TEST_F(LibRadosIoECPP, RmXattrPP) { TEST_F(LibRadosIoECPP, CrcZeroWrite) { SKIP_IF_CRIMSON(); - set_allow_ec_overwrites(pool_name, true); + set_allow_ec_overwrites(); char buf[128]; memset(buf, 0xcc, sizeof(buf)); bufferlist bl; + bl.append(buf, sizeof(buf)); ASSERT_EQ(0, ioctx.write("foo", bl, 0, 0)); ASSERT_EQ(0, ioctx.write("foo", bl, 0, sizeof(buf))); @@ -890,7 +891,6 @@ TEST_F(LibRadosIoECPP, CrcZeroWrite) { ObjectReadOperation read; read.read(0, bl.length(), NULL, NULL); ASSERT_EQ(0, ioctx.operate("foo", &read, &bl)); - recreate_pool(); } TEST_F(LibRadosIoECPP, XattrListPP) { diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc index d9cb1c5b8b7..6425d3aac02 100644 --- a/src/test/librados/misc.cc +++ b/src/test/librados/misc.cc @@ -76,15 +76,15 @@ TEST(LibRadosMiscConnectFailure, ConnectTimeout) { ASSERT_EQ(0, rados_conf_set(cluster, "mon_host", "255.0.1.2:3456")); ASSERT_EQ(0, rados_conf_set(cluster, "key", "AQAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAA==")); - ASSERT_EQ(0, rados_conf_set(cluster, "client_mount_timeout", "2s")); + ASSERT_EQ(0, rados_conf_set(cluster, "client_mount_timeout", "5s")); utime_t start = ceph_clock_now(); ASSERT_EQ(-ETIMEDOUT, rados_connect(cluster)); utime_t end = ceph_clock_now(); utime_t dur = end - start; - ASSERT_GE(dur, utime_t(2, 0)); - ASSERT_LT(dur, utime_t(4, 0)); + ASSERT_GE(dur, utime_t(5, 0)); + ASSERT_LT(dur, utime_t(15, 0)); rados_shutdown(cluster); } diff --git a/src/test/librados/snapshots_cxx.cc b/src/test/librados/snapshots_cxx.cc index 3338b62056e..ab6ecdfbf01 100644 --- a/src/test/librados/snapshots_cxx.cc +++ b/src/test/librados/snapshots_cxx.cc @@ -153,7 +153,6 @@ TEST_F(LibRadosSnapshotsSelfManagedPP, SnapPP) { } TEST_F(LibRadosSnapshotsSelfManagedPP, RollbackPP) { - SKIP_IF_CRIMSON(); std::vector<uint64_t> my_snaps; IoCtx readioctx; ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), readioctx)); diff --git a/src/test/librados/testcase_cxx.cc b/src/test/librados/testcase_cxx.cc index 75c05cc2041..69230cb9e9d 100644 --- a/src/test/librados/testcase_cxx.cc +++ b/src/test/librados/testcase_cxx.cc @@ -3,6 +3,9 @@ #include "testcase_cxx.h" +#include <chrono> +#include <thread> + #include <errno.h> #include <fmt/format.h> #include "test_cxx.h" @@ -401,18 +404,32 @@ void RadosTestECPP::TearDown() cleanup_default_namespace(ioctx); cleanup_namespace(ioctx, nspace); } + if (ec_overwrites_set) { + ASSERT_EQ(0, destroy_one_ec_pool_pp(pool_name, s_cluster)); + ASSERT_EQ("", create_one_ec_pool_pp(pool_name, s_cluster)); + ec_overwrites_set = false; + } ioctx.close(); } -void RadosTestECPP::recreate_pool() +void RadosTestECPP::set_allow_ec_overwrites() { - SKIP_IF_CRIMSON(); - ASSERT_EQ(0, destroy_one_ec_pool_pp(pool_name, s_cluster)); - ASSERT_EQ("", create_one_ec_pool_pp(pool_name, s_cluster)); - SetUp(); -} + ec_overwrites_set = true; + ASSERT_EQ("", set_allow_ec_overwrites_pp(pool_name, cluster, true)); -void RadosTestECPP::set_allow_ec_overwrites(std::string pool, bool allow) -{ - ASSERT_EQ("", set_allow_ec_overwrites_pp(pool, cluster, allow)); -}
\ No newline at end of file + char buf[128]; + memset(buf, 0xcc, sizeof(buf)); + bufferlist bl; + bl.append(buf, sizeof(buf)); + + const std::string objname = "RadosTestECPP::set_allow_ec_overwrites:test_obj"; + ASSERT_EQ(0, ioctx.write(objname, bl, sizeof(buf), 0)); + const auto end = std::chrono::steady_clock::now() + std::chrono::seconds(120); + while (true) { + if (0 == ioctx.write(objname, bl, sizeof(buf), 0)) { + break; + } + ASSERT_LT(std::chrono::steady_clock::now(), end); + std::this_thread::sleep_for(std::chrono::seconds(2)); + } +} diff --git a/src/test/librados/testcase_cxx.h b/src/test/librados/testcase_cxx.h index 3fd5f9c6077..15b7df8171b 100644 --- a/src/test/librados/testcase_cxx.h +++ b/src/test/librados/testcase_cxx.h @@ -111,14 +111,14 @@ protected: }; class RadosTestECPP : public RadosTestPP { + bool ec_overwrites_set = false; public: RadosTestECPP(bool c=false) : cluster(s_cluster), cleanup(c) {} ~RadosTestECPP() override {} protected: static void SetUpTestCase(); static void TearDownTestCase(); - void recreate_pool(); - void set_allow_ec_overwrites(std::string pool, bool allow=true); + void set_allow_ec_overwrites(); static librados::Rados s_cluster; static std::string pool_name; diff --git a/src/test/librados/watch_notify_cxx.cc b/src/test/librados/watch_notify_cxx.cc index 808384bccbb..9427ad7ae98 100644 --- a/src/test/librados/watch_notify_cxx.cc +++ b/src/test/librados/watch_notify_cxx.cc @@ -368,7 +368,7 @@ TEST_P(LibRadosWatchNotifyPP, WatchNotify3) { notify_oid = "foo"; notify_ioctx = &ioctx; notify_cookies.clear(); - uint32_t timeout = 12; // configured timeout + uint32_t timeout = 26; // configured timeout char buf[128]; memset(buf, 0xcc, sizeof(buf)); bufferlist bl1; diff --git a/src/test/librados_test_stub/TestMemIoCtxImpl.cc b/src/test/librados_test_stub/TestMemIoCtxImpl.cc index 77ea14366cd..248fd5b8feb 100644 --- a/src/test/librados_test_stub/TestMemIoCtxImpl.cc +++ b/src/test/librados_test_stub/TestMemIoCtxImpl.cc @@ -479,7 +479,7 @@ int TestMemIoCtxImpl::selfmanaged_snap_rollback(const std::string& oid, for (TestMemCluster::FileSnapshots::reverse_iterator it = snaps.rbegin(); it != snaps.rend(); ++it) { TestMemCluster::SharedFile file = *it; - if (file->snap_id < get_snap_read()) { + if (file->snap_id < snapid) { if (versions == 0) { // already at the snapshot version return 0; diff --git a/src/test/librbd/exclusive_lock/test_mock_PostAcquireRequest.cc b/src/test/librbd/exclusive_lock/test_mock_PostAcquireRequest.cc index 943b8cc2dfa..50a87313447 100644 --- a/src/test/librbd/exclusive_lock/test_mock_PostAcquireRequest.cc +++ b/src/test/librbd/exclusive_lock/test_mock_PostAcquireRequest.cc @@ -84,8 +84,6 @@ using ::testing::SetArgPointee; using ::testing::StrEq; using ::testing::WithArg; -static const std::string TEST_COOKIE("auto 123"); - class TestMockExclusiveLockPostAcquireRequest : public TestMockFixture { public: typedef PostAcquireRequest<MockTestImageCtx> MockPostAcquireRequest; diff --git a/src/test/librbd/exclusive_lock/test_mock_PreAcquireRequest.cc b/src/test/librbd/exclusive_lock/test_mock_PreAcquireRequest.cc index 5b4bce6dd58..ca98c8773e4 100644 --- a/src/test/librbd/exclusive_lock/test_mock_PreAcquireRequest.cc +++ b/src/test/librbd/exclusive_lock/test_mock_PreAcquireRequest.cc @@ -44,8 +44,6 @@ using ::testing::SetArgPointee; using ::testing::StrEq; using ::testing::WithArg; -static const std::string TEST_COOKIE("auto 123"); - class TestMockExclusiveLockPreAcquireRequest : public TestMockFixture { public: typedef PreAcquireRequest<MockTestImageCtx> MockPreAcquireRequest; diff --git a/src/test/librbd/exclusive_lock/test_mock_PreReleaseRequest.cc b/src/test/librbd/exclusive_lock/test_mock_PreReleaseRequest.cc index 466a3ab421e..f37939cf5ba 100644 --- a/src/test/librbd/exclusive_lock/test_mock_PreReleaseRequest.cc +++ b/src/test/librbd/exclusive_lock/test_mock_PreReleaseRequest.cc @@ -70,8 +70,6 @@ using ::testing::Return; using ::testing::StrEq; using ::testing::WithArg; -static const std::string TEST_COOKIE("auto 123"); - class TestMockExclusiveLockPreReleaseRequest : public TestMockFixture { public: typedef ImageDispatch<MockTestImageCtx> MockImageDispatch; diff --git a/src/test/librbd/test_Groups.cc b/src/test/librbd/test_Groups.cc index 88b19146f16..16ba5d4f487 100644 --- a/src/test/librbd/test_Groups.cc +++ b/src/test/librbd/test_Groups.cc @@ -27,7 +27,6 @@ TEST_F(TestGroup, group_create) rados_ioctx_destroy(ioctx); } BOOST_SCOPE_EXIT_END; - librbd::RBD rbd; ASSERT_EQ(0, rbd_group_create(ioctx, "mygroup")); size_t size = 0; @@ -50,26 +49,62 @@ TEST_F(TestGroup, group_createPP) ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx)); librbd::RBD rbd; - ASSERT_EQ(0, rbd.group_create(ioctx, "mygroup")); + ASSERT_EQ(0, rbd.group_create(ioctx, "mygroupPP")); std::vector<std::string> groups; ASSERT_EQ(0, rbd.group_list(ioctx, &groups)); ASSERT_EQ(1U, groups.size()); - ASSERT_EQ("mygroup", groups[0]); + ASSERT_EQ("mygroupPP", groups[0]); groups.clear(); - ASSERT_EQ(0, rbd.group_rename(ioctx, "mygroup", "newgroup")); + ASSERT_EQ(0, rbd.group_rename(ioctx, "mygroupPP", "newgroupPP")); ASSERT_EQ(0, rbd.group_list(ioctx, &groups)); ASSERT_EQ(1U, groups.size()); - ASSERT_EQ("newgroup", groups[0]); + ASSERT_EQ("newgroupPP", groups[0]); - ASSERT_EQ(0, rbd.group_remove(ioctx, "newgroup")); + ASSERT_EQ(0, rbd.group_remove(ioctx, "newgroupPP")); groups.clear(); ASSERT_EQ(0, rbd.group_list(ioctx, &groups)); ASSERT_EQ(0U, groups.size()); } +TEST_F(TestGroup, group_get_id) +{ + rados_ioctx_t ioctx; + rados_ioctx_create(_cluster, _pool_name.c_str(), &ioctx); + BOOST_SCOPE_EXIT(ioctx) { + rados_ioctx_destroy(ioctx); + } BOOST_SCOPE_EXIT_END; + + ASSERT_EQ(0, rbd_group_create(ioctx, "group_get_id")); + + size_t size = 0; + ASSERT_EQ(-ERANGE, rbd_group_get_id(ioctx, "group_get_id", NULL, &size)); + ASSERT_GT(size, 0); + + char group_id[32]; + ASSERT_EQ(0, rbd_group_get_id(ioctx, "group_get_id", group_id, &size)); + ASSERT_EQ(strlen(group_id) + 1, size); + + ASSERT_EQ(0, rbd_group_remove(ioctx, "group_get_id")); +} + +TEST_F(TestGroup, group_get_idPP) +{ + librados::IoCtx ioctx; + ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx)); + + librbd::RBD rbd; + ASSERT_EQ(0, rbd.group_create(ioctx, "group_get_idPP")); + + std::string group_id; + ASSERT_EQ(0, rbd.group_get_id(ioctx, "group_get_idPP", &group_id)); + ASSERT_FALSE(group_id.empty()); + + ASSERT_EQ(0, rbd.group_remove(ioctx, "group_get_idPP")); +} + TEST_F(TestGroup, add_image) { REQUIRE_FORMAT_V2(); @@ -159,7 +194,7 @@ TEST_F(TestGroup, add_imagePP) librados::IoCtx ioctx; ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx)); - const char *group_name = "mycg"; + const char *group_name = "mycgPP"; librbd::RBD rbd; ASSERT_EQ(0, rbd.group_create(ioctx, group_name)); @@ -240,7 +275,8 @@ TEST_F(TestGroup, add_snapshot) EXPECT_EQ(0, rbd_close(image)); } BOOST_SCOPE_EXIT_END; - ASSERT_EQ(10, rbd_write(image, 0, 10, orig_data)); + ASSERT_EQ(10, rbd_write2(image, 0, 10, orig_data, + LIBRADOS_OP_FLAG_FADVISE_FUA)); ASSERT_EQ(10, rbd_read(image, 0, 10, read_data)); ASSERT_EQ(0, memcmp(orig_data, read_data, 10)); @@ -309,11 +345,17 @@ TEST_F(TestGroup, add_snapshot) ASSERT_STREQ(snap_name, snaps[0].name); - ASSERT_EQ(10, rbd_write(image, 11, 10, test_data)); - ASSERT_EQ(10, rbd_read(image, 11, 10, read_data)); + ASSERT_EQ(10, rbd_write2(image, 9, 10, test_data, + LIBRADOS_OP_FLAG_FADVISE_FUA)); + ASSERT_EQ(10, rbd_read(image, 9, 10, read_data)); ASSERT_EQ(0, memcmp(test_data, read_data, 10)); + ASSERT_EQ(10, rbd_read(image, 0, 10, read_data)); + ASSERT_NE(0, memcmp(orig_data, read_data, 10)); ASSERT_EQ(0, rbd_group_snap_rollback(ioctx, group_name, snap_name)); + if (!is_feature_enabled(RBD_FEATURE_EXCLUSIVE_LOCK)) { + ASSERT_EQ(0, rbd_invalidate_cache(image)); + } ASSERT_EQ(10, rbd_read(image, 0, 10, read_data)); ASSERT_EQ(0, memcmp(orig_data, read_data, 10)); @@ -370,7 +412,7 @@ TEST_F(TestGroup, add_snapshotPP) librados::IoCtx ioctx; ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx)); - const char *group_name = "snap_group"; + const char *group_name = "snap_groupPP"; const char *snap_name = "snap_snapshot"; librbd::RBD rbd; @@ -384,7 +426,8 @@ TEST_F(TestGroup, add_snapshotPP) bufferlist expect_bl; bufferlist read_bl; expect_bl.append(std::string(512, '1')); - ASSERT_EQ((ssize_t)expect_bl.length(), image.write(0, expect_bl.length(), expect_bl)); + ASSERT_EQ(512, image.write2(0, expect_bl.length(), expect_bl, + LIBRADOS_OP_FLAG_FADVISE_FUA)); ASSERT_EQ(512, image.read(0, 512, read_bl)); ASSERT_TRUE(expect_bl.contents_equal(read_bl)); @@ -399,14 +442,17 @@ TEST_F(TestGroup, add_snapshotPP) bufferlist write_bl; write_bl.append(std::string(1024, '2')); - ASSERT_EQ(1024, image.write(513, write_bl.length(), write_bl)); - - read_bl.clear(); - ASSERT_EQ(1024, image.read(513, 1024, read_bl)); + ASSERT_EQ(1024, image.write2(256, write_bl.length(), write_bl, + LIBRADOS_OP_FLAG_FADVISE_FUA)); + ASSERT_EQ(1024, image.read(256, 1024, read_bl)); ASSERT_TRUE(write_bl.contents_equal(read_bl)); + ASSERT_EQ(512, image.read(0, 512, read_bl)); + ASSERT_FALSE(expect_bl.contents_equal(read_bl)); ASSERT_EQ(0, rbd.group_snap_rollback(ioctx, group_name, snap_name)); - + if (!is_feature_enabled(RBD_FEATURE_EXCLUSIVE_LOCK)) { + ASSERT_EQ(0, image.invalidate_cache()); + } ASSERT_EQ(512, image.read(0, 512, read_bl)); ASSERT_TRUE(expect_bl.contents_equal(read_bl)); diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index 5e82beb039d..cc0104b6373 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -3795,7 +3795,7 @@ TYPED_TEST(EncryptedFlattenTest, ZeroOverlap) } } -#endif +#endif // HAVE_LIBCRYPTSETUP TEST_F(TestLibRBD, TestIOWithIOHint) { @@ -7346,6 +7346,28 @@ TEST_F(TestLibRBD, FlushAioPP) ioctx.close(); } +struct diff_extent { + diff_extent(uint64_t _offset, uint64_t _length, bool _exists, + uint64_t object_size) : + offset(_offset), length(_length), exists(_exists) + { + if (object_size != 0) { + offset -= offset % object_size; + length = object_size; + } + } + uint64_t offset; + uint64_t length; + bool exists; + bool operator==(const diff_extent& o) const { + return offset == o.offset && length == o.length && exists == o.exists; + } +}; + +ostream& operator<<(ostream & o, const diff_extent& e) { + return o << '(' << e.offset << '~' << e.length << ' ' + << (e.exists ? "true" : "false") << ')'; +} int iterate_cb(uint64_t off, size_t len, int exists, void *arg) { @@ -7355,6 +7377,13 @@ int iterate_cb(uint64_t off, size_t len, int exists, void *arg) return 0; } +int vector_iterate_cb(uint64_t off, size_t len, int exists, void *arg) +{ + auto diff = static_cast<std::vector<diff_extent>*>(arg); + diff->push_back(diff_extent(off, len, exists, 0)); + return 0; +} + static int iterate_error_cb(uint64_t off, size_t len, int exists, void *arg) { return -EINVAL; @@ -7429,6 +7458,436 @@ template <typename T> class DiffIterateTest : public TestLibRBD { public: static const uint8_t whole_object = T::whole_object; + + void test_deterministic(uint64_t object_off, uint64_t len) { + rados_ioctx_t ioctx; + ASSERT_EQ(0, rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx)); + + rbd_image_t image; + int order = 22; + std::string name = this->get_temp_image_name(); + ASSERT_EQ(0, create_image(ioctx, name.c_str(), 20 << 20, &order)); + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + test_deterministic(image, object_off, len, 1); + + ASSERT_EQ(0, rbd_close(image)); + rados_ioctx_destroy(ioctx); + } + + void test_deterministic_pp(uint64_t object_off, uint64_t len) { + librados::IoCtx ioctx; + ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx)); + + librbd::RBD rbd; + librbd::Image image; + int order = 22; + std::string name = this->get_temp_image_name(); + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), 20 << 20, &order)); + ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); + test_deterministic_pp(image, object_off, len, 1); + } + +#ifdef HAVE_LIBCRYPTSETUP + + void test_deterministic_luks1(uint64_t object_off, uint64_t len) { + rados_ioctx_t ioctx; + ASSERT_EQ(0, rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx)); + + rbd_image_t image; + int order = 22; + std::string name = this->get_temp_image_name(); + ASSERT_EQ(0, create_image(ioctx, name.c_str(), 24 << 20, &order)); + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + rbd_encryption_luks1_format_options_t fopts = { + RBD_ENCRYPTION_ALGORITHM_AES256, "some passphrase", 15}; + ASSERT_EQ(0, rbd_encryption_format(image, RBD_ENCRYPTION_FORMAT_LUKS1, + &fopts, sizeof(fopts))); + test_deterministic(image, object_off, len, 512); + + ASSERT_EQ(0, rbd_close(image)); + rados_ioctx_destroy(ioctx); + } + + void test_deterministic_luks1_pp(uint64_t object_off, uint64_t len) { + librados::IoCtx ioctx; + ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx)); + + librbd::RBD rbd; + librbd::Image image; + int order = 22; + std::string name = this->get_temp_image_name(); + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), 24 << 20, &order)); + ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); + librbd::encryption_luks1_format_options_t fopts = { + RBD_ENCRYPTION_ALGORITHM_AES256, "some passphrase"}; + ASSERT_EQ(0, image.encryption_format(RBD_ENCRYPTION_FORMAT_LUKS1, &fopts, + sizeof(fopts))); + test_deterministic_pp(image, object_off, len, 512); + } + + void test_deterministic_luks2(uint64_t object_off, uint64_t len) { + rados_ioctx_t ioctx; + ASSERT_EQ(0, rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx)); + + rbd_image_t image; + int order = 22; + std::string name = this->get_temp_image_name(); + ASSERT_EQ(0, create_image(ioctx, name.c_str(), 36 << 20, &order)); + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + rbd_encryption_luks2_format_options_t fopts = { + RBD_ENCRYPTION_ALGORITHM_AES256, "some passphrase", 15}; + ASSERT_EQ(0, rbd_encryption_format(image, RBD_ENCRYPTION_FORMAT_LUKS2, + &fopts, sizeof(fopts))); + test_deterministic(image, object_off, len, 4096); + + ASSERT_EQ(0, rbd_close(image)); + rados_ioctx_destroy(ioctx); + } + + void test_deterministic_luks2_pp(uint64_t object_off, uint64_t len) { + librados::IoCtx ioctx; + ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx)); + + librbd::RBD rbd; + librbd::Image image; + int order = 22; + std::string name = this->get_temp_image_name(); + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), 36 << 20, &order)); + ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); + librbd::encryption_luks2_format_options_t fopts = { + RBD_ENCRYPTION_ALGORITHM_AES256, "some passphrase"}; + ASSERT_EQ(0, image.encryption_format(RBD_ENCRYPTION_FORMAT_LUKS2, &fopts, + sizeof(fopts))); + test_deterministic_pp(image, object_off, len, 4096); + } + +#endif // HAVE_LIBCRYPTSETUP + +private: + void test_deterministic(rbd_image_t image, uint64_t object_off, + uint64_t len, uint64_t block_size) { + uint64_t off1 = 0; + uint64_t off2 = 4 << 20; + uint64_t size = 20 << 20; + uint64_t extent_len = round_up_to(object_off + len, block_size); + + rbd_image_info_t info; + ASSERT_EQ(0, rbd_stat(image, &info, sizeof(info))); + ASSERT_EQ(size, info.size); + ASSERT_EQ(5, info.num_objs); + ASSERT_EQ(4 << 20, info.obj_size); + ASSERT_EQ(22, info.order); + + uint64_t object_size = 0; + if (whole_object) { + object_size = 1 << info.order; + } + + std::vector<diff_extent> extents; + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_EQ(-ENOENT, rbd_diff_iterate2(image, "snap1", 0, size, true, + whole_object, vector_iterate_cb, + &extents)); + + ASSERT_EQ(0, rbd_snap_create(image, "snap1")); + + std::string buf(len, '1'); + ASSERT_EQ(len, rbd_write(image, off1 + object_off, len, buf.data())); + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + extents.clear(); + + ASSERT_EQ(0, rbd_snap_create(image, "snap2")); + + ASSERT_EQ(len, rbd_write(image, off2 + object_off, len, buf.data())); + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[1]); + extents.clear(); + + ASSERT_EQ(0, rbd_snap_create(image, "snap3")); + + // 1. beginning of time -> HEAD + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[1]); + extents.clear(); + + // 2. snap1 -> HEAD + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[1]); + extents.clear(); + + // 3. snap2 -> HEAD + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[0]); + extents.clear(); + + // 4. snap3 -> HEAD + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap3", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_PASSED(validate_object_map, image); + ASSERT_EQ(0, rbd_snap_set(image, "snap3")); + + // 5. beginning of time -> snap3 + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[1]); + extents.clear(); + + // 6. snap1 -> snap3 + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[1]); + extents.clear(); + + // 7. snap2 -> snap3 + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[0]); + extents.clear(); + + // 8. snap3 -> snap3 + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap3", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_PASSED(validate_object_map, image); + ASSERT_EQ(0, rbd_snap_set(image, "snap2")); + + // 9. beginning of time -> snap2 + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + extents.clear(); + + // 10. snap1 -> snap2 + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + extents.clear(); + + // 11. snap2 -> snap2 + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + // 12. snap3 -> snap2 + ASSERT_EQ(-EINVAL, rbd_diff_iterate2(image, "snap3", 0, size, true, + whole_object, vector_iterate_cb, + &extents)); + + ASSERT_PASSED(validate_object_map, image); + ASSERT_EQ(0, rbd_snap_set(image, "snap1")); + + // 13. beginning of time -> snap1 + ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + // 14. snap1 -> snap1 + ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + // 15. snap2 -> snap1 + ASSERT_EQ(-EINVAL, rbd_diff_iterate2(image, "snap2", 0, size, true, + whole_object, vector_iterate_cb, + &extents)); + + // 16. snap3 -> snap1 + ASSERT_EQ(-EINVAL, rbd_diff_iterate2(image, "snap3", 0, size, true, + whole_object, vector_iterate_cb, + &extents)); + + ASSERT_PASSED(validate_object_map, image); + } + + void test_deterministic_pp(librbd::Image& image, uint64_t object_off, + uint64_t len, uint64_t block_size) { + uint64_t off1 = 8 << 20; + uint64_t off2 = 16 << 20; + uint64_t size = 20 << 20; + uint64_t extent_len = round_up_to(object_off + len, block_size); + + librbd::image_info_t info; + ASSERT_EQ(0, image.stat(info, sizeof(info))); + ASSERT_EQ(size, info.size); + ASSERT_EQ(5, info.num_objs); + ASSERT_EQ(4 << 20, info.obj_size); + ASSERT_EQ(22, info.order); + + uint64_t object_size = 0; + if (whole_object) { + object_size = 1 << info.order; + } + + std::vector<diff_extent> extents; + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_EQ(-ENOENT, image.diff_iterate2("snap1", 0, size, true, + whole_object, vector_iterate_cb, + &extents)); + + ASSERT_EQ(0, image.snap_create("snap1")); + + ceph::bufferlist bl; + bl.append(std::string(len, '1')); + ASSERT_EQ(len, image.write(off1 + object_off, len, bl)); + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + extents.clear(); + + ASSERT_EQ(0, image.snap_create("snap2")); + + ASSERT_EQ(len, image.write(off2 + object_off, len, bl)); + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[1]); + extents.clear(); + + ASSERT_EQ(0, image.snap_create("snap3")); + + // 1. beginning of time -> HEAD + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[1]); + extents.clear(); + + // 2. snap1 -> HEAD + ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[1]); + extents.clear(); + + // 3. snap2 -> HEAD + ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[0]); + extents.clear(); + + // 4. snap3 -> HEAD + ASSERT_EQ(0, image.diff_iterate2("snap3", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_PASSED(validate_object_map, image); + ASSERT_EQ(0, image.snap_set("snap3")); + + // 5. beginning of time -> snap3 + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[1]); + extents.clear(); + + // 6. snap1 -> snap3 + ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(2u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[1]); + extents.clear(); + + // 7. snap2 -> snap3 + ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(off2, extent_len, true, object_size), extents[0]); + extents.clear(); + + // 8. snap3 -> snap3 + ASSERT_EQ(0, image.diff_iterate2("snap3", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_PASSED(validate_object_map, image); + ASSERT_EQ(0, image.snap_set("snap2")); + + // 9. beginning of time -> snap2 + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + extents.clear(); + + // 10. snap1 -> snap2 + ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(off1, extent_len, true, object_size), extents[0]); + extents.clear(); + + // 11. snap2 -> snap2 + ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + // 12. snap3 -> snap2 + ASSERT_EQ(-EINVAL, image.diff_iterate2("snap3", 0, size, true, + whole_object, vector_iterate_cb, + &extents)); + + ASSERT_PASSED(validate_object_map, image); + ASSERT_EQ(0, image.snap_set("snap1")); + + // 13. beginning of time -> snap1 + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + // 14. snap1 -> snap1 + ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, whole_object, + vector_iterate_cb, &extents)); + ASSERT_EQ(0u, extents.size()); + + // 15. snap2 -> snap1 + ASSERT_EQ(-EINVAL, image.diff_iterate2("snap2", 0, size, true, + whole_object, vector_iterate_cb, + &extents)); + + // 16. snap3 -> snap1 + ASSERT_EQ(-EINVAL, image.diff_iterate2("snap3", 0, size, true, + whole_object, vector_iterate_cb, + &extents)); + + ASSERT_PASSED(validate_object_map, image); + } }; template <bool _whole_object> @@ -7489,300 +7948,80 @@ TYPED_TEST(DiffIterateTest, DiffIterate) ioctx.close(); } -struct diff_extent { - diff_extent(uint64_t _offset, uint64_t _length, bool _exists, - uint64_t object_size) : - offset(_offset), length(_length), exists(_exists) - { - if (object_size != 0) { - offset -= offset % object_size; - length = object_size; - } - } - uint64_t offset; - uint64_t length; - bool exists; - bool operator==(const diff_extent& o) const { - return offset == o.offset && length == o.length && exists == o.exists; - } -}; - -ostream& operator<<(ostream & o, const diff_extent& e) { - return o << '(' << e.offset << '~' << e.length << ' ' << (e.exists ? "true" : "false") << ')'; -} - -int vector_iterate_cb(uint64_t off, size_t len, int exists, void *arg) -{ - //cout << "iterate_cb " << off << "~" << len << std::endl; - vector<diff_extent> *diff = static_cast<vector<diff_extent> *>(arg); - diff->push_back(diff_extent(off, len, exists, 0)); - return 0; -} - TYPED_TEST(DiffIterateTest, DiffIterateDeterministic) { REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2)); - rados_ioctx_t ioctx; - ASSERT_EQ(0, rados_ioctx_create(this->_cluster, this->m_pool_name.c_str(), - &ioctx)); - - rbd_image_t image; - int order = 22; - std::string name = this->get_temp_image_name(); - uint64_t size = 20 << 20; - - ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order)); - ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); - - uint64_t object_size = 0; - if (this->whole_object) { - object_size = 1 << order; - } - - std::vector<diff_extent> extents; - ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(0u, extents.size()); - - ASSERT_EQ(0, rbd_snap_create(image, "snap1")); - - std::string buf(256, '1'); - ASSERT_EQ(256, rbd_write(image, 0, 256, buf.data())); - ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - extents.clear(); - - ASSERT_EQ(0, rbd_snap_create(image, "snap2")); - - ASSERT_EQ(256, rbd_write(image, 1 << order, 256, buf.data())); - ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(2u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); - extents.clear(); - - ASSERT_EQ(0, rbd_snap_create(image, "snap3")); - - // 1. beginning of time -> HEAD - ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(2u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); - extents.clear(); - - // 2. snap1 -> HEAD - ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(2u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); - extents.clear(); - - // 3. snap2 -> HEAD - ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]); - extents.clear(); - - // 4. snap3 -> HEAD - ASSERT_EQ(0, rbd_diff_iterate2(image, "snap3", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(0u, extents.size()); - - ASSERT_PASSED(this->validate_object_map, image); - ASSERT_EQ(0, rbd_snap_set(image, "snap3")); - - // 5. beginning of time -> snap3 - ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(2u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); - extents.clear(); - - // 6. snap1 -> snap3 - ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(2u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); - extents.clear(); - - // 7. snap2 -> snap3 - ASSERT_EQ(0, rbd_diff_iterate2(image, "snap2", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]); - extents.clear(); - - ASSERT_PASSED(this->validate_object_map, image); - ASSERT_EQ(0, rbd_snap_set(image, "snap2")); - - // 8. beginning of time -> snap2 - ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - extents.clear(); - - // 9. snap1 -> snap2 - ASSERT_EQ(0, rbd_diff_iterate2(image, "snap1", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - extents.clear(); - - ASSERT_PASSED(this->validate_object_map, image); - ASSERT_EQ(0, rbd_snap_set(image, "snap1")); - - // 10. beginning of time -> snap1 - ASSERT_EQ(0, rbd_diff_iterate2(image, NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(0u, extents.size()); - - ASSERT_PASSED(this->validate_object_map, image); - - ASSERT_EQ(0, rbd_close(image)); - rados_ioctx_destroy(ioctx); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic(0, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic((1 << 20) - 256, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic((1 << 20) - 128, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic(1 << 20, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic((4 << 20) - 256, 256)); } TYPED_TEST(DiffIterateTest, DiffIterateDeterministicPP) { REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2)); - librados::IoCtx ioctx; - ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx)); - - librbd::RBD rbd; - librbd::Image image; - int order = 22; - std::string name = this->get_temp_image_name(); - uint64_t size = 20 << 20; - - ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order)); - ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); - - uint64_t object_size = 0; - if (this->whole_object) { - object_size = 1 << order; - } - - std::vector<diff_extent> extents; - ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(0u, extents.size()); - - ASSERT_EQ(0, image.snap_create("snap1")); - - ceph::bufferlist bl; - bl.append(std::string(256, '1')); - ASSERT_EQ(256, image.write(0, 256, bl)); - ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - extents.clear(); - - ASSERT_EQ(0, image.snap_create("snap2")); - - ASSERT_EQ(256, image.write(1 << order, 256, bl)); - ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(2u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); - extents.clear(); - - ASSERT_EQ(0, image.snap_create("snap3")); - - // 1. beginning of time -> HEAD - ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(2u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); - extents.clear(); - - // 2. snap1 -> HEAD - ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(2u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); - extents.clear(); - - // 3. snap2 -> HEAD - ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]); - extents.clear(); - - // 4. snap3 -> HEAD - ASSERT_EQ(0, image.diff_iterate2("snap3", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(0u, extents.size()); - - ASSERT_PASSED(this->validate_object_map, image); - ASSERT_EQ(0, image.snap_set("snap3")); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_pp(0, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_pp((3 << 20) - 2, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_pp((3 << 20) - 1, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_pp(3 << 20, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_pp((4 << 20) - 2, 2)); +} - // 5. beginning of time -> snap3 - ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(2u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); - extents.clear(); +#ifdef HAVE_LIBCRYPTSETUP - // 6. snap1 -> snap3 - ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(2u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[1]); - extents.clear(); +TYPED_TEST(DiffIterateTest, DiffIterateDeterministicLUKS1) +{ + REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2)); + REQUIRE(!is_feature_enabled(RBD_FEATURE_JOURNALING)); - // 7. snap2 -> snap3 - ASSERT_EQ(0, image.diff_iterate2("snap2", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(1 << order, 256, true, object_size), extents[0]); - extents.clear(); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks1(0, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks1((1 << 20) - 256, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks1((1 << 20) - 128, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks1(1 << 20, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks1((4 << 20) - 256, 256)); +} - ASSERT_PASSED(this->validate_object_map, image); - ASSERT_EQ(0, image.snap_set("snap2")); +TYPED_TEST(DiffIterateTest, DiffIterateDeterministicLUKS1PP) +{ + REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2)); + REQUIRE(!is_feature_enabled(RBD_FEATURE_JOURNALING)); - // 8. beginning of time -> snap2 - ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - extents.clear(); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks1_pp(0, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks1_pp((3 << 20) - 2, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks1_pp((3 << 20) - 1, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks1_pp(3 << 20, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks1_pp((4 << 20) - 2, 2)); +} - // 9. snap1 -> snap2 - ASSERT_EQ(0, image.diff_iterate2("snap1", 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(1u, extents.size()); - ASSERT_EQ(diff_extent(0, 256, true, object_size), extents[0]); - extents.clear(); +TYPED_TEST(DiffIterateTest, DiffIterateDeterministicLUKS2) +{ + REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2)); + REQUIRE(!is_feature_enabled(RBD_FEATURE_JOURNALING)); - ASSERT_PASSED(this->validate_object_map, image); - ASSERT_EQ(0, image.snap_set("snap1")); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks2(0, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks2((1 << 20) - 256, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks2((1 << 20) - 128, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks2(1 << 20, 256)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks2((4 << 20) - 256, 256)); +} - // 10. beginning of time -> snap1 - ASSERT_EQ(0, image.diff_iterate2(NULL, 0, size, true, this->whole_object, - vector_iterate_cb, &extents)); - ASSERT_EQ(0u, extents.size()); +TYPED_TEST(DiffIterateTest, DiffIterateDeterministicLUKS2PP) +{ + REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2)); + REQUIRE(!is_feature_enabled(RBD_FEATURE_JOURNALING)); - ASSERT_PASSED(this->validate_object_map, image); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks2_pp(0, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks2_pp((3 << 20) - 2, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks2_pp((3 << 20) - 1, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks2_pp(3 << 20, 2)); + EXPECT_NO_FATAL_FAILURE(this->test_deterministic_luks2_pp((4 << 20) - 2, 2)); } +#endif // HAVE_LIBCRYPTSETUP + TYPED_TEST(DiffIterateTest, DiffIterateDiscard) { REQUIRE(!is_feature_enabled(RBD_FEATURE_STRIPINGV2)); @@ -8529,23 +8768,59 @@ TYPED_TEST(DiffIterateTest, DiffIterateUnalignedSmall) { librbd::RBD rbd; librbd::Image image; - int order = 0; + int order = 22; std::string name = this->get_temp_image_name(); - ssize_t size = 10 << 20; + ssize_t data_end = 8 << 20; - ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order)); + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), + data_end + (2 << 20), &order)); ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); ceph::bufferlist bl; - bl.append(std::string(size, '1')); - ASSERT_EQ(size, image.write(0, size, bl)); + bl.append(std::string(data_end, '1')); + ASSERT_EQ(data_end, image.write(0, data_end, bl)); std::vector<diff_extent> extents; + ASSERT_EQ(0, image.diff_iterate2(NULL, 0, 0, true, + this->whole_object, vector_iterate_cb, + &extents)); + ASSERT_EQ(0u, extents.size()); + ASSERT_EQ(0, image.diff_iterate2(NULL, 5000005, 1234, true, this->whole_object, vector_iterate_cb, &extents)); ASSERT_EQ(1u, extents.size()); ASSERT_EQ(diff_extent(5000005, 1234, true, 0), extents[0]); + extents.clear(); + + ASSERT_EQ(0, image.diff_iterate2(NULL, data_end - 1, 0, true, + this->whole_object, vector_iterate_cb, + &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_EQ(0, image.diff_iterate2(NULL, data_end - 1, 1, true, + this->whole_object, vector_iterate_cb, + &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(data_end - 1, 1, true, 0), extents[0]); + extents.clear(); + + ASSERT_EQ(0, image.diff_iterate2(NULL, data_end - 1, 2, true, + this->whole_object, vector_iterate_cb, + &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(data_end - 1, 1, true, 0), extents[0]); + extents.clear(); + + ASSERT_EQ(0, image.diff_iterate2(NULL, data_end, 0, true, + this->whole_object, vector_iterate_cb, + &extents)); + ASSERT_EQ(0u, extents.size()); + + ASSERT_EQ(0, image.diff_iterate2(NULL, data_end, 1, true, + this->whole_object, vector_iterate_cb, + &extents)); + ASSERT_EQ(0u, extents.size()); ASSERT_PASSED(this->validate_object_map, image); } @@ -8580,6 +8855,20 @@ TYPED_TEST(DiffIterateTest, DiffIterateUnaligned) ASSERT_EQ(diff_extent(8376263, 12345, true, 0), extents[0]); ASSERT_EQ(diff_extent(8388608, 4194304, true, 0), extents[1]); ASSERT_EQ(diff_extent(12582912, 54321, true, 0), extents[2]); + extents.clear(); + + // length is clipped up to end + ASSERT_EQ(0, image.diff_iterate2(NULL, size - 1, size, true, + this->whole_object, vector_iterate_cb, + &extents)); + ASSERT_EQ(1u, extents.size()); + ASSERT_EQ(diff_extent(size - 1, 1, true, 0), extents[0]); + extents.clear(); + + // offset past end + ASSERT_EQ(-EINVAL, image.diff_iterate2(NULL, size, size, true, + this->whole_object, + vector_iterate_cb, &extents)); ASSERT_PASSED(this->validate_object_map, image); } diff --git a/src/test/mds/CMakeLists.txt b/src/test/mds/CMakeLists.txt index f80abe75083..18ebb648e68 100644 --- a/src/test/mds/CMakeLists.txt +++ b/src/test/mds/CMakeLists.txt @@ -18,11 +18,10 @@ target_link_libraries(unittest_mds_sessionfilter mds osdc ceph-common global ${B add_executable(unittest_mds_quiesce_db TestQuiesceDb.cc ../../../src/mds/QuiesceDbManager.cc - ../../../src/mds/BoostUrlImpl.cc $<TARGET_OBJECTS:unit-main> ) add_ceph_unittest(unittest_mds_quiesce_db) -target_link_libraries(unittest_mds_quiesce_db ceph-common global) +target_link_libraries(unittest_mds_quiesce_db ceph-common global Boost::url) # unittest_mds_quiesce_agent add_executable(unittest_mds_quiesce_agent diff --git a/src/test/mds/TestQuiesceDb.cc b/src/test/mds/TestQuiesceDb.cc index 19c8d9b6163..b6319e69c7e 100644 --- a/src/test/mds/TestQuiesceDb.cc +++ b/src/test/mds/TestQuiesceDb.cc @@ -91,6 +91,19 @@ class QuiesceDbTest: public testing::Test { submit_condition.notify_all(); return ++cluster_membership->epoch; } + std::atomic<std::optional<bool>> has_work_override; + bool db_thread_has_work() const override { + if (auto has_work = has_work_override.load()) { + return *has_work; + } + return QuiesceDbManager::db_thread_has_work(); + } + + void spurious_submit_wakeup() + { + std::lock_guard l(submit_mutex); + submit_condition.notify_all(); + } }; epoch_t epoch = 0; @@ -113,6 +126,16 @@ class QuiesceDbTest: public testing::Test { return promise.get_future(); } + using ListingHook = std::function<bool(QuiesceInterface::PeerId, QuiesceDbListing&)>; + std::list<std::pair<ListingHook, std::promise<void>>> listing_hooks; + + std::future<void> add_listing_hook(ListingHook&& predicate) + { + std::lock_guard l(comms_mutex); + auto&& [_, promise] = listing_hooks.emplace_back(predicate, std::promise<void> {}); + return promise.get_future(); + } + void SetUp() override { for (QuiesceInterface::PeerId r = mds_gid_t(1); r < mds_gid_t(11); r++) { managers[r].reset(new TestQuiesceDbManager()); @@ -153,8 +176,18 @@ class QuiesceDbTest: public testing::Test { std::unique_lock l(comms_mutex); if (epoch == this->epoch) { if (this->managers.contains(recipient)) { + std::queue<std::promise<void>> done_hooks; dout(10) << "listing from " << me << " (leader=" << leader << ") to " << recipient << " for version " << listing.db_version << " with " << listing.sets.size() << " sets" << dendl; + for (auto it = listing_hooks.begin(); it != listing_hooks.end();) { + if (it->first(recipient, listing)) { + done_hooks.emplace(std::move(it->second)); + it = listing_hooks.erase(it); + } else { + it++; + } + } + ceph::bufferlist bl; encode(listing, bl); listing.clear(); @@ -163,6 +196,11 @@ class QuiesceDbTest: public testing::Test { this->managers[recipient]->submit_peer_listing({me, std::move(listing)}); comms_cond.notify_all(); + l.unlock(); + while (!done_hooks.empty()) { + done_hooks.front().set_value(); + done_hooks.pop(); + } return 0; } } @@ -562,7 +600,7 @@ TEST_F(QuiesceDbTest, QuiesceRequestValidation) return !testing::Test::HasFailure(); }; - const auto ops = std::array { QuiesceDbRequest::RootsOp::INCLUDE_OR_QUERY, QuiesceDbRequest::RootsOp::EXCLUDE_OR_RELEASE, QuiesceDbRequest::RootsOp::RESET_OR_CANCEL, QuiesceDbRequest::RootsOp::__INVALID }; + const auto ops = std::array { QuiesceDbRequest::RootsOp::INCLUDE_OR_QUERY, QuiesceDbRequest::RootsOp::EXCLUDE_OR_CANCEL, QuiesceDbRequest::RootsOp::RESET_OR_RELEASE, QuiesceDbRequest::RootsOp::__INVALID }; const auto strings = nullopt_and_default<std::string>(); const auto versions = nullopt_and_default<QuiesceSetVersion>(); const auto intervals = nullopt_and_default<QuiesceTimeInterval>(); @@ -766,7 +804,7 @@ TEST_F(QuiesceDbTest, SetModification) // cancel with no set_id should cancel all active sets ASSERT_EQ(OK(), run_request([](auto& r) { - r.control.roots_op = QuiesceDbRequest::RootsOp::RESET_OR_CANCEL; + r.cancel(); })); ASSERT_TRUE(db(mds_gid_t(1)).sets.at("set1").members.at("file:/root4").excluded); @@ -829,7 +867,7 @@ TEST_F(QuiesceDbTest, Timeouts) { ASSERT_EQ(OK(), run_request([](auto& r) { r.set_id = "set2"; - r.release_roots(); + r.release(); })); ASSERT_EQ(QuiesceState::QS_RELEASING, last_request->response.sets.at("set2").rstate.state); @@ -939,7 +977,7 @@ TEST_F(QuiesceDbTest, InterruptedQuiesceAwait) ASSERT_EQ(OK(), run_request([](auto& r) { r.set_id = "set1"; r.expiration = sec(100); - r.timeout = sec(10); + r.timeout = sec(100); r.roots.emplace("root1"); })); @@ -1006,7 +1044,7 @@ TEST_F(QuiesceDbTest, InterruptedQuiesceAwait) ASSERT_EQ(OK(), run_request([](auto& r) { r.set_id = "set1"; - r.reset_roots({}); + r.cancel(); })); EXPECT_EQ(ERR(ECANCELED), await3.wait_result()); @@ -1069,7 +1107,7 @@ TEST_F(QuiesceDbTest, RepeatedQuiesceAwait) { for (int i = 0; i < 2; i++) { ASSERT_EQ(ERR(EINPROGRESS), run_request([=](auto& r) { r.set_id = "set1"; - r.release_roots(); + r.release(); r.await = (expiration*2)/5; })); } @@ -1077,7 +1115,7 @@ TEST_F(QuiesceDbTest, RepeatedQuiesceAwait) { // NB: the ETIMEDOUT is the await result, while the set itself should be EXPIRED EXPECT_EQ(ERR(ETIMEDOUT), run_request([=](auto& r) { r.set_id = "set1"; - r.release_roots(); + r.release(); r.await = expiration; })); @@ -1090,7 +1128,7 @@ TEST_F(QuiesceDbTest, RepeatedQuiesceAwait) { EXPECT_EQ(ERR(EPERM), run_request([](auto& r) { r.set_id = "set1"; - r.release_roots(); + r.release(); })); } @@ -1115,7 +1153,7 @@ TEST_F(QuiesceDbTest, ReleaseAwait) for (auto&& set_id : { "set1", "set2" }) { ASSERT_EQ(ERR(EPERM), run_request([set_id](auto& r) { r.set_id = set_id; - r.release_roots(); + r.release(); r.await = sec(1); })) << "bad release-await " << set_id; } @@ -1134,13 +1172,13 @@ TEST_F(QuiesceDbTest, ReleaseAwait) auto & release_await1 = start_request([](auto &r) { r.set_id = "set1"; - r.release_roots(); + r.release(); r.await = sec(100); }); auto& release_await2 = start_request([](auto& r) { r.set_id = "set2"; - r.release_roots(); + r.release(); r.await = sec(100); }); @@ -1155,7 +1193,7 @@ TEST_F(QuiesceDbTest, ReleaseAwait) // we can request release again without any version bump EXPECT_EQ(OK(), run_request([](auto& r) { r.set_id = "set1"; - r.release_roots(); + r.release(); })); EXPECT_EQ(releasing_v1, last_request->response.sets.at("set1").version ); @@ -1163,7 +1201,7 @@ TEST_F(QuiesceDbTest, ReleaseAwait) // we can release-await with a short await timeout EXPECT_EQ(ERR(EINPROGRESS), run_request([](auto& r) { r.set_id = "set1"; - r.release_roots(); + r.release(); r.await = sec(0.1); })); @@ -1197,7 +1235,7 @@ TEST_F(QuiesceDbTest, ReleaseAwait) // await again auto& release_await22 = start_request([](auto& r) { r.set_id = "set2"; - r.release_roots(); + r.release(); r.await = sec(100); }); @@ -1235,18 +1273,18 @@ TEST_F(QuiesceDbTest, ReleaseAwait) // it should be OK to request release or release-await on a RELEASED set EXPECT_EQ(OK(), run_request([](auto& r) { r.set_id = "set1"; - r.release_roots(); + r.release(); })); EXPECT_EQ(OK(), run_request([](auto& r) { r.set_id = "set1"; - r.release_roots(); + r.release(); r.await = sec(0.1); })); // it's invalid to send a release without a set id EXPECT_EQ(ERR(EINVAL), run_request([](auto& r) { - r.release_roots(); + r.release(); })); } @@ -1347,6 +1385,34 @@ TEST_F(QuiesceDbTest, LeaderShutdown) } /* ================================================================ */ +TEST_F(QuiesceDbTest, MultiRankBootstrap) +{ + // create a cluster with a peer that doesn't process messages + managers.at(mds_gid_t(2))->has_work_override = false; + ASSERT_NO_FATAL_FAILURE(configure_cluster({ mds_gid_t(1), mds_gid_t(2) })); + + const QuiesceTimeInterval PEER_DISCOVERY_INTERVAL = std::chrono::milliseconds(1100); + + // we should be now in the bootstrap loop, + // which should send discoveries to silent peers + // once in PEER_DISCOVERY_INTERVAL + for (int i = 0; i < 5; i++) { + + if (i > 2) { + // through a wrench by disrupting the wait sleep in the bootstrap flow + managers.at(mds_gid_t(1))->spurious_submit_wakeup(); + } + + // wait for the next peer discovery request + auto saw_discovery = add_listing_hook([](auto recipient, auto const& listing) { + return recipient == mds_gid_t(2) && listing.db_version.set_version == 0; + }); + + EXPECT_EQ(std::future_status::ready, saw_discovery.wait_for(PEER_DISCOVERY_INTERVAL + std::chrono::milliseconds(100))); + } +} + +/* ================================================================ */ TEST_F(QuiesceDbTest, MultiRankQuiesce) { ASSERT_NO_FATAL_FAILURE(configure_cluster({ mds_gid_t(1), mds_gid_t(2), mds_gid_t(3) })); @@ -1453,7 +1519,7 @@ TEST_F(QuiesceDbTest, MultiRankRelease) // release roots ASSERT_EQ(OK(), run_request([](auto& r) { r.set_id = "set1"; - r.release_roots(); + r.release(); })); EXPECT_EQ(QS_RELEASING, last_request->response.sets.at("set1").rstate.state); @@ -1463,7 +1529,7 @@ TEST_F(QuiesceDbTest, MultiRankRelease) auto &async_release = start_request([](auto& r) { r.set_id = "set2"; r.await = sec(100); - r.release_roots(); + r.release(); }); EXPECT_EQ(NA(), async_release.check_result()); @@ -1471,7 +1537,7 @@ TEST_F(QuiesceDbTest, MultiRankRelease) // shouldn't hurt to run release twice for set 1 ASSERT_EQ(OK(), run_request([](auto& r) { r.set_id = "set1"; - r.release_roots(); + r.release(); })); EXPECT_EQ(releasing_v, last_request->response.sets.at("set1").version); @@ -1522,7 +1588,7 @@ TEST_F(QuiesceDbTest, MultiRankRelease) ASSERT_EQ(OK(), run_request([set_id](auto& r) { r.set_id = set_id; r.await = sec(100); - r.release_roots(); + r.release(); })); ASSERT_EQ(ERR(EPERM), run_request([set_id](auto& r) { r.set_id = set_id; @@ -1659,4 +1725,80 @@ TEST_F(QuiesceDbTest, AckDuringEpochMismatch) r.set_id = "set1"; r.await = sec(10); })); -}
\ No newline at end of file +} + +/* ==================================== */ +TEST_F(QuiesceDbTest, QuiesceRootMerge) +{ + ASSERT_NO_FATAL_FAILURE(configure_cluster({ mds_gid_t(1) })); + managers.at(mds_gid_t(1))->reset_agent_callback(QUIESCING_AGENT_CB); + + ASSERT_EQ(OK(), run_request([](auto& r) { + r.set_id = "set1"; + r.timeout = sec(60); + r.expiration = sec(60); + r.await = sec(60); + r.include_roots({ "root1", "root2" }); + })); + + EXPECT_EQ(QS_QUIESCED, last_request->response.sets.at("set1").rstate.state); + auto set1_exp = last_request->response.sets.at("set1").expiration; + + // reset the agent callback to SILENT so that + // our sets stay RELEASING and QUIESCING forever + managers.at(mds_gid_t(1))->reset_agent_callback(SILENT_AGENT_CB); + + ASSERT_EQ(OK(), run_request([](auto& r) { + r.set_id = "set1"; + r.release(); + })); + + EXPECT_EQ(QS_RELEASING, last_request->response.sets.at("set1").rstate.state); + + ASSERT_EQ(OK(), run_request([=](auto& r) { + r.set_id = "set2"; + r.timeout = set1_exp*2; + r.expiration = set1_exp*2; + r.include_roots({ "root2", "root3" }); + })); + + EXPECT_EQ(QS_QUIESCING, last_request->response.sets.at("set2").rstate.state); + + // at this point, we should expect to have root1 RELEASING, root3 QUIESCING + // and root2, which is shared, should take the min state (QUIESCING) and the max ttl + + auto agent_map = [this]() -> std::optional<QuiesceMap> { + std::promise<QuiesceMap> agent_map_promise; + auto agent_map_future = agent_map_promise.get_future(); + + managers.at(mds_gid_t(1))->reset_agent_callback([&agent_map_promise](QuiesceMap& map) -> bool { + try { + agent_map_promise.set_value(map); + } catch (std::future_error) { + // ignore this if we accidentally get called more than once + } + return false; + }); + + if (std::future_status::ready == agent_map_future.wait_for(std::chrono::seconds(10))) { + return agent_map_future.get(); + } + else { + return std::nullopt; + } + }(); + + ASSERT_TRUE(agent_map.has_value()); + EXPECT_EQ(3, agent_map->roots.size()); + + { + auto const & r1 = agent_map->roots.at("file:/root1"); + auto const & r2 = agent_map->roots.at("file:/root2"); + auto const & r3 = agent_map->roots.at("file:/root3"); + + EXPECT_EQ(QS_RELEASING, r1.state); + EXPECT_EQ(QS_QUIESCING, r2.state); + EXPECT_EQ(QS_QUIESCING, r3.state); + EXPECT_EQ(std::max(r1.ttl, r3.ttl), r2.ttl); + } +} diff --git a/src/test/neorados/cls.cc b/src/test/neorados/cls.cc index aadc8172098..6463ec2f96c 100644 --- a/src/test/neorados/cls.cc +++ b/src/test/neorados/cls.cc @@ -50,49 +50,3 @@ CORO_TEST_F(NeoRadosCls, DNE, NeoRadosTest) sys::errc::operation_not_supported); co_return; } - -CORO_TEST_F(NeoRadosCls, RemoteReads, NeoRadosTest) -{ - SKIP_IF_CRIMSON(); - static constexpr std::size_t object_size = 4096; - static constexpr std::array oids{"src_object.1"sv, "src_object.2"sv, - "src_object.3"sv}; - - std::array<char, object_size> buf; - buf.fill(1); - - for (const auto& oid : oids) { - buffer::list in; - in.append(buf.data(), buf.size()); - co_await execute(oid, WriteOp{}.write_full(std::move(in))); - } - - // Construct JSON request passed to "test_gather" method, and in - // turn, to "test_read" method - buffer::list in; - { - auto formatter = std::make_unique<JSONFormatter>(true); - formatter->open_object_section("foo"); - encode_json("src_objects", oids, formatter.get()); - encode_json("cls", "test_remote_reads", formatter.get()); - encode_json("method", "test_read", formatter.get()); - encode_json("pool", pool_name(), formatter.get()); - formatter->close_section(); - formatter->flush(in); - } - - static const auto target = "tgt_object"s; - - // Create target object by combining data gathered from source - // objects using "test_read" method - co_await execute(target, - WriteOp{}.exec("test_remote_reads", "test_gather", in)); - - - // Read target object and check its size. - buffer::list out; - co_await execute(target, ReadOp{}.read(0, 0, &out)); - EXPECT_EQ(3 * object_size, out.length()); - - co_return; -} diff --git a/src/test/objectstore/CMakeLists.txt b/src/test/objectstore/CMakeLists.txt index a24e627d8ea..bddff3f6727 100644 --- a/src/test/objectstore/CMakeLists.txt +++ b/src/test/objectstore/CMakeLists.txt @@ -6,7 +6,8 @@ install(TARGETS ceph_perf_objectstore add_library(store_test_fixture OBJECT store_test_fixture.cc) target_include_directories(store_test_fixture PRIVATE - $<TARGET_PROPERTY:GTest::GTest,INTERFACE_INCLUDE_DIRECTORIES>) + $<TARGET_PROPERTY:GTest::GTest,INTERFACE_INCLUDE_DIRECTORIES> + legacy-option-headers) add_executable(ceph_test_objectstore store_test.cc @@ -23,6 +24,8 @@ target_link_libraries(ceph_test_objectstore install(TARGETS ceph_test_objectstore DESTINATION ${CMAKE_INSTALL_BINDIR}) +add_subdirectory(allocsim) + add_executable(ceph_test_keyvaluedb test_kv.cc) target_link_libraries(ceph_test_keyvaluedb @@ -140,11 +143,9 @@ if(WITH_BLUESTORE) endif() # fragmentation simulator -add_library(ObjectStoreImitator OBJECT ObjectStoreImitator.cc) - add_executable(ceph_test_fragmentation_sim Fragmentation_simulator.cc - $<TARGET_OBJECTS:ObjectStoreImitator>) + ObjectStoreImitator.cc) add_ceph_unittest(ceph_test_fragmentation_sim) target_link_libraries(ceph_test_fragmentation_sim os global) diff --git a/src/test/objectstore/allocsim/CMakeLists.txt b/src/test/objectstore/allocsim/CMakeLists.txt new file mode 100644 index 00000000000..cbfbc698863 --- /dev/null +++ b/src/test/objectstore/allocsim/CMakeLists.txt @@ -0,0 +1,10 @@ + + +add_executable(replayer ops_replayer.cc) + +target_link_libraries(replayer + PRIVATE + fmt + librados + Boost::program_options +) diff --git a/src/test/objectstore/allocsim/ops_replayer.cc b/src/test/objectstore/allocsim/ops_replayer.cc new file mode 100644 index 00000000000..3cd92113b3a --- /dev/null +++ b/src/test/objectstore/allocsim/ops_replayer.cc @@ -0,0 +1,399 @@ +#include <algorithm> +#include <cassert> +#include <cstdlib> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <thread> +#include <condition_variable> +#include <cstdint> +#include <ctime> +#include <filesystem> +#include <mutex> +#include "include/rados/buffer_fwd.h" +#include "include/rados/librados.hpp" +#include <atomic> +#include <fmt/format.h> +#include <map> +#include <memory> +#include <random> +#include <string> +#include <iostream> +#include <vector> + +#include <boost/program_options/variables_map.hpp> +#include <boost/program_options/parsers.hpp> + +namespace po = boost::program_options; + + +using namespace std; +using namespace ceph; + + +static map<string, shared_ptr<string>> string_cache; +static std::atomic<uint64_t> in_flight_ops(0); +static std::condition_variable cv; +static std::mutex in_flight_mutex; + +enum op_type { + Write, + WriteFull, + Read, + Truncate, + Zero +}; + +struct Op { + time_t at; + op_type type; + uint64_t offset; + uint64_t length; + shared_ptr<string> object; + shared_ptr<string> collection; + shared_ptr<string> who; + librados::AioCompletion *completion; + bufferlist read_bl; + + Op( + time_t at, + op_type type, + uint64_t offset, + uint64_t length, + shared_ptr<string> object, + shared_ptr<string> collection, + shared_ptr<string> who + ) : at(at), type(type), offset(offset), length(length), object(object), collection(collection), who(who), completion(nullptr) {} + +}; + +struct ParserContext { + map<string, shared_ptr<string>> string_cache; + vector<Op> ops; + char *start; // starts and ends in new line or eof + char *end; + uint64_t max_buffer_size; +}; + +class MemoryStreamBuf : public std::streambuf { +public: + MemoryStreamBuf(const char* start, const char* end) { + this->setg(const_cast<char*>(start), const_cast<char*>(start), const_cast<char*>(end)); + } +}; + +class MemoryInputStream : public std::istream { + MemoryStreamBuf _buffer; +public: + MemoryInputStream(const char* start, const char* end) + : std::istream(&_buffer), _buffer(start, end) { + rdbuf(&_buffer); + } +}; + +void gen_buffer(bufferlist& bl, uint64_t size) { + std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size); + std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned char> e; + std::generate(buffer.get(), buffer.get()+size, std::ref(e)); + bl.append(buffer.get(), size); +} + +void completion_cb(librados::completion_t cb, void *arg) { + Op *op = static_cast<Op*>(arg); + // Process the completed operation here + // std::cout << fmt::format("Completed op {} object={} range={}~{}", op->type, *op->object, op->offset, op->length) << std::endl; + + delete op->completion; + op->completion = nullptr; + if (op->type == Read) { + op->read_bl.clear(); + } + + { + std::lock_guard<std::mutex> lock(in_flight_mutex); + in_flight_ops--; + } + cv.notify_one(); +} + +void parse_entry_point(shared_ptr<ParserContext> context) { + cout << fmt::format("Starting parser thread start={:p} end={:p}", context->start, context->end) << endl; + + string date, time, who, type, range, object, collection; + MemoryInputStream fstream(context->start, context->end); + const char* date_format_first_column = "%Y-%m-%d"; + // we expect this input: + // 2024-05-10 12:06:24.990831+00:00 client.607247697.0:5632274 write 4096~4096 2:d03a455a:::08b0f2fd5f20f504e76c2dd3d24683a1:head 2.1c0b + while (fstream >> date){ + // cout << date << endl; + tm t; + char* res = strptime(date.c_str(), date_format_first_column, &t); + if (res == nullptr) { + fstream.ignore(std::numeric_limits<std::streamsize>::max(), '\n'); + continue; + } + fstream >> time >> who >> type >> range >> object >> collection; + + date += " " + time; + // cout << date << endl; + // FIXME: this is wrong but it returns a reasonable bad timestamp :P + const char* date_format_full = "%Y-%m-%d %H:%M:%S.%f%z"; + res = strptime(date.c_str(), date_format_full, &t); + time_t at = mktime(&t); + + // cout << fmt::format("{} {} {} {} {} {} {}", date, at, who, type, range, object, collection) << endl; + + shared_ptr<string> who_ptr = make_shared<string>(who); + auto who_it = string_cache.find(who); + if (who_it == string_cache.end()) { + string_cache.insert({ who, who_ptr }); + } else { + who_ptr = who_it->second; + } + + shared_ptr<string> object_ptr = make_shared<string>(object); + auto object_it = string_cache.find(object); + if (object_it == string_cache.end()) { + string_cache.insert({ object, object_ptr }); + } else { + object_ptr = object_it->second; + } + + op_type ot; + if (type == "write") { + ot = Write; + } else if (type == "writefull") { + ot = WriteFull; + } else if (type == "read") { + ot = Read; + } else if (type == "sparse-read") { + ot = Read; + } else if (type == "truncate") { + ot = Truncate; + } else if (type == "zero") { + ot = Zero; + } else { + cout << "invalid type " << type << endl; + exit(1); + } + + shared_ptr<string> collection_ptr = make_shared<string>(collection); + auto collection_it = string_cache.find(collection); + if (collection_it == string_cache.end()) { + string_cache.insert({ collection, collection_ptr }); + } else { + collection_ptr = collection_it->second; + } + + uint64_t offset = 0, length = 0; + stringstream range_stream(range); + string offset_str, length_str; + getline(range_stream, offset_str, '~'); + offset = stoll(offset_str); + + if (ot != Truncate) { + // Truncate only has one number + getline(range_stream, length_str, '~'); + length = stoll(length_str); + } + + context->max_buffer_size = max(length, context->max_buffer_size); + + context->ops.push_back(Op(at, ot, offset, length, object_ptr, collection_ptr, who_ptr)); + } +} + +void worker_thread_entry(uint64_t id, uint64_t nworker_threads, vector<Op> &ops, uint64_t max_buffer_size, uint64_t io_depth, librados::IoCtx* io) { + // Create a buffer big enough for every operation. We will take enoguh bytes from it for every operation + bufferlist bl; + gen_buffer(bl, max_buffer_size); + hash<string> hasher; + + cout << fmt::format("Starting thread {} with io_depth={} max_buffer_size={}", id, io_depth, max_buffer_size) << endl; + for (auto &op : ops) { + { + std::unique_lock<std::mutex> lock(in_flight_mutex); + cv.wait(lock, [&io_depth] { return in_flight_ops < io_depth; }); + } + size_t key = hasher(*op.who) % nworker_threads; + if (key != id) { + continue; + } + // cout << fmt::format("Running op {} object={} range={}~{}", op.type, *op.object, op.offset, op.length) << endl; + op.completion = librados::Rados::aio_create_completion(static_cast<void*>(&op), completion_cb); + switch (op.type) { + case Write: { + bufferlist trimmed; + trimmed.substr_of(bl, 0, op.length); + int ret = io->aio_write(*op.object, op.completion, trimmed, op.length, op.offset); + if (ret != 0) { + cout << fmt::format("Error writing ecode={}", ret) << endl;; + } + break; + } + case WriteFull: { + bufferlist trimmed; + trimmed.substr_of(bl, 0, op.length); + int ret = io->aio_write_full(*op.object, op.completion, trimmed); + if (ret != 0) { + cout << fmt::format("Error writing full ecode={}", ret) << endl;; + } + break; + } + case Read: { + bufferlist read; + int ret = io->aio_read(*op.object, op.completion, &op.read_bl, op.length, op.offset); + if (ret != 0) { + cout << fmt::format("Error reading ecode={}", ret) << endl;; + } + break; + } + case Truncate: { + librados::ObjectWriteOperation write_operation; + write_operation.truncate(op.offset); + int ret = io->aio_operate(*op.object, op.completion, &write_operation); + if (ret != 0) { + cout << fmt::format("Error truncating ecode={}", ret) << endl;; + } + break; + } + case Zero: { + librados::ObjectWriteOperation write_operation; + write_operation.zero(op.offset, op.length); + int ret = io->aio_operate(*op.object, op.completion, &write_operation); + if (ret != 0) { + cout << fmt::format("Error zeroing ecode={}", ret) << endl;; + } + break; + } + } + in_flight_ops++; + } +} + +void usage(po::options_description &desc) { + cout << desc << std::endl; +} + +int main(int argc, char** argv) { + vector<Op> ops; + librados::Rados cluster; + librados::IoCtx io; + uint64_t max_buffer_size = 0; // We can use a single buffer for writes and trim it at will. The buffer will be the size of the maximum length op. + + // options + uint64_t io_depth = 8; + uint64_t nparser_threads = 16; + uint64_t nworker_threads = 16; + string file("input.txt"); + string ceph_conf_path("./ceph.conf"); + string pool("test_pool"); + + po::options_description po_options("Options"); + po_options.add_options() + ("help,h", "produce help message") + (",i", po::value<string>(&file)->default_value("input.txt"), "Input file (output of op_scraper.py)") + ("ceph-conf", po::value<string>(&ceph_conf_path)->default_value("ceph.conf"), "Path to ceph conf") + ("io-depth", po::value<uint64_t>(&io_depth)->default_value(64), "I/O depth") + ("parser-threads", po::value<uint64_t>(&nparser_threads)->default_value(16), "Number of parser threads") + ("worker-threads", po::value<uint64_t>(&nworker_threads)->default_value(16), "Number of I/O worker threads") + ("pool", po::value<string>(&pool)->default_value("test_pool"), "Pool to use for I/O") + ; + + po::options_description po_all("All options"); + po_all.add(po_options); + + po::variables_map vm; + po::parsed_options parsed = po::command_line_parser(argc, argv).options(po_all).allow_unregistered().run(); + po::store( parsed, vm); + po::notify(vm); + if (vm.count("help")) { + usage(po_all); + exit(EXIT_SUCCESS); + } + + // Parse input file + vector<std::thread> parser_threads; + vector<shared_ptr<ParserContext>> parser_contexts; + int fd = open(file.c_str(), O_RDONLY); + if (fd == -1) { + cout << "Error opening file" << endl; + } + struct stat file_stat; + fstat(fd, &file_stat); + char* mapped_buffer = (char*)mmap(NULL, file_stat.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (mapped_buffer == nullptr) { + cout << "error mapping buffer" << endl; + } + uint64_t start_offset = 0; + uint64_t step_size = file_stat.st_size / nparser_threads; + for (int i = 0; i < nparser_threads; i++) { + char* end = mapped_buffer + start_offset + step_size; + while(*end != '\n') { + end--; + } + if (i == nparser_threads - 1) { + end = mapped_buffer + file_stat.st_size; + } + shared_ptr<ParserContext> context = make_shared<ParserContext>(); + context->start = mapped_buffer + start_offset; + context->end = end; + context->max_buffer_size = 0; + parser_contexts.push_back(context); + parser_threads.push_back(std::thread(parse_entry_point, context)); + start_offset += (end - mapped_buffer - start_offset); + } + for (auto& t : parser_threads) { + t.join(); + } + // reduce + for (auto context : parser_contexts) { + string_cache.insert(context->string_cache.begin(), context->string_cache.end()); + ops.insert(ops.end(), context->ops.begin(), context->ops.end()); + max_buffer_size = max(context->max_buffer_size, max_buffer_size); + context->string_cache.clear(); + context->ops.clear(); + } + + int ret = cluster.init2("client.admin", "ceph", 0); + if (ret < 0) { + std::cerr << "Couldn't init ceph! error " << ret << std::endl; + return EXIT_FAILURE; + } + std::cout << "cluster init ready" << std::endl; + + ret = cluster.conf_read_file(ceph_conf_path.c_str()); + if (ret < 0) { + std::cerr << "Couldn't read the Ceph configuration file! error " << ret << std::endl; + return EXIT_FAILURE; + } + std::cout << "cluster config ready" << std::endl; + ret = cluster.connect(); + if (ret < 0) { + std::cerr << "Couldn't connect to cluster! error " << ret << std::endl; + return EXIT_FAILURE; + } + std::cout << "cluster connect ready" << std::endl; + cluster.ioctx_create(pool.c_str(), io); + if (ret < 0) { + std::cerr << "Couldn't set up ioctx! error " << ret << std::endl; + exit(EXIT_FAILURE); + } + std::cout << fmt::format("pool {} ready", pool) << std::endl; + + + // process ops + vector<thread> worker_threads; + for (int i = 0; i < nworker_threads; i++) { + worker_threads.push_back(thread(worker_thread_entry, i, nworker_threads, std::ref(ops), max_buffer_size, io_depth, &io)); + } + for (auto& worker : worker_threads) { + worker.join(); + } + while (in_flight_ops > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + + cout << ops.size() << endl; + return 0; +} diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index 4974fae125b..c14a5b02889 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -55,6 +55,20 @@ typedef boost::mt11213b gen_type; const uint64_t DEF_STORE_TEST_BLOCKDEV_SIZE = 10240000000; #define dout_context g_ceph_context +static uint64_t get_testing_seed(const char* function) { + char* random_seed = getenv("TEST_RANDOM_SEED"); + uint64_t testing_seed; + if (random_seed) { + testing_seed = atoi(random_seed); + } else { + testing_seed = time(NULL); + } + cout << "seed for " << function << " is " << testing_seed << std::endl; + return testing_seed; +} + +#define TEST_RANDOM_SEED get_testing_seed(__func__) + static bool bl_eq(bufferlist& expected, bufferlist& actual) { if (expected.contents_equal(actual)) @@ -785,7 +799,7 @@ TEST_P(StoreTest, SimpleColPreHashTest) { uint32_t pg_num = 128; boost::uniform_int<> pg_id_range(0, pg_num); - gen_type rng(time(NULL)); + gen_type rng(TEST_RANDOM_SEED); int pg_id = pg_id_range(rng); int objs_per_folder = abs(merge_threshold) * 16 * g_ceph_context->_conf->filestore_split_multiple; @@ -5175,7 +5189,7 @@ void StoreTest::doSyntheticTest( uint64_t max_obj, uint64_t max_wr, uint64_t align) { MixedGenerator gen(555); - gen_type rng(time(NULL)); + gen_type rng(TEST_RANDOM_SEED); coll_t cid(spg_t(pg_t(0,555), shard_id_t::NO_SHARD)); SetVal(g_conf(), "bluestore_fsck_on_mount", "false"); @@ -5232,7 +5246,7 @@ void StoreTest::doSyntheticLimitedTest( uint64_t max_obj, uint64_t max_wr, uint64_t align) { MixedGenerator gen(555); - gen_type rng(time(NULL)); + gen_type rng(TEST_RANDOM_SEED); coll_t cid(spg_t(pg_t(0,555), shard_id_t::NO_SHARD)); SetVal(g_conf(), "bluestore_fsck_on_mount", "false"); @@ -5506,7 +5520,7 @@ TEST_P(StoreTestSpecificAUSize, SyntheticMatrixPreferDeferred) { TEST_P(StoreTest, AttrSynthetic) { MixedGenerator gen(447); - gen_type rng(time(NULL)); + gen_type rng(TEST_RANDOM_SEED); coll_t cid(spg_t(pg_t(0,447),shard_id_t::NO_SHARD)); SyntheticWorkloadState test_obj(store.get(), &gen, &rng, cid, 40*1024, 4*1024, 0); @@ -7248,6 +7262,7 @@ TEST_P(DeferredReplayTest, DeferredReplay) { // SetVal(g_conf(), "bluestore_debug_omit_kv_commit", "true"); g_conf().apply_changes(nullptr); + ch.reset(nullptr); store->umount(); SetVal(g_conf(), "bluestore_debug_omit_kv_commit", "false"); g_conf().apply_changes(nullptr); @@ -7332,6 +7347,7 @@ TEST_P(DeferredReplayTest, DeferredReplayInReadOnly) { // SetVal(g_conf(), "bluestore_debug_omit_kv_commit", "true"); g_conf().apply_changes(nullptr); + ch.reset(nullptr); store->umount(); SetVal(g_conf(), "bluestore_debug_omit_kv_commit", "false"); g_conf().apply_changes(nullptr); @@ -7378,7 +7394,7 @@ void doMany4KWritesTest(ObjectStore* store, unsigned write_alignment) { MixedGenerator gen(555); - gen_type rng(time(NULL)); + gen_type rng(TEST_RANDOM_SEED); coll_t cid(spg_t(pg_t(0,555), shard_id_t::NO_SHARD)); store_statfs_t res_stat; @@ -8831,6 +8847,152 @@ TEST_P(StoreTestSpecificAUSize, DeferredDifferentChunks) { } } +TEST_P(StoreTestSpecificAUSize, DeferredAndClone) { + + if (string(GetParam()) != "bluestore") + return; + + size_t alloc_size = 4096; + size_t prefer_deferred_size = 65536; + + SetVal(g_conf(), "bluestore_block_db_create", "true"); + SetVal(g_conf(), "bluestore_block_db_size", stringify(1 << 30).c_str()); + + StartDeferred(alloc_size); + SetVal(g_conf(), "bluestore_prefer_deferred_size", + stringify(prefer_deferred_size).c_str()); + g_conf().apply_changes(nullptr); + + int r; + coll_t cid; + + ghobject_t hoid(hobject_t("test", "", CEPH_NOSNAP, 0, -1, "")); + hoid.hobj.pool = -1; + ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP))); + hoid2.hobj.pool = -1; + C_SaferCond c1; + + ObjectStore::CollectionHandle ch = store->create_new_collection(cid); + { + ObjectStore::Transaction t; + t.create_collection(cid, 0); + t.touch(cid, hoid); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + { + ObjectStore::Transaction t; + bufferlist bl; + bl.append(std::string(3, 'z')); + t.write(cid, hoid, 0, bl.length(), bl, + CEPH_OSD_OP_FLAG_FADVISE_NOCACHE); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + { + cerr << "Clone range object" << std::endl; + ObjectStore::Transaction t; + t.clone_range(cid, hoid, hoid2, 0, 3, 0); + t.register_on_commit(&c1); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + c1.wait(); + { + bufferlist bl, expected; + r = store->read(ch, hoid2, 0, 3, bl); + ASSERT_EQ(r, 3); + expected.append(string(3, 'z')); + ASSERT_TRUE(bl_eq(bl, expected)); + } + { + ObjectStore::Transaction t; + t.remove(cid, hoid); + t.remove(cid, hoid2); + t.remove_collection(cid); + cerr << "Cleaning" << std::endl; + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } +} + +TEST_P(StoreTestSpecificAUSize, DeferredAndClone2) { + + if (string(GetParam()) != "bluestore") + return; + + size_t alloc_size = 4096; + size_t prefer_deferred_size = 32768; + + SetVal(g_conf(), "bluestore_block_db_create", "true"); + SetVal(g_conf(), "bluestore_block_db_size", stringify(1 << 30).c_str()); + + StartDeferred(alloc_size); + SetVal(g_conf(), "bluestore_prefer_deferred_size", + stringify(prefer_deferred_size).c_str()); + g_conf().apply_changes(nullptr); + + int r; + coll_t cid; + + ghobject_t hoid(hobject_t("test", "", CEPH_NOSNAP, 0, -1, "")); + hoid.hobj.pool = -1; + ghobject_t hoid2(hobject_t(sobject_t("Object 2", CEPH_NOSNAP))); + hoid2.hobj.pool = -1; + C_SaferCond c1, c2; + + ObjectStore::CollectionHandle ch = store->create_new_collection(cid); + { + ObjectStore::Transaction t; + t.create_collection(cid, 0); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + { + ObjectStore::Transaction t; + t.touch(cid, hoid); + bufferlist bl; + bl.append(std::string(0x10000, 'h')); + t.write(cid, hoid, 0, bl.length(), bl, + CEPH_OSD_OP_FLAG_FADVISE_NOCACHE); + t.register_on_commit(&c1); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + c1.wait(); + + { + cerr << "Overwrite some and clone range object" << std::endl; + ObjectStore::Transaction t; + bufferlist bl; + bl.append(std::string(0x400, 'z')); + t.write(cid, hoid, 0, bl.length(), bl, + CEPH_OSD_OP_FLAG_FADVISE_NOCACHE); + t.clone_range(cid, hoid, hoid2, 0, 0x10000, 0); + t.register_on_commit(&c2); + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } + c2.wait(); + { + bufferlist bl, expected; + r = store->read(ch, hoid2, 0, 0x1000, bl); + ASSERT_EQ(r, 0x1000); + expected.append(string(0x400, 'z')); + expected.append(string(0xc00, 'h')); + ASSERT_TRUE(bl_eq(bl, expected)); + } + { + ObjectStore::Transaction t; + t.remove(cid, hoid); + t.remove(cid, hoid2); + t.remove_collection(cid); + cerr << "Cleaning" << std::endl; + r = queue_transaction(store, ch, std::move(t)); + ASSERT_EQ(r, 0); + } +} + TEST_P(StoreTestSpecificAUSize, BlobReuseOnOverwriteReverse) { if (string(GetParam()) != "bluestore") @@ -10657,7 +10819,7 @@ void doManySetAttr(ObjectStore* store, std::function<void(ObjectStore*)> do_check_fn) { MixedGenerator gen(447); - gen_type rng(time(NULL)); + gen_type rng(TEST_RANDOM_SEED); coll_t cid(spg_t(pg_t(0, 447), shard_id_t::NO_SHARD)); SyntheticWorkloadState test_obj(store, &gen, &rng, cid, 0, 0, 0); @@ -11205,6 +11367,7 @@ int main(int argc, char **argv) { g_ceph_context->_conf.set_val_or_die("bluefs_check_volume_selector_on_umount", "true"); g_ceph_context->_conf.set_val_or_die("bdev_debug_aio", "true"); + g_ceph_context->_conf.set_val_or_die("log_max_recent", "10000"); // specify device size g_ceph_context->_conf.set_val_or_die("bluestore_block_size", diff --git a/src/test/objectstore/test_bluestore_types.cc b/src/test/objectstore/test_bluestore_types.cc index db0ee380891..f02da9df5c9 100644 --- a/src/test/objectstore/test_bluestore_types.cc +++ b/src/test/objectstore/test_bluestore_types.cc @@ -1,18 +1,18 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab -#include "include/types.h" -#include "os/bluestore/bluestore_types.h" -#include "gtest/gtest.h" -#include "include/stringify.h" +#include "common/ceph_argparse.h" #include "common/ceph_time.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "include/stringify.h" +#include "include/types.h" +#include "os/bluestore/AvlAllocator.h" #include "os/bluestore/BlueStore.h" +#include "os/bluestore/bluestore_types.h" #include "os/bluestore/simple_bitmap.h" -#include "os/bluestore/AvlAllocator.h" -#include "common/ceph_argparse.h" -#include "global/global_init.h" -#include "global/global_context.h" #include "perfglue/heap_profiler.h" +#include "gtest/gtest.h" #include <sstream> @@ -49,14 +49,15 @@ TEST(bluestore, sizeof) { P(range_seg_t); P(sb_info_t); P(SimpleBitmap); - cout << "map<uint64_t,uint64_t>\t" << sizeof(map<uint64_t,uint64_t>) << std::endl; - cout << "map<char,char>\t" << sizeof(map<char,char>) << std::endl; + cout << "map<uint64_t,uint64_t>\t" << sizeof(map<uint64_t, uint64_t>) + << std::endl; + cout << "map<char,char>\t" << sizeof(map<char, char>) << std::endl; } -void dump_mempools() -{ +void dump_mempools() { ostringstream ostr; - auto f = Formatter::create_unique("json-pretty", "json-pretty", "json-pretty"); + auto f = + Formatter::create_unique("json-pretty", "json-pretty", "json-pretty"); ostr << "Mempools: "; f->open_object_section("mempools"); mempool::dump(f.get()); @@ -85,7 +86,7 @@ TEST(sb_info_space_efficient_map_t, basic) { sb_info_space_efficient_map_t sb_info; const size_t num_shared = 1000; for (size_t i = 0; i < num_shared; i += 2) { - auto& sbi = sb_info.add_maybe_stray(i); + auto &sbi = sb_info.add_maybe_stray(i); sbi.pool_id = i; } ASSERT_TRUE(sb_info.find(0) != sb_info.end()); @@ -120,23 +121,22 @@ TEST(sb_info_space_efficient_map_t, size) { sb_info_space_efficient_map_t sb_info; BlueStore store(g_ceph_context, "", 4096); - BlueStore::OnodeCacheShard* oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard* bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); for (size_t i = 0; i < num_shared; i++) { - auto& sbi = sb_info.add_or_adopt(i); + auto &sbi = sb_info.add_or_adopt(i); // primarily to silent the 'unused' warning ceph_assert(sbi.pool_id == sb_info_t::INVALID_POOL_ID); } dump_mempools(); } -TEST(bluestore_extent_ref_map_t, add) -{ +TEST(bluestore_extent_ref_map_t, add) { bluestore_extent_ref_map_t m; m.get(10, 10); ASSERT_EQ(1u, m.ref_map.size()); @@ -160,8 +160,7 @@ TEST(bluestore_extent_ref_map_t, add) ASSERT_EQ(1u, m.ref_map.size()); } -TEST(bluestore_extent_ref_map_t, get) -{ +TEST(bluestore_extent_ref_map_t, get) { bluestore_extent_ref_map_t m; m.get(00, 30); cout << m << std::endl; @@ -207,8 +206,7 @@ TEST(bluestore_extent_ref_map_t, get) ASSERT_EQ(1u, m.ref_map[28].refs); } -TEST(bluestore_extent_ref_map_t, put) -{ +TEST(bluestore_extent_ref_map_t, put) { bluestore_extent_ref_map_t m; PExtentVector r; bool maybe_unshared = false; @@ -273,8 +271,7 @@ TEST(bluestore_extent_ref_map_t, put) ASSERT_TRUE(maybe_unshared); } -TEST(bluestore_extent_ref_map_t, contains) -{ +TEST(bluestore_extent_ref_map_t, contains) { bluestore_extent_ref_map_t m; m.get(10, 30); ASSERT_TRUE(m.contains(10, 30)); @@ -302,8 +299,7 @@ TEST(bluestore_extent_ref_map_t, contains) ASSERT_FALSE(m.contains(4000, 30)); } -TEST(bluestore_extent_ref_map_t, intersects) -{ +TEST(bluestore_extent_ref_map_t, intersects) { bluestore_extent_ref_map_t m; m.get(10, 30); ASSERT_TRUE(m.intersects(10, 30)); @@ -329,8 +325,7 @@ TEST(bluestore_extent_ref_map_t, intersects) ASSERT_FALSE(m.intersects(55, 1)); } -TEST(bluestore_blob_t, calc_csum) -{ +TEST(bluestore_blob_t, calc_csum) { bufferlist bl; bl.append("asdfghjkqwertyuizxcvbnm,"); bufferlist bl2; @@ -345,10 +340,9 @@ TEST(bluestore_blob_t, calc_csum) n.append("12345678"); for (unsigned csum_type = Checksummer::CSUM_NONE + 1; - csum_type < Checksummer::CSUM_MAX; - ++csum_type) { + csum_type < Checksummer::CSUM_MAX; ++csum_type) { cout << "csum_type " << Checksummer::get_csum_type_string(csum_type) - << std::endl; + << std::endl; bluestore_blob_t b; int bad_off; @@ -397,46 +391,42 @@ TEST(bluestore_blob_t, calc_csum) } } -TEST(bluestore_blob_t, csum_bench) -{ +TEST(bluestore_blob_t, csum_bench) { bufferlist bl; bufferptr bp(10485760); for (char *a = bp.c_str(); a < bp.c_str() + bp.length(); ++a) *a = (unsigned long)a & 0xff; bl.append(bp); int count = 256; - for (unsigned csum_type = 1; - csum_type < Checksummer::CSUM_MAX; - ++csum_type) { + for (unsigned csum_type = 1; csum_type < Checksummer::CSUM_MAX; ++csum_type) { bluestore_blob_t b; b.init_csum(csum_type, 12, bl.length()); ceph::mono_clock::time_point start = ceph::mono_clock::now(); - for (int i = 0; i<count; ++i) { + for (int i = 0; i < count; ++i) { b.calc_csum(0, bl); } ceph::mono_clock::time_point end = ceph::mono_clock::now(); auto dur = std::chrono::duration_cast<ceph::timespan>(end - start); - double mbsec = (double)count * (double)bl.length() / 1000000.0 / (double)dur.count() * 1000000000.0; - cout << "csum_type " << Checksummer::get_csum_type_string(csum_type) - << ", " << dur << " seconds, " - << mbsec << " MB/sec" << std::endl; + double mbsec = (double)count * (double)bl.length() / 1000000.0 / + (double)dur.count() * 1000000000.0; + cout << "csum_type " << Checksummer::get_csum_type_string(csum_type) << ", " + << dur << " seconds, " << mbsec << " MB/sec" << std::endl; } } -TEST(Blob, put_ref) -{ +TEST(Blob, put_ref) { { BlueStore store(g_ceph_context, "", 4096); - BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard *bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); BlueStore::Blob b(coll.get()); b.dirty_blob().allocated_test(bluestore_pextent_t(0x40715000, 0x2000)); b.dirty_blob().allocated_test( - bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x8000)); + bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x8000)); b.dirty_blob().allocated_test(bluestore_pextent_t(0x4071f000, 0x5000)); b.get_ref(coll.get(), 0, 0x1200); b.get_ref(coll.get(), 0xae00, 0x4200); @@ -458,78 +448,78 @@ TEST(Blob, put_ref) unsigned mas = 4096; BlueStore store(g_ceph_context, "", 8192); - BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard *bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); { BlueStore::Blob B(coll.get()); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(0, mas * 2)); - B.get_ref(coll.get(), 0, mas*2); + B.get_ref(coll.get(), 0, mas * 2); ASSERT_EQ(mas * 2, B.get_referenced_bytes()); - ASSERT_TRUE(b.is_allocated(0, mas*2)); - ASSERT_TRUE(B.put_ref(coll.get(), 0, mas*2, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 2)); + ASSERT_TRUE(B.put_ref(coll.get(), 0, mas * 2, &r)); ASSERT_EQ(0u, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(1u, r.size()); ASSERT_EQ(0u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); - ASSERT_FALSE(b.is_allocated(0, mas*2)); + ASSERT_EQ(mas * 2, r[0].length); + ASSERT_FALSE(b.is_allocated(0, mas * 2)); ASSERT_FALSE(b.is_allocated(0, mas)); ASSERT_FALSE(b.is_allocated(mas, 0)); ASSERT_FALSE(b.get_extents()[0].is_valid()); - ASSERT_EQ(mas*2, b.get_extents()[0].length); + ASSERT_EQ(mas * 2, b.get_extents()[0].length); } { BlueStore::Blob B(coll.get()); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(123, mas * 2)); - B.get_ref(coll.get(), 0, mas*2); + B.get_ref(coll.get(), 0, mas * 2); ASSERT_EQ(mas * 2, B.get_referenced_bytes()); ASSERT_FALSE(B.put_ref(coll.get(), 0, mas, &r)); ASSERT_EQ(mas, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*2)); + ASSERT_TRUE(b.is_allocated(0, mas * 2)); ASSERT_TRUE(B.put_ref(coll.get(), mas, mas, &r)); ASSERT_EQ(0u, B.get_referenced_bytes()); ASSERT_EQ(0u, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(1u, r.size()); ASSERT_EQ(123u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); - ASSERT_FALSE(b.is_allocated(0, mas*2)); + ASSERT_EQ(mas * 2, r[0].length); + ASSERT_FALSE(b.is_allocated(0, mas * 2)); ASSERT_FALSE(b.get_extents()[0].is_valid()); - ASSERT_EQ(mas*2, b.get_extents()[0].length); + ASSERT_EQ(mas * 2, b.get_extents()[0].length); } { BlueStore::Blob B(coll.get()); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas)); b.allocated_test(bluestore_pextent_t(2, mas)); b.allocated_test(bluestore_pextent_t(3, mas)); b.allocated_test(bluestore_pextent_t(4, mas)); - B.get_ref(coll.get(), 0, mas*4); + B.get_ref(coll.get(), 0, mas * 4); ASSERT_EQ(mas * 4, B.get_referenced_bytes()); ASSERT_FALSE(B.put_ref(coll.get(), mas, mas, &r)); ASSERT_EQ(mas * 3, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*4)); + ASSERT_TRUE(b.is_allocated(0, mas * 4)); ASSERT_TRUE(b.is_allocated(mas, mas)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*2, mas, &r)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 2, mas, &r)); ASSERT_EQ(mas * 2, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(mas*2, mas)); - ASSERT_TRUE(b.is_allocated(0, mas*4)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*3, mas, &r)); + ASSERT_TRUE(b.is_allocated(mas * 2, mas)); + ASSERT_TRUE(b.is_allocated(0, mas * 4)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 3, mas, &r)); ASSERT_EQ(mas, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(2u, r.size()); @@ -537,8 +527,8 @@ TEST(Blob, put_ref) ASSERT_EQ(mas, r[0].length); ASSERT_EQ(4u, r[1].offset); ASSERT_EQ(mas, r[1].length); - ASSERT_TRUE(b.is_allocated(0, mas*2)); - ASSERT_FALSE(b.is_allocated(mas*2, mas*2)); + ASSERT_TRUE(b.is_allocated(0, mas * 2)); + ASSERT_FALSE(b.is_allocated(mas * 2, mas * 2)); ASSERT_TRUE(b.get_extents()[0].is_valid()); ASSERT_TRUE(b.get_extents()[1].is_valid()); ASSERT_FALSE(b.get_extents()[2].is_valid()); @@ -546,7 +536,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B(coll.get()); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas)); b.allocated_test(bluestore_pextent_t(2, mas)); @@ -554,19 +544,19 @@ TEST(Blob, put_ref) b.allocated_test(bluestore_pextent_t(4, mas)); b.allocated_test(bluestore_pextent_t(5, mas)); b.allocated_test(bluestore_pextent_t(6, mas)); - B.get_ref(coll.get(), 0, mas*6); + B.get_ref(coll.get(), 0, mas * 6); ASSERT_EQ(mas * 6, B.get_referenced_bytes()); ASSERT_FALSE(B.put_ref(coll.get(), mas, mas, &r)); ASSERT_EQ(mas * 5, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*6)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*2, mas, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 6)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 2, mas, &r)); ASSERT_EQ(mas * 4, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*6)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*3, mas, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 6)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 3, mas, &r)); ASSERT_EQ(mas * 3, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(2u, r.size()); @@ -574,9 +564,9 @@ TEST(Blob, put_ref) ASSERT_EQ(mas, r[0].length); ASSERT_EQ(4u, r[1].offset); ASSERT_EQ(mas, r[1].length); - ASSERT_TRUE(b.is_allocated(0, mas*2)); - ASSERT_FALSE(b.is_allocated(mas*2, mas*2)); - ASSERT_TRUE(b.is_allocated(mas*4, mas*2)); + ASSERT_TRUE(b.is_allocated(0, mas * 2)); + ASSERT_FALSE(b.is_allocated(mas * 2, mas * 2)); + ASSERT_TRUE(b.is_allocated(mas * 4, mas * 2)); ASSERT_EQ(5u, b.get_extents().size()); ASSERT_TRUE(b.get_extents()[0].is_valid()); ASSERT_TRUE(b.get_extents()[1].is_valid()); @@ -586,30 +576,30 @@ TEST(Blob, put_ref) } { BlueStore::Blob B(coll); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas * 6)); - B.get_ref(coll.get(), 0, mas*6); + B.get_ref(coll.get(), 0, mas * 6); ASSERT_EQ(mas * 6, B.get_referenced_bytes()); ASSERT_FALSE(B.put_ref(coll.get(), mas, mas, &r)); ASSERT_EQ(mas * 5, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*6)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*2, mas, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 6)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 2, mas, &r)); ASSERT_EQ(mas * 4, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*6)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*3, mas, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 6)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 3, mas, &r)); ASSERT_EQ(mas * 3, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(1u, r.size()); ASSERT_EQ(0x2001u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); - ASSERT_TRUE(b.is_allocated(0, mas*2)); - ASSERT_FALSE(b.is_allocated(mas*2, mas*2)); - ASSERT_TRUE(b.is_allocated(mas*4, mas*2)); + ASSERT_EQ(mas * 2, r[0].length); + ASSERT_TRUE(b.is_allocated(0, mas * 2)); + ASSERT_FALSE(b.is_allocated(mas * 2, mas * 2)); + ASSERT_TRUE(b.is_allocated(mas * 4, mas * 2)); ASSERT_EQ(3u, b.get_extents().size()); ASSERT_TRUE(b.get_extents()[0].is_valid()); ASSERT_FALSE(b.get_extents()[1].is_valid()); @@ -617,36 +607,36 @@ TEST(Blob, put_ref) } { BlueStore::Blob B(coll); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas * 4)); b.allocated_test(bluestore_pextent_t(2, mas * 4)); b.allocated_test(bluestore_pextent_t(3, mas * 4)); - B.get_ref(coll.get(), 0, mas*12); + B.get_ref(coll.get(), 0, mas * 12); ASSERT_EQ(mas * 12, B.get_referenced_bytes()); ASSERT_FALSE(B.put_ref(coll.get(), mas, mas, &r)); ASSERT_EQ(mas * 11, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*12)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*9, mas, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 12)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 9, mas, &r)); ASSERT_EQ(mas * 10, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*12)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*2, mas*7, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 12)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 2, mas * 7, &r)); ASSERT_EQ(mas * 3, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(3u, r.size()); ASSERT_EQ(0x2001u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(mas * 2, r[0].length); ASSERT_EQ(0x2u, r[1].offset); - ASSERT_EQ(mas*4, r[1].length); + ASSERT_EQ(mas * 4, r[1].length); ASSERT_EQ(0x3u, r[2].offset); - ASSERT_EQ(mas*2, r[2].length); - ASSERT_TRUE(b.is_allocated(0, mas*2)); - ASSERT_FALSE(b.is_allocated(mas*2, mas*8)); - ASSERT_TRUE(b.is_allocated(mas*10, mas*2)); + ASSERT_EQ(mas * 2, r[2].length); + ASSERT_TRUE(b.is_allocated(0, mas * 2)); + ASSERT_FALSE(b.is_allocated(mas * 2, mas * 8)); + ASSERT_TRUE(b.is_allocated(mas * 10, mas * 2)); ASSERT_EQ(3u, b.get_extents().size()); ASSERT_TRUE(b.get_extents()[0].is_valid()); ASSERT_FALSE(b.get_extents()[1].is_valid()); @@ -654,36 +644,36 @@ TEST(Blob, put_ref) } { BlueStore::Blob B(coll); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas * 4)); b.allocated_test(bluestore_pextent_t(2, mas * 4)); b.allocated_test(bluestore_pextent_t(3, mas * 4)); - B.get_ref(coll.get(), 0, mas*12); + B.get_ref(coll.get(), 0, mas * 12); ASSERT_EQ(mas * 12, B.get_referenced_bytes()); ASSERT_FALSE(B.put_ref(coll.get(), mas, mas, &r)); ASSERT_EQ(mas * 11, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*12)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*9, mas, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 12)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 9, mas, &r)); ASSERT_EQ(mas * 10, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*12)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*2, mas*7, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 12)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 2, mas * 7, &r)); ASSERT_EQ(mas * 3, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(3u, r.size()); ASSERT_EQ(0x2001u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(mas * 2, r[0].length); ASSERT_EQ(0x2u, r[1].offset); - ASSERT_EQ(mas*4, r[1].length); + ASSERT_EQ(mas * 4, r[1].length); ASSERT_EQ(0x3u, r[2].offset); - ASSERT_EQ(mas*2, r[2].length); - ASSERT_TRUE(b.is_allocated(0, mas*2)); - ASSERT_FALSE(b.is_allocated(mas*2, mas*8)); - ASSERT_TRUE(b.is_allocated(mas*10, mas*2)); + ASSERT_EQ(mas * 2, r[2].length); + ASSERT_TRUE(b.is_allocated(0, mas * 2)); + ASSERT_FALSE(b.is_allocated(mas * 2, mas * 8)); + ASSERT_TRUE(b.is_allocated(mas * 10, mas * 2)); ASSERT_EQ(3u, b.get_extents().size()); ASSERT_TRUE(b.get_extents()[0].is_valid()); ASSERT_FALSE(b.get_extents()[1].is_valid()); @@ -693,61 +683,61 @@ TEST(Blob, put_ref) cout << "r " << r << " " << b << std::endl; ASSERT_EQ(1u, r.size()); ASSERT_EQ(0x1u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(mas * 2, r[0].length); ASSERT_EQ(2u, b.get_extents().size()); ASSERT_FALSE(b.get_extents()[0].is_valid()); ASSERT_TRUE(b.get_extents()[1].is_valid()); - ASSERT_TRUE(B.put_ref(coll.get(), mas*10, mas*2, &r)); + ASSERT_TRUE(B.put_ref(coll.get(), mas * 10, mas * 2, &r)); ASSERT_EQ(mas * 0, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(1u, r.size()); ASSERT_EQ(0x2003u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(mas * 2, r[0].length); ASSERT_EQ(1u, b.get_extents().size()); ASSERT_FALSE(b.get_extents()[0].is_valid()); } { BlueStore::Blob B(coll); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas * 4)); b.allocated_test(bluestore_pextent_t(2, mas * 4)); b.allocated_test(bluestore_pextent_t(3, mas * 4)); - B.get_ref(coll.get(), 0, mas*12); + B.get_ref(coll.get(), 0, mas * 12); ASSERT_EQ(mas * 12, B.get_referenced_bytes()); ASSERT_FALSE(B.put_ref(coll.get(), mas, mas, &r)); ASSERT_EQ(mas * 11, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*12)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*9, mas, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 12)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 9, mas, &r)); ASSERT_EQ(mas * 10, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*12)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*2, mas*7, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 12)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 2, mas * 7, &r)); ASSERT_EQ(mas * 3, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(3u, r.size()); ASSERT_EQ(0x2001u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(mas * 2, r[0].length); ASSERT_EQ(0x2u, r[1].offset); - ASSERT_EQ(mas*4, r[1].length); + ASSERT_EQ(mas * 4, r[1].length); ASSERT_EQ(0x3u, r[2].offset); - ASSERT_EQ(mas*2, r[2].length); - ASSERT_TRUE(b.is_allocated(0, mas*2)); - ASSERT_FALSE(b.is_allocated(mas*2, mas*8)); - ASSERT_TRUE(b.is_allocated(mas*10, mas*2)); + ASSERT_EQ(mas * 2, r[2].length); + ASSERT_TRUE(b.is_allocated(0, mas * 2)); + ASSERT_FALSE(b.is_allocated(mas * 2, mas * 8)); + ASSERT_TRUE(b.is_allocated(mas * 10, mas * 2)); ASSERT_EQ(3u, b.get_extents().size()); ASSERT_TRUE(b.get_extents()[0].is_valid()); ASSERT_FALSE(b.get_extents()[1].is_valid()); ASSERT_TRUE(b.get_extents()[2].is_valid()); - ASSERT_FALSE(B.put_ref(coll.get(), mas*10, mas*2, &r)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 10, mas * 2, &r)); ASSERT_EQ(mas * 1, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(1u, r.size()); ASSERT_EQ(0x2003u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(mas * 2, r[0].length); ASSERT_EQ(2u, b.get_extents().size()); ASSERT_TRUE(b.get_extents()[0].is_valid()); ASSERT_FALSE(b.get_extents()[1].is_valid()); @@ -756,39 +746,39 @@ TEST(Blob, put_ref) cout << "r " << r << " " << b << std::endl; ASSERT_EQ(1u, r.size()); ASSERT_EQ(0x1u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(mas * 2, r[0].length); ASSERT_EQ(1u, b.get_extents().size()); ASSERT_FALSE(b.get_extents()[0].is_valid()); } { BlueStore::Blob B(coll.get()); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); PExtentVector r; b.allocated_test(bluestore_pextent_t(1, mas * 8)); - B.get_ref(coll.get(), 0, mas*8); + B.get_ref(coll.get(), 0, mas * 8); ASSERT_EQ(mas * 8, B.get_referenced_bytes()); ASSERT_FALSE(B.put_ref(coll.get(), 0, mas, &r)); ASSERT_EQ(mas * 7, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*8)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*7, mas, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 8)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 7, mas, &r)); ASSERT_EQ(mas * 6, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*8)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*2, mas, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 8)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 2, mas, &r)); ASSERT_EQ(mas * 5, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); ASSERT_TRUE(b.is_allocated(0, 8)); - ASSERT_FALSE(B.put_ref(coll.get(), mas*3, mas*4, &r)); + ASSERT_FALSE(B.put_ref(coll.get(), mas * 3, mas * 4, &r)); ASSERT_EQ(mas * 1, B.get_referenced_bytes()); ASSERT_EQ(1u, r.size()); ASSERT_EQ(0x2001u, r[0].offset); - ASSERT_EQ(mas*6, r[0].length); - ASSERT_TRUE(b.is_allocated(0, mas*2)); - ASSERT_FALSE(b.is_allocated(mas*2, mas*6)); + ASSERT_EQ(mas * 6, r[0].length); + ASSERT_TRUE(b.is_allocated(0, mas * 2)); + ASSERT_FALSE(b.is_allocated(mas * 2, mas * 6)); ASSERT_EQ(2u, b.get_extents().size()); ASSERT_TRUE(b.get_extents()[0].is_valid()); ASSERT_FALSE(b.get_extents()[1].is_valid()); @@ -797,34 +787,34 @@ TEST(Blob, put_ref) cout << "r " << r << " " << b << std::endl; ASSERT_EQ(1u, r.size()); ASSERT_EQ(0x1u, r[0].offset); - ASSERT_EQ(mas*2, r[0].length); + ASSERT_EQ(mas * 2, r[0].length); ASSERT_EQ(1u, b.get_extents().size()); ASSERT_FALSE(b.get_extents()[0].is_valid()); } // verify csum chunk size if factored in properly { BlueStore::Blob B(coll.get()); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); PExtentVector r; - b.allocated_test(bluestore_pextent_t(0, mas*4)); + b.allocated_test(bluestore_pextent_t(0, mas * 4)); b.init_csum(Checksummer::CSUM_CRC32C, 14, mas * 4); - B.get_ref(coll.get(), 0, mas*4); + B.get_ref(coll.get(), 0, mas * 4); ASSERT_EQ(mas * 4, B.get_referenced_bytes()); - ASSERT_TRUE(b.is_allocated(0, mas*4)); - ASSERT_FALSE(B.put_ref(coll.get(), 0, mas*3, &r)); + ASSERT_TRUE(b.is_allocated(0, mas * 4)); + ASSERT_FALSE(B.put_ref(coll.get(), 0, mas * 3, &r)); ASSERT_EQ(mas * 1, B.get_referenced_bytes()); cout << "r " << r << " " << b << std::endl; ASSERT_EQ(0u, r.size()); - ASSERT_TRUE(b.is_allocated(0, mas*4)); + ASSERT_TRUE(b.is_allocated(0, mas * 4)); ASSERT_TRUE(b.get_extents()[0].is_valid()); - ASSERT_EQ(mas*4, b.get_extents()[0].length); + ASSERT_EQ(mas * 4, b.get_extents()[0].length); } { BlueStore::Blob B(coll.get()); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); b.allocated_test(bluestore_pextent_t(0x40101000, 0x4000)); - b.allocated_test(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, - 0x13000)); + b.allocated_test( + bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x13000)); b.allocated_test(bluestore_pextent_t(0x40118000, 0x7000)); B.get_ref(coll.get(), 0x0, 0x3800); @@ -842,7 +832,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B(coll.get()); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); b.allocated_test(bluestore_pextent_t(1, 0x5000)); b.allocated_test(bluestore_pextent_t(2, 0x5000)); B.get_ref(coll.get(), 0x0, 0xa000); @@ -859,7 +849,7 @@ TEST(Blob, put_ref) } { BlueStore::Blob B(coll.get()); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); b.allocated_test(bluestore_pextent_t(1, 0x7000)); b.allocated_test(bluestore_pextent_t(2, 0x7000)); B.get_ref(coll.get(), 0x0, 0xe000); @@ -875,19 +865,20 @@ TEST(Blob, put_ref) ASSERT_EQ(1u, r[0].offset); ASSERT_EQ(0x7000u, r[0].length); ASSERT_EQ(2u, r[1].offset); - ASSERT_EQ(0x3000u, r[1].length); // we have 0x1000 bytes less due to - // alignment caused by min_alloc_size = 0x2000 + ASSERT_EQ(0x3000u, + r[1].length); // we have 0x1000 bytes less due to + // alignment caused by min_alloc_size = 0x2000 } { BlueStore store(g_ceph_context, "", 0x4000); - BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard *bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); BlueStore::Blob B(coll.get()); - bluestore_blob_t& b = B.dirty_blob(); + bluestore_blob_t &b = B.dirty_blob(); b.allocated_test(bluestore_pextent_t(1, 0x5000)); b.allocated_test(bluestore_pextent_t(2, 0x7000)); B.get_ref(coll.get(), 0x0, 0xc000); @@ -908,8 +899,7 @@ TEST(Blob, put_ref) } } -TEST(bluestore_blob_t, can_split) -{ +TEST(bluestore_blob_t, can_split) { bluestore_blob_t a; ASSERT_TRUE(a.can_split()); a.flags = bluestore_blob_t::FLAG_SHARED; @@ -920,8 +910,7 @@ TEST(bluestore_blob_t, can_split) ASSERT_FALSE(a.can_split()); } -TEST(bluestore_blob_t, can_split_at) -{ +TEST(bluestore_blob_t, can_split_at) { bluestore_blob_t a; a.allocated_test(bluestore_pextent_t(0x10000, 0x2000)); a.allocated_test(bluestore_pextent_t(0x20000, 0x2000)); @@ -934,14 +923,13 @@ TEST(bluestore_blob_t, can_split_at) ASSERT_FALSE(a.can_split_at(0x2800)); } -TEST(bluestore_blob_t, prune_tail) -{ +TEST(bluestore_blob_t, prune_tail) { bluestore_blob_t a; a.allocated_test(bluestore_pextent_t(0x10000, 0x2000)); a.allocated_test(bluestore_pextent_t(0x20000, 0x2000)); ASSERT_FALSE(a.can_prune_tail()); a.allocated_test( - bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x2000)); + bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x2000)); ASSERT_TRUE(a.can_prune_tail()); a.prune_tail(); ASSERT_FALSE(a.can_prune_tail()); @@ -949,7 +937,7 @@ TEST(bluestore_blob_t, prune_tail) ASSERT_EQ(0x4000u, a.get_logical_length()); a.allocated_test( - bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x2000)); + bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x2000)); a.init_csum(Checksummer::CSUM_CRC32C_8, 12, 0x6000); ASSERT_EQ(6u, a.csum_data.length()); ASSERT_TRUE(a.can_prune_tail()); @@ -961,17 +949,16 @@ TEST(bluestore_blob_t, prune_tail) bluestore_blob_t b; b.allocated_test( - bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x2000)); + bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, 0x2000)); ASSERT_FALSE(a.can_prune_tail()); } -TEST(Blob, split) -{ +TEST(Blob, split) { BlueStore store(g_ceph_context, "", 4096); - BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard *bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); { BlueStore::Blob L(coll.get()); @@ -1017,13 +1004,12 @@ TEST(Blob, split) } } -TEST(Blob, legacy_decode) -{ +TEST(Blob, legacy_decode) { BlueStore store(g_ceph_context, "", 4096); - BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard *bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); bufferlist bl, bl2; { @@ -1040,34 +1026,26 @@ TEST(Blob, legacy_decode) size_t bound = 0, bound2 = 0; - B.bound_encode( - bound, - 1, /*struct_v*/ - 0, /*sbid*/ - false); + B.bound_encode(bound, 1, /*struct_v*/ + 0, /*sbid*/ + false); fake_ref_map.bound_encode(bound); - B.bound_encode( - bound2, - 2, /*struct_v*/ - 0, /*sbid*/ - true); + B.bound_encode(bound2, 2, /*struct_v*/ + 0, /*sbid*/ + true); { auto app = bl.get_contiguous_appender(bound); auto app2 = bl2.get_contiguous_appender(bound2); - B.encode( - app, - 1, /*struct_v*/ - 0, /*sbid*/ - false); + B.encode(app, 1, /*struct_v*/ + 0, /*sbid*/ + false); fake_ref_map.encode(app); - B.encode( - app2, - 2, /*struct_v*/ - 0, /*sbid*/ - true); + B.encode(app2, 2, /*struct_v*/ + 0, /*sbid*/ + true); } auto p = bl.front().begin_deep(); @@ -1076,37 +1054,30 @@ TEST(Blob, legacy_decode) BlueStore::Blob Bres2(coll.get()); uint64_t sbid, sbid2; - Bres.decode( - p, - 1, /*struct_v*/ - &sbid, - true, - coll.get()); - Bres2.decode( - p2, - 2, /*struct_v*/ - &sbid2, - true, - coll.get()); + Bres.decode(p, 1, /*struct_v*/ + &sbid, true, coll.get()); + Bres2.decode(p2, 2, /*struct_v*/ + &sbid2, true, coll.get()); ASSERT_EQ(0xff0u + 1u, Bres.get_blob_use_tracker().get_referenced_bytes()); ASSERT_EQ(0xff0u + 1u, Bres2.get_blob_use_tracker().get_referenced_bytes()); - ASSERT_TRUE(Bres.get_blob_use_tracker().equal(Bres2.get_blob_use_tracker())); + ASSERT_TRUE( + Bres.get_blob_use_tracker().equal(Bres2.get_blob_use_tracker())); } } -TEST(ExtentMap, seek_lextent) -{ +TEST(ExtentMap, seek_lextent) { BlueStore store(g_ceph_context, "", 4096); - BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard *bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); BlueStore::Onode onode(coll.get(), ghobject_t(), ""); - BlueStore::ExtentMap em(&onode, - g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); + BlueStore::ExtentMap em( + &onode, + g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); BlueStore::BlobRef br(coll->new_blob()); ASSERT_EQ(em.extent_map.end(), em.seek_lextent(0)); @@ -1148,17 +1119,17 @@ TEST(ExtentMap, seek_lextent) ASSERT_EQ(em.extent_map.end(), em.seek_lextent(500)); } -TEST(ExtentMap, has_any_lextents) -{ +TEST(ExtentMap, has_any_lextents) { BlueStore store(g_ceph_context, "", 4096); - BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard *bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); BlueStore::Onode onode(coll.get(), ghobject_t(), ""); - BlueStore::ExtentMap em(&onode, - g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); + BlueStore::ExtentMap em( + &onode, + g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); BlueStore::BlobRef b(coll->new_blob()); ASSERT_FALSE(em.has_any_lextents(0, 0)); @@ -1198,26 +1169,25 @@ TEST(ExtentMap, has_any_lextents) ASSERT_FALSE(em.has_any_lextents(500, 1000)); } -void erase_and_delete(BlueStore::ExtentMap& em, size_t v) -{ +void erase_and_delete(BlueStore::ExtentMap &em, size_t v) { auto d = em.find(v); ASSERT_NE(d, em.extent_map.end()); em.extent_map.erase(d); delete &*d; } -TEST(ExtentMap, compress_extent_map) -{ +TEST(ExtentMap, compress_extent_map) { BlueStore store(g_ceph_context, "", 4096); - BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard *bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); - + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); + auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); BlueStore::Onode onode(coll.get(), ghobject_t(), ""); - BlueStore::ExtentMap em(&onode, - g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); + BlueStore::ExtentMap em( + &onode, + g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); BlueStore::BlobRef b1(coll->new_blob()); BlueStore::BlobRef b2(coll->new_blob()); BlueStore::BlobRef b3(coll->new_blob()); @@ -1273,7 +1243,7 @@ public: static constexpr uint32_t au_size = 4096; uint32_t blob_size = 65536; - size_t csum_order = 12; //1^12 = 4096 bytes + size_t csum_order = 12; // 1^12 = 4096 bytes struct au { uint32_t chksum; @@ -1284,35 +1254,30 @@ public: // test onode that glues some simplifications in representation // with actual BlueStore's onode struct t_onode { - BlueStore::OnodeRef onode; //actual BS onode - std::vector<uint32_t> data; //map to AUs + BlueStore::OnodeRef onode; // actual BS onode + std::vector<uint32_t> data; // map to AUs static constexpr uint32_t empty = std::numeric_limits<uint32_t>::max(); }; - void print(std::ostream& out, t_onode& onode) - { + void print(std::ostream &out, t_onode &onode) { for (size_t i = 0; i < onode.data.size(); ++i) { - if (i != 0) out << " "; + if (i != 0) + out << " "; if (onode.data[i] == t_onode::empty) { - out << "-"; + out << "-"; } else { - out << std::hex << onode.data[i] - << "/" << disk[onode.data[i]].chksum - << ":" << std::dec << disk[onode.data[i]].refs; + out << std::hex << onode.data[i] << "/" << disk[onode.data[i]].chksum + << ":" << std::dec << disk[onode.data[i]].refs; } } } - explicit ExtentMapFixture() - : store(g_ceph_context, "", au_size) - { + explicit ExtentMapFixture() : store(g_ceph_context, "", au_size) { oc = BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); - bc = BlueStore::BufferCacheShard::create(g_ceph_context, "lru", NULL); + bc = BlueStore::BufferCacheShard::create(&store, "lru", NULL); coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); } - void SetUp() override { - } - void TearDown() override { - } + void SetUp() override {} + void TearDown() override {} // takes new space from disk, initializes csums // returns index of first au @@ -1326,15 +1291,13 @@ public: } return pos; } - void release(uint32_t& au_idx) { + void release(uint32_t &au_idx) { if (au_idx != t_onode::empty) { disk_unref(au_idx); } au_idx = t_onode::empty; } - void disk_ref(uint32_t au_idx) { - ++disk[au_idx].refs; - } + void disk_ref(uint32_t au_idx) { ++disk[au_idx].refs; } void disk_unref(uint32_t au_idx) { ceph_assert(disk[au_idx].refs > 0); --disk[au_idx].refs; @@ -1346,16 +1309,16 @@ public: return res; } - void fillup(t_onode& onode, uint32_t end) { + void fillup(t_onode &onode, uint32_t end) { if (end > onode.data.size()) { size_t e = onode.data.size(); onode.data.resize(end); for (; e < end; ++e) { - onode.data[e] = t_onode::empty; + onode.data[e] = t_onode::empty; } } } - void punch_hole(t_onode& onode, uint32_t off, uint32_t len) { + void punch_hole(t_onode &onode, uint32_t off, uint32_t len) { ceph_assert((off % au_size) == 0); ceph_assert((len % au_size) == 0); uint32_t i = off / au_size; @@ -1363,14 +1326,14 @@ public: fillup(onode, end); while (i < end && i < onode.data.size()) { if (onode.data[i] != t_onode::empty) - release(onode.data[i]); + release(onode.data[i]); onode.data[i] = t_onode::empty; i++; } store.debug_punch_hole(coll, onode.onode, off, len); } - void write(t_onode& onode, uint32_t off, uint32_t len) { + void write(t_onode &onode, uint32_t off, uint32_t len) { ceph_assert((off % au_size) == 0); ceph_assert((len % au_size) == 0); punch_hole(onode, off, len); @@ -1389,30 +1352,30 @@ public: } // below simulation of write performed by BlueStore::do_write() - auto helper_blob_write = [&]( - uint32_t log_off, // logical offset of blob to put to onode - uint32_t empty_aus, // amount of unreferenced aus in the beginning - uint32_t first_au, // first au that will be referenced - uint32_t num_aus // number of aus, first, first+1.. first+num_au-1 - ) { - uint32_t blob_length = (empty_aus + num_aus) * au_size; - BlueStore::BlobRef b(coll->new_blob()); - bluestore_blob_t& bb = b->dirty_blob(); - bb.init_csum(Checksummer::CSUM_CRC32C, csum_order, blob_length); - for(size_t i = 0; i < num_aus; ++i) { - bb.set_csum_item(empty_aus + i, disk[first_au + i].chksum); - } - - PExtentVector pextents; - pextents.emplace_back(first_au * au_size, num_aus * au_size); - bb.allocated(empty_aus * au_size, num_aus * au_size, pextents); - - auto *ext = new BlueStore::Extent(log_off, empty_aus * au_size, - num_aus * au_size, b); - onode.onode->extent_map.extent_map.insert(*ext); - b->get_ref(coll.get(), empty_aus * au_size, num_aus * au_size); - bb.mark_used(empty_aus * au_size, num_aus * au_size); - }; + auto helper_blob_write = + [&](uint32_t log_off, // logical offset of blob to put to onode + uint32_t empty_aus, // amount of unreferenced aus in the beginning + uint32_t first_au, // first au that will be referenced + uint32_t num_aus // number of aus, first, first+1.. first+num_au-1 + ) { + uint32_t blob_length = (empty_aus + num_aus) * au_size; + BlueStore::BlobRef b(coll->new_blob()); + bluestore_blob_t &bb = b->dirty_blob(); + bb.init_csum(Checksummer::CSUM_CRC32C, csum_order, blob_length); + for (size_t i = 0; i < num_aus; ++i) { + bb.set_csum_item(empty_aus + i, disk[first_au + i].chksum); + } + + PExtentVector pextents; + pextents.emplace_back(first_au * au_size, num_aus * au_size); + bb.allocated(empty_aus * au_size, num_aus * au_size, pextents); + + auto *ext = new BlueStore::Extent(log_off, empty_aus * au_size, + num_aus * au_size, b); + onode.onode->extent_map.extent_map.insert(*ext); + b->get_ref(coll.get(), empty_aus * au_size, num_aus * au_size); + bb.mark_used(empty_aus * au_size, num_aus * au_size); + }; size_t off_blob_aligned = p2align(off, blob_size); size_t off_blob_roundup = p2align(off + blob_size, blob_size); @@ -1431,7 +1394,7 @@ public: }; } - void dup(t_onode& ofrom, t_onode& oto, uint64_t off, uint64_t len) { + void dup(t_onode &ofrom, t_onode &oto, uint64_t off, uint64_t len) { ceph_assert((off % au_size) == 0); ceph_assert((len % au_size) == 0); punch_hole(oto, off, len); @@ -1443,84 +1406,87 @@ public: while (i < end) { oto.data[i] = ofrom.data[i]; if (oto.data[i] != t_onode::empty) { - disk_ref(oto.data[i]); + disk_ref(oto.data[i]); } ++i; } BlueStore::TransContext txc(store.cct, coll.get(), nullptr, nullptr); - ofrom.onode->extent_map.dup_esb(&store, &txc, coll, ofrom.onode, oto.onode, off, len, off); + ofrom.onode->extent_map.dup_esb(&store, &txc, coll, ofrom.onode, oto.onode, + off, len, off); } - int32_t compare(t_onode& onode) { + int32_t compare(t_onode &onode) { BlueStore::ExtentMap::debug_au_vector_t debug = - onode.onode->extent_map.debug_list_disk_layout(); + onode.onode->extent_map.debug_list_disk_layout(); size_t pos = 0; for (size_t i = 0; i < debug.size(); ++i) { if (debug[i].disk_offset == -1ULL) { - size_t len = debug[i].disk_length; - size_t l = len / au_size; - if (pos + l > onode.data.size()) { - return pos + l; - } - while (l > 0) { - if (onode.data[pos] != t_onode::empty) { - return pos; - } - --l; - ++pos; - }; + size_t len = debug[i].disk_length; + size_t l = len / au_size; + if (pos + l > onode.data.size()) { + return pos + l; + } + while (l > 0) { + if (onode.data[pos] != t_onode::empty) { + return pos; + } + --l; + ++pos; + }; } else { - ceph_assert(pos < onode.data.size()); - uint32_t au = onode.data[pos]; - if (debug[i].disk_offset != au * au_size || - debug[i].disk_length != au_size || - debug[i].chksum != disk[au].chksum) { - return pos; - } - if ((int32_t)debug[i].ref_cnts == -1) { - if (disk[au].refs != 1) { - return pos; - } - } else { - if (disk[au].refs != debug[i].ref_cnts) { - return pos; - } - } - ++pos; + ceph_assert(pos < onode.data.size()); + uint32_t au = onode.data[pos]; + if (debug[i].disk_offset != au * au_size || + debug[i].disk_length != au_size || + debug[i].chksum != disk[au].chksum) { + return pos; + } + if ((int32_t)debug[i].ref_cnts == -1) { + if (disk[au].refs != 1) { + return pos; + } + } else { + if (disk[au].refs != debug[i].ref_cnts) { + return pos; + } + } + ++pos; } } // remaining aus must be empty while (pos < onode.data.size()) { if (onode.data[pos] != t_onode::empty) { - return pos; + return pos; } ++pos; } return -1; } - bool check(t_onode& onode) { + bool check(t_onode &onode) { int32_t res = compare(onode); if (res != -1) { - cout << "Discrepancy at 0x" << std::hex << res * au_size << std::dec << std::endl; + cout << "Discrepancy at 0x" << std::hex << res * au_size << std::dec + << std::endl; cout << "Simulated: "; print(cout, onode); cout << std::endl; - cout << "Onode: " << onode.onode->extent_map.debug_list_disk_layout() << std::endl; + cout << "Onode: " << onode.onode->extent_map.debug_list_disk_layout() + << std::endl; return false; } return true; } - void print(t_onode& onode) { + void print(t_onode &onode) { cout << "Simulated: "; print(cout, onode); cout << std::endl; - cout << "Onode: " << onode.onode->extent_map.debug_list_disk_layout() << std::endl; + cout << "Onode: " << onode.onode->extent_map.debug_list_disk_layout() + << std::endl; } }; -TEST_F(ExtentMapFixture, walk) -{ +TEST_F(ExtentMapFixture, walk) { std::vector<t_onode> X; for (size_t i = 0; i < 100; i++) { X.push_back(create()); @@ -1528,15 +1494,14 @@ TEST_F(ExtentMapFixture, walk) for (size_t i = 0; i < 100 - 1; i++) { write(X[i], (i + 2) * au_size, 4 * au_size); - dup(X[i], X[i+1], (i + 1) * au_size, 8 * au_size); + dup(X[i], X[i + 1], (i + 1) * au_size, 8 * au_size); } for (size_t i = 0; i < 100; i++) { ASSERT_EQ(check(X[i]), true); } } -TEST_F(ExtentMapFixture, pyramid) -{ +TEST_F(ExtentMapFixture, pyramid) { constexpr size_t H = 100; std::vector<t_onode> X; for (size_t i = 0; i < H; i++) { @@ -1552,8 +1517,7 @@ TEST_F(ExtentMapFixture, pyramid) } } -TEST_F(ExtentMapFixture, rain) -{ +TEST_F(ExtentMapFixture, rain) { constexpr size_t H = 100; constexpr size_t W = 100; std::vector<t_onode> X; @@ -1569,8 +1533,7 @@ TEST_F(ExtentMapFixture, rain) } } -TEST_F(ExtentMapFixture, pollock) -{ +TEST_F(ExtentMapFixture, pollock) { constexpr size_t H = 100; constexpr size_t W = 100; std::vector<t_onode> X; @@ -1590,8 +1553,7 @@ TEST_F(ExtentMapFixture, pollock) } } -TEST_F(ExtentMapFixture, carousel) -{ +TEST_F(ExtentMapFixture, carousel) { constexpr size_t R = 10; constexpr size_t CNT = 300; constexpr size_t W = 100; @@ -1612,8 +1574,7 @@ TEST_F(ExtentMapFixture, carousel) } } -TEST_F(ExtentMapFixture, petri) -{ +TEST_F(ExtentMapFixture, petri) { constexpr size_t R = 10; constexpr size_t CNT = 300; constexpr size_t W = 100; @@ -1637,25 +1598,25 @@ TEST_F(ExtentMapFixture, petri) } } -TEST(ExtentMap, dup_extent_map) -{ +TEST(ExtentMap, dup_extent_map) { BlueStore store(g_ceph_context, "", 4096); - BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard *bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); - size_t csum_order = 12; //1^12 = 4096 bytes + size_t csum_order = 12; // 1^12 = 4096 bytes auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); std::unique_ptr<ceph::Formatter> formatter(Formatter::create("json")); /////////////////////////// - //constructing onode1 - BlueStore::OnodeRef onode1(new BlueStore::Onode(coll.get(), ghobject_t(), "")); - - //BlueStore::ExtentMap em1(&onode1, - // g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); - BlueStore::ExtentMap& em1 = onode1->extent_map; + // constructing onode1 + BlueStore::OnodeRef onode1( + new BlueStore::Onode(coll.get(), ghobject_t(), "")); + + // BlueStore::ExtentMap em1(&onode1, + // g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); + BlueStore::ExtentMap &em1 = onode1->extent_map; /////////////////////////// // constructing extent/Blob: 0x0~2000 at <0x100000~2000> size_t ext1_offs = 0x0; @@ -1664,7 +1625,7 @@ TEST(ExtentMap, dup_extent_map) BlueStore::BlobRef b1 = coll->new_blob(); auto &_b1 = b1->dirty_blob(); _b1.init_csum(Checksummer::CSUM_CRC32C, csum_order, ext1_len); - for(size_t i = 0; i < _b1.get_csum_count(); i++) { + for (size_t i = 0; i < _b1.get_csum_count(); i++) { *(_b1.get_csum_item_ptr(i)) = i + 1; } PExtentVector pextents; @@ -1677,16 +1638,18 @@ TEST(ExtentMap, dup_extent_map) _b1.mark_used(ext1->blob_offset, ext1->length); /////////////////////////// - //constructing onode2 which is a full clone from onode1 - BlueStore::OnodeRef onode2(new BlueStore::Onode(coll.get(), ghobject_t(), "")); - //BlueStore::ExtentMap em2(&onode2, - // g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); - BlueStore::ExtentMap& em2 = onode2->extent_map; + // constructing onode2 which is a full clone from onode1 + BlueStore::OnodeRef onode2( + new BlueStore::Onode(coll.get(), ghobject_t(), "")); + // BlueStore::ExtentMap em2(&onode2, + // g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); + BlueStore::ExtentMap &em2 = onode2->extent_map; { BlueStore::TransContext txc(store.cct, coll.get(), nullptr, nullptr); - //em1.dup(&store, &txc, coll, em2, ext1_offs, ext1_len, ext1_offs); - onode1->extent_map.dup_esb(&store, &txc, coll, onode1, onode2, ext1_offs, ext1_len, ext1_offs); + // em1.dup(&store, &txc, coll, em2, ext1_offs, ext1_len, ext1_offs); + onode1->extent_map.dup_esb(&store, &txc, coll, onode1, onode2, ext1_offs, + ext1_len, ext1_offs); em1.dump(formatter.get()); // see the log if any formatter->flush(std::cout); @@ -1704,17 +1667,18 @@ TEST(ExtentMap, dup_extent_map) ASSERT_EQ(b1->get_shared_blob(), b2->get_shared_blob()); auto &_b2 = b2->get_blob(); ASSERT_EQ(_b1.get_csum_count(), _b2.get_csum_count()); - for(size_t i = 0; i < _b2.get_csum_count(); i++) { + for (size_t i = 0; i < _b2.get_csum_count(); i++) { ASSERT_EQ(*(_b1.get_csum_item_ptr(i)), *(_b2.get_csum_item_ptr(i))); } } /////////////////////////// - //constructing onode3 which is partial clone (tail part) from onode2 - BlueStore::OnodeRef onode3(new BlueStore::Onode(coll.get(), ghobject_t(), "")); - //BlueStore::ExtentMap em3(&onode3, - // g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); - BlueStore::ExtentMap& em3 = onode3->extent_map; + // constructing onode3 which is partial clone (tail part) from onode2 + BlueStore::OnodeRef onode3( + new BlueStore::Onode(coll.get(), ghobject_t(), "")); + // BlueStore::ExtentMap em3(&onode3, + // g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); + BlueStore::ExtentMap &em3 = onode3->extent_map; { size_t clone_shift = 0x1000; ceph_assert(ext1_len > clone_shift); @@ -1722,7 +1686,8 @@ TEST(ExtentMap, dup_extent_map) size_t clone_len = ext1_len - clone_shift; BlueStore::TransContext txc(store.cct, coll.get(), nullptr, nullptr); - onode1->extent_map.dup_esb(&store, &txc, coll, onode1, onode3, clone_offs, clone_len, clone_offs); + onode1->extent_map.dup_esb(&store, &txc, coll, onode1, onode3, clone_offs, + clone_len, clone_offs); em1.dump(formatter.get()); // see the log if any formatter->flush(std::cout); std::cout << std::endl; @@ -1746,17 +1711,18 @@ TEST(ExtentMap, dup_extent_map) ASSERT_EQ(ll, ext1_len); auto &_b3 = b3->get_blob(); ASSERT_EQ(_b1.get_csum_count(), _b3.get_csum_count()); - for(size_t i = 0; i < _b3.get_csum_count(); i++) { + for (size_t i = 0; i < _b3.get_csum_count(); i++) { ASSERT_EQ(*(_b1.get_csum_item_ptr(i)), *(_b3.get_csum_item_ptr(i))); } } /////////////////////////// - //constructing onode4 which is partial clone (head part) from onode2 - BlueStore::OnodeRef onode4(new BlueStore::Onode(coll.get(), ghobject_t(), "")); - //BlueStore::ExtentMap em4(&onode4, - // g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); - BlueStore::ExtentMap& em4 = onode4->extent_map; + // constructing onode4 which is partial clone (head part) from onode2 + BlueStore::OnodeRef onode4( + new BlueStore::Onode(coll.get(), ghobject_t(), "")); + // BlueStore::ExtentMap em4(&onode4, + // g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); + BlueStore::ExtentMap &em4 = onode4->extent_map; { size_t clone_shift = 0; @@ -1765,7 +1731,8 @@ TEST(ExtentMap, dup_extent_map) size_t clone_offs = ext1_offs + clone_shift; BlueStore::TransContext txc(store.cct, coll.get(), nullptr, nullptr); - onode2->extent_map.dup_esb(&store, &txc, coll, onode2, onode4, clone_offs, clone_len, clone_offs); + onode2->extent_map.dup_esb(&store, &txc, coll, onode2, onode4, clone_offs, + clone_len, clone_offs); em2.dump(formatter.get()); // see the log if any formatter->flush(std::cout); std::cout << std::endl; @@ -1795,15 +1762,13 @@ TEST(ExtentMap, dup_extent_map) ASSERT_EQ(csum_entries, _b4.get_csum_count()); ASSERT_GT(_b2.get_csum_count(), csum_entries); - for(size_t i = 0; i < csum_entries; i++) { + for (size_t i = 0; i < csum_entries; i++) { ASSERT_EQ(*(_b2.get_csum_item_ptr(i)), *(_b4.get_csum_item_ptr(i))); } } } - -void clear_and_dispose(BlueStore::old_extent_map_t& old_em) -{ +void clear_and_dispose(BlueStore::old_extent_map_t &old_em) { auto oep = old_em.begin(); while (oep != old_em.end()) { auto &lo = *oep; @@ -1812,43 +1777,42 @@ void clear_and_dispose(BlueStore::old_extent_map_t& old_em) } } -TEST(GarbageCollector, BasicTest) -{ - BlueStore::OnodeCacheShard *oc = BlueStore::OnodeCacheShard::create( - g_ceph_context, "lru", NULL); - BlueStore::BufferCacheShard *bc = BlueStore::BufferCacheShard::create( - g_ceph_context, "lru", NULL); - +TEST(GarbageCollector, BasicTest) { BlueStore store(g_ceph_context, "", 4096); + BlueStore::OnodeCacheShard *oc = + BlueStore::OnodeCacheShard::create(g_ceph_context, "lru", NULL); + BlueStore::BufferCacheShard *bc = + BlueStore::BufferCacheShard::create(&store, "lru", NULL); + auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); BlueStore::Onode onode(coll.get(), ghobject_t(), ""); - BlueStore::ExtentMap em(&onode, - g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); + BlueStore::ExtentMap em( + &onode, + g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); BlueStore::old_extent_map_t old_extents; - - /* - min_alloc_size = 4096 - original disposition - extent1 <loffs = 100, boffs = 100, len = 10> - -> blob1<compressed, len_on_disk=4096, logical_len=8192> - extent2 <loffs = 200, boffs = 200, len = 10> - -> blob2<raw, len_on_disk=4096, llen=4096> - extent3 <loffs = 300, boffs = 300, len = 10> - -> blob1<compressed, len_on_disk=4096, llen=8192> - extent4 <loffs = 4096, boffs = 0, len = 10> - -> blob3<raw, len_on_disk=4096, llen=4096> - on write(300~100) resulted in - extent1 <loffs = 100, boffs = 100, len = 10> - -> blob1<compressed, len_on_disk=4096, logical_len=8192> - extent2 <loffs = 200, boffs = 200, len = 10> - -> blob2<raw, len_on_disk=4096, llen=4096> - extent3 <loffs = 300, boffs = 300, len = 100> - -> blob4<raw, len_on_disk=4096, llen=4096> - extent4 <loffs = 4096, boffs = 0, len = 10> - -> blob3<raw, len_on_disk=4096, llen=4096> - */ + /* + min_alloc_size = 4096 + original disposition + extent1 <loffs = 100, boffs = 100, len = 10> + -> blob1<compressed, len_on_disk=4096, logical_len=8192> + extent2 <loffs = 200, boffs = 200, len = 10> + -> blob2<raw, len_on_disk=4096, llen=4096> + extent3 <loffs = 300, boffs = 300, len = 10> + -> blob1<compressed, len_on_disk=4096, llen=8192> + extent4 <loffs = 4096, boffs = 0, len = 10> + -> blob3<raw, len_on_disk=4096, llen=4096> + on write(300~100) resulted in + extent1 <loffs = 100, boffs = 100, len = 10> + -> blob1<compressed, len_on_disk=4096, logical_len=8192> + extent2 <loffs = 200, boffs = 200, len = 10> + -> blob2<raw, len_on_disk=4096, llen=4096> + extent3 <loffs = 300, boffs = 300, len = 100> + -> blob4<raw, len_on_disk=4096, llen=4096> + extent4 <loffs = 4096, boffs = 0, len = 10> + -> blob3<raw, len_on_disk=4096, llen=4096> + */ { BlueStore::GarbageCollector gc(g_ceph_context); int64_t saving; @@ -1870,11 +1834,11 @@ TEST(GarbageCollector, BasicTest) em.extent_map.insert(*new BlueStore::Extent(4096, 0, 10, b3)); b3->get_ref(coll.get(), 0, 10); - old_extents.push_back(*new BlueStore::OldExtent(300, 300, 10, b1)); + old_extents.push_back(*new BlueStore::OldExtent(300, 300, 10, b1)); saving = gc.estimate(300, 100, em, old_extents, 4096); ASSERT_EQ(saving, 1); - auto& to_collect = gc.get_extents_to_collect(); + auto &to_collect = gc.get_extents_to_collect(); ASSERT_EQ(to_collect.num_intervals(), 1u); { auto it = to_collect.begin(); @@ -1885,31 +1849,32 @@ TEST(GarbageCollector, BasicTest) em.clear(); clear_and_dispose(old_extents); } - /* - original disposition - min_alloc_size = 0x10000 - extent1 <loffs = 0, boffs = 0, len = 0x40000> - -> blob1<compressed, len_on_disk=0x20000, logical_len=0x40000> - Write 0x8000~37000 resulted in the following extent map prior to GC - for the last write_small(0x30000~0xf000): - - extent1 <loffs = 0, boffs = 0, len = 0x8000> - -> blob1<compressed, len_on_disk=0x20000, logical_len=0x40000> - extent2 <loffs = 0x8000, boffs = 0x8000, len = 0x8000> - -> blob2<raw, len_on_disk=0x10000, llen=0x10000> - extent3 <loffs = 0x10000, boffs = 0, len = 0x20000> - -> blob3<raw, len_on_disk=0x20000, llen=0x20000> - extent4 <loffs = 0x30000, boffs = 0, len = 0xf000> - -> blob4<raw, len_on_disk=0x10000, llen=0x10000> - extent5 <loffs = 0x3f000, boffs = 0x3f000, len = 0x1000> - -> blob1<compressed, len_on_disk=0x20000, llen=0x40000> - */ + /* + original disposition + min_alloc_size = 0x10000 + extent1 <loffs = 0, boffs = 0, len = 0x40000> + -> blob1<compressed, len_on_disk=0x20000, logical_len=0x40000> + Write 0x8000~37000 resulted in the following extent map prior to GC + for the last write_small(0x30000~0xf000): + + extent1 <loffs = 0, boffs = 0, len = 0x8000> + -> blob1<compressed, len_on_disk=0x20000, logical_len=0x40000> + extent2 <loffs = 0x8000, boffs = 0x8000, len = 0x8000> + -> blob2<raw, len_on_disk=0x10000, llen=0x10000> + extent3 <loffs = 0x10000, boffs = 0, len = 0x20000> + -> blob3<raw, len_on_disk=0x20000, llen=0x20000> + extent4 <loffs = 0x30000, boffs = 0, len = 0xf000> + -> blob4<raw, len_on_disk=0x10000, llen=0x10000> + extent5 <loffs = 0x3f000, boffs = 0x3f000, len = 0x1000> + -> blob1<compressed, len_on_disk=0x20000, llen=0x40000> + */ { BlueStore store(g_ceph_context, "", 0x10000); auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); BlueStore::Onode onode(coll.get(), ghobject_t(), ""); - BlueStore::ExtentMap em(&onode, - g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); + BlueStore::ExtentMap em( + &onode, + g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); BlueStore::old_extent_map_t old_extents; BlueStore::GarbageCollector gc(g_ceph_context); @@ -1927,32 +1892,34 @@ TEST(GarbageCollector, BasicTest) em.extent_map.insert(*new BlueStore::Extent(0, 0, 0x8000, b1)); b1->get_ref(coll.get(), 0, 0x8000); em.extent_map.insert( - *new BlueStore::Extent(0x8000, 0x8000, 0x8000, b2)); // new extent + *new BlueStore::Extent(0x8000, 0x8000, 0x8000, b2)); // new extent b2->get_ref(coll.get(), 0x8000, 0x8000); em.extent_map.insert( - *new BlueStore::Extent(0x10000, 0, 0x20000, b3)); // new extent + *new BlueStore::Extent(0x10000, 0, 0x20000, b3)); // new extent b3->get_ref(coll.get(), 0, 0x20000); em.extent_map.insert( - *new BlueStore::Extent(0x30000, 0, 0xf000, b4)); // new extent + *new BlueStore::Extent(0x30000, 0, 0xf000, b4)); // new extent b4->get_ref(coll.get(), 0, 0xf000); em.extent_map.insert(*new BlueStore::Extent(0x3f000, 0x3f000, 0x1000, b1)); b1->get_ref(coll.get(), 0x3f000, 0x1000); - old_extents.push_back(*new BlueStore::OldExtent(0x8000, 0x8000, 0x8000, b1)); old_extents.push_back( - *new BlueStore::OldExtent(0x10000, 0x10000, 0x20000, b1)); - old_extents.push_back(*new BlueStore::OldExtent(0x30000, 0x30000, 0xf000, b1)); + *new BlueStore::OldExtent(0x8000, 0x8000, 0x8000, b1)); + old_extents.push_back( + *new BlueStore::OldExtent(0x10000, 0x10000, 0x20000, b1)); + old_extents.push_back( + *new BlueStore::OldExtent(0x30000, 0x30000, 0xf000, b1)); saving = gc.estimate(0x30000, 0xf000, em, old_extents, 0x10000); ASSERT_EQ(saving, 2); - auto& to_collect = gc.get_extents_to_collect(); + auto &to_collect = gc.get_extents_to_collect(); ASSERT_EQ(to_collect.num_intervals(), 2u); { auto it1 = to_collect.begin(); auto it2 = ++to_collect.begin(); using p = decltype(*it1); { - auto v1 = p{0x0ul ,0x8000ul}; + auto v1 = p{0x0ul, 0x8000ul}; auto v2 = p{0x0ul, 0x8000ul}; ASSERT_TRUE(*it1 == v1 || *it2 == v2); } @@ -1966,20 +1933,20 @@ TEST(GarbageCollector, BasicTest) em.clear(); clear_and_dispose(old_extents); } - /* - original disposition - min_alloc_size = 0x1000 - extent1 <loffs = 0, boffs = 0, len = 0x4000> - -> blob1<compressed, len_on_disk=0x2000, logical_len=0x4000> - write 0x3000~4000 resulted in the following extent map - (future feature - suppose we can compress incoming write prior to - GC invocation) - - extent1 <loffs = 0, boffs = 0, len = 0x4000> - -> blob1<compressed, len_on_disk=0x2000, logical_len=0x4000> - extent2 <loffs = 0x3000, boffs = 0, len = 0x4000> - -> blob2<compressed, len_on_disk=0x2000, llen=0x4000> - */ + /* + original disposition + min_alloc_size = 0x1000 + extent1 <loffs = 0, boffs = 0, len = 0x4000> + -> blob1<compressed, len_on_disk=0x2000, logical_len=0x4000> + write 0x3000~4000 resulted in the following extent map + (future feature - suppose we can compress incoming write prior to + GC invocation) + + extent1 <loffs = 0, boffs = 0, len = 0x4000> + -> blob1<compressed, len_on_disk=0x2000, logical_len=0x4000> + extent2 <loffs = 0x3000, boffs = 0, len = 0x4000> + -> blob2<compressed, len_on_disk=0x2000, llen=0x4000> + */ { BlueStore::GarbageCollector gc(g_ceph_context); int64_t saving; @@ -1993,45 +1960,47 @@ TEST(GarbageCollector, BasicTest) em.extent_map.insert(*new BlueStore::Extent(0, 0, 0x3000, b1)); b1->get_ref(coll.get(), 0, 0x3000); em.extent_map.insert( - *new BlueStore::Extent(0x3000, 0, 0x4000, b2)); // new extent + *new BlueStore::Extent(0x3000, 0, 0x4000, b2)); // new extent b2->get_ref(coll.get(), 0, 0x4000); - old_extents.push_back(*new BlueStore::OldExtent(0x3000, 0x3000, 0x1000, b1)); + old_extents.push_back( + *new BlueStore::OldExtent(0x3000, 0x3000, 0x1000, b1)); saving = gc.estimate(0x3000, 0x4000, em, old_extents, 0x1000); ASSERT_EQ(saving, 0); - auto& to_collect = gc.get_extents_to_collect(); + auto &to_collect = gc.get_extents_to_collect(); ASSERT_EQ(to_collect.num_intervals(), 0u); em.clear(); clear_and_dispose(old_extents); } - /* - original disposition - min_alloc_size = 0x10000 - extent0 <loffs = 0, boffs = 0, len = 0x20000> - -> blob0<compressed, len_on_disk=0x10000, logical_len=0x20000> - extent1 <loffs = 0x20000, boffs = 0, len = 0x20000> - -> blob1<compressed, len_on_disk=0x10000, logical_len=0x20000> - write 0x8000~37000 resulted in the following extent map prior - to GC for the last write_small(0x30000~0xf000) - - extent0 <loffs = 0, boffs = 0, len = 0x8000> - -> blob0<compressed, len_on_disk=0x10000, logical_len=0x20000> - extent2 <loffs = 0x8000, boffs = 0x8000, len = 0x8000> - -> blob2<raw, len_on_disk=0x10000, llen=0x10000> - extent3 <loffs = 0x10000, boffs = 0, len = 0x20000> - -> blob3<raw, len_on_disk=0x20000, llen=0x20000> - extent4 <loffs = 0x30000, boffs = 0, len = 0xf000> - -> blob4<raw, len_on_disk=0x1000, llen=0x1000> - extent5 <loffs = 0x3f000, boffs = 0x1f000, len = 0x1000> - -> blob1<compressed, len_on_disk=0x10000, llen=0x20000> - */ + /* + original disposition + min_alloc_size = 0x10000 + extent0 <loffs = 0, boffs = 0, len = 0x20000> + -> blob0<compressed, len_on_disk=0x10000, logical_len=0x20000> + extent1 <loffs = 0x20000, boffs = 0, len = 0x20000> + -> blob1<compressed, len_on_disk=0x10000, logical_len=0x20000> + write 0x8000~37000 resulted in the following extent map prior + to GC for the last write_small(0x30000~0xf000) + + extent0 <loffs = 0, boffs = 0, len = 0x8000> + -> blob0<compressed, len_on_disk=0x10000, logical_len=0x20000> + extent2 <loffs = 0x8000, boffs = 0x8000, len = 0x8000> + -> blob2<raw, len_on_disk=0x10000, llen=0x10000> + extent3 <loffs = 0x10000, boffs = 0, len = 0x20000> + -> blob3<raw, len_on_disk=0x20000, llen=0x20000> + extent4 <loffs = 0x30000, boffs = 0, len = 0xf000> + -> blob4<raw, len_on_disk=0x1000, llen=0x1000> + extent5 <loffs = 0x3f000, boffs = 0x1f000, len = 0x1000> + -> blob1<compressed, len_on_disk=0x10000, llen=0x20000> + */ { BlueStore store(g_ceph_context, "", 0x10000); auto coll = ceph::make_ref<BlueStore::Collection>(&store, oc, bc, coll_t()); BlueStore::Onode onode(coll.get(), ghobject_t(), ""); - BlueStore::ExtentMap em(&onode, - g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); + BlueStore::ExtentMap em( + &onode, + g_ceph_context->_conf->bluestore_extent_map_inline_shard_prealloc_size); BlueStore::old_extent_map_t old_extents; BlueStore::GarbageCollector gc(g_ceph_context); @@ -2052,26 +2021,27 @@ TEST(GarbageCollector, BasicTest) em.extent_map.insert(*new BlueStore::Extent(0, 0, 0x8000, b0)); b0->get_ref(coll.get(), 0, 0x8000); em.extent_map.insert( - *new BlueStore::Extent(0x8000, 0x8000, 0x8000, b2)); // new extent + *new BlueStore::Extent(0x8000, 0x8000, 0x8000, b2)); // new extent b2->get_ref(coll.get(), 0x8000, 0x8000); em.extent_map.insert( - *new BlueStore::Extent(0x10000, 0, 0x20000, b3)); // new extent + *new BlueStore::Extent(0x10000, 0, 0x20000, b3)); // new extent b3->get_ref(coll.get(), 0, 0x20000); em.extent_map.insert( - *new BlueStore::Extent(0x30000, 0, 0xf000, b4)); // new extent + *new BlueStore::Extent(0x30000, 0, 0xf000, b4)); // new extent b4->get_ref(coll.get(), 0, 0xf000); em.extent_map.insert(*new BlueStore::Extent(0x3f000, 0x1f000, 0x1000, b1)); b1->get_ref(coll.get(), 0x1f000, 0x1000); - old_extents.push_back(*new BlueStore::OldExtent(0x8000, 0x8000, 0x8000, b0)); old_extents.push_back( - *new BlueStore::OldExtent(0x10000, 0x10000, 0x10000, b0)); + *new BlueStore::OldExtent(0x8000, 0x8000, 0x8000, b0)); + old_extents.push_back( + *new BlueStore::OldExtent(0x10000, 0x10000, 0x10000, b0)); old_extents.push_back( - *new BlueStore::OldExtent(0x20000, 0x00000, 0x1f000, b1)); + *new BlueStore::OldExtent(0x20000, 0x00000, 0x1f000, b1)); saving = gc.estimate(0x30000, 0xf000, em, old_extents, 0x10000); ASSERT_EQ(saving, 2); - auto& to_collect = gc.get_extents_to_collect(); + auto &to_collect = gc.get_extents_to_collect(); ASSERT_EQ(to_collect.num_intervals(), 2u); { auto it1 = to_collect.begin(); @@ -2080,7 +2050,7 @@ TEST(GarbageCollector, BasicTest) { auto v1 = p{0x0ul, 0x8000ul}; auto v2 = p{0x0ul, 0x8000ul}; - ASSERT_TRUE(*it1 == v1 || *it2 == v2); + ASSERT_TRUE(*it1 == v1 || *it2 == v2); } { auto v1 = p{0x3f000ul, 0x1000ul}; @@ -2094,8 +2064,7 @@ TEST(GarbageCollector, BasicTest) } } -TEST(BlueStoreRepairer, StoreSpaceTracker) -{ +TEST(BlueStoreRepairer, StoreSpaceTracker) { BlueStoreRepairer::StoreSpaceTracker bmap0; bmap0.init((uint64_t)4096 * 1024 * 1024 * 1024, 0x1000); ASSERT_EQ(bmap0.granularity, 2 * 1024 * 1024U); @@ -2171,16 +2140,16 @@ TEST(BlueStoreRepairer, StoreSpaceTracker) ASSERT_TRUE(bmap.is_used(hoid, 0xc1000)); interval_set<uint64_t> extents; - extents.insert(0,0x500); - extents.insert(0x800,0x100); - extents.insert(0x1000,0x1000); - extents.insert(0xa001,1); - extents.insert(0xa0000,0xff8); + extents.insert(0, 0x500); + extents.insert(0x800, 0x100); + extents.insert(0x1000, 0x1000); + extents.insert(0xa001, 1); + extents.insert(0xa0000, 0xff8); ASSERT_EQ(3u, bmap.filter_out(extents)); ASSERT_TRUE(bmap.is_used(cid)); ASSERT_TRUE(bmap.is_used(hoid)); - + BlueStoreRepairer::StoreSpaceTracker bmap2; bmap2.init((uint64_t)0x3223b1d1000, 0x10000); ASSERT_EQ(0x1a0000u, bmap2.granularity); @@ -2193,8 +2162,7 @@ TEST(BlueStoreRepairer, StoreSpaceTracker) ASSERT_TRUE(bmap2.is_used(hoid, 0x3223b19ffff)); } -TEST(bluestore_blob_t, unused) -{ +TEST(bluestore_blob_t, unused) { { bluestore_blob_t b; uint64_t min_alloc_size = 64 << 10; // 64 kB @@ -2205,7 +2173,8 @@ TEST(bluestore_blob_t, unused) uint64_t suggested_boff = 0; PExtentVector extents; extents.emplace_back(0x1a560000, min_alloc_size); - b.allocated(p2align(suggested_boff, min_alloc_size), 0 /*no matter*/, extents); + b.allocated(p2align(suggested_boff, min_alloc_size), 0 /*no matter*/, + extents); b.mark_used(offset, length); ASSERT_FALSE(b.is_unused(offset, length)); @@ -2235,7 +2204,8 @@ TEST(bluestore_blob_t, unused) uint64_t suggested_boff = 0x11000; PExtentVector extents; extents.emplace_back(0x1a560000, min_alloc_size); - b.allocated(p2align(suggested_boff, min_alloc_size), 0 /*no matter*/, extents); + b.allocated(p2align(suggested_boff, min_alloc_size), 0 /*no matter*/, + extents); b.add_unused(0, offset); b.add_unused(offset + length, min_alloc_size * 2 - offset - length); b.mark_used(offset, length); @@ -2278,7 +2248,8 @@ TEST(bluestore_blob_t, unused) ASSERT_FALSE(b.is_unused(offset, length)); ASSERT_FALSE(b.is_unused(offset, unused_granularity)); - ASSERT_TRUE(b.is_unused(0, offset / unused_granularity * unused_granularity)); + ASSERT_TRUE( + b.is_unused(0, offset / unused_granularity * unused_granularity)); ASSERT_TRUE(b.is_unused(offset + length, offset0 - offset - length)); auto end0_aligned = round_up_to(offset0 + length, unused_granularity); ASSERT_TRUE(b.is_unused(end0_aligned, min_alloc_size * 3 - end0_aligned)); @@ -2289,18 +2260,15 @@ TEST(bluestore_blob_t, unused) // https://tracker.ceph.com/issues/51682 // Basic map_any functionality is tested as well though. // -TEST(bluestore_blob_t, wrong_map_bl_in_51682) -{ +TEST(bluestore_blob_t, wrong_map_bl_in_51682) { { bluestore_blob_t b; uint64_t min_alloc_size = 4 << 10; // 64 kB b.allocated_test(bluestore_pextent_t(0x17ba000, 4 * min_alloc_size)); b.allocated_test(bluestore_pextent_t(0x17bf000, 4 * min_alloc_size)); - b.allocated_test( - bluestore_pextent_t( - bluestore_pextent_t::INVALID_OFFSET, - 1 * min_alloc_size)); + b.allocated_test(bluestore_pextent_t(bluestore_pextent_t::INVALID_OFFSET, + 1 * min_alloc_size)); b.allocated_test(bluestore_pextent_t(0x153c44d000, 7 * min_alloc_size)); b.mark_used(0, 0x8000); @@ -2310,34 +2278,31 @@ TEST(bluestore_blob_t, wrong_map_bl_in_51682) bufferlist bl; bl.append(s); const size_t num_expected_entries = 5; - uint64_t expected[num_expected_entries][2] = { - {0x17ba000, 0x4000}, - {0x17bf000, 0x3000}, - {0x17c0000, 0x3000}, - {0xffffffffffffffff, 0x1000}, - {0x153c44d000, 0x3000}}; + uint64_t expected[num_expected_entries][2] = {{0x17ba000, 0x4000}, + {0x17bf000, 0x3000}, + {0x17c0000, 0x3000}, + {0xffffffffffffffff, 0x1000}, + {0x153c44d000, 0x3000}}; size_t expected_pos = 0; - b.map_bl(0, bl, - [&](uint64_t o, bufferlist& bl) { - ASSERT_EQ(o, expected[expected_pos][0]); - ASSERT_EQ(bl.length(), expected[expected_pos][1]); - ++expected_pos; - }); + b.map_bl(0, bl, [&](uint64_t o, bufferlist &bl) { + ASSERT_EQ(o, expected[expected_pos][0]); + ASSERT_EQ(bl.length(), expected[expected_pos][1]); + ++expected_pos; + }); // 0x5000 is an improper offset presumably provided when doing a repair - b.map_bl(0x5000, bl, - [&](uint64_t o, bufferlist& bl) { - ASSERT_EQ(o, expected[expected_pos][0]); - ASSERT_EQ(bl.length(), expected[expected_pos][1]); - ++expected_pos; - }); + b.map_bl(0x5000, bl, [&](uint64_t o, bufferlist &bl) { + ASSERT_EQ(o, expected[expected_pos][0]); + ASSERT_EQ(bl.length(), expected[expected_pos][1]); + ++expected_pos; + }); ASSERT_EQ(expected_pos, num_expected_entries); } } //--------------------------------------------------------------------------------- -static int verify_extent(const extent_t & ext, const extent_t *ext_arr, uint64_t ext_arr_size, uint64_t idx) -{ - const extent_t & ext_ref = ext_arr[idx]; +static int verify_extent(const extent_t &ext, const extent_t *ext_arr, + uint64_t ext_arr_size, uint64_t idx) { + const extent_t &ext_ref = ext_arr[idx]; if (ext.offset == ext_ref.offset && ext.length == ext_ref.length) { return 0; } else { @@ -2346,25 +2311,27 @@ static int verify_extent(const extent_t & ext, const extent_t *ext_arr, uint64_t std::cerr << "Null extent was returned at idx = " << idx << std::endl; } unsigned start = std::max(((int32_t)(idx)-3), 0); - unsigned end = std::min(idx+3, ext_arr_size); + unsigned end = std::min(idx + 3, ext_arr_size); for (unsigned j = start; j < end; j++) { - const extent_t & ext_ref = ext_arr[j]; - std::cerr << j << ") ref_ext = [" << ext_ref.offset << ", " << ext_ref.length << "]" << std::endl; + const extent_t &ext_ref = ext_arr[j]; + std::cerr << j << ") ref_ext = [" << ext_ref.offset << ", " + << ext_ref.length << "]" << std::endl; } - std::cerr << idx << ") ext = [" << ext.offset << ", " << ext.length << "]" << std::endl; + std::cerr << idx << ") ext = [" << ext.offset << ", " << ext.length + << "]" << std::endl; return -1; } } //--------------------------------------------------------------------------------- -static int test_extents(uint64_t index, extent_t *ext_arr, uint64_t ext_arr_size, SimpleBitmap& sbmap, bool set) -{ - const uint64_t MAX_JUMP_BIG = 1523; - const uint64_t MAX_JUMP_SMALL = 19; - const uint64_t MAX_LEN_BIG = 523; - const uint64_t MAX_LEN_SMALL = 23; - - uint64_t n = sbmap.get_size(); +static int test_extents(uint64_t index, extent_t *ext_arr, + uint64_t ext_arr_size, SimpleBitmap &sbmap, bool set) { + const uint64_t MAX_JUMP_BIG = 1523; + const uint64_t MAX_JUMP_SMALL = 19; + const uint64_t MAX_LEN_BIG = 523; + const uint64_t MAX_LEN_SMALL = 23; + + uint64_t n = sbmap.get_size(); uint64_t offset = 0; unsigned length, jump, i; for (i = 0; i < ext_arr_size; i++) { @@ -2392,28 +2359,30 @@ static int test_extents(uint64_t index, extent_t *ext_arr, uint64_t ext_arr_size success = sbmap.clr(offset, length); } if (!success) { - std::cerr << "Failed sbmap." << (set ? "set(" : "clr(") << offset << ", " << length << ")"<< std::endl; + std::cerr << "Failed sbmap." << (set ? "set(" : "clr(") << offset << ", " + << length << ")" << std::endl; return -1; } // if this is not the first entry and no jump -> merge extents - if ( (i==0) || (jump > 0) ) { + if ((i == 0) || (jump > 0)) { ext_arr[i] = {offset, length}; } else { // merge 2 extents - i --; + i--; ext_arr[i].length += length; } offset += length; } unsigned arr_size = std::min((uint64_t)i, ext_arr_size); std::cout << std::hex << std::right; - std::cout << "[" << index << "] " << (set ? "Set::" : "Clr::") << " extents count = 0x" << arr_size; + std::cout << "[" << index << "] " << (set ? "Set::" : "Clr::") + << " extents count = 0x" << arr_size; std::cout << std::dec << std::endl; offset = 0; extent_t ext; - for(unsigned i = 0; i < arr_size; i++) { + for (unsigned i = 0; i < arr_size; i++) { if (set) { ext = sbmap.get_next_set_extent(offset); } else { @@ -2434,43 +2403,46 @@ static int test_extents(uint64_t index, extent_t *ext_arr, uint64_t ext_arr_size if (ext.length == 0) { return 0; } else { - std::cerr << "sbmap.get_next_" << (set ? "set" : "clr") << "_extent(" << offset << ") return length = " << ext.length << std::endl; + std::cerr << "sbmap.get_next_" << (set ? "set" : "clr") << "_extent(" + << offset << ") return length = " << ext.length << std::endl; return -1; } } //--------------------------------------------------------------------------------- -TEST(SimpleBitmap, basic) -{ +TEST(SimpleBitmap, basic) { const uint64_t MAX_EXTENTS_COUNT = 7131177; - std::unique_ptr<extent_t[]> ext_arr = std::make_unique<extent_t[]>(MAX_EXTENTS_COUNT); + std::unique_ptr<extent_t[]> ext_arr = + std::make_unique<extent_t[]>(MAX_EXTENTS_COUNT); ASSERT_TRUE(ext_arr != nullptr); const uint64_t BIT_COUNT = 4ULL << 30; // 4Gb = 512MB SimpleBitmap sbmap(g_ceph_context, BIT_COUNT); // use current time as seed for random generator std::srand(std::time(nullptr)); - for (unsigned i = 0; i < 3; i++ ) { - memset(ext_arr.get(), 0, sizeof(extent_t)*MAX_EXTENTS_COUNT); + for (unsigned i = 0; i < 3; i++) { + memset(ext_arr.get(), 0, sizeof(extent_t) * MAX_EXTENTS_COUNT); sbmap.clear_all(); - ASSERT_TRUE(test_extents(i, ext_arr.get(), MAX_EXTENTS_COUNT, sbmap, true) == 0); + ASSERT_TRUE( + test_extents(i, ext_arr.get(), MAX_EXTENTS_COUNT, sbmap, true) == 0); - memset(ext_arr.get(), 0, sizeof(extent_t)*MAX_EXTENTS_COUNT); + memset(ext_arr.get(), 0, sizeof(extent_t) * MAX_EXTENTS_COUNT); sbmap.set_all(); - ASSERT_TRUE(test_extents(i, ext_arr.get(), MAX_EXTENTS_COUNT, sbmap, false) == 0); + ASSERT_TRUE( + test_extents(i, ext_arr.get(), MAX_EXTENTS_COUNT, sbmap, false) == 0); } } //--------------------------------------------------------------------------------- -static int test_intersections(unsigned test_idx, SimpleBitmap &sbmap, uint8_t map[], uint64_t map_size) -{ - const uint64_t MAX_LEN_BIG = 523; - const uint64_t MAX_LEN_SMALL = 23; +static int test_intersections(unsigned test_idx, SimpleBitmap &sbmap, + uint8_t map[], uint64_t map_size) { + const uint64_t MAX_LEN_BIG = 523; + const uint64_t MAX_LEN_SMALL = 23; - bool success; + bool success; uint64_t set_op_count = 0, clr_op_count = 0; unsigned length, i; - for (i = 0; i < map_size / (MAX_LEN_BIG*2); i++) { + for (i = 0; i < map_size / (MAX_LEN_BIG * 2); i++) { uint64_t offset = (std::rand() % (map_size - 1)); if (i & 1) { length = std::rand() % MAX_LEN_BIG; @@ -2486,22 +2458,23 @@ static int test_intersections(unsigned test_idx, SimpleBitmap &sbmap, uint8_t ma bool set = (std::rand() % 3); if (set) { success = sbmap.set(offset, length); - memset(map+offset, 0xFF, length); + memset(map + offset, 0xFF, length); set_op_count++; } else { success = sbmap.clr(offset, length); - memset(map+offset, 0x0, length); + memset(map + offset, 0x0, length); clr_op_count++; } if (!success) { - std::cerr << "Failed sbmap." << (set ? "set(" : "clr(") << offset << ", " << length << ")"<< std::endl; + std::cerr << "Failed sbmap." << (set ? "set(" : "clr(") << offset << ", " + << length << ")" << std::endl; return -1; } } uint64_t set_bit_count = 0; uint64_t clr_bit_count = 0; - for(uint64_t idx = 0; idx < map_size; idx++) { + for (uint64_t idx = 0; idx < map_size; idx++) { if (map[idx]) { set_bit_count++; success = sbmap.bit_is_set(idx); @@ -2510,37 +2483,42 @@ static int test_intersections(unsigned test_idx, SimpleBitmap &sbmap, uint8_t ma success = sbmap.bit_is_clr(idx); } if (!success) { - std::cerr << "expected: sbmap.bit_is_" << (map[idx] ? "set(" : "clr(") << idx << ")"<< std::endl; + std::cerr << "expected: sbmap.bit_is_" << (map[idx] ? "set(" : "clr(") + << idx << ")" << std::endl; return -1; } - } - std::cout << std::hex << std::right << __func__ ; - std::cout << " [" << test_idx << "] set_bit_count = 0x" << std::setfill('0') << std::setw(8) << set_bit_count - << ", clr_bit_count = 0x" << std::setfill('0') << std::setw(8) << clr_bit_count - << ", sum = 0x" << set_bit_count + clr_bit_count << std::endl; + std::cout << std::hex << std::right << __func__; + std::cout << " [" << test_idx << "] set_bit_count = 0x" << std::setfill('0') + << std::setw(8) << set_bit_count << ", clr_bit_count = 0x" + << std::setfill('0') << std::setw(8) << clr_bit_count + << ", sum = 0x" << set_bit_count + clr_bit_count << std::endl; std::cout << std::dec; uint64_t offset = 0; - for(uint64_t i = 0; i < (set_op_count + clr_op_count); i++) { + for (uint64_t i = 0; i < (set_op_count + clr_op_count); i++) { extent_t ext = sbmap.get_next_set_extent(offset); - //std::cout << "set_ext:: " << i << ") [" << ext.offset << ", " << ext.length << "]" << std::endl; + // std::cout << "set_ext:: " << i << ") [" << ext.offset << ", " << + // ext.length << "]" << std::endl; for (uint64_t idx = ext.offset; idx < ext.offset + ext.length; idx++) { if (map[idx] != 0xFF) { - std::cerr << "map[" << idx << "] is clear, but extent [" << ext.offset << ", " << ext.length << "] is set" << std::endl; - return -1; + std::cerr << "map[" << idx << "] is clear, but extent [" << ext.offset + << ", " << ext.length << "] is set" << std::endl; + return -1; } } offset = ext.offset + ext.length; } offset = 0; - for(uint64_t i = 0; i < (set_op_count + clr_op_count); i++) { + for (uint64_t i = 0; i < (set_op_count + clr_op_count); i++) { extent_t ext = sbmap.get_next_clr_extent(offset); - //std::cout << "clr_ext:: " << i << ") [" << ext.offset << ", " << ext.length << "]" << std::endl; + // std::cout << "clr_ext:: " << i << ") [" << ext.offset << ", " << + // ext.length << "]" << std::endl; for (uint64_t idx = ext.offset; idx < ext.offset + ext.length; idx++) { - if (map[idx] ) { - std::cerr << "map[" << idx << "] is set, but extent [" << ext.offset << ", " << ext.length << "] is free" << std::endl; - return -1; + if (map[idx]) { + std::cerr << "map[" << idx << "] is set, but extent [" << ext.offset + << ", " << ext.length << "] is free" << std::endl; + return -1; } } offset = ext.offset + ext.length; @@ -2550,18 +2528,17 @@ static int test_intersections(unsigned test_idx, SimpleBitmap &sbmap, uint8_t ma } //--------------------------------------------------------------------------------- -TEST(SimpleBitmap, intersection) -{ - const uint64_t MAP_SIZE = 1ULL << 30; // 1G +TEST(SimpleBitmap, intersection) { + const uint64_t MAP_SIZE = 1ULL << 30; // 1G SimpleBitmap sbmap(g_ceph_context, MAP_SIZE); // use current time as seed for random generator std::srand(std::time(nullptr)); - std::unique_ptr<uint8_t[]> map = std::make_unique<uint8_t[]> (MAP_SIZE); + std::unique_ptr<uint8_t[]> map = std::make_unique<uint8_t[]>(MAP_SIZE); ASSERT_TRUE(map != nullptr); - for (unsigned i = 0; i < 1; i++ ) { + for (unsigned i = 0; i < 1; i++) { sbmap.clear_all(); memset(map.get(), 0, MAP_SIZE); ASSERT_TRUE(test_intersections(i, sbmap, map.get(), MAP_SIZE) == 0); @@ -2572,41 +2549,42 @@ TEST(SimpleBitmap, intersection) } } - //--------------------------------------------------------------------------------- -static int test_extents_boundaries(uint64_t index, extent_t *ext_arr, uint64_t ext_arr_size, SimpleBitmap& sbmap, bool set) -{ - uint64_t n = sbmap.get_size(); +static int test_extents_boundaries(uint64_t index, extent_t *ext_arr, + uint64_t ext_arr_size, SimpleBitmap &sbmap, + bool set) { + uint64_t n = sbmap.get_size(); uint64_t offset = 0, k = 0; - for(unsigned i = 0; i < 64; i++) { + for (unsigned i = 0; i < 64; i++) { offset += i; if (offset >= n) { break; } - for(unsigned length = 1; length <= 128; length++) { + for (unsigned length = 1; length <= 128; length++) { if (offset + length >= n) { - break; + break; } if (k >= ext_arr_size) { - break; + break; } bool success; if (set) { - success = sbmap.set(offset, length); + success = sbmap.set(offset, length); } else { - success = sbmap.clr(offset, length); + success = sbmap.clr(offset, length); } if (!success) { - std::cerr << "Failed sbmap." << (set ? "set(" : "clr(") << offset << ", " << length << ")"<< std::endl; - return -1; + std::cerr << "Failed sbmap." << (set ? "set(" : "clr(") << offset + << ", " << length << ")" << std::endl; + return -1; } ext_arr[k++] = {offset, length}; if (length < 64) { - offset += 64; + offset += 64; } else { - offset += 128; + offset += 128; } } if (k >= ext_arr_size) { @@ -2615,13 +2593,14 @@ static int test_extents_boundaries(uint64_t index, extent_t *ext_arr, uint64_t e } unsigned arr_size = std::min((uint64_t)k, ext_arr_size); - std::cout << std::hex << std::right << __func__ ; - std::cout << " [" << index << "] " << (set ? "Set::" : "Clr::") << " extents count = 0x" << arr_size; + std::cout << std::hex << std::right << __func__; + std::cout << " [" << index << "] " << (set ? "Set::" : "Clr::") + << " extents count = 0x" << arr_size; std::cout << std::dec << std::endl; offset = 0; extent_t ext; - for(unsigned i = 0; i < arr_size; i++) { + for (unsigned i = 0; i < arr_size; i++) { if (set) { ext = sbmap.get_next_set_extent(offset); } else { @@ -2642,17 +2621,17 @@ static int test_extents_boundaries(uint64_t index, extent_t *ext_arr, uint64_t e if (ext.length == 0) { return 0; } else { - std::cerr << "sbmap.get_next_" << (set ? "set" : "clr") << "_extent(" << offset << ") return length = " << ext.length << std::endl; + std::cerr << "sbmap.get_next_" << (set ? "set" : "clr") << "_extent(" + << offset << ") return length = " << ext.length << std::endl; return -1; } - } //--------------------------------------------------------------------------------- -TEST(SimpleBitmap, boundaries) -{ +TEST(SimpleBitmap, boundaries) { const uint64_t MAX_EXTENTS_COUNT = 64 << 10; - std::unique_ptr<extent_t[]> ext_arr = std::make_unique<extent_t[]>(MAX_EXTENTS_COUNT); + std::unique_ptr<extent_t[]> ext_arr = + std::make_unique<extent_t[]>(MAX_EXTENTS_COUNT); ASSERT_TRUE(ext_arr != nullptr); // use current time as seed for random generator @@ -2661,26 +2640,27 @@ TEST(SimpleBitmap, boundaries) uint64_t bit_count = 32 << 20; // 32Mb = 4MB unsigned count = 0; for (unsigned i = 0; i < 64; i++) { - SimpleBitmap sbmap(g_ceph_context, bit_count+i); - memset(ext_arr.get(), 0, sizeof(extent_t)*MAX_EXTENTS_COUNT); + SimpleBitmap sbmap(g_ceph_context, bit_count + i); + memset(ext_arr.get(), 0, sizeof(extent_t) * MAX_EXTENTS_COUNT); sbmap.clear_all(); - ASSERT_TRUE(test_extents_boundaries(count, ext_arr.get(), MAX_EXTENTS_COUNT, sbmap, true) == 0); + ASSERT_TRUE(test_extents_boundaries(count, ext_arr.get(), MAX_EXTENTS_COUNT, + sbmap, true) == 0); - memset(ext_arr.get(), 0, sizeof(extent_t)*MAX_EXTENTS_COUNT); + memset(ext_arr.get(), 0, sizeof(extent_t) * MAX_EXTENTS_COUNT); sbmap.set_all(); - ASSERT_TRUE(test_extents_boundaries(count++, ext_arr.get(), MAX_EXTENTS_COUNT, sbmap, false) == 0); + ASSERT_TRUE(test_extents_boundaries(count++, ext_arr.get(), + MAX_EXTENTS_COUNT, sbmap, false) == 0); } } //--------------------------------------------------------------------------------- -TEST(SimpleBitmap, boundaries2) -{ +TEST(SimpleBitmap, boundaries2) { const uint64_t bit_count_base = 64 << 10; // 64Kb = 8MB - const extent_t null_extent = {0, 0}; + const extent_t null_extent = {0, 0}; for (unsigned i = 0; i < 64; i++) { - uint64_t bit_count = bit_count_base + i; - extent_t full_extent = {0, bit_count}; + uint64_t bit_count = bit_count_base + i; + extent_t full_extent = {0, bit_count}; SimpleBitmap sbmap(g_ceph_context, bit_count); sbmap.set(0, bit_count); @@ -2705,8 +2685,7 @@ TEST(SimpleBitmap, boundaries2) } } -TEST(shared_blob_2hash_tracker_t, basic_test) -{ +TEST(shared_blob_2hash_tracker_t, basic_test) { shared_blob_2hash_tracker_t t1(1024 * 1024, 4096); ASSERT_TRUE(t1.count_non_zero() == 0); @@ -2756,7 +2735,7 @@ TEST(shared_blob_2hash_tracker_t, basic_test) ASSERT_TRUE(t1.count_non_zero() != 0); - ASSERT_TRUE(!t1.test_all_zero(5,0x1000)); + ASSERT_TRUE(!t1.test_all_zero(5, 0x1000)); ASSERT_TRUE(!t1.test_all_zero(5, 0x2000)); ASSERT_TRUE(!t1.test_all_zero(5, 0x3000)); ASSERT_TRUE(t1.test_all_zero(5, 0x4000)); @@ -2771,14 +2750,13 @@ TEST(shared_blob_2hash_tracker_t, basic_test) ASSERT_TRUE(!t1.test_all_zero_range(5, 0, 0x9000)); } -TEST(bluestore_blob_use_tracker_t, mempool_stats_test) -{ - using mempool::bluestore_cache_other::allocated_items; +TEST(bluestore_blob_use_tracker_t, mempool_stats_test) { using mempool::bluestore_cache_other::allocated_bytes; + using mempool::bluestore_cache_other::allocated_items; uint64_t other_items0 = allocated_items(); uint64_t other_bytes0 = allocated_bytes(); { - bluestore_blob_use_tracker_t* t1 = new bluestore_blob_use_tracker_t; + bluestore_blob_use_tracker_t *t1 = new bluestore_blob_use_tracker_t; t1->init(1024 * 1024, 4096); ASSERT_EQ(256, allocated_items() - other_items0); // = 1M / 4K @@ -2789,7 +2767,7 @@ TEST(bluestore_blob_use_tracker_t, mempool_stats_test) ASSERT_EQ(allocated_bytes(), other_bytes0); } { - bluestore_blob_use_tracker_t* t1 = new bluestore_blob_use_tracker_t; + bluestore_blob_use_tracker_t *t1 = new bluestore_blob_use_tracker_t; t1->init(1024 * 1024, 4096); t1->add_tail(2048 * 1024, 4096); @@ -2802,7 +2780,7 @@ TEST(bluestore_blob_use_tracker_t, mempool_stats_test) ASSERT_EQ(allocated_bytes(), other_bytes0); } { - bluestore_blob_use_tracker_t* t1 = new bluestore_blob_use_tracker_t; + bluestore_blob_use_tracker_t *t1 = new bluestore_blob_use_tracker_t; t1->init(1024 * 1024, 4096); t1->prune_tail(512 * 1024); @@ -2815,8 +2793,8 @@ TEST(bluestore_blob_use_tracker_t, mempool_stats_test) ASSERT_EQ(allocated_bytes(), other_bytes0); } { - bluestore_blob_use_tracker_t* t1 = new bluestore_blob_use_tracker_t; - bluestore_blob_use_tracker_t* t2 = new bluestore_blob_use_tracker_t; + bluestore_blob_use_tracker_t *t1 = new bluestore_blob_use_tracker_t; + bluestore_blob_use_tracker_t *t2 = new bluestore_blob_use_tracker_t; t1->init(1024 * 1024, 4096); @@ -2839,9 +2817,9 @@ TEST(bluestore_blob_use_tracker_t, mempool_stats_test) int main(int argc, char **argv) { auto args = argv_to_vec(argc, argv); - auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, - CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = + global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); common_init_finish(g_ceph_context); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/src/test/osd/CMakeLists.txt b/src/test/osd/CMakeLists.txt index c9216c42d5c..c7492e238e5 100644 --- a/src/test/osd/CMakeLists.txt +++ b/src/test/osd/CMakeLists.txt @@ -55,6 +55,7 @@ target_link_libraries(unittest_osd_types global) # unittest_ecbackend add_executable(unittest_ecbackend TestECBackend.cc + $<TARGET_OBJECTS:unit-main> ) add_ceph_unittest(unittest_ecbackend) target_link_libraries(unittest_ecbackend osd global) diff --git a/src/test/osd/Object.cc b/src/test/osd/Object.cc index 9d914abd794..e9c36efd067 100644 --- a/src/test/osd/Object.cc +++ b/src/test/osd/Object.cc @@ -125,15 +125,18 @@ void ObjectDesc::update(ContentsGenerator *gen, const ContDesc &next) { return; } -bool ObjectDesc::check(bufferlist &to_check) { +bool ObjectDesc::check(bufferlist &to_check, + const std::pair<uint64_t, uint64_t>& offlen) { iterator objiter = begin(); + const auto [offset, size] = offlen; + objiter.seek(offset); + std::cout << "seeking to " << offset << std::endl; uint64_t error_at = 0; if (!objiter.check_bl_advance(to_check, &error_at)) { std::cout << "incorrect buffer at pos " << error_at << std::endl; return false; } - uint64_t size = layers.begin()->first->get_length(layers.begin()->second); if (to_check.length() < size) { std::cout << "only read " << to_check.length() << " out of size " << size << std::endl; @@ -143,11 +146,14 @@ bool ObjectDesc::check(bufferlist &to_check) { } bool ObjectDesc::check_sparse(const std::map<uint64_t, uint64_t>& extents, - bufferlist &to_check) + bufferlist &to_check, + const std::pair<uint64_t, uint64_t>& offlen) { + const auto [offset_to_skip, _] = offlen; + uint64_t pos = offset_to_skip; uint64_t off = 0; - uint64_t pos = 0; auto objiter = begin(); + objiter.seek(pos); for (auto &&extiter : extents) { // verify hole { diff --git a/src/test/osd/Object.h b/src/test/osd/Object.h index 76ce2d2a2fc..05d32a6f031 100644 --- a/src/test/osd/Object.h +++ b/src/test/osd/Object.h @@ -431,6 +431,9 @@ public: } } ceph_assert(pos == _pos); + if (current != layers.end()) { + current->iter.seek(pos); + } } // grab the bytes in the range of [pos, pos+s), and advance @c pos @@ -517,9 +520,11 @@ public: // takes ownership of gen void update(ContentsGenerator *gen, const ContDesc &next); - bool check(bufferlist &to_check); + bool check(bufferlist &to_check, + const std::pair<uint64_t, uint64_t>& offlen); bool check_sparse(const std::map<uint64_t, uint64_t>& extends, - bufferlist &to_check); + bufferlist &to_check, + const std::pair<uint64_t, uint64_t>& offlen); const ContDesc &most_recent(); ContentsGenerator *most_recent_gen() { return layers.begin()->first.get(); diff --git a/src/test/osd/RadosModel.h b/src/test/osd/RadosModel.h index 8bb94e7f3a0..49b7d605d36 100644 --- a/src/test/osd/RadosModel.h +++ b/src/test/osd/RadosModel.h @@ -1355,10 +1355,12 @@ public: int snap; bool balance_reads; bool localize_reads; + uint8_t offlen_randomization_ratio; std::shared_ptr<int> in_use; std::vector<bufferlist> results; + std::vector<std::pair<uint64_t, uint64_t>> offlens; std::vector<int> retvals; std::vector<std::map<uint64_t, uint64_t>> extent_results; std::vector<bool> is_sparse_read; @@ -1382,6 +1384,7 @@ public: const std::string &oid, bool balance_reads, bool localize_reads, + uint8_t offlen_randomization_ratio, TestOpStat *stat = 0) : TestOp(n, context, stat), completions(3), @@ -1389,7 +1392,9 @@ public: snap(0), balance_reads(balance_reads), localize_reads(localize_reads), + offlen_randomization_ratio(offlen_randomization_ratio), results(3), + offlens(3), retvals(3), extent_results(3), is_sparse_read(3, false), @@ -1399,24 +1404,45 @@ public: attrretval(0) {} + static std::pair<uint64_t, uint64_t> maybe_randomize_offlen( + uint8_t offlen_randomization_ratio, + uint64_t max_len) + { + if ((rand() % 100) < offlen_randomization_ratio && max_len > 0) { + // the random offset here is de dacto "first n bytes to skip in + // a chhunk" -- it doesn't care about good distrubution across + // entire object. imperfect but should be good enough for parital + // read testing. + const auto off = rand() % max_len; + return {off, max_len - off}; + } else { + return {0, max_len}; + } + } + void _do_read(librados::ObjectReadOperation& read_op, int index) { - uint64_t len = 0; - if (old_value.has_contents()) - len = old_value.most_recent_gen()->get_length(old_value.most_recent()); + uint64_t max_len = 0; + if (old_value.has_contents()) { + max_len = + old_value.most_recent_gen()->get_length(old_value.most_recent()); + } + offlens[index] = + maybe_randomize_offlen(offlen_randomization_ratio, max_len); + const auto [offset, length] = offlens[index]; if (context->no_sparse || rand() % 2) { is_sparse_read[index] = false; - read_op.read(0, - len, + read_op.read(offset, + length, &results[index], &retvals[index]); bufferlist init_value_bl; encode(static_cast<uint32_t>(-1), init_value_bl); - read_op.checksum(LIBRADOS_CHECKSUM_TYPE_CRC32C, init_value_bl, 0, len, + read_op.checksum(LIBRADOS_CHECKSUM_TYPE_CRC32C, init_value_bl, offset, length, 0, &checksums[index], &checksum_retvals[index]); } else { is_sparse_read[index] = true; - read_op.sparse_read(0, - len, + read_op.sparse_read(offset, + length, &extent_results[index], &results[index], &retvals[index]); @@ -1576,12 +1602,12 @@ public: } for (unsigned i = 0; i < results.size(); i++) { if (is_sparse_read[i]) { - if (!old_value.check_sparse(extent_results[i], results[i])) { + if (!old_value.check_sparse(extent_results[i], results[i], offlens[i])) { std::cerr << num << ": oid " << oid << " contents " << to_check << " corrupt" << std::endl; context->errors++; } } else { - if (!old_value.check(results[i])) { + if (!old_value.check(results[i], offlens[i])) { std::cerr << num << ": oid " << oid << " contents " << to_check << " corrupt" << std::endl; context->errors++; } @@ -2176,6 +2202,7 @@ public: {} void _do_read(librados::ObjectReadOperation& read_op, uint32_t offset, uint32_t length, int index) { + std::cout << __func__ << ":" << __LINE__ << std::endl; read_op.read(offset, length, &results[index], diff --git a/src/test/osd/TestECBackend.cc b/src/test/osd/TestECBackend.cc index 1c13fb4c95c..d28d428fc06 100644 --- a/src/test/osd/TestECBackend.cc +++ b/src/test/osd/TestECBackend.cc @@ -16,6 +16,7 @@ #include <sstream> #include <errno.h> #include <signal.h> +#include "osd/ECCommon.h" #include "osd/ECBackend.h" #include "gtest/gtest.h" @@ -53,10 +54,179 @@ TEST(ECUtil, stripe_info_t) ASSERT_EQ(s.aligned_chunk_offset_to_logical_offset(2*s.get_chunk_size()), 2*s.get_stripe_width()); - ASSERT_EQ(s.aligned_offset_len_to_chunk(make_pair(swidth, 10*swidth)), + ASSERT_EQ(s.chunk_aligned_offset_len_to_chunk( + make_pair(swidth+s.get_chunk_size(), 10*swidth)), make_pair(s.get_chunk_size(), 10*s.get_chunk_size())); + ASSERT_EQ(s.chunk_aligned_offset_len_to_chunk(make_pair(swidth, 10*swidth)), + make_pair(s.get_chunk_size(), 10*s.get_chunk_size())); + + // round down offset if it's under stripe width + ASSERT_EQ(s.chunk_aligned_offset_len_to_chunk(make_pair(s.get_chunk_size(), 10*swidth)), + make_pair<uint64_t>(0, 10*s.get_chunk_size())); + + // round up size if above stripe + ASSERT_EQ(s.chunk_aligned_offset_len_to_chunk(make_pair(s.get_chunk_size(), + 10*swidth + s.get_chunk_size())), + make_pair<uint64_t>(0, 11*s.get_chunk_size())); + ASSERT_EQ(s.offset_len_to_stripe_bounds(make_pair(swidth-10, (uint64_t)20)), make_pair((uint64_t)0, 2*swidth)); } +TEST(ECUtil, offset_length_is_same_stripe) +{ + const uint64_t swidth = 4096; + const uint64_t schunk = 1024; + const uint64_t ssize = 4; + + ECUtil::stripe_info_t s(ssize, swidth); + ASSERT_EQ(s.get_stripe_width(), swidth); + ASSERT_EQ(s.get_chunk_size(), schunk); + + // read nothing at the very beginning + // +---+---+---+---+ + // | 0| | | | + // +---+---+---+---+ + // | | | | | + // +---+---+---+---+ + ASSERT_TRUE(s.offset_length_is_same_stripe(0, 0)); + + // read nothing at the stripe end + // +---+---+---+---+ + // | | | | 0| + // +---+---+---+---+ + // | | | | | + // +---+---+---+---+ + ASSERT_TRUE(s.offset_length_is_same_stripe(swidth, 0)); + + // read single byte at the stripe end + // +---+---+---+---+ + // | | | | ~1| + // +---+---+---+---+ + // | | | | | + // +---+---+---+---+ + ASSERT_TRUE(s.offset_length_is_same_stripe(swidth - 1, 1)); + + // read single stripe + // +---+---+---+---+ + // | 1k| 1k| 1k| 1k| + // +---+---+---+---+ + // | | | | | + // +---+---+---+---+ + ASSERT_TRUE(s.offset_length_is_same_stripe(0, swidth)); + + // read single chunk + // +---+---+---+---+ + // | 1k| | | | + // +---+---+---+---+ + // | | | | | + // +---+---+---+---+ + ASSERT_TRUE(s.offset_length_is_same_stripe(0, schunk)); + + // read single stripe except its first chunk + // +---+---+---+---+ + // | | 1k| 1k| 1k| + // +---+---+---+---+ + // | | | | | + // +---+---+---+---+ + ASSERT_TRUE(s.offset_length_is_same_stripe(schunk, swidth - schunk)); + + // read two stripes + // +---+---+---+---+ + // | 1k| 1k| 1k| 1k| + // +---+---+---+---+ + // | 1k| 1k| 1k| 1k| + // +---+---+---+---+ + ASSERT_FALSE(s.offset_length_is_same_stripe(0, 2*swidth)); + + // multistripe read: 1st stripe without 1st byte + 1st byte of 2nd stripe + // +-----+---+---+---+ + // | 1k-1| 1k| 1k| 1k| + // +-----+---+---+---+ + // | 1| | | | + // +-----+---+---+---+ + ASSERT_FALSE(s.offset_length_is_same_stripe(1, swidth)); +} + + +TEST(ECCommon, get_min_want_to_read_shards) +{ + const uint64_t swidth = 4096; + const uint64_t ssize = 4; + + ECUtil::stripe_info_t s(ssize, swidth); + ASSERT_EQ(s.get_stripe_width(), swidth); + ASSERT_EQ(s.get_chunk_size(), 1024); + + const std::vector<int> chunk_mapping = {}; // no remapping + + // read nothing at the very beginning + { + std::set<int> want_to_read; + ECCommon::ReadPipeline::get_min_want_to_read_shards( + 0, 0, s, chunk_mapping, &want_to_read); + ASSERT_TRUE(want_to_read == std::set<int>{}); + } + + // read nothing at the middle (0-sized partial read) + { + std::set<int> want_to_read; + ECCommon::ReadPipeline::get_min_want_to_read_shards( + 2048, 0, s, chunk_mapping, &want_to_read); + ASSERT_TRUE(want_to_read == std::set<int>{}); + } + + // read not-so-many (< chunk_size) bytes at the middle (partial read) + { + std::set<int> want_to_read; + ECCommon::ReadPipeline::get_min_want_to_read_shards( + 2048, 42, s, chunk_mapping, &want_to_read); + ASSERT_TRUE(want_to_read == std::set<int>{2}); + } + + // read more (> chunk_size) bytes at the middle (partial read) + { + std::set<int> want_to_read; + ECCommon::ReadPipeline::get_min_want_to_read_shards( + 1024, 1024+42, s, chunk_mapping, &want_to_read); + // extra () due to a language / macro limitation + ASSERT_TRUE(want_to_read == (std::set<int>{1, 2})); + } + + // full stripe except last chunk + { + std::set<int> want_to_read; + ECCommon::ReadPipeline::get_min_want_to_read_shards( + 0, 3*1024, s, chunk_mapping, &want_to_read); + // extra () due to a language / macro limitation + ASSERT_TRUE(want_to_read == (std::set<int>{0, 1, 2})); + } + + // full stripe except 1st chunk + { + std::set<int> want_to_read; + ECCommon::ReadPipeline::get_min_want_to_read_shards( + 1024, swidth-1024, s, chunk_mapping, &want_to_read); + // extra () due to a language / macro limitation + ASSERT_TRUE(want_to_read == (std::set<int>{1, 2, 3})); + } + + // large, multi-stripe read starting just after 1st chunk + { + std::set<int> want_to_read; + ECCommon::ReadPipeline::get_min_want_to_read_shards( + 1024, swidth*42, s, chunk_mapping, &want_to_read); + // extra () due to a language / macro limitation + ASSERT_TRUE(want_to_read == (std::set<int>{0, 1, 2, 3})); + } + + // large read from the beginning + { + std::set<int> want_to_read; + ECCommon::ReadPipeline::get_min_want_to_read_shards( + 0, swidth*42, s, chunk_mapping, &want_to_read); + // extra () due to a language / macro limitation + ASSERT_TRUE(want_to_read == (std::set<int>{0, 1, 2, 3})); + } +} diff --git a/src/test/osd/TestRados.cc b/src/test/osd/TestRados.cc index 876b5eb8dd7..f0acb24a3de 100644 --- a/src/test/osd/TestRados.cc +++ b/src/test/osd/TestRados.cc @@ -29,6 +29,7 @@ public: bool ec_pool, bool balance_reads, bool localize_reads, + uint8_t offlen_randomization_ratio, bool set_redirect, bool set_chunk, bool enable_dedup) : @@ -38,6 +39,7 @@ public: m_ec_pool(ec_pool), m_balance_reads(balance_reads), m_localize_reads(localize_reads), + m_offlen_randomization_ratio(offlen_randomization_ratio), m_set_redirect(set_redirect), m_set_chunk(set_chunk), m_enable_dedup(enable_dedup) @@ -264,7 +266,7 @@ private: case TEST_OP_READ: oid = *(rand_choose(context.oid_not_in_use)); return new ReadOp(m_op, &context, oid, m_balance_reads, m_localize_reads, - m_stats); + m_offlen_randomization_ratio, m_stats); case TEST_OP_WRITE: oid = *(rand_choose(context.oid_not_in_use)); @@ -452,6 +454,7 @@ private: bool m_ec_pool; bool m_balance_reads; bool m_localize_reads; + uint8_t m_offlen_randomization_ratio; bool m_set_redirect; bool m_set_chunk; bool m_enable_dedup; @@ -518,6 +521,7 @@ int main(int argc, char **argv) bool no_sparse = false; bool balance_reads = false; bool localize_reads = false; + uint8_t offlen_randomization_ratio = 50; bool set_redirect = false; bool set_chunk = false; bool enable_dedup = false; @@ -551,6 +555,8 @@ int main(int argc, char **argv) balance_reads = true; else if (strcmp(argv[i], "--localize-reads") == 0) localize_reads = true; + else if (strcmp(argv[i], "--offlen_randomization_ratio") == 0) + offlen_randomization_ratio = atoi(argv[++i]); else if (strcmp(argv[i], "--pool-snaps") == 0) pool_snaps = true; else if (strcmp(argv[i], "--write-fadvise-dontneed") == 0) @@ -711,6 +717,7 @@ int main(int argc, char **argv) ops, objects, op_weights, &stats, max_seconds, ec_pool, balance_reads, localize_reads, + offlen_randomization_ratio, set_redirect, set_chunk, enable_dedup); int r = context.init(); if (r < 0) { diff --git a/src/test/pybind/test_rbd.py b/src/test/pybind/test_rbd.py index 0040d1e67e5..f6a48975e22 100644 --- a/src/test/pybind/test_rbd.py +++ b/src/test/pybind/test_rbd.py @@ -13,8 +13,9 @@ import sys from assertions import (assert_equal as eq, assert_raises, assert_not_equal, assert_greater_equal) -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from rados import (Rados, + LIBRADOS_SNAP_HEAD, LIBRADOS_OP_FLAG_FADVISE_DONTNEED, LIBRADOS_OP_FLAG_FADVISE_NOCACHE, LIBRADOS_OP_FLAG_FADVISE_RANDOM) @@ -33,6 +34,8 @@ from rbd import (RBD, Group, Image, ImageNotFound, InvalidArgument, ImageExists, RBD_MIRROR_IMAGE_MODE_JOURNAL, RBD_MIRROR_IMAGE_MODE_SNAPSHOT, RBD_LOCK_MODE_EXCLUSIVE, RBD_OPERATION_FEATURE_GROUP, RBD_OPERATION_FEATURE_CLONE_CHILD, + RBD_SNAP_NAMESPACE_TYPE_USER, + RBD_SNAP_NAMESPACE_TYPE_GROUP, RBD_SNAP_NAMESPACE_TYPE_TRASH, RBD_SNAP_NAMESPACE_TYPE_MIRROR, RBD_IMAGE_MIGRATION_STATE_PREPARED, RBD_CONFIG_SOURCE_CONFIG, @@ -589,7 +592,11 @@ class TestImage(object): def setup_method(self, method): self.rbd = RBD() + # {create,access,modify}_timestamp() have second precision, + # allow for rounding + self.time_before_create = datetime.now(timezone.utc) - timedelta(seconds=1) create_image() + self.time_after_create = datetime.now(timezone.utc) + timedelta(seconds=1) self.image = Image(ioctx, image_name) def teardown_method(self, method): @@ -624,28 +631,32 @@ class TestImage(object): @require_new_format() def test_id(self): - assert_not_equal(b'', self.image.id()) + id = self.image.id() + assert isinstance(id, str) + assert len(id) > 0 def test_block_name_prefix(self): - assert_not_equal(b'', self.image.block_name_prefix()) + block_name_prefix = self.image.block_name_prefix() + assert isinstance(block_name_prefix, str) + assert len(block_name_prefix) > 0 def test_data_pool_id(self): assert_greater_equal(self.image.data_pool_id(), 0) def test_create_timestamp(self): - timestamp = self.image.create_timestamp() - assert_not_equal(0, timestamp.year) - assert_not_equal(1970, timestamp.year) + time = self.image.create_timestamp() + assert self.time_before_create < time + assert time < self.time_after_create def test_access_timestamp(self): - timestamp = self.image.access_timestamp() - assert_not_equal(0, timestamp.year) - assert_not_equal(1970, timestamp.year) + time = self.image.access_timestamp() + assert self.time_before_create < time + assert time < self.time_after_create def test_modify_timestamp(self): - timestamp = self.image.modify_timestamp() - assert_not_equal(0, timestamp.year) - assert_not_equal(1970, timestamp.year) + time = self.image.modify_timestamp() + assert self.time_before_create < time + assert time < self.time_after_create def test_invalidate_cache(self): self.image.write(b'abc', 0) @@ -1106,14 +1117,16 @@ class TestImage(object): eq(self.image.snap_exists('snap1'), False) def test_snap_timestamp(self): + # get_snap_timestamp() has second precision, allow for rounding + time_before = datetime.now(timezone.utc) - timedelta(seconds=1) self.image.create_snap('snap1') + time_after = datetime.now(timezone.utc) + timedelta(seconds=1) eq(['snap1'], [snap['name'] for snap in self.image.list_snaps()]) for snap in self.image.list_snaps(): snap_id = snap["id"] time = self.image.get_snap_timestamp(snap_id) - assert_not_equal(b'', time.year) - assert_not_equal(0, time.year) - assert_not_equal(time.year, '1970') + assert time_before < time + assert time < time_after self.image.remove_snap('snap1') def test_limit_snaps(self): @@ -1790,6 +1803,68 @@ class TestClone(object): # unprotect, remove parent snap happen in cleanup, and should succeed + def test_clone_by_snap_id(self): + clone_name2 = get_temp_image_name() + assert_raises(TypeError, self.rbd.clone, ioctx, image_name, + None, ioctx, clone_name2, features) + assert_raises(TypeError, self.rbd.clone, ioctx, image_name, + 1.0, ioctx, clone_name2, features) + assert_raises(InvalidArgument, self.rbd.clone, ioctx, image_name, + LIBRADOS_SNAP_HEAD, ioctx, clone_name2, features) + + self.image.create_snap('snap2') + snap_id = self.image.snap_get_id('snap2') + self.image.remove_snap('snap2') + assert_raises(ImageNotFound, self.image.snap_get_trash_namespace, + snap_id) + assert_raises(ImageNotFound, self.rbd.clone, ioctx, image_name, + snap_id, ioctx, clone_name2, features, clone_format=1) + assert_raises(ImageNotFound, self.rbd.clone, ioctx, image_name, + snap_id, ioctx, clone_name2, features, clone_format=2) + + snap_id = self.image.snap_get_id('snap1') + self.rbd.clone(ioctx, image_name, snap_id, ioctx, clone_name2, + features, clone_format=1) + with Image(ioctx, clone_name2) as clone2: + assert clone2.parent_info() == self.clone.parent_info() + assert clone2.op_features() == 0 + self.rbd.remove(ioctx, clone_name2) + self.rbd.clone(ioctx, image_name, snap_id, ioctx, clone_name2, + features, clone_format=2) + with Image(ioctx, clone_name2) as clone2: + assert clone2.parent_info() == self.clone.parent_info() + assert clone2.op_features() == RBD_OPERATION_FEATURE_CLONE_CHILD + self.rbd.remove(ioctx, clone_name2) + + self.image.create_snap('snap2') + snap_id = self.image.snap_get_id('snap2') + assert_raises(InvalidArgument, self.rbd.clone, ioctx, image_name, + snap_id, ioctx, clone_name2, features, clone_format=1) + self.rbd.clone(ioctx, image_name, snap_id, ioctx, clone_name2, + features, clone_format=2) + with Image(ioctx, clone_name2) as clone2: + clone2_parent_info = clone2.parent_info() + clone_parent_info = self.clone.parent_info() + assert clone2_parent_info[0] == clone_parent_info[0] + assert clone2_parent_info[1] == clone_parent_info[1] + assert clone2_parent_info[2] == 'snap2' + assert clone_parent_info[2] == 'snap1' + + self.image.remove_snap('snap2') + trash_snap = self.image.snap_get_trash_namespace(snap_id) + assert trash_snap == { + 'original_namespace_type' : RBD_SNAP_NAMESPACE_TYPE_USER, + 'original_name' : 'snap2' + } + clone_name3 = get_temp_image_name() + assert_raises(InvalidArgument, self.rbd.clone, ioctx, image_name, + snap_id, ioctx, clone_name3, features, clone_format=1) + assert_raises(ImageNotFound, self.rbd.clone, ioctx, image_name, + snap_id, ioctx, clone_name3, features, clone_format=2) + self.rbd.remove(ioctx, clone_name2) + assert_raises(ImageNotFound, self.image.snap_get_trash_namespace, + snap_id) + def test_stat(self): image_info = self.image.stat() clone_info = self.clone.stat() @@ -2075,7 +2150,11 @@ class TestClone(object): snaps = [s for s in self.image.list_snaps() if s['name'] != 'snap1'] eq([RBD_SNAP_NAMESPACE_TYPE_TRASH], [s['namespace'] for s in snaps]) - eq([{'original_name' : 'snap2'}], [s['trash'] for s in snaps]) + trash_snap = { + 'original_namespace_type' : RBD_SNAP_NAMESPACE_TYPE_USER, + 'original_name' : 'snap2' + } + eq([trash_snap], [s['trash'] for s in snaps]) self.rbd.remove(ioctx, clone_name) eq([], [s for s in self.image.list_snaps() if s['name'] != 'snap1']) @@ -2769,6 +2848,11 @@ class TestGroups(object): eq([], list(self.group.list_images())) RBD().trash_restore(ioctx, image_id, image_name) + def test_group_get_id(self): + id = self.group.id() + assert isinstance(id, str) + assert len(id) > 0 + def test_group_image_many_images(self): eq([], list(self.group.list_images())) self.group.add_image(ioctx, image_name) @@ -2805,7 +2889,7 @@ class TestGroups(object): eq([snap_name], [snap['name'] for snap in self.group.list_snaps()]) for snap in self.image.list_snaps(): - eq(rbd.RBD_SNAP_NAMESPACE_TYPE_GROUP, snap['namespace']) + eq(RBD_SNAP_NAMESPACE_TYPE_GROUP, snap['namespace']) info = snap['group'] eq(group_name, info['group_name']) eq(snap_name, info['group_snap_name']) @@ -2870,34 +2954,290 @@ class TestGroups(object): self.group.remove_snap(new_snap_name) eq([], list(self.group.list_snaps())) - def test_group_snap_rollback(self): - eq([], list(self.group.list_images())) - self.group.add_image(ioctx, image_name) + @require_features([RBD_FEATURE_LAYERING]) + def test_group_snap_clone(self): + data = rand_data(256) with Image(ioctx, image_name) as image: - image.write(b'\0' * 256, 0) - read = image.read(0, 256) - eq(read, b'\0' * 256) + image.write(data, 0) - global snap_name - eq([], list(self.group.list_snaps())) + self.group.add_image(ioctx, image_name) self.group.create_snap(snap_name) - eq([snap_name], [snap['name'] for snap in self.group.list_snaps()]) + assert [s['name'] for s in self.group.list_snaps()] == [snap_name] + image_snaps = list(self.image.list_snaps()) + assert [s['namespace'] for s in image_snaps] == [RBD_SNAP_NAMESPACE_TYPE_GROUP] + image_snap_name = image_snaps[0]['name'] + image_snap_id = image_snaps[0]['id'] + assert image_snaps[0]['group'] == { + 'pool' : ioctx.get_pool_id(), + 'name' : group_name, + 'snap_name' : snap_name, + } + + clone_name = get_temp_image_name() + assert_raises(ImageNotFound, self.rbd.clone, ioctx, image_name, + image_snap_name, ioctx, clone_name, features, clone_format=1) + assert_raises(InvalidArgument, self.rbd.clone, ioctx, image_name, + image_snap_id, ioctx, clone_name, features, clone_format=1) + assert_raises(ImageNotFound, self.rbd.clone, ioctx, image_name, + image_snap_name, ioctx, clone_name, features, clone_format=2) + self.rbd.clone(ioctx, image_name, image_snap_id, ioctx, clone_name, + features, clone_format=2) + with Image(ioctx, clone_name) as clone: + parent_spec = clone.get_parent_image_spec() + assert parent_spec['pool_name'] == pool_name + assert parent_spec['image_name'] == image_name + assert parent_spec['snap_namespace_type'] == RBD_SNAP_NAMESPACE_TYPE_GROUP + assert parent_spec['snap_name'] == image_snap_name + assert parent_spec['snap_id'] == image_snap_id + read = clone.read(0, 256) + assert read == data + + self.group.remove_snap(snap_name) + assert list(self.group.list_snaps()) == [] + image_snaps = list(self.image.list_snaps()) + assert [s['namespace'] for s in image_snaps] == [RBD_SNAP_NAMESPACE_TYPE_TRASH] + trash_image_snap_name = image_snaps[0]['name'] + assert image_snaps[0]['id'] == image_snap_id + assert image_snaps[0]['trash'] == { + 'original_namespace_type' : RBD_SNAP_NAMESPACE_TYPE_GROUP, + 'original_name' : image_snap_name + } + assert trash_image_snap_name != image_snap_name + + with Image(ioctx, clone_name) as clone: + parent_spec = clone.get_parent_image_spec() + assert parent_spec['pool_name'] == pool_name + assert parent_spec['image_name'] == image_name + assert parent_spec['snap_namespace_type'] == RBD_SNAP_NAMESPACE_TYPE_TRASH + assert parent_spec['snap_name'] == trash_image_snap_name + assert parent_spec['snap_id'] == image_snap_id + read = clone.read(0, 256) + assert read == data + + self.rbd.remove(ioctx, clone_name) + assert list(self.image.list_snaps()) == [] + @require_features([RBD_FEATURE_LAYERING]) + def test_group_snap_clone_flatten(self): + data = rand_data(256) with Image(ioctx, image_name) as image: - data = rand_data(256) image.write(data, 0) + + self.group.add_image(ioctx, image_name) + self.group.create_snap(snap_name) + assert [s['name'] for s in self.group.list_snaps()] == [snap_name] + image_snaps = list(self.image.list_snaps()) + assert [s['namespace'] for s in image_snaps] == [RBD_SNAP_NAMESPACE_TYPE_GROUP] + image_snap_id = image_snaps[0]['id'] + + clone_name = get_temp_image_name() + self.rbd.clone(ioctx, image_name, image_snap_id, ioctx, clone_name, + features, clone_format=2) + self.group.remove_snap(snap_name) + assert list(self.group.list_snaps()) == [] + image_snaps = list(self.image.list_snaps()) + assert [s['namespace'] for s in image_snaps] == [RBD_SNAP_NAMESPACE_TYPE_TRASH] + assert image_snaps[0]['id'] == image_snap_id + + with Image(ioctx, clone_name) as clone: + parent_spec = clone.get_parent_image_spec() + assert parent_spec['pool_id'] == ioctx.get_pool_id() + assert parent_spec['image_id'] == self.image.id() + assert parent_spec['snap_id'] == image_snap_id + read = clone.read(0, 256) + assert read == data + clone.flatten() + + assert list(self.image.list_snaps()) == [] + with Image(ioctx, clone_name) as clone: + assert_raises(ImageNotFound, clone.get_parent_image_spec) + read = clone.read(0, 256) + assert read == data + + self.rbd.remove(ioctx, clone_name) + + def test_group_snap_rollback(self): + for _ in range(1, 3): + create_image() + self.image_names.append(image_name) + + with Image(ioctx, self.image_names[0]) as image: + image.write(b'1' * 256, 0) + with Image(ioctx, self.image_names[1]) as image: + image.write(b'2' * 256, 0) + with Image(ioctx, self.image_names[2]) as image: + image.write(b'3' * 256, 0) + self.group.add_image(ioctx, self.image_names[0]) + snap_name1 = get_temp_snap_name() + self.group.create_snap(snap_name1) + + with Image(ioctx, self.image_names[0]) as image: + image.write(b'4' * 256, 0) + with Image(ioctx, self.image_names[1]) as image: + image.write(b'5' * 256, 0) + with Image(ioctx, self.image_names[2]) as image: + image.write(b'6' * 256, 0) + self.group.add_image(ioctx, self.image_names[1]) + snap_name2 = get_temp_snap_name() + self.group.create_snap(snap_name2) + + with Image(ioctx, self.image_names[0]) as image: + image.write(b'7' * 256, 0) + with Image(ioctx, self.image_names[1]) as image: + image.write(b'8' * 256, 0) + with Image(ioctx, self.image_names[2]) as image: + image.write(b'9' * 256, 0) + self.group.add_image(ioctx, self.image_names[2]) + snap_name3 = get_temp_snap_name() + self.group.create_snap(snap_name3) + + with Image(ioctx, self.image_names[0]) as image: + image.write(b'a' * 256, 0) + with Image(ioctx, self.image_names[1]) as image: + image.write(b'b' * 256, 0) + with Image(ioctx, self.image_names[2]) as image: + image.write(b'c' * 256, 0) + + for i in range(0, 3): + self.group.remove_image(ioctx, self.image_names[i]) + with Image(ioctx, self.image_names[0]) as image: + image_snaps = list(image.list_snaps()) + assert [s['namespace'] for s in image_snaps] == [RBD_SNAP_NAMESPACE_TYPE_GROUP, + RBD_SNAP_NAMESPACE_TYPE_GROUP, + RBD_SNAP_NAMESPACE_TYPE_GROUP] + with Image(ioctx, self.image_names[1]) as image: + image_snaps = list(image.list_snaps()) + assert [s['namespace'] for s in image_snaps] == [RBD_SNAP_NAMESPACE_TYPE_GROUP, + RBD_SNAP_NAMESPACE_TYPE_GROUP] + with Image(ioctx, self.image_names[2]) as image: + image_snaps = list(image.list_snaps()) + assert [s['namespace'] for s in image_snaps] == [RBD_SNAP_NAMESPACE_TYPE_GROUP] + + # group = [] + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name1) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name2) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name3) + + with Image(ioctx, self.image_names[0]) as image: + read = image.read(0, 256) + assert read == b'a' * 256 + with Image(ioctx, self.image_names[1]) as image: read = image.read(0, 256) - eq(read, data) + assert read == b'b' * 256 + with Image(ioctx, self.image_names[2]) as image: + read = image.read(0, 256) + assert read == b'c' * 256 - self.group.rollback_to_snap(snap_name) - with Image(ioctx, image_name) as image: + # group = [img0] + self.group.add_image(ioctx, self.image_names[0]) + self.group.rollback_to_snap(snap_name1) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name2) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name3) + + with Image(ioctx, self.image_names[0]) as image: + read = image.read(0, 256) + assert read == b'1' * 256 + with Image(ioctx, self.image_names[1]) as image: + read = image.read(0, 256) + assert read == b'b' * 256 + with Image(ioctx, self.image_names[2]) as image: + read = image.read(0, 256) + assert read == b'c' * 256 + + # group = [img1] + self.group.remove_image(ioctx, self.image_names[0]) + self.group.add_image(ioctx, self.image_names[1]) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name1) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name2) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name3) + + # group = [img2] + self.group.remove_image(ioctx, self.image_names[1]) + self.group.add_image(ioctx, self.image_names[2]) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name1) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name2) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name3) + + # group = [img0 img1] + self.group.remove_image(ioctx, self.image_names[2]) + # re-add in reverse order to test that order doesn't matter + self.group.add_image(ioctx, self.image_names[1]) + self.group.add_image(ioctx, self.image_names[0]) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name1) + self.group.rollback_to_snap(snap_name2) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name3) + + with Image(ioctx, self.image_names[0]) as image: read = image.read(0, 256) - eq(read, b'\0' * 256) + assert read == b'4' * 256 + with Image(ioctx, self.image_names[1]) as image: + read = image.read(0, 256) + assert read == b'5' * 256 + with Image(ioctx, self.image_names[2]) as image: + read = image.read(0, 256) + assert read == b'c' * 256 + + # group = [img0 img2] + self.group.remove_image(ioctx, self.image_names[1]) + self.group.add_image(ioctx, self.image_names[2]) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name1) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name2) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name3) + + # group = [img1 img2] + self.group.remove_image(ioctx, self.image_names[0]) + self.group.add_image(ioctx, self.image_names[1]) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name1) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name2) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name3) + + # group = [img0 img1 img2] + self.group.add_image(ioctx, self.image_names[0]) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name1) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name2) + self.group.rollback_to_snap(snap_name3) + + with Image(ioctx, self.image_names[0]) as image: + read = image.read(0, 256) + assert read == b'7' * 256 + with Image(ioctx, self.image_names[1]) as image: + read = image.read(0, 256) + assert read == b'8' * 256 + with Image(ioctx, self.image_names[2]) as image: + read = image.read(0, 256) + assert read == b'9' * 256 - self.group.remove_image(ioctx, image_name) - eq([], list(self.group.list_images())) - self.group.remove_snap(snap_name) - eq([], list(self.group.list_snaps())) + # group = [img0 img1] + self.group.remove_image(ioctx, self.image_names[2]) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name1) + self.group.rollback_to_snap(snap_name2) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name3) + + with Image(ioctx, self.image_names[0]) as image: + read = image.read(0, 256) + assert read == b'4' * 256 + with Image(ioctx, self.image_names[1]) as image: + read = image.read(0, 256) + assert read == b'5' * 256 + with Image(ioctx, self.image_names[2]) as image: + read = image.read(0, 256) + assert read == b'9' * 256 + + # group = [img0] + self.group.remove_image(ioctx, self.image_names[1]) + self.group.rollback_to_snap(snap_name1) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name2) + assert_raises(InvalidArgument, self.group.rollback_to_snap, snap_name3) + + with Image(ioctx, self.image_names[0]) as image: + read = image.read(0, 256) + assert read == b'1' * 256 + with Image(ioctx, self.image_names[1]) as image: + read = image.read(0, 256) + assert read == b'5' * 256 + with Image(ioctx, self.image_names[2]) as image: + read = image.read(0, 256) + assert read == b'9' * 256 class TestMigration(object): diff --git a/src/test/rbd_mirror/test_ImageDeleter.cc b/src/test/rbd_mirror/test_ImageDeleter.cc index 5fa5d6db512..6b5993591fd 100644 --- a/src/test/rbd_mirror/test_ImageDeleter.cc +++ b/src/test/rbd_mirror/test_ImageDeleter.cc @@ -202,7 +202,7 @@ public: librbd::ImageOptions clone_opts; clone_opts.set(RBD_IMAGE_OPTION_FEATURES, ictx->features); EXPECT_EQ(0, librbd::clone(m_local_io_ctx, m_local_image_id.c_str(), - nullptr, "snap1", m_local_io_ctx, + nullptr, CEPH_NOSNAP, "snap1", m_local_io_ctx, clone_id.c_str(), "clone1", clone_opts, GLOBAL_CLONE_IMAGE_ID, m_remote_mirror_uuid)); diff --git a/src/test/rgw/CMakeLists.txt b/src/test/rgw/CMakeLists.txt index c96e9012790..1b3c1363498 100644 --- a/src/test/rgw/CMakeLists.txt +++ b/src/test/rgw/CMakeLists.txt @@ -268,6 +268,14 @@ target_include_directories(unittest_rgw_lc target_link_libraries(unittest_rgw_lc rgw_common ${rgw_libs} ${EXPAT_LIBRARIES}) +# unittest_rgw_cksum +add_executable(unittest_rgw_cksum test_rgw_cksum.cc) +add_ceph_unittest(unittest_rgw_cksum) +target_include_directories(unittest_rgw_cksum + SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/rgw") +target_link_libraries(unittest_rgw_cksum + rgw_common ${rgw_libs}) + # unittest_rgw_arn add_executable(unittest_rgw_arn test_rgw_arn.cc) add_ceph_unittest(unittest_rgw_arn) diff --git a/src/test/rgw/bucket_notification/test_bn.py b/src/test/rgw/bucket_notification/test_bn.py index 61b7374b7ef..54a2a0e98ee 100644 --- a/src/test/rgw/bucket_notification/test_bn.py +++ b/src/test/rgw/bucket_notification/test_bn.py @@ -759,7 +759,7 @@ def test_ps_s3_topic_admin_on_master(): assert_equal(topic_arn2, 'arn:aws:sns:' + zonegroup + ':' + tenant + ':' + topic_name + '_2') endpoint_address = 'http://127.0.0.1:9002' - endpoint_args = 'push-endpoint='+endpoint_address + endpoint_args = 'push-endpoint=' + endpoint_address + '&persistent=true' topic_conf3 = PSTopicS3(conn, topic_name+'_3', zonegroup, endpoint_args=endpoint_args) topic_arn3 = topic_conf3.set_config() assert_equal(topic_arn3, @@ -770,6 +770,24 @@ def test_ps_s3_topic_admin_on_master(): assert_equal(parsed_result['arn'], topic_arn3) matches = [tenant, UID_PREFIX] assert_true( all([x in parsed_result['owner'] for x in matches])) + assert_equal(parsed_result['dest']['persistent_queue'], + tenant + ":" + topic_name + '_3') + + # recall CreateTopic and verify the owner and persistent_queue remain same. + topic_conf3 = PSTopicS3(conn, topic_name + '_3', zonegroup, + endpoint_args=endpoint_args) + topic_arn3 = topic_conf3.set_config() + assert_equal(topic_arn3, + 'arn:aws:sns:' + zonegroup + ':' + tenant + ':' + topic_name + '_3') + # get topic 3 via commandline + result = admin( + ['topic', 'get', '--topic', topic_name + '_3', '--tenant', tenant], + get_config_cluster()) + parsed_result = json.loads(result[0]) + assert_equal(parsed_result['arn'], topic_arn3) + assert_true(all([x in parsed_result['owner'] for x in matches])) + assert_equal(parsed_result['dest']['persistent_queue'], + tenant + ":" + topic_name + '_3') # delete topic 3 remove_topic(topic_name + '_3', tenant) @@ -2939,25 +2957,48 @@ def wait_for_queue_to_drain(topic_name, tenant=None, account=None, http_port=Non log.info('waited for %ds for queue %s to drain', time_diff, topic_name) -@attr('basic_test') -def test_ps_s3_persistent_topic_stats(): - """ test persistent topic stats """ - conn = connection() +@attr('kafka_test') +def persistent_topic_stats(conn, endpoint_type): zonegroup = get_config_zonegroup() - # create random port for the http server - host = get_ip() - port = random.randint(10000, 20000) - # create bucket bucket_name = gen_bucket_name() bucket = conn.create_bucket(bucket_name) topic_name = bucket_name + TOPIC_SUFFIX + host = get_ip() + task = None + port = None + if endpoint_type == 'http': + # create random port for the http server + port = random.randint(10000, 20000) + # start an http server in a separate thread + receiver = HTTPServerWithEvents((host, port)) + endpoint_address = 'http://'+host+':'+str(port) + endpoint_args = 'push-endpoint='+endpoint_address+'&persistent=true'+ \ + '&retry_sleep_duration=1' + elif endpoint_type == 'amqp': + # start amqp receiver + exchange = 'ex1' + task, receiver = create_amqp_receiver_thread(exchange, topic_name) + task.start() + endpoint_address = 'amqp://' + host + endpoint_args = 'push-endpoint='+endpoint_address+'&amqp-exchange='+exchange+'&amqp-ack-level=broker&persistent=true'+ \ + '&retry_sleep_duration=1' + elif endpoint_type == 'kafka': + # start kafka receiver + task, receiver = create_kafka_receiver_thread(topic_name) + task.start() + endpoint_address = 'kafka://' + host + endpoint_args = 'push-endpoint='+endpoint_address+'&kafka-ack-level=broker&persistent=true'+ \ + '&retry_sleep_duration=1' + else: + return SkipTest('Unknown endpoint type: ' + endpoint_type) + # create s3 topic - endpoint_address = 'http://'+host+':'+str(port) - endpoint_args = 'push-endpoint='+endpoint_address+'&persistent=true'+ \ - '&retry_sleep_duration=1' + endpoint_address = 'kafka://' + host + ':1234' # wrong port + endpoint_args = 'push-endpoint='+endpoint_address+'&kafka-ack-level=broker&persistent=true'+ \ + '&retry_sleep_duration=1' topic_conf = PSTopicS3(conn, topic_name, zonegroup, endpoint_args=endpoint_args) topic_arn = topic_conf.set_config() # create s3 notification @@ -3004,8 +3045,12 @@ def test_ps_s3_persistent_topic_stats(): # topic stats get_stats_persistent_topic(topic_name, 2 * number_of_objects) - # start an http server in a separate thread - http_server = HTTPServerWithEvents((host, port)) + # change the endpoint port + endpoint_address = 'kafka://' + host + endpoint_args = 'push-endpoint='+endpoint_address+'&kafka-ack-level=broker&persistent=true'+ \ + '&retry_sleep_duration=1' + topic_conf = PSTopicS3(conn, topic_name, zonegroup, endpoint_args=endpoint_args) + topic_arn = topic_conf.set_config() wait_for_queue_to_drain(topic_name, http_port=port) @@ -3014,7 +3059,115 @@ def test_ps_s3_persistent_topic_stats(): topic_conf.del_config() # delete the bucket conn.delete_bucket(bucket_name) - http_server.close() + receiver.close(task) + + +@attr('http_test') +def persistent_topic_stats_http(): + """ test persistent topic stats, http endpoint """ + conn = connection() + persistent_topic_stats(conn, 'http') + + +@attr('kafka_test') +def persistent_topic_stats_kafka(): + """ test persistent topic stats, kafka endpoint """ + conn = connection() + persistent_topic_stats(conn, 'kafka') + + +@attr('kafka_test') +def test_persistent_topic_dump(): + """ test persistent topic dump """ + conn = connection() + zonegroup = get_config_zonegroup() + + # create bucket + bucket_name = gen_bucket_name() + bucket = conn.create_bucket(bucket_name) + topic_name = bucket_name + TOPIC_SUFFIX + + # start kafka receiver + host = get_ip() + task, receiver = create_kafka_receiver_thread(topic_name) + task.start() + + + # create s3 topic + endpoint_address = 'kafka://WrongHost' # wrong port + endpoint_args = 'push-endpoint='+endpoint_address+'&kafka-ack-level=broker&persistent=true'+ \ + '&retry_sleep_duration=1' + topic_conf = PSTopicS3(conn, topic_name, zonegroup, endpoint_args=endpoint_args) + topic_arn = topic_conf.set_config() + # create s3 notification + notification_name = bucket_name + NOTIFICATION_SUFFIX + topic_conf_list = [{'Id': notification_name, 'TopicArn': topic_arn, + 'Events': [] + }] + + s3_notification_conf = PSNotificationS3(conn, bucket_name, topic_conf_list) + response, status = s3_notification_conf.set_config() + assert_equal(status/100, 2) + + # create objects in the bucket (async) + number_of_objects = 20 + client_threads = [] + start_time = time.time() + for i in range(number_of_objects): + key = bucket.new_key('key-'+str(i)) + content = str(os.urandom(1024*1024)) + thr = threading.Thread(target = set_contents_from_string, args=(key, content,)) + thr.start() + client_threads.append(thr) + [thr.join() for thr in client_threads] + time_diff = time.time() - start_time + print('average time for creation + async http notification is: ' + str(time_diff*1000/number_of_objects) + ' milliseconds') + + # topic dump + result = admin(['topic', 'dump', '--topic', topic_name], get_config_cluster()) + assert_equal(result[1], 0) + parsed_result = json.loads(result[0]) + assert_equal(len(parsed_result), number_of_objects) + + # delete objects from the bucket + client_threads = [] + start_time = time.time() + for key in bucket.list(): + thr = threading.Thread(target = key.delete, args=()) + thr.start() + client_threads.append(thr) + [thr.join() for thr in client_threads] + time_diff = time.time() - start_time + print('average time for deletion + async http notification is: ' + str(time_diff*1000/number_of_objects) + ' milliseconds') + + # topic stats + result = admin(['topic', 'dump', '--topic', topic_name], get_config_cluster()) + assert_equal(result[1], 0) + print(result[0]) + parsed_result = json.loads(result[0]) + assert_equal(len(parsed_result), 2*number_of_objects) + + # change the endpoint port + endpoint_address = 'kafka://' + host + endpoint_args = 'push-endpoint='+endpoint_address+'&kafka-ack-level=broker&persistent=true'+ \ + '&retry_sleep_duration=1' + topic_conf = PSTopicS3(conn, topic_name, zonegroup, endpoint_args=endpoint_args) + topic_arn = topic_conf.set_config() + + wait_for_queue_to_drain(topic_name,) + + result = admin(['topic', 'dump', '--topic', topic_name], get_config_cluster()) + assert_equal(result[1], 0) + parsed_result = json.loads(result[0]) + assert_equal(len(parsed_result), 0) + + # cleanup + s3_notification_conf.del_config() + topic_conf.del_config() + # delete the bucket + conn.delete_bucket(bucket_name) + receiver.close(task) + def ps_s3_persistent_topic_configs(persistency_time, config_dict): conn = connection() @@ -3548,33 +3701,50 @@ def test_ps_s3_persistent_multiple_gateways(): http_server.close() -@attr('http_test') -def test_ps_s3_persistent_multiple_endpoints(): - """ test pushing persistent notification when one of the endpoints has error """ - conn = connection() +def persistent_topic_multiple_endpoints(conn, endpoint_type): zonegroup = get_config_zonegroup() - # create random port for the http server - host = get_ip() - port = random.randint(10000, 20000) - # start an http server in a separate thread - number_of_objects = 10 - http_server = HTTPServerWithEvents((host, port)) - # create bucket bucket_name = gen_bucket_name() bucket = conn.create_bucket(bucket_name) topic_name = bucket_name + TOPIC_SUFFIX + topic_name_1 = topic_name+'_1' + + host = get_ip() + task = None + port = None + if endpoint_type == 'http': + # create random port for the http server + port = random.randint(10000, 20000) + # start an http server in a separate thread + receiver = HTTPServerWithEvents((host, port)) + endpoint_address = 'http://'+host+':'+str(port) + endpoint_args = 'push-endpoint='+endpoint_address+'&persistent=true'+ \ + '&retry_sleep_duration=1' + elif endpoint_type == 'amqp': + # start amqp receiver + exchange = 'ex1' + task, receiver = create_amqp_receiver_thread(exchange, topic_name_1) + task.start() + endpoint_address = 'amqp://' + host + endpoint_args = 'push-endpoint='+endpoint_address+'&amqp-exchange='+exchange+'&amqp-ack-level=broker&persistent=true'+ \ + '&retry_sleep_duration=1' + elif endpoint_type == 'kafka': + # start kafka receiver + task, receiver = create_kafka_receiver_thread(topic_name_1) + task.start() + endpoint_address = 'kafka://' + host + endpoint_args = 'push-endpoint='+endpoint_address+'&kafka-ack-level=broker&persistent=true'+ \ + '&retry_sleep_duration=1' + else: + return SkipTest('Unknown endpoint type: ' + endpoint_type) # create two s3 topics - endpoint_address = 'http://'+host+':'+str(port) - endpoint_args = 'push-endpoint='+endpoint_address+'&persistent=true'+ \ - '&retry_sleep_duration=1' - topic_conf1 = PSTopicS3(conn, topic_name+'_1', zonegroup, endpoint_args=endpoint_args) + topic_conf1 = PSTopicS3(conn, topic_name_1, zonegroup, endpoint_args=endpoint_args) topic_arn1 = topic_conf1.set_config() endpoint_address = 'http://kaboom:9999' endpoint_args = 'push-endpoint='+endpoint_address+'&persistent=true'+ \ - '&retry_sleep_duration=1' + '&retry_sleep_duration=1' topic_conf2 = PSTopicS3(conn, topic_name+'_2', zonegroup, endpoint_args=endpoint_args) topic_arn2 = topic_conf2.set_config() @@ -3596,6 +3766,7 @@ def test_ps_s3_persistent_multiple_endpoints(): client_threads = [] start_time = time.time() + number_of_objects = 10 for i in range(number_of_objects): key = bucket.new_key(str(i)) content = str(os.urandom(1024*1024)) @@ -3606,9 +3777,8 @@ def test_ps_s3_persistent_multiple_endpoints(): keys = list(bucket.list()) - wait_for_queue_to_drain(topic_name+'_1') - - http_server.verify_s3_events(keys, exact_match=True, deletions=False) + wait_for_queue_to_drain(topic_name_1, http_port=port) + receiver.verify_s3_events(keys, exact_match=True, deletions=False) # delete objects from the bucket client_threads = [] @@ -3619,9 +3789,8 @@ def test_ps_s3_persistent_multiple_endpoints(): client_threads.append(thr) [thr.join() for thr in client_threads] - wait_for_queue_to_drain(topic_name+'_1') - - http_server.verify_s3_events(keys, exact_match=True, deletions=True) + wait_for_queue_to_drain(topic_name_1, http_port=port) + receiver.verify_s3_events(keys, exact_match=True, deletions=True) # cleanup s3_notification_conf1.del_config() @@ -3629,7 +3798,22 @@ def test_ps_s3_persistent_multiple_endpoints(): s3_notification_conf2.del_config() topic_conf2.del_config() conn.delete_bucket(bucket_name) - http_server.close() + receiver.close(task) + + +@attr('http_test') +def test_persistent_multiple_endpoints_http(): + """ test pushing persistent notification when one of the endpoints has error, http endpoint """ + conn = connection() + persistent_topic_multiple_endpoints(conn, 'http') + + +@attr('kafka_test') +def test_persistent_multiple_endpoints_kafka(): + """ test pushing persistent notification when one of the endpoints has error, kafka endpoint """ + conn = connection() + persistent_topic_multiple_endpoints(conn, 'kafka') + def persistent_notification(endpoint_type, conn, account=None): """ test pushing persistent notification """ @@ -4563,18 +4747,12 @@ def test_persistent_ps_s3_reload(): http_server.close() -@attr('data_path_v2_test') -def test_persistent_ps_s3_data_path_v2_migration(): +def persistent_data_path_v2_migration(conn, endpoint_type): """ test data path v2 persistent migration """ if get_config_cluster() == 'noname': return SkipTest('realm is needed for migration test') - conn = connection() zonegroup = get_config_zonegroup() - # create random port for the http server - host = get_ip() - http_port = random.randint(10000, 20000) - # disable v2 notification zonegroup_modify_feature(enable=False, feature_name=zonegroup_feature_notification_v2) @@ -4583,10 +4761,35 @@ def test_persistent_ps_s3_data_path_v2_migration(): bucket = conn.create_bucket(bucket_name) topic_name = bucket_name + TOPIC_SUFFIX - # create s3 topic - endpoint_address = 'http://'+host+':'+str(http_port) - endpoint_args = 'push-endpoint='+endpoint_address+'&persistent=true'+ \ - '&retry_sleep_duration=1' + host = get_ip() + task = None + port = None + if endpoint_type == 'http': + # create random port for the http server + port = random.randint(10000, 20000) + # start an http server in a separate thread + receiver = HTTPServerWithEvents((host, port)) + endpoint_address = 'http://'+host+':'+str(port) + endpoint_args = 'push-endpoint='+endpoint_address+'&persistent=true'+ \ + '&retry_sleep_duration=1' + elif endpoint_type == 'amqp': + # start amqp receiver + exchange = 'ex1' + task, receiver = create_amqp_receiver_thread(exchange, topic_name) + task.start() + endpoint_address = 'amqp://' + host + endpoint_args = 'push-endpoint='+endpoint_address+'&amqp-exchange='+exchange+'&amqp-ack-level=broker&persistent=true'+ \ + '&retry_sleep_duration=1' + elif endpoint_type == 'kafka': + # start kafka receiver + task, receiver = create_kafka_receiver_thread(topic_name) + task.start() + endpoint_address = 'kafka://' + host + endpoint_args = 'push-endpoint='+endpoint_address+'&kafka-ack-level=broker&persistent=true'+ \ + '&retry_sleep_duration=1' + else: + return SkipTest('Unknown endpoint type: ' + endpoint_type) + topic_conf = PSTopicS3(conn, topic_name, zonegroup, endpoint_args=endpoint_args) topic_arn = topic_conf.set_config() # create s3 notification @@ -4653,14 +4856,11 @@ def test_persistent_ps_s3_data_path_v2_migration(): # topic stats get_stats_persistent_topic(topic_name, 2 * number_of_objects) - # start an http server in a separate thread - http_server = HTTPServerWithEvents((host, http_port)) - - wait_for_queue_to_drain(topic_name, http_port=http_port) + wait_for_queue_to_drain(topic_name) # verify events keys = list(bucket.list()) # exact match is false because the notifications are persistent. - http_server.verify_s3_events(keys, exact_match=False) + receiver.verify_s3_events(keys, exact_match=False) except Exception as e: assert False, str(e) @@ -4677,8 +4877,21 @@ def test_persistent_ps_s3_data_path_v2_migration(): [thr.join() for thr in client_threads] # delete the bucket conn.delete_bucket(bucket_name) - if http_server: - http_server.close() + receiver.close(task) + + +@attr('data_path_v2_test') +def persistent_data_path_v2_migration_http(): + """ test data path v2 persistent migration, http endpoint """ + conn = connection() + persistent_data_path_v2_migration(conn, 'http') + + +@attr('data_path_v2_kafka_test') +def persistent_data_path_v2_migration_kafka(): + """ test data path v2 persistent migration, kafka endpoint """ + conn = connection() + persistent_data_path_v2_migration(conn, 'kafka') @attr('data_path_v2_test') @@ -5008,3 +5221,188 @@ def test_ps_s3_data_path_v2_mixed_migration(): for conn, bucket in zip(connections_list, buckets_list): conn.delete_bucket(bucket.name) + +@attr('kafka_test') +def test_notification_caching(): + """ test notification caching """ + conn = connection() + # create bucket + bucket_name = gen_bucket_name() + bucket = conn.create_bucket(bucket_name) + topic_name = bucket_name + TOPIC_SUFFIX + + # start kafka receiver + task, receiver = create_kafka_receiver_thread(topic_name) + task.start() + incorrect_port = 8080 + endpoint_address = 'kafka://' + kafka_server + ':' + str(incorrect_port) + endpoint_args = 'push-endpoint=' + endpoint_address + '&kafka-ack-level=broker' + '&persistent=true' + + # create s3 topic + zonegroup = get_config_zonegroup() + topic_conf = PSTopicS3(conn, topic_name, zonegroup, + endpoint_args=endpoint_args) + topic_arn = topic_conf.set_config() + # create s3 notification + notification_name = bucket_name + NOTIFICATION_SUFFIX + topic_conf_list = [{'Id': notification_name, 'TopicArn': topic_arn, + 'Events': [] + }] + + s3_notification_conf = PSNotificationS3(conn, bucket_name, topic_conf_list) + response, status = s3_notification_conf.set_config() + assert_equal(status / 100, 2) + + # create objects in the bucket (async) + number_of_objects = 10 + client_threads = [] + start_time = time.time() + for i in range(number_of_objects): + key = bucket.new_key(str(i)) + content = str(os.urandom(1024 * 1024)) + thr = threading.Thread(target=set_contents_from_string, + args=(key, content,)) + thr.start() + client_threads.append(thr) + [thr.join() for thr in client_threads] + + time_diff = time.time() - start_time + print('average time for creation + async notification is: ' + str( + time_diff * 1000 / number_of_objects) + ' milliseconds') + + # delete objects from the bucket + client_threads = [] + start_time = time.time() + for key in bucket.list(): + thr = threading.Thread(target=key.delete, args=()) + thr.start() + client_threads.append(thr) + [thr.join() for thr in client_threads] + + time_diff = time.time() - start_time + print('average time for deletion + async notification is: ' + str( + time_diff * 1000 / number_of_objects) + ' milliseconds') + + time.sleep(30) + # topic stats + result = admin(['topic', 'stats', '--topic', topic_name], + get_config_cluster()) + assert_equal(result[1], 0) + parsed_result = json.loads(result[0]) + assert_equal(parsed_result['Topic Stats']['Entries'], 2 * number_of_objects) + + # remove the port and update the topic, so its pointing to correct endpoint. + endpoint_address = 'kafka://' + kafka_server + # update s3 topic + topic_conf.set_attributes(attribute_name="push-endpoint", + attribute_val=endpoint_address) + keys = list(bucket.list()) + wait_for_queue_to_drain(topic_name) + receiver.verify_s3_events(keys, deletions=True) + + # cleanup + s3_notification_conf.del_config() + topic_conf.del_config() + # delete the bucket + conn.delete_bucket(bucket_name) + receiver.close(task) + + +@attr('kafka_test') +def test_connection_caching(): + """ test connection caching """ + conn = connection() + # create bucket + bucket_name = gen_bucket_name() + bucket = conn.create_bucket(bucket_name) + topic_name_1 = bucket_name + TOPIC_SUFFIX + "-without-ssl" + topic_name_2 = bucket_name + TOPIC_SUFFIX + "-with-ssl" + + # start kafka receiver + task_1, receiver_1 = create_kafka_receiver_thread(topic_name_1) + task_1.start() + task_2, receiver_2 = create_kafka_receiver_thread(topic_name_2) + task_2.start() + endpoint_address = 'kafka://' + kafka_server + endpoint_args = 'push-endpoint=' + endpoint_address + '&kafka-ack-level=broker&use-ssl=true' + '&persistent=true' + + # initially create both s3 topics with `use-ssl=true` + zonegroup = get_config_zonegroup() + topic_conf_1 = PSTopicS3(conn, topic_name_1, zonegroup, + endpoint_args=endpoint_args) + topic_arn_1 = topic_conf_1.set_config() + topic_conf_2 = PSTopicS3(conn, topic_name_2, zonegroup, + endpoint_args=endpoint_args) + topic_arn_2 = topic_conf_2.set_config() + # create s3 notification + notification_name = bucket_name + NOTIFICATION_SUFFIX + topic_conf_list = [{'Id': notification_name + '_1', 'TopicArn': topic_arn_1, + 'Events': [] + }, + {'Id': notification_name + '_2', 'TopicArn': topic_arn_2, + 'Events': []}] + + s3_notification_conf = PSNotificationS3(conn, bucket_name, topic_conf_list) + response, status = s3_notification_conf.set_config() + assert_equal(status / 100, 2) + + # create objects in the bucket (async) + number_of_objects = 10 + client_threads = [] + start_time = time.time() + for i in range(number_of_objects): + key = bucket.new_key(str(i)) + content = str(os.urandom(1024)) + thr = threading.Thread(target=set_contents_from_string, + args=(key, content,)) + thr.start() + client_threads.append(thr) + [thr.join() for thr in client_threads] + + time_diff = time.time() - start_time + print('average time for creation + async notification is: ' + str( + time_diff * 1000 / number_of_objects) + ' milliseconds') + + # delete objects from the bucket + client_threads = [] + start_time = time.time() + for key in bucket.list(): + thr = threading.Thread(target=key.delete, args=()) + thr.start() + client_threads.append(thr) + [thr.join() for thr in client_threads] + + time_diff = time.time() - start_time + print('average time for deletion + async notification is: ' + str( + time_diff * 1000 / number_of_objects) + ' milliseconds') + + time.sleep(30) + # topic stats + result = admin(['topic', 'stats', '--topic', topic_name_1], + get_config_cluster()) + assert_equal(result[1], 0) + parsed_result = json.loads(result[0]) + assert_equal(parsed_result['Topic Stats']['Entries'], 2 * number_of_objects) + + # remove the ssl from topic1 and update the topic. + endpoint_address = 'kafka://' + kafka_server + topic_conf_1.set_attributes(attribute_name="use-ssl", + attribute_val="false") + keys = list(bucket.list()) + wait_for_queue_to_drain(topic_name_1) + receiver_1.verify_s3_events(keys, deletions=True) + # topic stats for 2nd topic will still reflect entries + result = admin(['topic', 'stats', '--topic', topic_name_2], + get_config_cluster()) + assert_equal(result[1], 0) + parsed_result = json.loads(result[0]) + assert_equal(parsed_result['Topic Stats']['Entries'], 2 * number_of_objects) + + # cleanup + s3_notification_conf.del_config() + topic_conf_1.del_config() + topic_conf_2.del_config() + # delete the bucket + conn.delete_bucket(bucket_name) + receiver_1.close(task_1) + receiver_2.close(task_2) diff --git a/src/test/rgw/rgw_multi/tests.py b/src/test/rgw/rgw_multi/tests.py index f0b36865ed1..dd16cb45f9f 100644 --- a/src/test/rgw/rgw_multi/tests.py +++ b/src/test/rgw/rgw_multi/tests.py @@ -919,6 +919,40 @@ def test_versioned_object_incremental_sync(): for _, bucket in zone_bucket: zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) +def test_null_version_id_delete(): + zonegroup = realm.master_zonegroup() + zonegroup_conns = ZonegroupConns(zonegroup) + + zone = zonegroup_conns.rw_zones[0] + + # create a non-versioned bucket + bucket = zone.create_bucket(gen_bucket_name()) + log.debug('created bucket=%s', bucket.name) + zonegroup_meta_checkpoint(zonegroup) + obj = 'obj' + + # upload an initial object + key1 = new_key(zone, bucket, obj) + key1.set_contents_from_string('') + log.debug('created initial version id=%s', key1.version_id) + zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) + + # enable versioning + bucket.configure_versioning(True) + zonegroup_meta_checkpoint(zonegroup) + + # re-upload the object as a new version + key2 = new_key(zone, bucket, obj) + key2.set_contents_from_string('') + log.debug('created new version id=%s', key2.version_id) + zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) + + bucket.delete_key(obj, version_id='null') + + bucket.delete_key(obj, version_id=key2.version_id) + + zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) + def test_concurrent_versioned_object_incremental_sync(): zonegroup = realm.master_zonegroup() zonegroup_conns = ZonegroupConns(zonegroup) @@ -1536,6 +1570,10 @@ def test_bucket_index_log_trim(): cold_bilog = bilog_list(zone.zone, cold_bucket.name) assert(len(cold_bilog) == 0) +# TODO: disable failing tests temporarily +# until they are fixed + +@attr('fails_with_rgw') def test_bucket_reshard_index_log_trim(): zonegroup = realm.master_zonegroup() zonegroup_conns = ZonegroupConns(zonegroup) @@ -1766,6 +1804,8 @@ def bucket_keys_eq(zone1, zone2, bucket_name): zone2.name) assert False + +@attr('fails_with_rgw') @attr('bucket_reshard') def test_bucket_sync_run_basic_incremental(): """ @@ -1831,6 +1871,7 @@ def trash_bucket(zone, bucket_name): cmd += ['--bucket', bucket_name] zone.cluster.admin(cmd) +@attr('fails_with_rgw') @attr('bucket_reshard') def test_zap_init_bucket_sync_run(): """ @@ -1922,6 +1963,67 @@ def test_role_delete_sync(): log.info(f'success, zone: {zone.name} does not have role: {role_name}') +def test_replication_status(): + zonegroup = realm.master_zonegroup() + zonegroup_conns = ZonegroupConns(zonegroup) + zone = zonegroup_conns.rw_zones[0] + + bucket = zone.conn.create_bucket(gen_bucket_name()) + obj_name = "a" + k = new_key(zone, bucket.name, obj_name) + k.set_contents_from_string('foo') + zonegroup_meta_checkpoint(zonegroup) + zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) + + head_res = zone.head_object(bucket.name, obj_name) + log.info("checking if object has PENDING ReplicationStatus") + assert(head_res["ReplicationStatus"] == "PENDING") + + bilog_autotrim(zone.zone) + zonegroup_data_checkpoint(zonegroup_conns) + zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) + + head_res = zone.head_object(bucket.name, obj_name) + log.info("checking if object has COMPLETED ReplicationStatus") + assert(head_res["ReplicationStatus"] == "COMPLETED") + + log.info("checking that ReplicationStatus update did not write a bilog") + bilog = bilog_list(zone.zone, bucket.name) + assert(len(bilog) == 0) + +def test_object_acl(): + zonegroup = realm.master_zonegroup() + zonegroup_conns = ZonegroupConns(zonegroup) + primary = zonegroup_conns.rw_zones[0] + secondary = zonegroup_conns.rw_zones[1] + + bucket = primary.create_bucket(gen_bucket_name()) + log.debug('created bucket=%s', bucket.name) + + # upload a dummy object and wait for sync. + k = new_key(primary, bucket, 'dummy') + k.set_contents_from_string('foo') + zonegroup_meta_checkpoint(zonegroup) + zonegroup_data_checkpoint(zonegroup_conns) + + #check object on secondary before setacl + bucket2 = get_bucket(secondary, bucket.name) + before_set_acl = bucket2.get_acl(k) + assert(len(before_set_acl.acl.grants) == 1) + + #set object acl on primary and wait for sync. + bucket.set_canned_acl('public-read', key_name=k) + log.debug('set acl=%s', bucket.name) + zonegroup_data_checkpoint(zonegroup_conns) + zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) + + #check object secondary after setacl + bucket2 = get_bucket(secondary, bucket.name) + after_set_acl = bucket2.get_acl(k) + assert(len(after_set_acl.acl.grants) == 2) # read grant added on AllUsers + + +@attr('fails_with_rgw') @attr('data_sync_init') def test_bucket_full_sync_after_data_sync_init(): zonegroup = realm.master_zonegroup() @@ -1953,6 +2055,7 @@ def test_bucket_full_sync_after_data_sync_init(): zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) zonegroup_data_checkpoint(zonegroup_conns) +@attr('fails_with_rgw') @attr('data_sync_init') @attr('bucket_reshard') def test_resharded_bucket_full_sync_after_data_sync_init(): @@ -1993,6 +2096,7 @@ def test_resharded_bucket_full_sync_after_data_sync_init(): zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) zonegroup_data_checkpoint(zonegroup_conns) +@attr('fails_with_rgw') @attr('data_sync_init') def test_bucket_incremental_sync_after_data_sync_init(): zonegroup = realm.master_zonegroup() @@ -2030,6 +2134,7 @@ def test_bucket_incremental_sync_after_data_sync_init(): zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) zonegroup_data_checkpoint(zonegroup_conns) +@attr('fails_with_rgw') @attr('data_sync_init') @attr('bucket_reshard') def test_resharded_bucket_incremental_sync_latest_after_data_sync_init(): @@ -2078,6 +2183,7 @@ def test_resharded_bucket_incremental_sync_latest_after_data_sync_init(): zonegroup_bucket_checkpoint(zonegroup_conns, bucket.name) zonegroup_data_checkpoint(zonegroup_conns) +@attr('fails_with_rgw') @attr('data_sync_init') @attr('bucket_reshard') def test_resharded_bucket_incremental_sync_oldest_after_data_sync_init(): @@ -2271,6 +2377,7 @@ def check_objects_not_exist(bucket, obj_arr): for objname in obj_arr: check_object_not_exists(bucket, objname) +@attr('fails_with_rgw') @attr('sync_policy') def test_sync_policy_config_zonegroup(): """ @@ -2342,6 +2449,7 @@ def test_sync_policy_config_zonegroup(): return +@attr('fails_with_rgw') @attr('sync_policy') def test_sync_flow_symmetrical_zonegroup_all(): """ @@ -2399,6 +2507,7 @@ def test_sync_flow_symmetrical_zonegroup_all(): remove_sync_policy_group(c1, "sync-group") return +@attr('fails_with_rgw') @attr('sync_policy') def test_sync_flow_symmetrical_zonegroup_select(): """ @@ -2467,6 +2576,7 @@ def test_sync_flow_symmetrical_zonegroup_select(): remove_sync_policy_group(c1, "sync-group") return +@attr('fails_with_rgw') @attr('sync_policy') def test_sync_flow_directional_zonegroup_select(): """ @@ -2584,6 +2694,7 @@ def test_sync_flow_directional_zonegroup_select(): remove_sync_policy_group(c1, "sync-group") return +@attr('fails_with_rgw') @attr('sync_policy') def test_sync_single_bucket(): """ @@ -2696,6 +2807,7 @@ def test_sync_single_bucket(): remove_sync_policy_group(c1, "sync-group") return +@attr('fails_with_rgw') @attr('sync_policy') def test_sync_different_buckets(): """ @@ -2845,6 +2957,7 @@ def test_sync_different_buckets(): remove_sync_policy_group(c1, "sync-group") return +@attr('fails_with_rgw') @attr('sync_policy') def test_sync_multiple_buckets_to_single(): """ @@ -2966,6 +3079,7 @@ def test_sync_multiple_buckets_to_single(): remove_sync_policy_group(c1, "sync-group") return +@attr('fails_with_rgw') @attr('sync_policy') def test_sync_single_bucket_to_multiple(): """ @@ -3094,6 +3208,7 @@ def start_2nd_rgw(zonegroup): z.gateways[1].start() log.info('gateway started zone=%s gateway=%s', z.name, z.gateways[1].endpoint()) +@attr('fails_with_rgw') @attr('rgw_down') def test_bucket_create_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3111,6 +3226,7 @@ def test_bucket_create_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_bucket_remove_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3136,6 +3252,7 @@ def test_bucket_remove_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_object_sync_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3147,6 +3264,7 @@ def test_object_sync_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_object_delete_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3158,6 +3276,7 @@ def test_object_delete_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_concurrent_versioned_object_incremental_sync_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3169,6 +3288,7 @@ def test_concurrent_versioned_object_incremental_sync_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_suspended_delete_marker_full_sync_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3180,6 +3300,7 @@ def test_suspended_delete_marker_full_sync_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_bucket_acl_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3191,6 +3312,7 @@ def test_bucket_acl_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_bucket_sync_enable_right_after_disable_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3202,6 +3324,7 @@ def test_bucket_sync_enable_right_after_disable_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_multipart_object_sync_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3213,6 +3336,7 @@ def test_multipart_object_sync_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_bucket_sync_run_basic_incremental_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3224,6 +3348,7 @@ def test_bucket_sync_run_basic_incremental_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_role_sync_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3235,6 +3360,7 @@ def test_role_sync_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_bucket_full_sync_after_data_sync_init_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3246,6 +3372,7 @@ def test_bucket_full_sync_after_data_sync_init_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_sync_policy_config_zonegroup_rgw_down(): zonegroup = realm.master_zonegroup() @@ -3257,6 +3384,7 @@ def test_sync_policy_config_zonegroup_rgw_down(): finally: start_2nd_rgw(zonegroup) +@attr('fails_with_rgw') @attr('rgw_down') def test_sync_flow_symmetrical_zonegroup_all_rgw_down(): zonegroup = realm.master_zonegroup() diff --git a/src/test/rgw/rgw_multi/zone_rados.py b/src/test/rgw/rgw_multi/zone_rados.py index 3761676a3d7..ce0530543e0 100644 --- a/src/test/rgw/rgw_multi/zone_rados.py +++ b/src/test/rgw/rgw_multi/zone_rados.py @@ -170,6 +170,9 @@ class RadosZone(Zone): return out['TopicConfigurations'] return [] + def head_object(self, bucket_name, obj_name): + return self.s3_client.head_object(Bucket=bucket_name, Key=obj_name) + def get_conn(self, credentials): return self.Conn(self, credentials) diff --git a/src/test/rgw/test_d4n_directory.cc b/src/test/rgw/test_d4n_directory.cc index 05ad8399141..fbebcc8e4ab 100644 --- a/src/test/rgw/test_d4n_directory.cc +++ b/src/test/rgw/test_d4n_directory.cc @@ -200,6 +200,7 @@ TEST_F(ObjectDirectoryFixture, GetYield) io.run(); } +/* Does not currently pass on Ubuntu due to incompatible Redis version. TEST_F(ObjectDirectoryFixture, CopyYield) { boost::asio::spawn(io, [this] (boost::asio::yield_context yield) { @@ -230,6 +231,7 @@ TEST_F(ObjectDirectoryFixture, CopyYield) io.run(); } +*/ TEST_F(ObjectDirectoryFixture, DelYield) { @@ -354,6 +356,7 @@ TEST_F(BlockDirectoryFixture, GetYield) io.run(); } +/* Does not currently pass on Ubuntu due to incompatible Redis version. TEST_F(BlockDirectoryFixture, CopyYield) { boost::asio::spawn(io, [this] (boost::asio::yield_context yield) { @@ -384,6 +387,7 @@ TEST_F(BlockDirectoryFixture, CopyYield) io.run(); } +*/ TEST_F(BlockDirectoryFixture, DelYield) { diff --git a/src/test/rgw/test_http_manager.cc b/src/test/rgw/test_http_manager.cc index f2daeddca79..400ac32f411 100644 --- a/src/test/rgw/test_http_manager.cc +++ b/src/test/rgw/test_http_manager.cc @@ -76,7 +76,8 @@ TEST(HTTPManager, ReadTruncated) const auto url = std::string{"http://127.0.0.1:"} + std::to_string(acceptor.local_endpoint().port()); RGWHTTPClient client{g_ceph_context, "GET", url}; - EXPECT_EQ(-EAGAIN, RGWHTTP::process(&client, null_yield)); + const auto dpp = NoDoutPrefix{g_ceph_context, ceph_subsys_rgw}; + EXPECT_EQ(-EAGAIN, RGWHTTP::process(&dpp, &client, null_yield)); server.join(); } @@ -100,7 +101,8 @@ TEST(HTTPManager, Head) const auto url = std::string{"http://127.0.0.1:"} + std::to_string(acceptor.local_endpoint().port()); RGWHTTPClient client{g_ceph_context, "HEAD", url}; - EXPECT_EQ(0, RGWHTTP::process(&client, null_yield)); + const auto dpp = NoDoutPrefix{g_ceph_context, ceph_subsys_rgw}; + EXPECT_EQ(0, RGWHTTP::process(&dpp, &client, null_yield)); server.join(); } diff --git a/src/test/rgw/test_multi.md b/src/test/rgw/test_multi.md index 46bf8e0bfd7..81add706c2c 100644 --- a/src/test/rgw/test_multi.md +++ b/src/test/rgw/test_multi.md @@ -9,7 +9,15 @@ $ cd /path/to/ceph/src/test/rgw/ $ nosetests test_multi.py ``` This will assume a configuration file called `/path/to/ceph/src/test/rgw/test_multi.conf` exists. -To use a different configuration file, set the `RGW_MULTI_TEST_CONF` environment variable to point to that file. +To use a different configuration file, set the `RGW_MULTI_TEST_CONF` environment variable to point to that file. Here is an example of configuration file: +``` +[DEFAULT] +num_zonegroup=1 +num_zones=3 +gateway_per_zone=1 +no_bootstrap=false +log_level=5 +``` Since we use the same entry point file for all tests, running specific tests is possible using the following format: ``` $ nosetests test_multi.py:<specific_test_name> @@ -19,6 +27,10 @@ To run multiple tests based on wildcard string, use the following format: $ nosetests test_multi.py -m "<wildcard string>" ``` Note that the test to run, does not have to be inside the `test_multi.py` file. +Some tests have attributes set based on their current reliability. You can filter tests based on their attributes: +``` +$ nosetests test_multi.py -a "!fails_with_rgw" +``` Note that different options for running specific and multiple tests exists in the [nose documentation](https://nose.readthedocs.io/en/latest/usage.html#options), as well as other options to control the execution of the tests. ## Configuration ### Environment Variables diff --git a/src/test/rgw/test_rgw_cksum.cc b/src/test/rgw/test_rgw_cksum.cc new file mode 100644 index 00000000000..3572f5994fa --- /dev/null +++ b/src/test/rgw/test_rgw_cksum.cc @@ -0,0 +1,372 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <errno.h> +#include <iostream> +#include <fstream> +#include <string> + +#include "gtest/gtest.h" + +#include "common/config.h" +#include "common/ceph_argparse.h" +#include "common/debug.h" +#include "rgw/rgw_cksum.h" +#include "rgw/rgw_cksum_pipe.h" +#include <openssl/sha.h> +#include "rgw/rgw_hex.h" + +#define dout_subsys ceph_subsys_rgw + +namespace { + + using namespace rgw; + using namespace rgw::cksum; + + bool verbose = false; + + cksum::Type t1 = cksum::Type::blake3; + cksum::Type t2 = cksum::Type::sha1; + cksum::Type t3 = cksum::Type::sha256; + cksum::Type t4 = cksum::Type::sha512; + cksum::Type t5 = cksum::Type::crc32; + cksum::Type t6 = cksum::Type::crc32c; + cksum::Type t7 = cksum::Type::xxh3; + + std::string lorem = + "Lorem ipsum dolor sit amet"; + + std::string dolor = + R"(Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.)"; + +TEST(RGWCksum, Ctor) +{ + cksum::Cksum ck1; + cksum::Cksum ck2(cksum::Type::none); + + auto ck3 = rgw::putobj::GetHeaderCksumResult(ck1, ""); + + ASSERT_EQ(ck1.to_armor(), ck2.to_armor()); + ASSERT_EQ(ck2.to_armor(), ck3.first.to_armor()); +} + +TEST(RGWCksum, DigestCRC32) +{ + auto t = cksum::Type::crc32; + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest* digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)dolor.c_str(), dolor.length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + + /* compare w/known value https://crccalc.com/ */ + ASSERT_EQ(cksum.hex(), "98b2c5bd"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), "OThiMmM1YmQ="); + /* compare with aws-sdk-cpp encoded value */ + ASSERT_EQ(cksum.to_armor(), "mLLFvQ=="); +} + +TEST(RGWCksum, DigestCRC32c) +{ + auto t = cksum::Type::crc32c; + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest* digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)dolor.c_str(), dolor.length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + /* compare w/known value https://crccalc.com/ */ + ASSERT_EQ(cksum.hex(), "95dc2e4b"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), "OTVkYzJlNGI="); + /* compare with aws-sdk-cpp encoded value */ + ASSERT_EQ(cksum.to_armor(), "ldwuSw=="); +} + +TEST(RGWCksum, DigestXXH3) +{ + auto t = cksum::Type::xxh3; + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest* digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)dolor.c_str(), dolor.length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + /* compare w/known value xxhsum -H3 */ + ASSERT_EQ(cksum.hex(), "5a164e0145351d01"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), "NWExNjRlMDE0NTM1MWQwMQ=="); +} + +TEST(RGWCksum, DigestSha1) +{ + auto t = cksum::Type::sha1; + for (const auto input_str : {&lorem, &dolor}) { + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest *digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)input_str->c_str(), + input_str->length()); + + /* try by hand */ + unsigned char sha1_hash[SHA_DIGEST_LENGTH]; // == 20 + ::SHA1((unsigned char *)input_str->c_str(), input_str->length(), sha1_hash); + // do some stuff with the hash + + char buf[20 * 2 + 1]; + memset(buf, 0, sizeof(buf)); + buf_to_hex(sha1_hash, SHA_DIGEST_LENGTH, buf); + if (verbose) { + std::cout << "byhand sha1 " << buf << std::endl; + } + + auto cksum = rgw::cksum::finalize_digest(digest, t); + if (verbose) { + std::cout << "computed sha1: " << cksum.hex() << std::endl; + } + + /* check match with direct OpenSSL mech */ + ASSERT_TRUE(memcmp(buf, cksum.hex().c_str(), + cksum.hex().length()) == 0); + + if (input_str == &lorem) { + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "38f00f8738e241daea6f37f6f55ae8414d7b0219"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "MzhmMDBmODczOGUyNDFkYWVhNmYzN2Y2ZjU1YWU4NDE0ZDdiMDIxOQ=="); + } else { // &dolor + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "cd36b370758a259b34845084a6cc38473cb95e27"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "Y2QzNmIzNzA3NThhMjU5YjM0ODQ1MDg0YTZjYzM4NDczY2I5NWUyNw=="); + /* compare with aws-sdk-cpp encoded value */ + ASSERT_EQ(cksum.to_armor(), "zTazcHWKJZs0hFCEpsw4Rzy5Xic="); + } + } +} + +TEST(RGWCksum, DigestSha256) +{ + auto t = cksum::Type::sha256; + for (const auto input_str : {&lorem, &dolor}) { + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest *digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)input_str->c_str(), + input_str->length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + if (verbose) { + std::cout << "computed sha256: " << cksum.hex() << std::endl; + } + + if (input_str == &lorem) { + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "16aba5393ad72c0041f5600ad3c2c52ec437a2f0c7fc08fadfc3c0fe9641d7a3"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "MTZhYmE1MzkzYWQ3MmMwMDQxZjU2MDBhZDNjMmM1MmVjNDM3YTJmMGM3ZmMwOGZhZGZjM2MwZmU5NjQxZDdhMw=="); + } else { // &dolor + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "2d8c2f6d978ca21712b5f6de36c9d31fa8e96a4fa5d8ff8b0188dfb9e7c171bb"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "MmQ4YzJmNmQ5NzhjYTIxNzEyYjVmNmRlMzZjOWQzMWZhOGU5NmE0ZmE1ZDhmZjhiMDE4OGRmYjllN2MxNzFiYg=="); + /* compare with aws-sdk-cpp encoded value */ + ASSERT_EQ(cksum.to_armor(), "LYwvbZeMohcStfbeNsnTH6jpak+l2P+LAYjfuefBcbs="); + } + } +} + +TEST(RGWCksum, DigestSha512) +{ + auto t = cksum::Type::sha512; + for (const auto input_str : {&lorem, &dolor}) { + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest *digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)input_str->c_str(), + input_str->length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + + if (input_str == &lorem) { + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "b1f4aaa6b51c19ffbe4b1b6fa107be09c8acafd7c768106a3faf475b1e27a940d3c075fda671eadf46c68f93d7eabcf604bcbf7055da0dc4eae6743607a2fc3f"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "YjFmNGFhYTZiNTFjMTlmZmJlNGIxYjZmYTEwN2JlMDljOGFjYWZkN2M3NjgxMDZhM2ZhZjQ3NWIxZTI3YTk0MGQzYzA3NWZkYTY3MWVhZGY0NmM2OGY5M2Q3ZWFiY2Y2MDRiY2JmNzA1NWRhMGRjNGVhZTY3NDM2MDdhMmZjM2Y="); + } else { // &dolor + /* compare w/known value, openssl sha1 */ + ASSERT_EQ(cksum.hex(), "8ba760cac29cb2b2ce66858ead169174057aa1298ccd581514e6db6dee3285280ee6e3a54c9319071dc8165ff061d77783100d449c937ff1fb4cd1bb516a69b9"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "OGJhNzYwY2FjMjljYjJiMmNlNjY4NThlYWQxNjkxNzQwNTdhYTEyOThjY2Q1ODE1MTRlNmRiNmRlZTMyODUyODBlZTZlM2E1NGM5MzE5MDcxZGM4MTY1ZmYwNjFkNzc3ODMxMDBkNDQ5YzkzN2ZmMWZiNGNkMWJiNTE2YTY5Yjk="); + } + } +} + +TEST(RGWCksum, DigestBlake3) +{ + auto t = cksum::Type::blake3; + for (const auto input_str : {&lorem, &dolor}) { + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest *digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)input_str->c_str(), + input_str->length()); + + auto cksum = rgw::cksum::finalize_digest(digest, t); + + if (input_str == &lorem) { + /* compare w/known value, b3sum */ + ASSERT_EQ(cksum.hex(), "f1da5f4e2bd5669307bcdb2e223dad05af7425207cbee59e73526235f50f76ad"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "ZjFkYTVmNGUyYmQ1NjY5MzA3YmNkYjJlMjIzZGFkMDVhZjc0MjUyMDdjYmVlNTllNzM1MjYyMzVmNTBmNzZhZA=="); + } else { // &dolor + /* compare w/known value, b3sum */ + ASSERT_EQ(cksum.hex(), "71fe44583a6268b56139599c293aeb854e5c5a9908eca00105d81ad5e22b7bb6"); + /* compare w/known value https://www.base64encode.org/ */ + ASSERT_EQ(cksum.to_base64(), + "NzFmZTQ0NTgzYTYyNjhiNTYxMzk1OTljMjkzYWViODU0ZTVjNWE5OTA4ZWNhMDAxMDVkODFhZDVlMjJiN2JiNg=="); + } + } +} /* blake3 */ + +TEST(RGWCksum, DigestSTR) +{ + for (auto t : {t1, t2, t3, t4, t5, t6, t7}) { + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest* digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *)dolor.c_str(), dolor.length()); + auto cksum = rgw::cksum::finalize_digest(digest, t); + if (verbose) { + std::cout << "type: " << to_string(t) + << " digest: " << cksum.to_string() + << std::endl; + } + } +} + +TEST(RGWCksum, DigestBL) +{ + std::string lacrimae = dolor + dolor; + + ceph::buffer::list dolor_bl; + for ([[maybe_unused]] const auto& ix : {1, 2}) { + dolor_bl.push_back( + buffer::create_static(dolor.length(), + const_cast<char*>(dolor.data()))); + } + + for (auto t : {t1, t2, t3, t4, t5, t6, t7}) { + DigestVariant dv1 = rgw::cksum::digest_factory(t); + Digest* digest1 = get_digest(dv1); + ASSERT_NE(digest1, nullptr); + + DigestVariant dv2 = rgw::cksum::digest_factory(t); + Digest* digest2 = get_digest(dv2); + ASSERT_NE(digest2, nullptr); + + digest1->Update((const unsigned char *)lacrimae.c_str(), + lacrimae.length()); + digest2->Update(dolor_bl); + + auto cksum1 = rgw::cksum::finalize_digest(digest1, t); + auto cksum2 = rgw::cksum::finalize_digest(digest2, t); + + ASSERT_EQ(cksum1.to_string(), cksum2.to_string()); + + /* serialization */ + buffer::list bl_out; + encode(cksum1, bl_out); + + /* unserialization */ + buffer::list bl_in; + bl_in.append(bl_out.c_str(), bl_out.length()); + + rgw::cksum::Cksum cksum3; + auto iter = bl_in.cbegin(); + decode(cksum3, iter); + + /* all that way for a Strohs */ + ASSERT_EQ(cksum1.to_string(), cksum3.to_string()); + } /* for t1, ... */ +} + + + + + //foop +TEST(RGWCksum, CtorUnarmor) +{ + auto t = cksum::Type::sha256; + DigestVariant dv = rgw::cksum::digest_factory(t); + Digest *digest = get_digest(dv); + + ASSERT_NE(digest, nullptr); + + digest->Update((const unsigned char *) lorem.c_str(), + lorem.length()); + + auto cksum1 = rgw::cksum::finalize_digest(digest, t); + auto armored_text1 = cksum1.to_armor(); + auto cksum2 = rgw::cksum::Cksum(cksum1.type, armored_text1.c_str()); + + ASSERT_EQ(armored_text1, cksum2.to_armor()); +} + +} /* namespace */ + +int main(int argc, char *argv[]) +{ + auto args = argv_to_vec(argc, argv); + env_to_vec(args); + + std::string val; + for (auto arg_iter = args.begin(); arg_iter != args.end();) { + if (ceph_argparse_flag(args, arg_iter, "--verbose", + (char*) nullptr)) { + verbose = true; + } else { + ++arg_iter; + } + } + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/test/rgw/test_rgw_reshard_wait.cc b/src/test/rgw/test_rgw_reshard_wait.cc index 058828b956c..91fc4324129 100644 --- a/src/test/rgw/test_rgw_reshard_wait.cc +++ b/src/test/rgw/test_rgw_reshard_wait.cc @@ -23,10 +23,11 @@ using Clock = RGWReshardWait::Clock; TEST(ReshardWait, wait_block) { constexpr ceph::timespan wait_duration = 10ms; + const auto dpp = NoDoutPrefix{g_ceph_context, ceph_subsys_rgw}; RGWReshardWait waiter(wait_duration); const auto start = Clock::now(); - EXPECT_EQ(0, waiter.wait(null_yield)); + EXPECT_EQ(0, waiter.wait(&dpp, null_yield)); const ceph::timespan elapsed = Clock::now() - start; EXPECT_LE(wait_duration, elapsed); // waited at least 10ms @@ -37,16 +38,17 @@ TEST(ReshardWait, stop_block) { constexpr ceph::timespan short_duration = 10ms; constexpr ceph::timespan long_duration = 10s; + const auto dpp = NoDoutPrefix{g_ceph_context, ceph_subsys_rgw}; RGWReshardWait long_waiter(long_duration); RGWReshardWait short_waiter(short_duration); const auto start = Clock::now(); - std::thread thread([&long_waiter] { - EXPECT_EQ(-ECANCELED, long_waiter.wait(null_yield)); + std::thread thread([&dpp, &long_waiter] { + EXPECT_EQ(-ECANCELED, long_waiter.wait(&dpp, null_yield)); }); - EXPECT_EQ(0, short_waiter.wait(null_yield)); + EXPECT_EQ(0, short_waiter.wait(&dpp, null_yield)); long_waiter.stop(); // cancel long waiter @@ -65,11 +67,12 @@ void rethrow(std::exception_ptr eptr) { TEST(ReshardWait, wait_yield) { constexpr ceph::timespan wait_duration = 50ms; + const auto dpp = NoDoutPrefix{g_ceph_context, ceph_subsys_rgw}; RGWReshardWait waiter(wait_duration); boost::asio::io_context context; boost::asio::spawn(context, [&] (boost::asio::yield_context yield) { - EXPECT_EQ(0, waiter.wait(yield)); + EXPECT_EQ(0, waiter.wait(&dpp, yield)); }, rethrow); const auto start = Clock::now(); @@ -88,6 +91,7 @@ TEST(ReshardWait, stop_yield) { constexpr ceph::timespan short_duration = 50ms; constexpr ceph::timespan long_duration = 10s; + const auto dpp = NoDoutPrefix{g_ceph_context, ceph_subsys_rgw}; RGWReshardWait long_waiter(long_duration); RGWReshardWait short_waiter(short_duration); @@ -95,14 +99,14 @@ TEST(ReshardWait, stop_yield) boost::asio::io_context context; boost::asio::spawn(context, [&] (boost::asio::yield_context yield) { - EXPECT_EQ(-ECANCELED, long_waiter.wait(yield)); + EXPECT_EQ(-ECANCELED, long_waiter.wait(&dpp, yield)); }, rethrow); const auto start = Clock::now(); EXPECT_EQ(1u, context.poll()); // spawn EXPECT_FALSE(context.stopped()); - EXPECT_EQ(0, short_waiter.wait(null_yield)); + EXPECT_EQ(0, short_waiter.wait(&dpp, null_yield)); long_waiter.stop(); // cancel long waiter @@ -119,6 +123,7 @@ TEST(ReshardWait, stop_multiple) { constexpr ceph::timespan short_duration = 50ms; constexpr ceph::timespan long_duration = 10s; + const auto dpp = NoDoutPrefix{g_ceph_context, ceph_subsys_rgw}; RGWReshardWait long_waiter(long_duration); RGWReshardWait short_waiter(short_duration); @@ -126,8 +131,8 @@ TEST(ReshardWait, stop_multiple) // spawn 4 threads std::vector<std::thread> threads; { - auto sync_waiter([&long_waiter] { - EXPECT_EQ(-ECANCELED, long_waiter.wait(null_yield)); + auto sync_waiter([&dpp, &long_waiter] { + EXPECT_EQ(-ECANCELED, long_waiter.wait(&dpp, null_yield)); }); threads.emplace_back(sync_waiter); threads.emplace_back(sync_waiter); @@ -138,7 +143,7 @@ TEST(ReshardWait, stop_multiple) boost::asio::io_context context; { auto async_waiter = [&] (boost::asio::yield_context yield) { - EXPECT_EQ(-ECANCELED, long_waiter.wait(yield)); + EXPECT_EQ(-ECANCELED, long_waiter.wait(&dpp, yield)); }; boost::asio::spawn(context, async_waiter, rethrow); boost::asio::spawn(context, async_waiter, rethrow); @@ -150,7 +155,7 @@ TEST(ReshardWait, stop_multiple) EXPECT_EQ(4u, context.poll()); // spawn EXPECT_FALSE(context.stopped()); - EXPECT_EQ(0, short_waiter.wait(null_yield)); + EXPECT_EQ(0, short_waiter.wait(&dpp, null_yield)); long_waiter.stop(); // cancel long waiter diff --git a/src/test/test_rgw_admin_log.cc b/src/test/test_rgw_admin_log.cc index 7dd7604db1a..0759b0c80ce 100644 --- a/src/test/test_rgw_admin_log.cc +++ b/src/test/test_rgw_admin_log.cc @@ -30,6 +30,7 @@ extern "C"{ #include "common/ceph_json.h" #include "common/code_environment.h" #include "common/ceph_argparse.h" +#include "common/armor.h" #include "common/Finisher.h" #include "global/global_init.h" #include "rgw_common.h" @@ -55,8 +56,6 @@ using namespace std; static string uid = "ceph"; static string display_name = "CEPH"; -extern "C" int ceph_armor(char *dst, const char *dst_end, - const char *src, const char *end); static void print_usage(char *exec){ cout << "Usage: " << exec << " <Options>\n"; cout << "Options:\n" diff --git a/src/test/test_rgw_admin_meta.cc b/src/test/test_rgw_admin_meta.cc index 00c43d10b54..962c08f369c 100644 --- a/src/test/test_rgw_admin_meta.cc +++ b/src/test/test_rgw_admin_meta.cc @@ -29,6 +29,7 @@ extern "C"{ #include "common/ceph_json.h" #include "common/code_environment.h" #include "common/ceph_argparse.h" +#include "common/armor.h" #include "common/Finisher.h" #include "global/global_init.h" #include "rgw_common.h" @@ -47,8 +48,6 @@ static string uid = CEPH_UID; static string display_name = "CEPH"; static string meta_caps = "metadata"; -extern "C" int ceph_armor(char *dst, const char *dst_end, - const char *src, const char *end); static void print_usage(char *exec){ cout << "Usage: " << exec << " <Options>\n"; cout << "Options:\n" diff --git a/src/tools/cephfs/JournalTool.cc b/src/tools/cephfs/JournalTool.cc index 021ad7fdea4..b204cbef1b0 100644 --- a/src/tools/cephfs/JournalTool.cc +++ b/src/tools/cephfs/JournalTool.cc @@ -888,14 +888,25 @@ int JournalTool::recover_dentries( } if ((other_pool || write_dentry) && !dry_run) { - dout(4) << "writing I dentry " << key << " into frag " + dout(4) << "writing i dentry " << key << " into frag " << frag_oid.name << dendl; + dout(20) << " dnfirst = " << fb.dnfirst << dendl; + if (!fb.alternate_name.empty()) { + bufferlist bl, b64; + bl.append(fb.alternate_name); + bl.encode_base64(b64); + auto encoded = std::string_view(b64.c_str(), b64.length()); + dout(20) << " alternate_name = b64:" << encoded << dendl; + } - // Compose: Dentry format is dnfirst, [I|L], InodeStore(bare=true) + // Compose: Dentry format is dnfirst, [i|l], InodeStore bufferlist dentry_bl; encode(fb.dnfirst, dentry_bl); - encode('I', dentry_bl); - encode_fullbit_as_inode(fb, true, &dentry_bl); + encode('i', dentry_bl); + ENCODE_START(2, 1, dentry_bl); + encode(fb.alternate_name, dentry_bl); + encode_fullbit_as_inode(fb, &dentry_bl); + ENCODE_FINISH(dentry_bl); // Record for writing to RADOS write_vals[key] = dentry_bl; @@ -950,12 +961,15 @@ int JournalTool::recover_dentries( dout(4) << "writing L dentry " << key << " into frag " << frag_oid.name << dendl; - // Compose: Dentry format is dnfirst, [I|L], InodeStore(bare=true) + // Compose: Dentry format is dnfirst, [I|L], ino, d_type, alternate_name bufferlist dentry_bl; encode(rb.dnfirst, dentry_bl); - encode('L', dentry_bl); + encode('l', dentry_bl); + ENCODE_START(2, 1, dentry_bl); encode(rb.ino, dentry_bl); encode(rb.d_type, dentry_bl); + encode(rb.alternate_name, dentry_bl); + ENCODE_FINISH(dentry_bl); // Record for writing to RADOS write_vals[key] = dentry_bl; @@ -1034,7 +1048,7 @@ int JournalTool::recover_dentries( */ for (const auto& fb : metablob.roots) { inodeno_t ino = fb.inode->ino; - dout(4) << "updating root 0x" << std::hex << ino << std::dec << dendl; + dout(4) << "updating root " << ino << dendl; object_t root_oid = InodeStore::get_object_name(ino, frag_t(), ".inode"); dout(4) << "object id " << root_oid.name << dendl; @@ -1074,10 +1088,10 @@ int JournalTool::recover_dentries( dout(4) << "writing root ino " << root_oid.name << " version " << fb.inode->version << dendl; - // Compose: root ino format is magic,InodeStore(bare=false) + // Compose: root ino format is magic,InodeStore bufferlist new_root_ino_bl; encode(std::string(CEPH_FS_ONDISK_MAGIC), new_root_ino_bl); - encode_fullbit_as_inode(fb, false, &new_root_ino_bl); + encode_fullbit_as_inode(fb, &new_root_ino_bl); // Write to RADOS r = output.write_full(root_oid.name, new_root_ino_bl); @@ -1187,7 +1201,6 @@ int JournalTool::erase_region(JournalScanner const &js, uint64_t const pos, uint */ void JournalTool::encode_fullbit_as_inode( const EMetaBlob::fullbit &fb, - const bool bare, bufferlist *out_bl) { ceph_assert(out_bl != NULL); @@ -1202,11 +1215,7 @@ void JournalTool::encode_fullbit_as_inode( new_inode.old_inodes = fb.old_inodes; // Serialize InodeStore - if (bare) { - new_inode.encode_bare(*out_bl, CEPH_FEATURES_SUPPORTED_DEFAULT); - } else { - new_inode.encode(*out_bl, CEPH_FEATURES_SUPPORTED_DEFAULT); - } + new_inode.encode(*out_bl, CEPH_FEATURES_SUPPORTED_DEFAULT); } /** @@ -1265,7 +1274,7 @@ int JournalTool::consume_inos(const std::set<inodeno_t> &inos) { const inodeno_t ino = *i; if (ino_table.force_consume(ino)) { - dout(4) << "Used ino 0x" << std::hex << ino << std::dec + dout(4) << "Used ino " << ino << " requires inotable update" << dendl; inotable_modified = true; } diff --git a/src/tools/cephfs/JournalTool.h b/src/tools/cephfs/JournalTool.h index 8d610a8665f..ac4258b89e4 100644 --- a/src/tools/cephfs/JournalTool.h +++ b/src/tools/cephfs/JournalTool.h @@ -78,7 +78,6 @@ class JournalTool : public MDSUtility // Backing store helpers void encode_fullbit_as_inode( const EMetaBlob::fullbit &fb, - const bool bare, bufferlist *out_bl); int consume_inos(const std::set<inodeno_t> &inos); diff --git a/src/tools/cephfs/first-damage.py b/src/tools/cephfs/first-damage.py index 6a341924119..6207b5c1793 100644 --- a/src/tools/cephfs/first-damage.py +++ b/src/tools/cephfs/first-damage.py @@ -59,7 +59,7 @@ NEXT_SNAP = None CONF = os.environ.get('CEPH_CONF') REPAIR_NOSNAP = None -CEPH_NOSNAP = 0xfffffffe # int32 -2 +CEPH_NOSNAP = 0xfffffffffffffffe # int64 -2 ROOT_INODE = "1.00000000" LOST_FOUND_INODE = "4.00000000" @@ -96,7 +96,7 @@ def traverse(MEMO, ioctx): log.warning(f"repairing first==CEPH_NOSNAP damage, setting to NEXT_SNAP (0x{NEXT_SNAP:x})") first = NEXT_SNAP nval = bytearray(val) - struct.pack_into("<I", nval, 0, NEXT_SNAP) + struct.pack_into("<Q", nval, 0, NEXT_SNAP) with rados.WriteOpCtx() as wctx: ioctx.set_omap(wctx, (dnk,), (bytes(nval),)) ioctx.operate_write_op(wctx, o.key) diff --git a/src/tools/cephfs_mirror/FSMirror.cc b/src/tools/cephfs_mirror/FSMirror.cc index 3d5bf2d1c72..ea1857b1eba 100644 --- a/src/tools/cephfs_mirror/FSMirror.cc +++ b/src/tools/cephfs_mirror/FSMirror.cc @@ -114,6 +114,7 @@ FSMirror::FSMirror(CephContext *cct, const Filesystem &filesystem, uint64_t pool m_args(args), m_work_queue(work_queue), m_snap_listener(this), + m_ts_listener(this), m_asok_hook(new MirrorAdminSocketHook(cct, filesystem, this)) { m_service_daemon->add_or_update_fs_attribute(m_filesystem.fscid, SERVICE_DAEMON_DIR_COUNT_KEY, (uint64_t)0); @@ -270,7 +271,7 @@ void FSMirror::init_instance_watcher(Context *on_finish) { Context *ctx = new C_CallbackAdapter< FSMirror, &FSMirror::handle_init_instance_watcher>(this); - m_instance_watcher = InstanceWatcher::create(m_ioctx, m_snap_listener, m_work_queue); + m_instance_watcher = InstanceWatcher::create(m_ioctx, m_snap_listener, m_ts_listener, m_work_queue); m_instance_watcher->init(ctx); } @@ -299,7 +300,7 @@ void FSMirror::init_mirror_watcher() { std::scoped_lock locker(m_lock); Context *ctx = new C_CallbackAdapter< FSMirror, &FSMirror::handle_init_mirror_watcher>(this); - m_mirror_watcher = MirrorWatcher::create(m_ioctx, this, m_work_queue); + m_mirror_watcher = MirrorWatcher::create(m_ioctx, this, m_ts_listener, m_work_queue); m_mirror_watcher->init(ctx); } diff --git a/src/tools/cephfs_mirror/FSMirror.h b/src/tools/cephfs_mirror/FSMirror.h index b106fdff8b6..70ebbd0f4b6 100644 --- a/src/tools/cephfs_mirror/FSMirror.h +++ b/src/tools/cephfs_mirror/FSMirror.h @@ -59,14 +59,12 @@ public: monotime get_failed_ts() { std::scoped_lock locker(m_lock); - if (m_instance_watcher) { - return m_instance_watcher->get_failed_ts(); - } - if (m_mirror_watcher) { - return m_mirror_watcher->get_failed_ts(); - } + return m_failed_ts; + } - return clock::now(); + void set_failed_ts() { + std::scoped_lock locker(m_lock); + m_failed_ts = clock::now(); } bool is_blocklisted() { @@ -76,14 +74,12 @@ public: monotime get_blocklisted_ts() { std::scoped_lock locker(m_lock); - if (m_instance_watcher) { - return m_instance_watcher->get_blocklisted_ts(); - } - if (m_mirror_watcher) { - return m_mirror_watcher->get_blocklisted_ts(); - } + return m_blocklisted_ts; + } - return clock::now(); + void set_blocklisted_ts() { + std::scoped_lock locker(m_lock); + m_blocklisted_ts = clock::now(); } Peers get_peers() { @@ -128,8 +124,24 @@ private: void release_directory(std::string_view dir_path) override { fs_mirror->handle_release_directory(dir_path); } + + }; + + struct TimestampListener: public Watcher::ErrorListener { + FSMirror *fs_mirror; + TimestampListener(FSMirror *fs_mirror) + : fs_mirror(fs_mirror) { + } + void set_blocklisted_ts() { + fs_mirror->set_blocklisted_ts(); + } + void set_failed_ts() { + fs_mirror->set_failed_ts(); + } }; + monotime m_blocklisted_ts; + monotime m_failed_ts; CephContext *m_cct; Filesystem m_filesystem; uint64_t m_pool_id; @@ -139,6 +151,7 @@ private: ceph::mutex m_lock = ceph::make_mutex("cephfs::mirror::fs_mirror"); SnapListener m_snap_listener; + TimestampListener m_ts_listener; std::set<std::string, std::less<>> m_directories; Peers m_all_peers; std::map<Peer, std::unique_ptr<PeerReplayer>> m_peer_replayers; diff --git a/src/tools/cephfs_mirror/InstanceWatcher.cc b/src/tools/cephfs_mirror/InstanceWatcher.cc index fece936a94b..5b19d017287 100644 --- a/src/tools/cephfs_mirror/InstanceWatcher.cc +++ b/src/tools/cephfs_mirror/InstanceWatcher.cc @@ -31,10 +31,11 @@ std::string instance_oid(const std::string &instance_id) { } // anonymous namespace InstanceWatcher::InstanceWatcher(librados::IoCtx &ioctx, - Listener &listener, ContextWQ *work_queue) + Listener &listener, ErrorListener &elistener, ContextWQ *work_queue) : Watcher(ioctx, instance_oid(stringify(ioctx.get_instance_id())), work_queue), m_ioctx(ioctx), m_listener(listener), + m_elistener(elistener), m_work_queue(work_queue), m_lock(ceph::make_mutex("cephfs::mirror::instance_watcher")) { } @@ -116,15 +117,15 @@ void InstanceWatcher::handle_rewatch_complete(int r) { dout(0) << ": client blocklisted" <<dendl; std::scoped_lock locker(m_lock); m_blocklisted = true; - m_blocklisted_ts = clock::now(); + m_elistener.set_blocklisted_ts(); } else if (r == -ENOENT) { derr << ": mirroring object deleted" << dendl; m_failed = true; - m_failed_ts = clock::now(); + m_elistener.set_failed_ts(); } else if (r < 0) { derr << ": rewatch error: " << cpp_strerror(r) << dendl; m_failed = true; - m_failed_ts = clock::now(); + m_elistener.set_failed_ts(); } } diff --git a/src/tools/cephfs_mirror/InstanceWatcher.h b/src/tools/cephfs_mirror/InstanceWatcher.h index 55353f9aa62..d8a873adc17 100644 --- a/src/tools/cephfs_mirror/InstanceWatcher.h +++ b/src/tools/cephfs_mirror/InstanceWatcher.h @@ -31,11 +31,11 @@ public: }; static InstanceWatcher *create(librados::IoCtx &ioctx, - Listener &listener, ContextWQ *work_queue) { - return new InstanceWatcher(ioctx, listener, work_queue); + Listener &listener, ErrorListener &elistener, ContextWQ *work_queue) { + return new InstanceWatcher(ioctx, listener, elistener, work_queue); } - InstanceWatcher(librados::IoCtx &ioctx, Listener &listener, ContextWQ *work_queue); + InstanceWatcher(librados::IoCtx &ioctx, Listener &listener, ErrorListener &elistener, ContextWQ *work_queue); ~InstanceWatcher(); void init(Context *on_finish); @@ -50,24 +50,15 @@ public: return m_blocklisted; } - monotime get_blocklisted_ts() { - std::scoped_lock locker(m_lock); - return m_blocklisted_ts; - } - bool is_failed() { std::scoped_lock locker(m_lock); return m_failed; } - monotime get_failed_ts() { - std::scoped_lock locker(m_lock); - return m_failed_ts; - } - private: librados::IoCtx &m_ioctx; Listener &m_listener; + ErrorListener &m_elistener; ContextWQ *m_work_queue; ceph::mutex m_lock; @@ -77,9 +68,6 @@ private: bool m_blocklisted = false; bool m_failed = false; - monotime m_blocklisted_ts; - monotime m_failed_ts; - void create_instance(); void handle_create_instance(int r); diff --git a/src/tools/cephfs_mirror/Mirror.cc b/src/tools/cephfs_mirror/Mirror.cc index ecf1702e599..397adb4f8c0 100644 --- a/src/tools/cephfs_mirror/Mirror.cc +++ b/src/tools/cephfs_mirror/Mirror.cc @@ -558,9 +558,9 @@ void Mirror::update_fs_mirrors() { std::scoped_lock locker(m_lock); for (auto &[filesystem, mirror_action] : m_mirror_actions) { auto failed_restart = mirror_action.fs_mirror && mirror_action.fs_mirror->is_failed() && - (failed_interval.count() > 0 && duration_cast<seconds>(mirror_action.fs_mirror->get_failed_ts() - clock::now()) > failed_interval); + (failed_interval.count() > 0 && duration_cast<seconds>(clock::now() - mirror_action.fs_mirror->get_failed_ts()).count() > failed_interval.count()); auto blocklisted_restart = mirror_action.fs_mirror && mirror_action.fs_mirror->is_blocklisted() && - (blocklist_interval.count() > 0 && duration_cast<seconds>(mirror_action.fs_mirror->get_blocklisted_ts() - clock::now()) > blocklist_interval); + (blocklist_interval.count() > 0 && duration_cast<seconds>(clock::now() - mirror_action.fs_mirror->get_blocklisted_ts()).count() > blocklist_interval.count()); if (!mirror_action.action_in_progress && !_is_restarting(filesystem)) { if (failed_restart || blocklisted_restart) { diff --git a/src/tools/cephfs_mirror/MirrorWatcher.cc b/src/tools/cephfs_mirror/MirrorWatcher.cc index 55e106512d3..e84ef90375a 100644 --- a/src/tools/cephfs_mirror/MirrorWatcher.cc +++ b/src/tools/cephfs_mirror/MirrorWatcher.cc @@ -21,10 +21,11 @@ namespace cephfs { namespace mirror { MirrorWatcher::MirrorWatcher(librados::IoCtx &ioctx, FSMirror *fs_mirror, - ContextWQ *work_queue) + ErrorListener &elistener, ContextWQ *work_queue) : Watcher(ioctx, CEPHFS_MIRROR_OBJECT, work_queue), m_ioctx(ioctx), m_fs_mirror(fs_mirror), + m_elistener(elistener), m_work_queue(work_queue), m_lock(ceph::make_mutex("cephfs::mirror::mirror_watcher")), m_instance_id(stringify(m_ioctx.get_instance_id())) { @@ -92,15 +93,15 @@ void MirrorWatcher::handle_rewatch_complete(int r) { dout(0) << ": client blocklisted" <<dendl; std::scoped_lock locker(m_lock); m_blocklisted = true; - m_blocklisted_ts = clock::now(); + m_elistener.set_blocklisted_ts(); } else if (r == -ENOENT) { derr << ": mirroring object deleted" << dendl; m_failed = true; - m_failed_ts = clock::now(); + m_elistener.set_failed_ts(); } else if (r < 0) { derr << ": rewatch error: " << cpp_strerror(r) << dendl; m_failed = true; - m_failed_ts = clock::now(); + m_elistener.set_failed_ts(); } } diff --git a/src/tools/cephfs_mirror/MirrorWatcher.h b/src/tools/cephfs_mirror/MirrorWatcher.h index 37fe55ef0c5..610db51b1c0 100644 --- a/src/tools/cephfs_mirror/MirrorWatcher.h +++ b/src/tools/cephfs_mirror/MirrorWatcher.h @@ -28,11 +28,11 @@ class FSMirror; class MirrorWatcher : public Watcher { public: static MirrorWatcher *create(librados::IoCtx &ioctx, FSMirror *fs_mirror, - ContextWQ *work_queue) { - return new MirrorWatcher(ioctx, fs_mirror, work_queue); + ErrorListener &elistener, ContextWQ *work_queue) { + return new MirrorWatcher(ioctx, fs_mirror, elistener, work_queue); } - MirrorWatcher(librados::IoCtx &ioctx, FSMirror *fs_mirror, + MirrorWatcher(librados::IoCtx &ioctx, FSMirror *fs_mirror, ErrorListener &elistener, ContextWQ *work_queue); ~MirrorWatcher(); @@ -48,24 +48,15 @@ public: return m_blocklisted; } - monotime get_blocklisted_ts() { - std::scoped_lock locker(m_lock); - return m_blocklisted_ts; - } - bool is_failed() { std::scoped_lock locker(m_lock); return m_failed; } - monotime get_failed_ts() { - std::scoped_lock locker(m_lock); - return m_failed_ts; - } - private: librados::IoCtx &m_ioctx; FSMirror *m_fs_mirror; + ErrorListener &m_elistener; ContextWQ *m_work_queue; ceph::mutex m_lock; @@ -77,9 +68,6 @@ private: bool m_blocklisted = false; bool m_failed = false; - monotime m_blocklisted_ts; - monotime m_failed_ts; - void register_watcher(); void handle_register_watcher(int r); diff --git a/src/tools/cephfs_mirror/Watcher.h b/src/tools/cephfs_mirror/Watcher.h index 9e7c54eebbb..a0c51401170 100644 --- a/src/tools/cephfs_mirror/Watcher.h +++ b/src/tools/cephfs_mirror/Watcher.h @@ -28,6 +28,13 @@ public: void register_watch(Context *on_finish); void unregister_watch(Context *on_finish); + struct ErrorListener { + virtual ~ErrorListener() { + } + virtual void set_blocklisted_ts() = 0; + virtual void set_failed_ts() = 0; + }; + protected: std::string m_oid; diff --git a/src/tools/erasure-code/ceph-erasure-code-tool.cc b/src/tools/erasure-code/ceph-erasure-code-tool.cc index 39f16a8cbbc..51343f7d615 100644 --- a/src/tools/erasure-code/ceph-erasure-code-tool.cc +++ b/src/tools/erasure-code/ceph-erasure-code-tool.cc @@ -260,6 +260,8 @@ int do_decode(const std::vector<const char*> &args) { ceph::bufferlist decoded_data; std::string fname = args[3]; + std::set<int> want_to_read; + const auto chunk_mapping = ec_impl->get_chunk_mapping(); for (auto &[shard, bl] : encoded_data) { std::string name = fname + "." + stringify(shard); std::string error; @@ -268,9 +270,12 @@ int do_decode(const std::vector<const char*> &args) { std::cerr << "failed to read " << name << ": " << error << std::endl; return 1; } + auto chunk = static_cast<ssize_t>(chunk_mapping.size()) > shard ? + chunk_mapping[shard] : shard; + want_to_read.insert(chunk); } - r = ECUtil::decode(*sinfo, ec_impl, encoded_data, &decoded_data); + r = ECUtil::decode(*sinfo, ec_impl, want_to_read, encoded_data, &decoded_data); if (r < 0) { std::cerr << "failed to decode: " << cpp_strerror(r) << std::endl; return 1; diff --git a/src/tools/rbd/ArgumentTypes.cc b/src/tools/rbd/ArgumentTypes.cc index 17a06c805f7..b479f961588 100644 --- a/src/tools/rbd/ArgumentTypes.cc +++ b/src/tools/rbd/ArgumentTypes.cc @@ -163,9 +163,21 @@ void add_snap_option(po::options_description *opt, (name.c_str(), po::value<std::string>(), description.c_str()); } -void add_snap_id_option(po::options_description *opt) { +void add_snap_id_option(po::options_description *opt, + ArgumentModifier modifier) { + std::string name = SNAPSHOT_ID; + std::string description = "snapshot id"; + switch (modifier) { + case ARGUMENT_MODIFIER_NONE: + case ARGUMENT_MODIFIER_DEST: + break; + case ARGUMENT_MODIFIER_SOURCE: + description = "source " + description; + break; + } + opt->add_options() - (SNAPSHOT_ID.c_str(), po::value<uint64_t>(), "snapshot id"); + (name.c_str(), po::value<uint64_t>(), description.c_str()); } void add_pool_options(boost::program_options::options_description *pos, diff --git a/src/tools/rbd/ArgumentTypes.h b/src/tools/rbd/ArgumentTypes.h index db16b4b3cf0..cc7c4813636 100644 --- a/src/tools/rbd/ArgumentTypes.h +++ b/src/tools/rbd/ArgumentTypes.h @@ -158,7 +158,8 @@ void add_image_id_option(boost::program_options::options_description *opt, void add_snap_option(boost::program_options::options_description *opt, ArgumentModifier modifier); -void add_snap_id_option(boost::program_options::options_description *opt); +void add_snap_id_option(boost::program_options::options_description *opt, + ArgumentModifier modifier); void add_pool_options(boost::program_options::options_description *pos, boost::program_options::options_description *opt, diff --git a/src/tools/rbd/action/Children.cc b/src/tools/rbd/action/Children.cc index 6881989abb2..93d4c539c28 100644 --- a/src/tools/rbd/action/Children.cc +++ b/src/tools/rbd/action/Children.cc @@ -85,7 +85,7 @@ void get_arguments(po::options_description *positional, at::add_image_or_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); at::add_image_id_option(options); - at::add_snap_id_option(options); + at::add_snap_id_option(options, at::ARGUMENT_MODIFIER_NONE); options->add_options() ("all,a", po::bool_switch(), "list all children (include trash)"); options->add_options() diff --git a/src/tools/rbd/action/Clone.cc b/src/tools/rbd/action/Clone.cc index 6406c957e49..874024f482b 100644 --- a/src/tools/rbd/action/Clone.cc +++ b/src/tools/rbd/action/Clone.cc @@ -4,6 +4,7 @@ #include "tools/rbd/ArgumentTypes.h" #include "tools/rbd/Shell.h" #include "tools/rbd/Utils.h" +#include "include/types.h" #include "common/errno.h" #include <iostream> #include <boost/program_options.hpp> @@ -15,16 +16,10 @@ namespace clone { namespace at = argument_types; namespace po = boost::program_options; -int do_clone(librbd::RBD &rbd, librados::IoCtx &p_ioctx, - const char *p_name, const char *p_snapname, - librados::IoCtx &c_ioctx, const char *c_name, - librbd::ImageOptions& opts) { - return rbd.clone3(p_ioctx, p_name, p_snapname, c_ioctx, c_name, opts); -} - void get_arguments(po::options_description *positional, po::options_description *options) { at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_SOURCE); + at::add_snap_id_option(options, at::ARGUMENT_MODIFIER_SOURCE); at::add_image_spec_options(positional, options, at::ARGUMENT_MODIFIER_DEST); at::add_create_image_options(options, false); } @@ -36,14 +31,28 @@ int execute(const po::variables_map &vm, std::string namespace_name; std::string image_name; std::string snap_name; + uint64_t snap_id = CEPH_NOSNAP; + + if (vm.count(at::SNAPSHOT_ID)) { + snap_id = vm[at::SNAPSHOT_ID].as<uint64_t>(); + } + int r = utils::get_pool_image_snapshot_names( vm, at::ARGUMENT_MODIFIER_SOURCE, &arg_index, &pool_name, &namespace_name, - &image_name, &snap_name, true, utils::SNAPSHOT_PRESENCE_REQUIRED, + &image_name, &snap_name, true, + (snap_id == CEPH_NOSNAP ? utils::SNAPSHOT_PRESENCE_REQUIRED : + utils::SNAPSHOT_PRESENCE_PERMITTED), utils::SPEC_VALIDATION_NONE); if (r < 0) { return r; } + if (!snap_name.empty() && snap_id != CEPH_NOSNAP) { + std::cerr << "rbd: trying to access snapshot using both name and id." + << std::endl; + return -EINVAL; + } + std::string dst_pool_name; std::string dst_namespace_name; std::string dst_image_name; @@ -77,8 +86,13 @@ int execute(const po::variables_map &vm, } librbd::RBD rbd; - r = do_clone(rbd, io_ctx, image_name.c_str(), snap_name.c_str(), dst_io_ctx, - dst_image_name.c_str(), opts); + if (!snap_name.empty()) { + r = rbd.clone3(io_ctx, image_name.c_str(), snap_name.c_str(), dst_io_ctx, + dst_image_name.c_str(), opts); + } else { + r = rbd.clone4(io_ctx, image_name.c_str(), snap_id, dst_io_ctx, + dst_image_name.c_str(), opts); + } if (r == -EXDEV) { std::cerr << "rbd: clone v2 required for cross-namespace clones." << std::endl; diff --git a/src/tools/rbd/action/Device.cc b/src/tools/rbd/action/Device.cc index d306e2dacf8..ca0caa0b265 100644 --- a/src/tools/rbd/action/Device.cc +++ b/src/tools/rbd/action/Device.cc @@ -195,7 +195,7 @@ void get_map_arguments(po::options_description *positional, ("exclusive", po::bool_switch(), "disable automatic exclusive lock transitions") ("quiesce", po::bool_switch(), "use quiesce hooks") ("quiesce-hook", po::value<std::string>(), "quiesce hook path"); - at::add_snap_id_option(options); + at::add_snap_id_option(options, at::ARGUMENT_MODIFIER_NONE); add_device_specific_options(options); } @@ -215,7 +215,7 @@ void get_unmap_arguments(po::options_description *positional, at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE); at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); at::add_snap_option(options, at::ARGUMENT_MODIFIER_NONE); - at::add_snap_id_option(options); + at::add_snap_id_option(options, at::ARGUMENT_MODIFIER_NONE); add_device_specific_options(options); } @@ -238,7 +238,7 @@ void get_attach_arguments(po::options_description *positional, ("exclusive", po::bool_switch(), "disable automatic exclusive lock transitions") ("quiesce", po::bool_switch(), "use quiesce hooks") ("quiesce-hook", po::value<std::string>(), "quiesce hook path"); - at::add_snap_id_option(options); + at::add_snap_id_option(options, at::ARGUMENT_MODIFIER_NONE); add_device_specific_options(options); } @@ -258,7 +258,7 @@ void get_detach_arguments(po::options_description *positional, at::add_namespace_option(options, at::ARGUMENT_MODIFIER_NONE); at::add_image_option(options, at::ARGUMENT_MODIFIER_NONE); at::add_snap_option(options, at::ARGUMENT_MODIFIER_NONE); - at::add_snap_id_option(options); + at::add_snap_id_option(options, at::ARGUMENT_MODIFIER_NONE); add_device_specific_options(options); } diff --git a/src/tools/rbd/action/Group.cc b/src/tools/rbd/action/Group.cc index 5c2232a6fc6..e8cc66ca679 100644 --- a/src/tools/rbd/action/Group.cc +++ b/src/tools/rbd/action/Group.cc @@ -261,6 +261,58 @@ int execute_rename(const po::variables_map &vm, return 0; } +int execute_info(const po::variables_map &vm, + const std::vector<std::string> &ceph_global_init_args) { + size_t arg_index = 0; + + std::string pool_name; + std::string namespace_name; + std::string group_name; + + int r = utils::get_pool_generic_snapshot_names( + vm, at::ARGUMENT_MODIFIER_NONE, &arg_index, at::POOL_NAME, &pool_name, + &namespace_name, GROUP_NAME, "group", &group_name, nullptr, true, + utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); + if (r < 0) { + return r; + } + + at::Format::Formatter formatter; + r = utils::get_formatter(vm, &formatter); + if (r < 0) { + return r; + } + Formatter *f = formatter.get(); + + librados::Rados rados; + librados::IoCtx io_ctx; + + r = utils::init(pool_name, namespace_name, &rados, &io_ctx); + if (r < 0) { + return r; + } + + librbd::RBD rbd; + std::string group_id; + r = rbd.group_get_id(io_ctx, group_name.c_str(), &group_id); + if (r < 0) { + return r; + } + + if (f) { + f->open_object_section("group"); + f->dump_string("group_name", group_name); + f->dump_string("group_id", group_id); + f->close_section(); + f->flush(std::cout); + } else { + std::cout << "rbd group '" << group_name << "':\n" + << "\t" << "id: " << group_id << std::endl; + } + + return 0; +} + int execute_add(const po::variables_map &vm, const std::vector<std::string> &ceph_global_init_args) { size_t arg_index = 0; @@ -771,6 +823,13 @@ void get_rename_arguments(po::options_description *positional, false); } +void get_info_arguments(po::options_description *positional, + po::options_description *options) { + add_group_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE, + false); + at::add_format_options(options); +} + void get_add_arguments(po::options_description *positional, po::options_description *options) { positional->add_options() @@ -877,6 +936,9 @@ Shell::Action action_list( Shell::Action action_rename( {"group", "rename"}, {}, "Rename a group within pool.", "", &get_rename_arguments, &execute_rename); +Shell::Action action_info( + {"group", "info"}, {}, "Show information about a group.", + "", &get_info_arguments, &execute_info); Shell::Action action_add( {"group", "image", "add"}, {}, "Add an image to a group.", "", &get_add_arguments, &execute_add); diff --git a/src/tools/rbd/action/Snap.cc b/src/tools/rbd/action/Snap.cc index cb87735f905..5f85f5d127d 100644 --- a/src/tools/rbd/action/Snap.cc +++ b/src/tools/rbd/action/Snap.cc @@ -24,6 +24,22 @@ static const std::string ALL_NAME("all"); namespace at = argument_types; namespace po = boost::program_options; +std::string get_snap_namespace_name(librbd::snap_namespace_type_t type) +{ + switch (type) { + case RBD_SNAP_NAMESPACE_TYPE_USER: + return "user"; + case RBD_SNAP_NAMESPACE_TYPE_GROUP: + return "group"; + case RBD_SNAP_NAMESPACE_TYPE_TRASH: + return "trash"; + case RBD_SNAP_NAMESPACE_TYPE_MIRROR: + return "mirror"; + default: + return "unknown (" + stringify(type) + ")"; + } +} + int do_list_snaps(librbd::Image& image, Formatter *f, bool all_snaps, librados::Rados& rados) { std::vector<librbd::snap_info_t> snaps; @@ -86,24 +102,8 @@ int do_list_snaps(librbd::Image& image, Formatter *f, bool all_snaps, librados:: return r; } - std::string snap_namespace_name = "Unknown"; - switch (snap_namespace) { - case RBD_SNAP_NAMESPACE_TYPE_USER: - snap_namespace_name = "user"; - break; - case RBD_SNAP_NAMESPACE_TYPE_GROUP: - snap_namespace_name = "group"; - break; - case RBD_SNAP_NAMESPACE_TYPE_TRASH: - snap_namespace_name = "trash"; - break; - case RBD_SNAP_NAMESPACE_TYPE_MIRROR: - snap_namespace_name = "mirror"; - break; - } - int get_trash_res = -ENOENT; - std::string trash_original_name; + librbd::snap_trash_namespace_t trash_snap; int get_group_res = -ENOENT; librbd::snap_group_namespace_t group_snap; int get_mirror_res = -ENOENT; @@ -113,8 +113,8 @@ int do_list_snaps(librbd::Image& image, Formatter *f, bool all_snaps, librados:: get_group_res = image.snap_get_group_namespace(s->id, &group_snap, sizeof(group_snap)); } else if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_TRASH) { - get_trash_res = image.snap_get_trash_namespace( - s->id, &trash_original_name); + get_trash_res = image.snap_get_trash_namespace2( + s->id, &trash_snap, sizeof(trash_snap)); } else if (snap_namespace == RBD_SNAP_NAMESPACE_TYPE_MIRROR) { get_mirror_res = image.snap_get_mirror_namespace( s->id, &mirror_snap, sizeof(mirror_snap)); @@ -152,14 +152,17 @@ int do_list_snaps(librbd::Image& image, Formatter *f, bool all_snaps, librados:: f->dump_string("timestamp", tt_str); if (all_snaps) { f->open_object_section("namespace"); - f->dump_string("type", snap_namespace_name); + f->dump_string("type", get_snap_namespace_name(snap_namespace)); if (get_group_res == 0) { std::string pool_name = pool_map[group_snap.group_pool]; f->dump_string("pool", pool_name); f->dump_string("group", group_snap.group_name); f->dump_string("group snap", group_snap.group_snap_name); } else if (get_trash_res == 0) { - f->dump_string("original_name", trash_original_name); + f->dump_string("original_namespace_type", + get_snap_namespace_name( + trash_snap.original_namespace_type)); + f->dump_string("original_name", trash_snap.original_name); } else if (get_mirror_res == 0) { f->dump_string("state", mirror_snap_state); f->open_array_section("mirror_peer_uuids"); @@ -187,7 +190,7 @@ int do_list_snaps(librbd::Image& image, Formatter *f, bool all_snaps, librados:: if (all_snaps) { std::ostringstream oss; - oss << snap_namespace_name; + oss << get_snap_namespace_name(snap_namespace); if (get_group_res == 0) { std::string pool_name = pool_map[group_snap.group_pool]; @@ -195,7 +198,9 @@ int do_list_snaps(librbd::Image& image, Formatter *f, bool all_snaps, librados:: << group_snap.group_name << "@" << group_snap.group_snap_name << ")"; } else if (get_trash_res == 0) { - oss << " (" << trash_original_name << ")"; + oss << " (" + << get_snap_namespace_name(trash_snap.original_namespace_type) + << " " << trash_snap.original_name << ")"; } else if (get_mirror_res == 0) { oss << " (" << mirror_snap_state << " " << "peer_uuids:[" << mirror_snap.mirror_peer_uuids << "]"; @@ -473,7 +478,7 @@ void get_remove_arguments(po::options_description *positional, po::options_description *options) { at::add_snap_spec_options(positional, options, at::ARGUMENT_MODIFIER_NONE); at::add_image_id_option(options); - at::add_snap_id_option(options); + at::add_snap_id_option(options, at::ARGUMENT_MODIFIER_NONE); at::add_no_progress_option(options); options->add_options() diff --git a/src/tracing/librbd.tp b/src/tracing/librbd.tp index b2624d5b184..791171e27f5 100644 --- a/src/tracing/librbd.tp +++ b/src/tracing/librbd.tp @@ -1386,20 +1386,20 @@ TRACEPOINT_EVENT(librbd, aio_compare_and_write_exit, TRACEPOINT_EVENT(librbd, clone_enter, TP_ARGS( const char*, parent_pool_name, - uint64_t, parent_id, + uint64_t, parent_pool_id, const char*, parent_name, const char*, parent_snap_name, const char*, child_pool_name, - uint64_t, child_id, + uint64_t, child_pool_id, const char*, child_name, uint64_t, features), TP_FIELDS( ctf_string(parent_pool_name, parent_pool_name) - ctf_integer(uint64_t, parent_id, parent_id) + ctf_integer(uint64_t, parent_pool_id, parent_pool_id) ctf_string(parent_name, parent_name) ctf_string(parent_snap_name, parent_snap_name) ctf_string(child_pool_name, child_pool_name) - ctf_integer(uint64_t, child_id, child_id) + ctf_integer(uint64_t, child_pool_id, child_pool_id) ctf_string(child_name, child_name) ctf_integer(uint64_t, features, features) ) @@ -1418,22 +1418,22 @@ TRACEPOINT_EVENT(librbd, clone_exit, TRACEPOINT_EVENT(librbd, clone2_enter, TP_ARGS( const char*, parent_pool_name, - uint64_t, parent_id, + uint64_t, parent_pool_id, const char*, parent_name, const char*, parent_snap_name, const char*, child_pool_name, - uint64_t, child_id, + uint64_t, child_pool_id, const char*, child_name, uint64_t, features, uint64_t, stripe_unit, int, stripe_count), TP_FIELDS( ctf_string(parent_pool_name, parent_pool_name) - ctf_integer(uint64_t, parent_id, parent_id) + ctf_integer(uint64_t, parent_pool_id, parent_pool_id) ctf_string(parent_name, parent_name) ctf_string(parent_snap_name, parent_snap_name) ctf_string(child_pool_name, child_pool_name) - ctf_integer(uint64_t, child_id, child_id) + ctf_integer(uint64_t, child_pool_id, child_pool_id) ctf_string(child_name, child_name) ctf_integer(uint64_t, features, features) ctf_integer(uint64_t, stripe_unit, stripe_unit) @@ -1454,20 +1454,20 @@ TRACEPOINT_EVENT(librbd, clone2_exit, TRACEPOINT_EVENT(librbd, clone3_enter, TP_ARGS( const char*, parent_pool_name, - uint64_t, parent_id, + uint64_t, parent_pool_id, const char*, parent_name, const char*, parent_snap_name, const char*, child_pool_name, - uint64_t, child_id, + uint64_t, child_pool_id, const char*, child_name, void*, opts), TP_FIELDS( ctf_string(parent_pool_name, parent_pool_name) - ctf_integer(uint64_t, parent_id, parent_id) + ctf_integer(uint64_t, parent_pool_id, parent_pool_id) ctf_string(parent_name, parent_name) ctf_string(parent_snap_name, parent_snap_name) ctf_string(child_pool_name, child_pool_name) - ctf_integer(uint64_t, child_id, child_id) + ctf_integer(uint64_t, child_pool_id, child_pool_id) ctf_string(child_name, child_name) ctf_integer_hex(void*, opts, opts) ) @@ -1481,6 +1481,36 @@ TRACEPOINT_EVENT(librbd, clone3_exit, ) ) +TRACEPOINT_EVENT(librbd, clone4_enter, + TP_ARGS( + const char*, parent_pool_name, + uint64_t, parent_pool_id, + const char*, parent_name, + uint64_t, parent_snap_id, + const char*, child_pool_name, + uint64_t, child_pool_id, + const char*, child_name, + void*, opts), + TP_FIELDS( + ctf_string(parent_pool_name, parent_pool_name) + ctf_integer(uint64_t, parent_pool_id, parent_pool_id) + ctf_string(parent_name, parent_name) + ctf_integer(uint64_t, parent_snap_id, parent_snap_id) + ctf_string(child_pool_name, child_pool_name) + ctf_integer(uint64_t, child_pool_id, child_pool_id) + ctf_string(child_name, child_name) + ctf_integer_hex(void*, opts, opts) + ) +) + +TRACEPOINT_EVENT(librbd, clone4_exit, + TP_ARGS( + int, retval), + TP_FIELDS( + ctf_integer(int, retval, retval) + ) +) + TRACEPOINT_EVENT(librbd, flatten_enter, TP_ARGS( void*, imagectx, diff --git a/src/vstart.sh b/src/vstart.sh index 59a3798744d..a462a9ae60a 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -273,6 +273,8 @@ options: --seastore-secondary-devs: comma-separated list of secondary blockdevs to use for seastore --seastore-secondary-devs-type: device type of all secondary blockdevs. HDD, SSD(default), ZNS or RANDOM_BLOCK_SSD --crimson-smp: number of cores to use for crimson + --crimson-alien-num-threads: number of alien-tp threads + --crimson-alien-num-cores: number of cores to use for alien-tp --osds-per-host: populate crush_location as each host holds the specified number of osds if set --require-osd-and-client-version: if supplied, do set-require-min-compat-client and require-osd-release to specified value --use-crush-tunables: if supplied, set tunables to specified value @@ -344,7 +346,11 @@ parse_secondary_devs() { done } +# Default values for the crimson options crimson_smp=1 +crimson_alien_num_threads=0 +crimson_alien_num_cores=0 + while [ $# -ge 1 ]; do case $1 in -d | --debug) @@ -575,6 +581,14 @@ case $1 in crimson_smp=$2 shift ;; + --crimson-alien-num-threads) + crimson_alien_num_threads=$2 + shift + ;; + --crimson-alien-num-cores) + crimson_alien_num_cores=$2 + shift + ;; --bluestore-spdk) [ -z "$2" ] && usage_exit IFS=',' read -r -a bluestore_spdk_dev <<< "$2" @@ -1698,8 +1712,25 @@ if [ "$ceph_osd" == "crimson-osd" ]; then extra_seastar_args=" --trace" fi if [ "$(expr $(nproc) - 1)" -gt "$(($CEPH_NUM_OSD * crimson_smp))" ]; then - echo "crimson_alien_thread_cpu_cores:" $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)" - $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)" + if [ $crimson_alien_num_cores -gt 0 ]; then + alien_bottom_cpu=$(($CEPH_NUM_OSD * crimson_smp)) + alien_top_cpu=$(( alien_bottom_cpu + crimson_alien_num_cores - 1 )) + # Ensure top value within range: + if [ "$(($alien_top_cpu))" -gt "$(expr $(nproc) - 1)" ]; then + alien_top_cpu=$(expr $(nproc) - 1) + fi + echo "crimson_alien_thread_cpu_cores: $alien_bottom_cpu-$alien_top_cpu" + # This is a (logical) processor id range, it could be refined to encompass only physical processor ids + # (equivalently, ignore hyperthreading sibling processor ids) + $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores "$alien_bottom_cpu-$alien_top_cpu" + else + echo "crimson_alien_thread_cpu_cores:" $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)" + $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_thread_cpu_cores $(($CEPH_NUM_OSD * crimson_smp))-"$(expr $(nproc) - 1)" + fi + if [ $crimson_alien_num_threads -gt 0 ]; then + echo "$CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads $crimson_alien_num_threads" + $CEPH_BIN/ceph -c $conf_fn config set osd crimson_alien_op_num_threads "$crimson_alien_num_threads" + fi else echo "No alien thread cpu core isolation" fi diff --git a/src/xxHash b/src/xxHash -Subproject 1f40c6511fa8dd9d2e337ca8c9bc18b3e87663c +Subproject bbb27a5efb85b92a0486cf361a8635715a53f6b |