from tasks.ceph_test_case import CephTestCase import json import logging log = logging.getLogger(__name__) class TestStretchClusterNew(CephTestCase): CLUSTER = "ceph" MONS = { "a": { "rank": 0, }, "b": { "rank": 1, }, "c": { "rank": 2, } } WRITE_PERIOD = 10 RECOVERY_PERIOD = WRITE_PERIOD * 6 SUCCESS_HOLD_TIME = 10 def setUp(self): """ Set up the cluster for the test. """ super(TestStretchClusterNew, self).setUp() def tearDown(self): """ Clean up the cluter after the test. """ super(TestStretchClusterNew, self).tearDown() def _check_connection_score(self): """ Check the connection score of all the mons. """ for mon, _ in self.MONS.items(): # get the connection score cscore = self.ceph_cluster.mon_manager.raw_cluster_cmd( 'daemon', 'mon.{}'.format(mon), 'connection', 'scores', 'dump') # parse the connection score cscore = json.loads(cscore) # check if the current mon rank is correct if cscore["rank"] != self.MONS[mon]["rank"]: log.error( "Rank mismatch {} != {}".format( cscore["rank"], self.MONS[mon]["rank"] ) ) return False # check if current mon have all the peer reports and ourself if len(cscore['reports']) != len(self.MONS): log.error( "Reports count mismatch {}".format(cscore['reports']) ) return False for report in cscore["reports"]: report_rank = [] for peer in report["peer_scores"]: # check if the peer is alive if not peer["peer_alive"]: log.error("Peer {} is not alive".format(peer)) return False report_rank.append(peer["peer_rank"]) # check if current mon has all the ranks and no duplicates expected_ranks = [ rank for data in self.MONS.values() for rank in data.values() ] if report_rank.sort() != expected_ranks.sort(): log.error("Rank mismatch in report {}".format(report)) return False log.info("Connection score is clean!") return True def test_connection_score(self): # check if all mons are in quorum self.ceph_cluster.mon_manager.wait_for_mon_quorum_size(3) # check if all connection scores reflect this self.wait_until_true_and_hold( lambda: self._check_connection_score(), # Wait for 4 minutes for the connection score to recover timeout=self.RECOVERY_PERIOD * 4, # Hold the clean connection score for 60 seconds success_hold_time=self.SUCCESS_HOLD_TIME * 6 )