summaryrefslogtreecommitdiffstats
path: root/src/osd/scrubber/scrub_job.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/osd/scrubber/scrub_job.h')
-rw-r--r--src/osd/scrubber/scrub_job.h326
1 files changed, 232 insertions, 94 deletions
diff --git a/src/osd/scrubber/scrub_job.h b/src/osd/scrubber/scrub_job.h
index ef30bcb4fe5..536a0caf161 100644
--- a/src/osd/scrubber/scrub_job.h
+++ b/src/osd/scrubber/scrub_job.h
@@ -2,48 +2,25 @@
// vim: ts=8 sw=2 smarttab
#pragma once
-#include <atomic>
#include <chrono>
#include <compare>
#include <iostream>
#include <memory>
+#include <random>
#include <vector>
#include "common/ceph_atomic.h"
+#include "common/fmt_common.h"
#include "include/utime_fmt.h"
#include "osd/osd_types.h"
#include "osd/osd_types_fmt.h"
#include "osd/scrubber_common.h"
-
-/**
- * The ID used to name a candidate to scrub:
- * - in this version: a PG is identified by its spg_t
- * - in the (near) future: a PG + a scrub type (shallow/deep)
- */
-using ScrubTargetId = spg_t;
-
+#include "scrub_queue_entry.h"
namespace Scrub {
enum class must_scrub_t { not_mandatory, mandatory };
-struct scrub_schedule_t {
- utime_t scheduled_at{};
- utime_t deadline{0, 0};
- utime_t not_before{utime_t::max()};
- // when compared - the 'not_before' is ignored, assuming
- // we never compare jobs with different eligibility status.
- std::partial_ordering operator<=>(const scrub_schedule_t& rhs) const
- {
- auto cmp1 = scheduled_at <=> rhs.scheduled_at;
- if (cmp1 != 0) {
- return cmp1;
- }
- return deadline <=> rhs.deadline;
- };
- bool operator==(const scrub_schedule_t& rhs) const = default;
-};
-
struct sched_params_t {
utime_t proposed_time{};
must_scrub_t is_must{must_scrub_t::not_mandatory};
@@ -89,8 +66,9 @@ struct sched_conf_t {
/**
* a randomization factor aimed at preventing 'thundering herd' problems
- * upon deep-scrubs common intervals. If polling a random number smaller
- * than that percentage, the next shallow scrub is upgraded to deep.
+ * upon deep-scrubs common intervals. The actual deep scrub interval will
+ * be selected with a normal distribution around the configured interval,
+ * with a standard deviation of <deep_randomize_ratio> * <interval>.
*/
double deep_randomize_ratio{0.0};
@@ -102,58 +80,145 @@ struct sched_conf_t {
};
-class ScrubJob {
- public:
+/**
+ * a wrapper around a Scrub::SchedEntry, adding some state flags
+ * to be used only by the Scrubber. Note that the SchedEntry itself is known to
+ * multiple objects (and must be kept small in size).
+*/
+struct SchedTarget {
+ constexpr explicit SchedTarget(spg_t pg_id, scrub_level_t scrub_level)
+ : sched_info{pg_id, scrub_level}
+ {}
+
+ /// our ID and scheduling parameters
+ SchedEntry sched_info;
+
+ /**
+ * is this target (meaning - a copy of this specific combination of
+ * PG and scrub type) currently in the queue?
+ */
+ bool queued{false};
+
+ // some helper functions
+
+ /// resets to the after-construction state
+ void reset();
+
+ /// set the urgency to the max of the current and the provided urgency
+ void up_urgency_to(urgency_t u);
+
+ /// access that part of the SchedTarget that is queued in the scrub queue
+ const SchedEntry& queued_element() const { return sched_info; }
+
+ bool is_deep() const { return sched_info.level == scrub_level_t::deep; }
+
+ bool is_shallow() const { return sched_info.level == scrub_level_t::shallow; }
+
+ scrub_level_t level() const { return sched_info.level; }
+
+ urgency_t urgency() const { return sched_info.urgency; }
+
/**
- * a time scheduled for scrub, and a deadline: The scrub could be delayed
- * if system load is too high (but not if after the deadline),or if trying
- * to scrub out of scrub hours.
+ * a loose definition of 'high priority' scrubs. Can only be used for
+ * logs and user messages. Actual scheduling decisions should be based
+ * on the 'urgency' attribute and its fine-grained characteristics.
*/
- scrub_schedule_t schedule;
+ bool is_high_priority() const
+ {
+ return urgency() != urgency_t::periodic_regular;
+ }
+
+ bool was_delayed() const { return sched_info.last_issue != delay_cause_t::none; }
+
+ /// provides r/w access to the scheduling sub-object
+ SchedEntry& sched_info_ref() { return sched_info; }
+};
+
+
+class ScrubJob {
+ public:
/// pg to be scrubbed
spg_t pgid;
/// the OSD id (for the log)
int whoami;
+ /*
+ * the schedule for the next scrub at the specific level. Also - the
+ * urgency and characteristics of the scrub (e.g. - high priority,
+ * must-repair, ...)
+ */
+ SchedTarget shallow_target;
+ SchedTarget deep_target;
+
/**
* Set whenever the PG scrubs are managed by the OSD (i.e. - from becoming
* an active Primary till the end of the interval).
*/
bool registered{false};
- /**
- * there is a scrub target for this PG in the queue.
- * \attn: temporary. Will be replaced with a pair of flags in the
- * two level-specific scheduling targets.
- */
- bool target_queued{false};
-
/// how the last attempt to scrub this PG ended
delay_cause_t last_issue{delay_cause_t::none};
/**
- * the scrubber is waiting for locked objects to be unlocked.
- * Set after a grace period has passed.
- */
+ * the scrubber is waiting for locked objects to be unlocked.
+ * Set after a grace period has passed.
+ */
bool blocked{false};
utime_t blocked_since{};
CephContext* cct;
- bool high_priority{false};
+ /// random generator for the randomization of the scrub times
+ /// \todo consider using one common generator in the OSD service
+ std::random_device random_dev;
+ std::mt19937 random_gen;
ScrubJob(CephContext* cct, const spg_t& pg, int node_id);
- utime_t get_sched_time() const { return schedule.not_before; }
+ /**
+ * returns a possible reference to the earliest target that is eligible. If
+ * both the shallow and the deep targets have their n.b. in the future,
+ * nullopt is returned.
+ */
+ std::optional<std::reference_wrapper<SchedTarget>> earliest_eligible(
+ utime_t scrub_clock_now);
+ std::optional<std::reference_wrapper<const SchedTarget>> earliest_eligible(
+ utime_t scrub_clock_now) const;
+
+ /**
+ * the target with the earliest 'not-before' time (i.e. - assuming
+ * both targets are in the future).
+ * \attn: might return the wrong answer if both targets are eligible.
+ * If a need arises, a version that accepts the current time as a parameter
+ * should be added. Then - a correct determination can be made for
+ * all cases.
+ */
+ const SchedTarget& earliest_target() const;
+ SchedTarget& earliest_target();
+
+ /**
+ * the target that will be scrubbed first. Basically - used
+ * cmp_entries() to determine the order of the two targets.
+ * Which means: if only one of the targets is eligible, it will be returned.
+ * If both - the one with the highest priority -> level -> target time.
+ * Otherwise - the one with the earliest not-before.
+ */
+ const SchedTarget& earliest_target(utime_t scrub_clock_now) const;
+ SchedTarget& earliest_target(utime_t scrub_clock_now);
+
+ /// the not-before of our earliest target (either shallow or deep)
+ utime_t get_sched_time() const;
std::string_view state_desc() const
{
- return registered ? (target_queued ? "queued" : "registered")
+ return registered ? (is_queued() ? "queued" : "registered")
: "not-registered";
}
+ SchedTarget& get_target(scrub_level_t s_or_d);
+
/**
* Given a proposed time for the next scrub, and the relevant
* configuration, adjust_schedule() determines the actual target time,
@@ -167,34 +232,29 @@ class ScrubJob {
* on the configuration; the deadline is set further out (if configured)
* and the n.b. is reset to the target.
*/
- void adjust_schedule(
- const Scrub::sched_params_t& suggested,
- const Scrub::sched_conf_t& aconf,
+ void adjust_shallow_schedule(
+ utime_t last_scrub,
+ const Scrub::sched_conf_t& app_conf,
utime_t scrub_clock_now,
- Scrub::delay_ready_t modify_ready_targets);
+ delay_ready_t modify_ready_targets);
- /**
- * push the 'not_before' time out by 'delay' seconds, so that this scrub target
- * would not be retried before 'delay' seconds have passed.
- */
- void delay_on_failure(
- std::chrono::seconds delay,
- delay_cause_t delay_cause,
- utime_t scrub_clock_now);
+ void adjust_deep_schedule(
+ utime_t last_deep,
+ const Scrub::sched_conf_t& app_conf,
+ utime_t scrub_clock_now,
+ delay_ready_t modify_ready_targets);
/**
- * Recalculating any possible updates to the scrub schedule, following an
- * aborted scrub attempt.
- * Usually - we can use the same schedule that triggered the aborted scrub.
- * But we must take into account scenarios where "something" caused the
- * parameters prepared for the *next* scrub to show higher urgency or
- * priority. "Something" - as in an operator command requiring immediate
- * scrubbing, or a change in the pool/cluster configuration.
+ * For the level specified, set the 'not-before' time to 'now+delay',
+ * so that this scrub target would not be retried before the required
+ * delay seconds have passed.
+ * The delay is determined based on the 'cause' parameter.
+ * The 'last_issue' is updated to the cause of the delay.
+ * \returns a reference to the target that was modified.
*/
- void merge_and_delay(
- const scrub_schedule_t& aborted_schedule,
- Scrub::delay_cause_t issue,
- requested_scrub_t updated_flags,
+ [[maybe_unused]] SchedTarget& delay_on_failure(
+ scrub_level_t level,
+ delay_cause_t delay_cause,
utime_t scrub_clock_now);
/**
@@ -211,21 +271,29 @@ class ScrubJob {
const Scrub::sched_conf_t& aconf,
utime_t scrub_clock_now) {}
+ /**
+ * the operator requested a scrub (shallow, deep or repair).
+ * Set the selected target to the requested urgency, adjusting scheduling
+ * parameters.
+ */
+ void operator_forced(scrub_level_t s_or_d, scrub_type_t scrub_type);
+
void dump(ceph::Formatter* f) const;
bool is_registered() const { return registered; }
- /**
- * is this a high priority scrub job?
- * High priority - (usually) a scrub that was initiated by the operator
- */
- bool is_high_priority() const { return high_priority; }
+ /// are any of our two SchedTargets queued in the scrub queue?
+ bool is_queued() const;
+
+ /// mark both targets as queued / not queued
+ void clear_both_targets_queued();
+ void set_both_targets_queued();
/**
* a text description of the "scheduling intentions" of this PG:
* are we already scheduled for a scrub/deep scrub? when?
*/
- std::string scheduling_state(utime_t now_is, bool is_deep_expected) const;
+ std::string scheduling_state(utime_t now_is) const;
std::ostream& gen_prefix(std::ostream& out, std::string_view fn) const;
std::string log_msg_prefix;
@@ -236,12 +304,83 @@ class ScrubJob {
// SchedTarget(s).
std::partial_ordering operator<=>(const ScrubJob& rhs) const
{
- return schedule <=> rhs.schedule;
+ return cmp_entries(
+ ceph_clock_now(), shallow_target.queued_element(),
+ deep_target.queued_element());
};
-};
-using ScrubQContainer = std::vector<std::unique_ptr<ScrubJob>>;
+ /*
+ * Restrictions and limitations that apply to each urgency level:
+ * -------------------------------------------------------------
+ * Some types of scrubs are exempt from some or all of the preconditions and
+ * limitations that apply to regular scrubs. The following table
+ * details the specific set of exemptions per 'urgency' level:
+ * (note: regular scrubs that are overdue are also allowed a specific
+ * set of exemptions. Those will be covered elsewhere).
+ *
+ * The relevant limitations are:
+ * - reservation: the scrub must reserve replicas;
+ * - dow/time: the scrub must adhere to the allowed days-of-week/hours;
+ * - ext-sleep: if initiated during allowed hours, the scrub is penalized
+ * if continued into the forbidden times, by having a longer sleep time;
+ * (note that this is only applicable to the wq scheduler).
+ * - load: the scrub must not be initiated if the OSD is under heavy CPU load;
+ * - noscrub: the scrub is aborted if the 'noscrub' flag (or the
+ * 'nodeep-scrub' flag for deep scrubs) is set;
+ * - randomization: the scrub's target time is extended by a random
+ * duration. This only applies to periodic scrubs.
+ * - configuration changes: the target time may be modified following
+ * a change in the configuration. This only applies to periodic scrubs.
+ * - max-scrubs: the scrub must not be initiated if the OSD is already
+ * scrubbing too many PGs (the 'osd_max_scrubs' limit).
+ * - backoff: the scrub must not be initiated this tick if a dice roll
+ * failed.
+ * - recovery: the scrub must not be initiated if the OSD is currently
+ * recovering PGs.
+ *
+ * The following table summarizes the limitations in effect per urgency level:
+ *
+ * +------------+---------+--------------+---------+----------+-------------+
+ * | limitation | must- | after-repair |repairing| operator | must-repair |
+ * | | scrub |(aft recovery)|(errors) | request | |
+ * +------------+---------+--------------+---------+----------+-------------+
+ * | reservation| yes! | no | no? + no | no |
+ * | dow/time | yes | yes | no + no | no |
+ * | ext-sleep | no | no | no + no | no |
+ * | load | yes | no | no + no | no |
+ * | noscrub | yes | no? | Yes + no | no |
+ * | max-scrubs | yes | yes | Yes + no | no |
+ * | backoff | yes | no | no + no | no |
+ * | recovery | yes | yes | Yes + no | no |
+ * +------------+---------+--------------+---------+----------+-------------+
+ */
+
+ // a set of helper functions for determining, for each urgency level, what
+ // restrictions and limitations apply to that level.
+
+ static bool observes_noscrub_flags(urgency_t urgency);
+
+ static bool observes_allowed_hours(urgency_t urgency);
+
+ static bool observes_load_limit(urgency_t urgency);
+
+ static bool requires_reservation(urgency_t urgency);
+
+ static bool requires_randomization(urgency_t urgency);
+
+ static bool observes_max_concurrency(urgency_t urgency);
+
+ static bool observes_random_backoff(urgency_t urgency);
+
+ static bool observes_recovery(urgency_t urgency);
+
+ // translating the 'urgency' into scrub behavior traits
+
+ static bool has_high_queue_priority(urgency_t urgency);
+
+ static bool is_repair_implied(urgency_t urgency);
+};
} // namespace Scrub
namespace std {
@@ -263,6 +402,18 @@ struct formatter<Scrub::sched_params_t> {
};
template <>
+struct formatter<Scrub::SchedTarget> {
+ constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+ template <typename FormatContext>
+ auto format(const Scrub::SchedTarget& st, FormatContext& ctx) const
+ {
+ return fmt::format_to(
+ ctx.out(), "{},q:{:c},issue:{}", st.sched_info,
+ st.queued ? '+' : '-', st.sched_info.last_issue);
+ }
+};
+
+template <>
struct formatter<Scrub::ScrubJob> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
@@ -270,9 +421,8 @@ struct formatter<Scrub::ScrubJob> {
auto format(const Scrub::ScrubJob& sjob, FormatContext& ctx) const
{
return fmt::format_to(
- ctx.out(), "pg[{}]:nb:{:s} / trg:{:s} / dl:{:s} <{}>",
- sjob.pgid, sjob.schedule.not_before, sjob.schedule.scheduled_at,
- sjob.schedule.deadline, sjob.state_desc());
+ ctx.out(), "pg[{}]:sh:{}/dp:{}<{}>",
+ sjob.pgid, sjob.shallow_target, sjob.deep_target, sjob.state_desc());
}
};
@@ -290,16 +440,4 @@ struct formatter<Scrub::sched_conf_t> {
cf.mandatory_on_invalid);
}
};
-
-template <>
-struct formatter<Scrub::scrub_schedule_t> {
- constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
- template <typename FormatContext>
- auto format(const Scrub::scrub_schedule_t& sc, FormatContext& ctx) const
- {
- return fmt::format_to(
- ctx.out(), "nb:{:s}(at:{:s},dl:{:s})", sc.not_before,
- sc.scheduled_at, sc.deadline);
- }
-};
} // namespace fmt