diff options
Diffstat (limited to 'src/osd/scrubber_common.h')
-rw-r--r-- | src/osd/scrubber_common.h | 283 |
1 files changed, 119 insertions, 164 deletions
diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 832811fbad7..2ab5570a715 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -24,6 +24,7 @@ namespace Scrub { class ReplicaReservations; struct ReplicaActive; class ScrubJob; + struct SchedEntry; } /// reservation-related data sent by the primary to the replicas, @@ -83,11 +84,23 @@ using act_token_t = uint32_t; /// (note: struct size should be kept small, as it is copied around) struct OSDRestrictions { /// high local OSD concurrency. Thus - only high priority scrubs are allowed - bool high_priority_only{false}; - bool allow_requested_repair_only{false}; - bool only_deadlined{false}; - bool load_is_low:1{true}; - bool time_permit:1{true}; + bool max_concurrency_reached{false}; + + /// rolled a dice, and decided not to scrub in this tick + bool random_backoff_active{false}; + + /// the OSD is performing recovery & osd_repair_during_recovery is 'true' + bool allow_requested_repair_only:1{false}; + + /// the CPU load is high. No regular scrubs are allowed. + bool cpu_overloaded:1{false}; + + /// outside of allowed scrubbing hours/days + bool restricted_time:1{false}; + + /// the OSD is performing a recovery, osd_scrub_during_recovery is 'false', + /// and so is osd_repair_during_recovery + bool recovery_in_progress:1{false}; }; static_assert(sizeof(Scrub::OSDRestrictions) <= sizeof(uint32_t)); @@ -96,7 +109,6 @@ static_assert(sizeof(Scrub::OSDRestrictions) <= sizeof(uint32_t)); struct ScrubPGPreconds { bool allow_shallow{true}; bool allow_deep{true}; - bool has_deep_errors{false}; bool can_autorepair{false}; }; static_assert(sizeof(Scrub::ScrubPGPreconds) <= sizeof(uint32_t)); @@ -108,6 +120,52 @@ enum class schedule_result_t { osd_wide_failure // failed to scrub any target }; +/// a collection of the basic scheduling information of a scrub target: +/// target time to scrub, the 'not before' time, and a deadline. +struct scrub_schedule_t { + /** + * the time at which we are allowed to start the scrub. Never + * decreasing after 'scheduled_at' is set. + */ + utime_t not_before{utime_t::max()}; + + /** + * the 'deadline' is the time by which we expect the periodic scrub to + * complete. It is determined by the SCRUB_MAX_INTERVAL pool configuration + * and by osd_scrub_max_interval; + * Once passed, the scrub will be allowed to run even if the OSD is + * overloaded.It would also have higher priority than other + * auto-scheduled scrubs. + */ + utime_t deadline{utime_t::max()}; + + /** + * the 'scheduled_at' is the time at which we intended the scrub to be scheduled. + * For periodic (regular) scrubs, it is set to the time of the last scrub + * plus the scrub interval (plus some randomization). Priority scrubs + * have their own specific rules for the target time. E.g.: + * - for operator-initiated scrubs: 'target' is set to 'scrub_must_stamp'; + * - same for re-scrubbing (deep scrub after a shallow scrub that ended with + * errors; + * - when requesting a scrub after a repair (the highest priority scrub): + * the target is set to '0' (beginning of time); + */ + utime_t scheduled_at{utime_t::max()}; + + std::partial_ordering operator<=>(const scrub_schedule_t& rhs) const + { + // when compared - the 'not_before' is ignored, assuming + // we never compare jobs with different eligibility status. + auto cmp1 = scheduled_at <=> rhs.scheduled_at; + if (cmp1 != 0) { + return cmp1; + } + return deadline <=> rhs.deadline; + }; + bool operator==(const scrub_schedule_t& rhs) const = default; +}; + + /// rescheduling param: should we delay jobs already ready to execute? enum class delay_ready_t : bool { delay_ready = true, no_delay = false }; @@ -122,9 +180,8 @@ struct formatter<Scrub::ScrubPGPreconds> { auto format(const Scrub::ScrubPGPreconds& conds, FormatContext& ctx) const { return fmt::format_to( - ctx.out(), "allowed(shallow/deep):{:1}/{:1},deep-err:{:1},can-autorepair:{:1}", - conds.allow_shallow, conds.allow_deep, conds.has_deep_errors, - conds.can_autorepair); + ctx.out(), "allowed(shallow/deep):{:1}/{:1},can-autorepair:{:1}", + conds.allow_shallow, conds.allow_deep, conds.can_autorepair); } }; @@ -136,15 +193,28 @@ struct formatter<Scrub::OSDRestrictions> { auto format(const Scrub::OSDRestrictions& conds, FormatContext& ctx) const { return fmt::format_to( - ctx.out(), - "priority-only:{},overdue-only:{},load:{},time:{},repair-only:{}", - conds.high_priority_only, - conds.only_deadlined, - conds.load_is_low ? "ok" : "high", - conds.time_permit ? "ok" : "no", - conds.allow_requested_repair_only); + ctx.out(), "<{}.{}.{}.{}.{}.{}>", + conds.max_concurrency_reached ? "max-scrubs" : "", + conds.random_backoff_active ? "backoff" : "", + conds.cpu_overloaded ? "high-load" : "", + conds.restricted_time ? "time-restrict" : "", + conds.recovery_in_progress ? "recovery" : "", + conds.allow_requested_repair_only ? "repair-only" : ""); } }; + +template <> +struct formatter<Scrub::scrub_schedule_t> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + template <typename FormatContext> + auto format(const Scrub::scrub_schedule_t& sc, FormatContext& ctx) const + { + return fmt::format_to( + ctx.out(), "nb:{:s}(at:{:s},dl:{:s})", sc.not_before, + sc.scheduled_at, sc.deadline); + } +}; + } // namespace fmt namespace Scrub { @@ -157,7 +227,8 @@ enum class delay_cause_t { none, ///< scrub attempt was successful replicas, ///< failed to reserve replicas flags, ///< noscrub or nodeep-scrub - pg_state, ///< e.g. snap-trimming + pg_state, ///< not active+clean + snap_trimming, ///< snap-trimming is in progress restricted_time, ///< time restrictions or busy CPU local_resources, ///< too many scrubbing PGs aborted, ///< scrub was aborted w/ unspecified reason @@ -171,7 +242,7 @@ namespace fmt { template <> struct formatter<Scrub::delay_cause_t> : ::fmt::formatter<std::string_view> { template <typename FormatContext> - auto format(Scrub::delay_cause_t cause, FormatContext& ctx) + auto format(Scrub::delay_cause_t cause, FormatContext& ctx) const { using enum Scrub::delay_cause_t; std::string_view desc; @@ -180,6 +251,7 @@ struct formatter<Scrub::delay_cause_t> : ::fmt::formatter<std::string_view> { case replicas: desc = "replicas"; break; case flags: desc = "noscrub"; break; case pg_state: desc = "pg-state"; break; + case snap_trimming: desc = "snap-trim"; break; case restricted_time: desc = "time/load"; break; case local_resources: desc = "local-cnt"; break; case aborted: desc = "aborted"; break; @@ -219,136 +291,22 @@ struct PgScrubBeListener { /** - * Flags affecting the scheduling and behaviour of the *next* scrub. - * - * we hold two of these flag collections: one - * for the next scrub, and one frozen at initiation (i.e. in pg::queue_scrub()) - */ -struct requested_scrub_t { - - // flags to indicate explicitly requested scrubs (by admin): - // bool must_scrub, must_deep_scrub, must_repair, need_auto; - - /** - * 'must_scrub' is set by an admin command (or by need_auto). - * Affects the priority of the scrubbing, and the sleep periods - * during the scrub. - */ - bool must_scrub{false}; - - /** - * scrub must not be aborted. - * Set for explicitly requested scrubs, and for scrubs originated by the - * pairing process with the 'repair' flag set (in the RequestScrub event). - * - * Will be copied into the 'required' scrub flag upon scrub start. - */ - bool req_scrub{false}; - - /** - * Set from: - * - scrub_requested() with need_auto param set, which only happens in - * - scrub_finish() - if deep_scrub_on_error is set, and we have errors - * - * If set, will prevent the OSD from casually postponing our scrub. When - * scrubbing starts, will cause must_scrub, must_deep_scrub and auto_repair to - * be set. - */ - bool need_auto{false}; - - /** - * Set for scrub-after-recovery just before we initiate the recovery deep - * scrub, or if scrub_requested() was called with either need_auto ot repair. - * Affects PG_STATE_DEEP_SCRUB. - */ - bool must_deep_scrub{false}; - - /** - * (An intermediary flag used by pg::sched_scrub() on the first time - * a planned scrub has all its resources). Determines whether the next - * repair/scrub will be 'deep'. - * - * Note: 'dumped' by PgScrubber::dump() and such. In reality, being a - * temporary that is set and reset by the same operation, will never - * appear externally to be set - */ - bool time_for_deep{false}; - - bool deep_scrub_on_error{false}; - - /** - * If set, we should see must_deep_scrub & must_scrub, too - * - * - 'must_repair' is checked by the OSD when scheduling the scrubs. - * - also checked & cleared at pg::queue_scrub() - */ - bool must_repair{false}; - - /* - * the value of auto_repair is determined in sched_scrub() (once per scrub. - * previous value is not remembered). Set if - * - allowed by configuration and backend, and - * - for periodic scrubs: time_for_deep was just set - */ - bool auto_repair{false}; - - /** - * indicating that we are scrubbing post repair to verify everything is fixed. - * Otherwise - PG_STATE_FAILED_REPAIR will be asserted. - */ - bool check_repair{false}; - - /** - * Used to indicate, both in client-facing listings and internally, that - * the planned scrub will be a deep one. - */ - bool calculated_to_deep{false}; -}; - -std::ostream& operator<<(std::ostream& out, const requested_scrub_t& sf); - -template <> -struct fmt::formatter<requested_scrub_t> { - constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } - - template <typename FormatContext> - auto format(const requested_scrub_t& rs, FormatContext& ctx) - { - return fmt::format_to(ctx.out(), - "(plnd:{}{}{}{}{}{}{}{}{}{})", - rs.must_repair ? " must_repair" : "", - rs.auto_repair ? " auto_repair" : "", - rs.check_repair ? " check_repair" : "", - rs.deep_scrub_on_error ? " deep_scrub_on_error" : "", - rs.must_deep_scrub ? " must_deep_scrub" : "", - rs.must_scrub ? " must_scrub" : "", - rs.time_for_deep ? " time_for_deep" : "", - rs.need_auto ? " need_auto" : "", - rs.req_scrub ? " req_scrub" : "", - rs.calculated_to_deep ? " deep" : ""); - } -}; - -/** * The interface used by the PG when requesting scrub-related info or services */ struct ScrubPgIF { virtual ~ScrubPgIF() = default; - friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s) - { - return s.show(out); + friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s) { + return s.show_concise(out); } - virtual std::ostream& show(std::ostream& out) const = 0; + virtual std::ostream& show_concise(std::ostream& out) const = 0; // --------------- triggering state-machine events: virtual void initiate_regular_scrub(epoch_t epoch_queued) = 0; - virtual void initiate_scrub_after_repair(epoch_t epoch_queued) = 0; - virtual void send_scrub_resched(epoch_t epoch_queued) = 0; virtual void active_pushes_notification(epoch_t epoch_queued) = 0; @@ -426,11 +384,8 @@ struct ScrubPgIF { /** * attempt to initiate a scrub session. - * @param candidate the scrub job to start. Later on - this will be the - * specific queue entry (that carries the information about the level, - * priority, etc. of the scrub that should be initiated on this PG). - * This parameter is saved by the scrubber for the whole duration of - * the scrub session (to be used if the scrub is aborted). + * param s_or_d: the scrub level to start. This identifies the specific + * target to be scrubbed. * @param osd_restrictions limitations on the types of scrubs that can * be initiated on this OSD at this time. * @param preconds the PG state re scrubbing at the time of the request, @@ -442,12 +397,11 @@ struct ScrubPgIF { * external reasons. */ virtual Scrub::schedule_result_t start_scrub_session( - std::unique_ptr<Scrub::ScrubJob> candidate, + scrub_level_t s_or_d, Scrub::OSDRestrictions osd_restrictions, - Scrub::ScrubPGPreconds pg_cond, - const requested_scrub_t& requested_flags) = 0; + Scrub::ScrubPGPreconds pg_cond) = 0; - virtual void set_op_parameters(const requested_scrub_t&) = 0; + virtual void set_op_parameters(Scrub::ScrubPGPreconds pg_cond) = 0; /// stop any active scrubbing (on interval end) and unregister from /// the OSD scrub queue @@ -475,11 +429,9 @@ struct ScrubPgIF { /// ... by requesting an "operator initiated" scrub virtual void on_operator_forced_scrub( ceph::Formatter* f, - scrub_level_t scrub_level, - requested_scrub_t& request_flags) = 0; + scrub_level_t scrub_level) = 0; - virtual void dump_scrubber(ceph::Formatter* f, - const requested_scrub_t& request_flags) const = 0; + virtual void dump_scrubber(ceph::Formatter* f) const = 0; /** * Return true if soid is currently being scrubbed and pending IOs should @@ -535,23 +487,31 @@ struct ScrubPgIF { virtual void update_scrub_stats( ceph::coarse_real_clock::time_point now_is) = 0; - // --------------- reservations ----------------------------------- - /** - * Reserve local scrub resources (managed by the OSD) + * Recalculate scrub (both deep & shallow) schedules * - * Fails if OSD's local-scrubs budget was exhausted - * \returns were local resources reserved? + * Dequeues the scrub job, and re-queues it with the new schedule. */ - virtual bool reserve_local() = 0; + virtual void update_scrub_job(Scrub::delay_ready_t delay_ready) = 0; + + virtual scrub_level_t scrub_requested( + scrub_level_t scrub_level, + scrub_type_t scrub_type) = 0; /** - * Recalculate the required scrub time. - * - * This function assumes that the queue registration status is up-to-date, - * i.e. the OSD "knows our name" if-f we are the Primary. + * let the scrubber know that a recovery operation has completed. + * This might trigger an 'after repair' scrub. */ - virtual void update_scrub_job(Scrub::delay_ready_t delay_ready) = 0; + virtual void recovery_completed() = 0; + + /** + * m_after_repair_scrub_required is set, and recovery_complete() is + * expected to trigger a deep scrub + */ + virtual bool is_after_repair_required() const = 0; + + + // --------------- reservations ----------------------------------- /** * route incoming replica-reservations requests/responses to the @@ -561,11 +521,6 @@ struct ScrubPgIF { */ virtual void handle_scrub_reserve_msgs(OpRequestRef op) = 0; - virtual scrub_level_t scrub_requested( - scrub_level_t scrub_level, - scrub_type_t scrub_type, - requested_scrub_t& req_flags) = 0; - // --------------- debugging via the asok ------------------------------ virtual int asok_debug(std::string_view cmd, |