diff options
Diffstat (limited to 'src/osd/scrubber/pg_scrubber.h')
-rw-r--r-- | src/osd/scrubber/pg_scrubber.h | 196 |
1 files changed, 116 insertions, 80 deletions
diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index e970bd7219d..0d9e8c1e9f6 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -75,6 +75,8 @@ Main Scrubber interfaces: #include <string_view> #include <vector> +#include "common/config_proxy.h" +#include "common/config_cacher.h" #include "osd/PG.h" #include "osd/scrubber_common.h" @@ -143,29 +145,48 @@ struct scrub_flags_t { unsigned int priority{0}; /** - * set by queue_scrub() if either planned_scrub.auto_repair or - * need_auto were set. - * Tested at scrub end. + * set by set_op_parameters() for deep scrubs, if the hardware + * supports auto repairing and osd_scrub_auto_repair is enabled. */ bool auto_repair{false}; /// this flag indicates that we are scrubbing post repair to verify everything - /// is fixed + /// is fixed (otherwise - PG_STATE_FAILED_REPAIR will be asserted.) + /// Update (July 2024): now reflects an 'after-repair' urgency. bool check_repair{false}; /// checked at the end of the scrub, to possibly initiate a deep-scrub bool deep_scrub_on_error{false}; - - /** - * scrub must not be aborted. - * Set for explicitly requested scrubs, and for scrubs originated by the - * pairing process with the 'repair' flag set (in the RequestScrub event). - */ - bool required{false}; }; ostream& operator<<(ostream& out, const scrub_flags_t& sf); +namespace fmt { +template <> +struct formatter<scrub_flags_t> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + template <typename FormatContext> + auto format(const scrub_flags_t& sf, FormatContext& ctx) const + { + std::string txt; + bool sep{false}; + if (sf.auto_repair) { + txt = "auto-repair"; + sep = true; + } + if (sf.check_repair) { + txt += sep ? ",check-repair" : "check-repair"; + sep = true; + } + if (sf.deep_scrub_on_error) { + txt += sep ? ",deep-scrub-on-error" : "deep-scrub-on-error"; + sep = true; + } + return fmt::format_to(ctx.out(), "{}", txt); + } +}; +} // namespace fmt + /** * The part of PG-scrubbing code that isn't state-machine wiring. @@ -188,15 +209,12 @@ class PgScrubber : public ScrubPgIF, [[nodiscard]] bool is_reserving() const final; Scrub::schedule_result_t start_scrub_session( - std::unique_ptr<Scrub::ScrubJob> candidate, + scrub_level_t s_or_d, Scrub::OSDRestrictions osd_restrictions, - Scrub::ScrubPGPreconds pg_cond, - const requested_scrub_t& requested_flags) final; + Scrub::ScrubPGPreconds pg_cond) final; void initiate_regular_scrub(epoch_t epoch_queued) final; - void initiate_scrub_after_repair(epoch_t epoch_queued) final; - void send_scrub_resched(epoch_t epoch_queued) final; void active_pushes_notification(epoch_t epoch_queued) final; @@ -264,8 +282,15 @@ class PgScrubber : public ScrubPgIF, scrub_level_t scrub_requested( scrub_level_t scrub_level, - scrub_type_t scrub_type, - requested_scrub_t& req_flags) final; + scrub_type_t scrub_type) final; + + /** + * let the scrubber know that a recovery operation has completed. + * This might trigger an 'after repair' scrub. + */ + void recovery_completed() final; + + bool is_after_repair_required() const final; /** * Reserve local scrub resources (managed by the OSD) @@ -273,7 +298,7 @@ class PgScrubber : public ScrubPgIF, * Fails if OSD's local-scrubs budget was exhausted * \returns were local resources reserved? */ - bool reserve_local() final; + bool reserve_local(const Scrub::SchedTarget& trgt); void handle_query_state(ceph::Formatter* f) final; @@ -286,11 +311,9 @@ class PgScrubber : public ScrubPgIF, void on_operator_forced_scrub( ceph::Formatter* f, - scrub_level_t scrub_level, - requested_scrub_t& request_flags) final; + scrub_level_t scrub_level) final; - void dump_scrubber(ceph::Formatter* f, - const requested_scrub_t& request_flags) const final; + void dump_scrubber(ceph::Formatter* f) const final; // used if we are a replica @@ -339,12 +362,8 @@ class PgScrubber : public ScrubPgIF, /** * finalize the parameters of the initiated scrubbing session: - * - * The "current scrub" flags (m_flags) are set from the 'planned_scrub' - * flag-set; PG_STATE_SCRUBBING, and possibly PG_STATE_DEEP_SCRUB & - * PG_STATE_REPAIR are set. */ - void set_op_parameters(const requested_scrub_t& request) final; + void set_op_parameters(Scrub::ScrubPGPreconds pg_cond) final; void cleanup_store(ObjectStore::Transaction* t) final; @@ -398,8 +417,8 @@ class PgScrubber : public ScrubPgIF, return m_pg->recovery_state.is_primary(); } - /// is this scrub more than just regular periodic scrub? - [[nodiscard]] bool is_high_priority() const final; + /// is this scrub's urgency high enough, or must it reserve its replicas? + [[nodiscard]] bool is_reservation_required() const final; void set_state_name(const char* name) final { @@ -506,7 +525,12 @@ class PgScrubber : public ScrubPgIF, /// control the order of construction/destruction. std::optional<Scrub::ScrubJob> m_scrub_job; - ostream& show(ostream& out) const override; + + /// the scrubber has initiated a recovery, and is waiting for the recovery + /// to complete (in order to perform an 'after-repair' scrub) + bool m_after_repair_scrub_required{false}; + + ostream& show_concise(ostream& out) const override; public: // ------------------ the I/F used by the ScrubBackend (ScrubBeListener) @@ -565,14 +589,27 @@ class PgScrubber : public ScrubPgIF, void run_callbacks(); // 'query' command data for an active scrub - void dump_active_scrubber(ceph::Formatter* f, bool is_deep) const; + void dump_active_scrubber(ceph::Formatter* f) const; + + /** + * Used as a parameter of requeue_penalized() to indicate whether the + * both targets of this PG should be delayed (and not just the named one). + */ + enum class delay_both_targets_t { no, yes }; /** * move the 'not before' to a later time (with a delay amount that is * based on the delay cause). Also saves the cause. - * Pushes the updated scrub-job into the OSD's queue. + * Pushes the updated scheduling entry into the OSD's queue. + * @param s_or_d - the specific target (shallow or deep) to delay; + * @param delay_both - should both targets be delayed? note - the + * 'other' target will not be delayed if it has higher priority. */ - void requeue_penalized(Scrub::delay_cause_t cause); + void requeue_penalized( + scrub_level_t s_or_d, + delay_both_targets_t delay_both, + Scrub::delay_cause_t cause, + utime_t scrub_clock_now); // ----- methods used to verify the relevance of incoming events: @@ -706,12 +743,14 @@ class PgScrubber : public ScrubPgIF, bool m_publish_sessions{false}; //< will the counter be part of 'query' //output? + /** + * the scrub operation flags. + * Set at scrub start. Checked in multiple locations - mostly + * at finish. + * Note: replicas only use the 'priority' field. + */ scrub_flags_t m_flags; - /// a reference to the details of the next scrub (as requested and managed by - /// the PG) - requested_scrub_t& m_planned_scrub; - bool m_active{false}; /** @@ -732,19 +771,24 @@ class PgScrubber : public ScrubPgIF, */ bool m_queued_or_active{false}; - /** - * A copy of the specific scheduling target (either shallow_target or - * deep_target in the scrub_job) that was selected for this active scrub - * session. - * \ATTN: in this initial step - a copy of the whole scrub-job is passed - * around. Later on this would be just a part of a Scrub::SchedTarget - */ - std::unique_ptr<Scrub::ScrubJob> m_active_target; + /// A copy of the specific scheduling target (either shallow_target or + /// deep_target in the scrub_job) that was selected for this active scrub + std::optional<Scrub::SchedTarget> m_active_target; eversion_t m_subset_last_update{}; std::unique_ptr<Scrub::Store> m_store; + /** + * the ScrubStore sub-object caches and manages the database of known + * scrub errors. reinit_scrub_store() clears the database and re-initializes + * the ScrubStore object. + * + * in the next iteration - reinit_..() potentially deletes only the + * shallow errors part of the database. + */ + void reinit_scrub_store(); + int num_digest_updates_pending{0}; hobject_t m_start, m_end; ///< note: half-closed: [start,end) @@ -800,7 +844,7 @@ class PgScrubber : public ScrubPgIF, /** * initiate a deep-scrub after the current scrub ended with errors. */ - void request_rescrubbing(requested_scrub_t& req_flags); + void request_rescrubbing(); /** * combine cluster & pool configuration options into a single struct @@ -809,40 +853,11 @@ class PgScrubber : public ScrubPgIF, Scrub::sched_conf_t populate_config_params() const; /** - * determine the time when the next scrub should be scheduled - * - * based on the planned scrub's flags, time of last scrub, and - * the pool's scrub configuration. This is only an initial "proposal", - * and will be further adjusted based on the scheduling parameters. - */ - Scrub::sched_params_t determine_initial_schedule( - const Scrub::sched_conf_t& app_conf, - utime_t scrub_clock_now) const; - - /// should we perform deep scrub? - bool is_time_for_deep( - Scrub::ScrubPGPreconds pg_cond, - const requested_scrub_t& planned) const; - - /** - * Validate the various 'next scrub' flags against configuration - * and scrub-related timestamps. - * - * @returns an updated copy of the m_planned_flags (or nothing if no scrubbing) + * recompute the two ScrubJob targets, taking into account not + * only the up-to-date 'last' stamps, but also the 'urgency' + * attributes of both targets. */ - std::optional<requested_scrub_t> validate_scrub_mode( - Scrub::OSDRestrictions osd_restrictions, - Scrub::ScrubPGPreconds pg_cond) const; - - std::optional<requested_scrub_t> validate_periodic_mode( - Scrub::ScrubPGPreconds pg_cond, - bool time_for_deep, - const requested_scrub_t& planned) const; - - std::optional<requested_scrub_t> validate_initiated_scrub( - Scrub::ScrubPGPreconds pg_cond, - bool time_for_deep, - const requested_scrub_t& planned) const; + void update_targets(utime_t scrub_clock_now); /* * Select a range of objects to scrub. @@ -882,6 +897,24 @@ class PgScrubber : public ScrubPgIF, // scrub state. ceph::coarse_real_clock::time_point m_last_stat_upd{}; + // ------------------ cached (frequently used) configuration values + + /// initial (& max) number of objects to scrub in one pass - deep scrub + md_config_cacher_t<int64_t> osd_scrub_chunk_max; + /// initial (& max) number of objects to scrub in one pass - shallow + md_config_cacher_t<int64_t> osd_shallow_scrub_chunk_max; + + /// chunk size won't be reduced (when preempted) below this + /// value (deep scrub) + md_config_cacher_t<int64_t> osd_scrub_chunk_min; + /// chunk size won't be reduced below this value (shallow scrub) + md_config_cacher_t<int64_t> osd_shallow_scrub_chunk_min; + + /// stats update (publish_stats_to_osd()) interval while scrubbing + md_config_cacher_t<int64_t> osd_stats_update_period_scrubbing; + /// stats update interval while not scrubbing + md_config_cacher_t<int64_t> osd_stats_update_period_not_scrubbing; + // ------------ members used if we are a replica epoch_t m_replica_min_epoch; ///< the min epoch needed to handle this message @@ -978,6 +1011,9 @@ class PgScrubber : public ScrubPgIF, mutable ceph::mutex m_preemption_lock = ceph::make_mutex("preemption_lock"); bool m_preemptable{false}; bool m_preempted{false}; + + /// the number of preemptions allowed before we start blocking + md_config_cacher_t<uint64_t> osd_scrub_max_preemptions; int m_left; size_t m_size_divisor{1}; bool are_preemptions_left() const { return m_left > 0; } |