summaryrefslogtreecommitdiffstats
path: root/src/osd/scrubber/pg_scrubber.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/osd/scrubber/pg_scrubber.h')
-rw-r--r--src/osd/scrubber/pg_scrubber.h196
1 files changed, 116 insertions, 80 deletions
diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h
index e970bd7219d..0d9e8c1e9f6 100644
--- a/src/osd/scrubber/pg_scrubber.h
+++ b/src/osd/scrubber/pg_scrubber.h
@@ -75,6 +75,8 @@ Main Scrubber interfaces:
#include <string_view>
#include <vector>
+#include "common/config_proxy.h"
+#include "common/config_cacher.h"
#include "osd/PG.h"
#include "osd/scrubber_common.h"
@@ -143,29 +145,48 @@ struct scrub_flags_t {
unsigned int priority{0};
/**
- * set by queue_scrub() if either planned_scrub.auto_repair or
- * need_auto were set.
- * Tested at scrub end.
+ * set by set_op_parameters() for deep scrubs, if the hardware
+ * supports auto repairing and osd_scrub_auto_repair is enabled.
*/
bool auto_repair{false};
/// this flag indicates that we are scrubbing post repair to verify everything
- /// is fixed
+ /// is fixed (otherwise - PG_STATE_FAILED_REPAIR will be asserted.)
+ /// Update (July 2024): now reflects an 'after-repair' urgency.
bool check_repair{false};
/// checked at the end of the scrub, to possibly initiate a deep-scrub
bool deep_scrub_on_error{false};
-
- /**
- * scrub must not be aborted.
- * Set for explicitly requested scrubs, and for scrubs originated by the
- * pairing process with the 'repair' flag set (in the RequestScrub event).
- */
- bool required{false};
};
ostream& operator<<(ostream& out, const scrub_flags_t& sf);
+namespace fmt {
+template <>
+struct formatter<scrub_flags_t> {
+ constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
+ template <typename FormatContext>
+ auto format(const scrub_flags_t& sf, FormatContext& ctx) const
+ {
+ std::string txt;
+ bool sep{false};
+ if (sf.auto_repair) {
+ txt = "auto-repair";
+ sep = true;
+ }
+ if (sf.check_repair) {
+ txt += sep ? ",check-repair" : "check-repair";
+ sep = true;
+ }
+ if (sf.deep_scrub_on_error) {
+ txt += sep ? ",deep-scrub-on-error" : "deep-scrub-on-error";
+ sep = true;
+ }
+ return fmt::format_to(ctx.out(), "{}", txt);
+ }
+};
+} // namespace fmt
+
/**
* The part of PG-scrubbing code that isn't state-machine wiring.
@@ -188,15 +209,12 @@ class PgScrubber : public ScrubPgIF,
[[nodiscard]] bool is_reserving() const final;
Scrub::schedule_result_t start_scrub_session(
- std::unique_ptr<Scrub::ScrubJob> candidate,
+ scrub_level_t s_or_d,
Scrub::OSDRestrictions osd_restrictions,
- Scrub::ScrubPGPreconds pg_cond,
- const requested_scrub_t& requested_flags) final;
+ Scrub::ScrubPGPreconds pg_cond) final;
void initiate_regular_scrub(epoch_t epoch_queued) final;
- void initiate_scrub_after_repair(epoch_t epoch_queued) final;
-
void send_scrub_resched(epoch_t epoch_queued) final;
void active_pushes_notification(epoch_t epoch_queued) final;
@@ -264,8 +282,15 @@ class PgScrubber : public ScrubPgIF,
scrub_level_t scrub_requested(
scrub_level_t scrub_level,
- scrub_type_t scrub_type,
- requested_scrub_t& req_flags) final;
+ scrub_type_t scrub_type) final;
+
+ /**
+ * let the scrubber know that a recovery operation has completed.
+ * This might trigger an 'after repair' scrub.
+ */
+ void recovery_completed() final;
+
+ bool is_after_repair_required() const final;
/**
* Reserve local scrub resources (managed by the OSD)
@@ -273,7 +298,7 @@ class PgScrubber : public ScrubPgIF,
* Fails if OSD's local-scrubs budget was exhausted
* \returns were local resources reserved?
*/
- bool reserve_local() final;
+ bool reserve_local(const Scrub::SchedTarget& trgt);
void handle_query_state(ceph::Formatter* f) final;
@@ -286,11 +311,9 @@ class PgScrubber : public ScrubPgIF,
void on_operator_forced_scrub(
ceph::Formatter* f,
- scrub_level_t scrub_level,
- requested_scrub_t& request_flags) final;
+ scrub_level_t scrub_level) final;
- void dump_scrubber(ceph::Formatter* f,
- const requested_scrub_t& request_flags) const final;
+ void dump_scrubber(ceph::Formatter* f) const final;
// used if we are a replica
@@ -339,12 +362,8 @@ class PgScrubber : public ScrubPgIF,
/**
* finalize the parameters of the initiated scrubbing session:
- *
- * The "current scrub" flags (m_flags) are set from the 'planned_scrub'
- * flag-set; PG_STATE_SCRUBBING, and possibly PG_STATE_DEEP_SCRUB &
- * PG_STATE_REPAIR are set.
*/
- void set_op_parameters(const requested_scrub_t& request) final;
+ void set_op_parameters(Scrub::ScrubPGPreconds pg_cond) final;
void cleanup_store(ObjectStore::Transaction* t) final;
@@ -398,8 +417,8 @@ class PgScrubber : public ScrubPgIF,
return m_pg->recovery_state.is_primary();
}
- /// is this scrub more than just regular periodic scrub?
- [[nodiscard]] bool is_high_priority() const final;
+ /// is this scrub's urgency high enough, or must it reserve its replicas?
+ [[nodiscard]] bool is_reservation_required() const final;
void set_state_name(const char* name) final
{
@@ -506,7 +525,12 @@ class PgScrubber : public ScrubPgIF,
/// control the order of construction/destruction.
std::optional<Scrub::ScrubJob> m_scrub_job;
- ostream& show(ostream& out) const override;
+
+ /// the scrubber has initiated a recovery, and is waiting for the recovery
+ /// to complete (in order to perform an 'after-repair' scrub)
+ bool m_after_repair_scrub_required{false};
+
+ ostream& show_concise(ostream& out) const override;
public:
// ------------------ the I/F used by the ScrubBackend (ScrubBeListener)
@@ -565,14 +589,27 @@ class PgScrubber : public ScrubPgIF,
void run_callbacks();
// 'query' command data for an active scrub
- void dump_active_scrubber(ceph::Formatter* f, bool is_deep) const;
+ void dump_active_scrubber(ceph::Formatter* f) const;
+
+ /**
+ * Used as a parameter of requeue_penalized() to indicate whether the
+ * both targets of this PG should be delayed (and not just the named one).
+ */
+ enum class delay_both_targets_t { no, yes };
/**
* move the 'not before' to a later time (with a delay amount that is
* based on the delay cause). Also saves the cause.
- * Pushes the updated scrub-job into the OSD's queue.
+ * Pushes the updated scheduling entry into the OSD's queue.
+ * @param s_or_d - the specific target (shallow or deep) to delay;
+ * @param delay_both - should both targets be delayed? note - the
+ * 'other' target will not be delayed if it has higher priority.
*/
- void requeue_penalized(Scrub::delay_cause_t cause);
+ void requeue_penalized(
+ scrub_level_t s_or_d,
+ delay_both_targets_t delay_both,
+ Scrub::delay_cause_t cause,
+ utime_t scrub_clock_now);
// ----- methods used to verify the relevance of incoming events:
@@ -706,12 +743,14 @@ class PgScrubber : public ScrubPgIF,
bool m_publish_sessions{false}; //< will the counter be part of 'query'
//output?
+ /**
+ * the scrub operation flags.
+ * Set at scrub start. Checked in multiple locations - mostly
+ * at finish.
+ * Note: replicas only use the 'priority' field.
+ */
scrub_flags_t m_flags;
- /// a reference to the details of the next scrub (as requested and managed by
- /// the PG)
- requested_scrub_t& m_planned_scrub;
-
bool m_active{false};
/**
@@ -732,19 +771,24 @@ class PgScrubber : public ScrubPgIF,
*/
bool m_queued_or_active{false};
- /**
- * A copy of the specific scheduling target (either shallow_target or
- * deep_target in the scrub_job) that was selected for this active scrub
- * session.
- * \ATTN: in this initial step - a copy of the whole scrub-job is passed
- * around. Later on this would be just a part of a Scrub::SchedTarget
- */
- std::unique_ptr<Scrub::ScrubJob> m_active_target;
+ /// A copy of the specific scheduling target (either shallow_target or
+ /// deep_target in the scrub_job) that was selected for this active scrub
+ std::optional<Scrub::SchedTarget> m_active_target;
eversion_t m_subset_last_update{};
std::unique_ptr<Scrub::Store> m_store;
+ /**
+ * the ScrubStore sub-object caches and manages the database of known
+ * scrub errors. reinit_scrub_store() clears the database and re-initializes
+ * the ScrubStore object.
+ *
+ * in the next iteration - reinit_..() potentially deletes only the
+ * shallow errors part of the database.
+ */
+ void reinit_scrub_store();
+
int num_digest_updates_pending{0};
hobject_t m_start, m_end; ///< note: half-closed: [start,end)
@@ -800,7 +844,7 @@ class PgScrubber : public ScrubPgIF,
/**
* initiate a deep-scrub after the current scrub ended with errors.
*/
- void request_rescrubbing(requested_scrub_t& req_flags);
+ void request_rescrubbing();
/**
* combine cluster & pool configuration options into a single struct
@@ -809,40 +853,11 @@ class PgScrubber : public ScrubPgIF,
Scrub::sched_conf_t populate_config_params() const;
/**
- * determine the time when the next scrub should be scheduled
- *
- * based on the planned scrub's flags, time of last scrub, and
- * the pool's scrub configuration. This is only an initial "proposal",
- * and will be further adjusted based on the scheduling parameters.
- */
- Scrub::sched_params_t determine_initial_schedule(
- const Scrub::sched_conf_t& app_conf,
- utime_t scrub_clock_now) const;
-
- /// should we perform deep scrub?
- bool is_time_for_deep(
- Scrub::ScrubPGPreconds pg_cond,
- const requested_scrub_t& planned) const;
-
- /**
- * Validate the various 'next scrub' flags against configuration
- * and scrub-related timestamps.
- *
- * @returns an updated copy of the m_planned_flags (or nothing if no scrubbing)
+ * recompute the two ScrubJob targets, taking into account not
+ * only the up-to-date 'last' stamps, but also the 'urgency'
+ * attributes of both targets.
*/
- std::optional<requested_scrub_t> validate_scrub_mode(
- Scrub::OSDRestrictions osd_restrictions,
- Scrub::ScrubPGPreconds pg_cond) const;
-
- std::optional<requested_scrub_t> validate_periodic_mode(
- Scrub::ScrubPGPreconds pg_cond,
- bool time_for_deep,
- const requested_scrub_t& planned) const;
-
- std::optional<requested_scrub_t> validate_initiated_scrub(
- Scrub::ScrubPGPreconds pg_cond,
- bool time_for_deep,
- const requested_scrub_t& planned) const;
+ void update_targets(utime_t scrub_clock_now);
/*
* Select a range of objects to scrub.
@@ -882,6 +897,24 @@ class PgScrubber : public ScrubPgIF,
// scrub state.
ceph::coarse_real_clock::time_point m_last_stat_upd{};
+ // ------------------ cached (frequently used) configuration values
+
+ /// initial (& max) number of objects to scrub in one pass - deep scrub
+ md_config_cacher_t<int64_t> osd_scrub_chunk_max;
+ /// initial (& max) number of objects to scrub in one pass - shallow
+ md_config_cacher_t<int64_t> osd_shallow_scrub_chunk_max;
+
+ /// chunk size won't be reduced (when preempted) below this
+ /// value (deep scrub)
+ md_config_cacher_t<int64_t> osd_scrub_chunk_min;
+ /// chunk size won't be reduced below this value (shallow scrub)
+ md_config_cacher_t<int64_t> osd_shallow_scrub_chunk_min;
+
+ /// stats update (publish_stats_to_osd()) interval while scrubbing
+ md_config_cacher_t<int64_t> osd_stats_update_period_scrubbing;
+ /// stats update interval while not scrubbing
+ md_config_cacher_t<int64_t> osd_stats_update_period_not_scrubbing;
+
// ------------ members used if we are a replica
epoch_t m_replica_min_epoch; ///< the min epoch needed to handle this message
@@ -978,6 +1011,9 @@ class PgScrubber : public ScrubPgIF,
mutable ceph::mutex m_preemption_lock = ceph::make_mutex("preemption_lock");
bool m_preemptable{false};
bool m_preempted{false};
+
+ /// the number of preemptions allowed before we start blocking
+ md_config_cacher_t<uint64_t> osd_scrub_max_preemptions;
int m_left;
size_t m_size_divisor{1};
bool are_preemptions_left() const { return m_left > 0; }