src/osd/scrubber/ScrubStore.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once

#include "common/map_cacher.hpp"
#include "osd/osd_types_fmt.h"
#include "osd/SnapMapper.h"  // for OSDriver

namespace librados {
struct object_id_t;
}

struct inconsistent_obj_wrapper;
struct inconsistent_snapset_wrapper;
class PgScrubber;

namespace Scrub {

/**
 * Storing errors detected during scrubbing.
 *
 * From both functional and internal perspectives, the store is a pair of key-value
 * databases: one maps objects to shallow errors detected during their scrubbing,
 * and other stores deep errors.
 * Note that the first store is updated in both shallow and in deep scrubs. The
 * second - only while deep scrubbing.
 *
 * The DBs can be consulted by the operator, when trying to list 'errors known
 * at this point in time'. Whenever a scrub starts - the relevant entries in the
 * DBs are removed. Specifically - the shallow errors DB is recreated each scrub,
 * while the deep errors DB is recreated only when a deep scrub starts.
 *
 * When queried - the data from both DBs is merged for each named object, and
 * returned to the operator.
 *
 * Implementation:
 * Each of the two DBs is implemented as OMAP entries of a single, uniquely named,
 * object. Both DBs are cached using the general KV Cache mechanism.
 */

class Store {
 public:
  ~Store();

  Store(
      PgScrubber& scrubber,
      ObjectStore& osd_store,
      ObjectStore::Transaction* t,
      const spg_t& pgid,
      const coll_t& coll);


  /// mark down detected errors, either shallow or deep
  void add_object_error(int64_t pool, const inconsistent_obj_wrapper& e);

  void add_snap_error(int64_t pool, const inconsistent_snapset_wrapper& e);

  // and a variant-friendly interface:
  void add_error(int64_t pool, const inconsistent_obj_wrapper& e);
  void add_error(int64_t pool, const inconsistent_snapset_wrapper& e);

  [[nodiscard]] bool is_empty() const;
  void flush(ObjectStore::Transaction*);

  /// remove both shallow and deep errors DBs. Called on interval.
  void cleanup(ObjectStore::Transaction*);

  /**
   * prepare the Store object for a new scrub session.
   * This involves clearing one or both of the errors DBs, and resetting
   * the cache.
   *
   * @param level: the scrub level to prepare for. Whenever a deep scrub
   * is requested, both the shallow and deep errors DBs are cleared.
   * If, on the other hand, a shallow scrub is requested, only the shallow
   * errors DB is cleared.
   */
  void reinit(ObjectStore::Transaction* t, scrub_level_t level);

  std::vector<ceph::buffer::list> get_snap_errors(
    int64_t pool,
    const librados::object_id_t& start,
    uint64_t max_return) const;

  std::vector<ceph::buffer::list> get_object_errors(
    int64_t pool,
    const librados::object_id_t& start,
    uint64_t max_return) const;

  std::ostream& gen_prefix(std::ostream& out, std::string_view fn) const;

 private:
  /**
   * at_level_t
   *
   * The machinery for caching and storing errors at a specific scrub level.
   */
  struct at_level_t {
    at_level_t(const spg_t& pgid, const ghobject_t& err_obj, OSDriver&& drvr)
	: errors_hoid{err_obj}
	, driver{std::move(drvr)}
	, backend{&driver}
    {}

    /// the object in the PG store, where the errors are stored
    ghobject_t errors_hoid;

    /// abstracted key fetching
    OSDriver driver;

    /// a K,V cache for the errors that are detected during the scrub
    /// session. The errors marked for a specific object are stored as
    /// an OMap entry with the object's name as the key.
    MapCacher::MapCacher<std::string, ceph::buffer::list> backend;

    /// a temp object mapping seq-id to inconsistencies
    std::map<std::string, ceph::buffer::list> results;
  };

  using CacherPosData =
      MapCacher::MapCacher<std::string, ceph::buffer::list>::PosAndData;
  using ExpCacherPosData = tl::expected<CacherPosData, int>;

  /// access to the owning Scrubber object, for logging mostly
  PgScrubber& m_scrubber;

  /// the OSD's storage backend
  ObjectStore& object_store;

  /// the collection (i.e. - the PG store) in which the errors are stored
  const coll_t coll;

  scrub_level_t current_level;

  /**
   * the machinery (backend details, cache, etc.) for storing both levels
   * of errors (note: 'optional' to allow delayed creation w/o dynamic
   * allocations; and 'mutable', as the caching mechanism is used in const
   * methods)
   */
  mutable std::optional<at_level_t> shallow_db;
  mutable std::optional<at_level_t> deep_db;

  std::vector<ceph::buffer::list> get_errors(
      const std::string& start,
      const std::string& end,
      uint64_t max_return) const;

  void collect_specific_store(
      MapCacher::MapCacher<std::string, ceph::buffer::list>& backend,
      ExpCacherPosData& latest,
      std::vector<bufferlist>& errors,
      std::string_view end_key,
      uint64_t max_return) const;

  /**
   * Clear the DB of errors at a specific scrub level by performing an
   * omap_clear() on the DB object, and resetting the MapCacher.
   */
  void clear_level_db(
      ObjectStore::Transaction* t,
      at_level_t& db,
      std::string_view db_name);

  /**
   * merge the two error wrappers - fetched from both DBs for the same object.
   * Specifically, the object errors are or'ed, and so are the per-shard
   * entries.
   */
  bufferlist merge_encoded_error_wrappers(
      hobject_t obj,
      ExpCacherPosData& latest_sh,
      ExpCacherPosData& latest_dp) const;
};
}  // namespace Scrub