src/rgw/rgw_dmclock_scheduler.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286

// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
 * Ceph - scalable distributed file system
 *
 * Copyright (C) 2018 Red Hat, Inc.
 *
 * This is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License version 2.1, as published by the Free Software
 * Foundation. See file COPYING.
 *
 */

#ifndef RGW_DMCLOCK_SCHEDULER_H
#define RGW_DMCLOCK_SCHEDULER_H

#include <boost/asio.hpp>
#include "common/ceph_time.h"
#include "common/async/completion.h"
#include "common/ceph_context.h"
#include "common/config.h"
#include "common/perf_counters.h"
#include "rgw_dmclock.h"
#include "rgw_yield_context.h"

namespace rgw::dmclock {

namespace queue_counters {

enum {
  l_first = 427150,
  l_qlen,
  l_cost,
  l_res,
  l_res_cost,
  l_prio,
  l_prio_cost,
  l_limit,
  l_limit_cost,
  l_cancel,
  l_cancel_cost,
  l_res_latency,
  l_prio_latency,
  l_last,
};

PerfCountersRef build(CephContext *cct, const std::string& name);

} // namespace queue_counters

/// function to provide client counters
using GetClientCounters = std::function<PerfCounters*(client_id)>;

namespace async = ceph::async;
struct Request {
  client_id client;
  Time started;
  Cost cost;
};

enum class ReqState {
  Wait,
  Ready,
  Cancelled
};
// For a blocking SyncRequest we hold a reference to a cv and the caller must
// ensure the lifetime
struct SyncRequest : public Request {
  std::mutex& req_mtx;
  std::condition_variable& req_cv;
  ReqState& req_state;
  GetClientCounters& counters;
  explicit SyncRequest(client_id _id, Time started, Cost cost,
		       std::mutex& mtx, std::condition_variable& _cv,
		       ReqState& _state, GetClientCounters& counters):
    Request{_id, started, cost}, req_mtx(mtx), req_cv(_cv), req_state(_state), counters(counters) {};
};

class SyncScheduler {
public:
  template <typename ...Args>
  SyncScheduler(CephContext *cct, GetClientCounters&& counters,
		Args&& ...args);
  ~SyncScheduler();

  // submit a blocking request for dmclock scheduling, this function waits until
  // the request is ready.
  int add_request(const client_id& client, const ReqParams& params,
		  const Time& time, Cost cost);

  auto schedule_request(const client_id& client, const ReqParams& params,
			const Time& time, Cost cost,
			optional_yield_context _y [[maybe_unused]])
  {
    return add_request(client, params, time, cost);
  }

  void cancel();

  void cancel(const client_id& client);

  static void handle_request_cb(const client_id& c, std::unique_ptr<SyncRequest> req,
				PhaseType phase, Cost cost);
private:
  static constexpr bool IsDelayed = false;
  using Queue = crimson::dmclock::PushPriorityQueue<client_id, SyncRequest, IsDelayed>;
  using RequestRef = typename Queue::RequestRef;
  using Clock = ceph::coarse_real_clock;

  Queue queue;
  CephContext const *cct;
  GetClientCounters counters; //< provides per-client perf counters
};

template <typename ...Args>
SyncScheduler::SyncScheduler(CephContext *cct, GetClientCounters&& counters,
			     Args&& ...args):
  queue(std::forward<Args>(args)...), cct(cct), counters(std::move(counters))
{}

/*
 * A dmclock request scheduling service for use with boost::asio.
 *
 * An asynchronous dmclock priority queue, where scheduled requests complete
 * on a boost::asio executor.
 */
class AsyncScheduler : public md_config_obs_t {
 public:
  template <typename ...Args> // args forwarded to PullPriorityQueue ctor
  AsyncScheduler(CephContext *cct, boost::asio::io_context& context,
            GetClientCounters&& counters, md_config_obs_t *observer,
            Args&& ...args);
  ~AsyncScheduler();

  using executor_type = boost::asio::io_context::executor_type;

  /// return the default executor for async_request() callbacks
  executor_type get_executor() noexcept {
    return timer.get_executor();
  }

  /// submit an async request for dmclock scheduling. the given completion
  /// handler will be invoked with (error_code, PhaseType) when the request
  /// is ready or canceled. on success, this grants a throttle unit that must
  /// be returned with a call to request_complete()
  template <typename CompletionToken>
  auto async_request(const client_id& client, const ReqParams& params,
                     const Time& time, Cost cost, CompletionToken&& token);

  int schedule_request(const client_id& client, const ReqParams& params,
		       const Time& time, Cost cost, optional_yield_context yield_ctx);
  /// returns a throttle unit granted by async_request()
  void request_complete();

  /// cancel all queued requests, invoking their completion handlers with an
  /// operation_aborted error and default-constructed result
  void cancel();

  /// cancel all queued requests for a given client, invoking their completion
  /// handler with an operation_aborted error and default-constructed result
  void cancel(const client_id& client);

  const char** get_tracked_conf_keys() const override;
  void handle_conf_change(const ConfigProxy& conf,
                          const std::set<std::string>& changed) override;

 private:
  static constexpr bool IsDelayed = false;
  using Queue = crimson::dmclock::PullPriorityQueue<client_id, Request, IsDelayed>;
  using RequestRef = typename Queue::RequestRef;
  Queue queue; //< dmclock priority queue

  using Signature = void(boost::system::error_code, PhaseType);
  using Completion = async::Completion<Signature, async::AsBase<Request>>;

  using Clock = ceph::coarse_real_clock;
  using Timer = boost::asio::basic_waitable_timer<Clock>;
  Timer timer; //< timer for the next scheduled request

  CephContext *const cct;
  md_config_obs_t *const observer; //< observer to update ClientInfoFunc
  GetClientCounters counters; //< provides per-client perf counters

  /// max request throttle
  std::atomic<int64_t> max_requests;
  std::atomic<int64_t> outstanding_requests = 0;

  /// set a timer to process the next request
  void schedule(const Time& time);

  /// process ready requests, then schedule the next pending request
  void process(const Time& now);
};


template <typename ...Args>
AsyncScheduler::AsyncScheduler(CephContext *cct, boost::asio::io_context& context,
                     GetClientCounters&& counters,
                     md_config_obs_t *observer, Args&& ...args)
  : queue(std::forward<Args>(args)...),
    timer(context), cct(cct), observer(observer),
    counters(std::move(counters)),
    max_requests(cct->_conf.get_val<int64_t>("rgw_max_concurrent_requests"))
{
  if (max_requests <= 0) {
    max_requests = std::numeric_limits<int64_t>::max();
  }
  if (observer) {
    cct->_conf.add_observer(this);
  }
}

template <typename CompletionToken>
auto AsyncScheduler::async_request(const client_id& client,
                              const ReqParams& params,
                              const Time& time, Cost cost,
                              CompletionToken&& token)
{
  boost::asio::async_completion<CompletionToken, Signature> init(token);

  auto ex1 = get_executor();
  auto& handler = init.completion_handler;

  // allocate the Request and add it to the queue
  auto completion = Completion::create(ex1, std::move(handler),
                                       Request{client, time, cost});
  // cast to unique_ptr<Request>
  auto req = RequestRef{std::move(completion)};
  int r = queue.add_request(std::move(req), client, params, time, cost);
  if (r == 0) {
    // schedule an immediate call to process() on the executor
    schedule(crimson::dmclock::TimeZero);
    if (auto c = counters(client)) {
      c->inc(queue_counters::l_qlen);
      c->inc(queue_counters::l_cost, cost);
    }
  } else {
    // post the error code
    boost::system::error_code ec(r, boost::system::system_category());
    // cast back to Completion
    auto completion = static_cast<Completion*>(req.release());
    async::post(std::unique_ptr<Completion>{completion},
                ec, PhaseType::priority);
    if (auto c = counters(client)) {
      c->inc(queue_counters::l_limit);
      c->inc(queue_counters::l_limit_cost, cost);
    }
  }

  return init.result.get();
}


/// array of per-client counters to serve as GetClientCounters
class ClientCounters {
  std::array<PerfCountersRef, static_cast<size_t>(client_id::count)> clients;
 public:
  ClientCounters(CephContext *cct);

  PerfCounters* operator()(client_id client) const {
    return clients[static_cast<size_t>(client)].get();
  }
};

/// a simple wrapper to hold client config. objects needed to construct a
/// scheduler instance, the primary utility of this being to optionally
/// construct scheduler only when configured in the frontends.
class optional_scheduler_ctx {
  std::optional<ClientConfig> clients;
  std::optional<ClientCounters> counters;
public:
  optional_scheduler_ctx(CephContext *cct) {
    if(cct->_conf.get_val<bool>("rgw_dmclock_enabled")){
      clients.emplace(ClientConfig(cct));
      counters.emplace(ClientCounters(cct));
    }
  }
  operator bool() const noexcept { return counters && clients; }
  // both the get functions below will throw
  ClientCounters& get_counters() { return counters.value(); }
  ClientConfig& get_clients() { return clients.value(); }
};
} // namespace rgw::dmclock

#endif // RGW_DMCLOCK_SCHEDULER_H