1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
|
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
// vim: ts=8 sw=2 smarttab
#include "crimson/osd/main_config_bootstrap_helpers.h"
#include <seastar/core/print.hh>
#include <seastar/core/prometheus.hh>
#include <seastar/core/thread.hh>
#include <seastar/http/httpd.hh>
#include <seastar/net/inet_address.hh>
#include <seastar/util/closeable.hh>
#include <seastar/util/defer.hh>
#include <seastar/util/std-compat.hh>
#include "common/ceph_argparse.h"
#include "common/config_tracker.h"
#include "crimson/common/buffer_io.h"
#include "crimson/common/config_proxy.h"
#include "crimson/common/fatal_signal.h"
#include "crimson/common/perf_counters_collection.h"
#include "crimson/mon/MonClient.h"
#include "crimson/net/Messenger.h"
#include "crimson/osd/main_config_bootstrap_helpers.h"
#include <sys/wait.h> // for waitpid()
using namespace std::literals;
using crimson::common::local_conf;
using crimson::common::sharded_conf;
using crimson::common::sharded_perf_coll;
static seastar::logger& logger()
{
return crimson::get_logger(ceph_subsys_osd);
}
namespace crimson::osd {
void usage(const char* prog)
{
std::cout << "crimson osd usage: " << prog << " -i <ID> [flags...]" << std::endl;
generic_server_usage();
}
seastar::future<> populate_config_from_mon()
{
logger().info("populating config from monitor");
// i don't have any client before joining the cluster, so no need to have
// a proper auth handler
class DummyAuthHandler : public crimson::common::AuthHandler {
public:
void handle_authentication(const EntityName& name,
const AuthCapsInfo& caps)
{}
};
return seastar::async([] {
auto auth_handler = std::make_unique<DummyAuthHandler>();
auto msgr = crimson::net::Messenger::create(entity_name_t::CLIENT(),
"temp_mon_client",
get_nonce(),
true);
crimson::mon::Client monc{*msgr, *auth_handler};
msgr->set_auth_client(&monc);
msgr->start({&monc}).get();
auto stop_msgr = seastar::defer([&] {
msgr->stop();
msgr->shutdown().get();
});
monc.start().handle_exception([] (auto ep) {
fmt::print(std::cerr, "FATAL: unable to connect to cluster: {}\n", ep);
return seastar::make_exception_future<>(ep);
}).get();
auto stop_monc = seastar::defer([&] {
monc.stop().get();
});
monc.sub_want("config", 0, 0);
monc.renew_subs().get();
// wait for monmap and config
monc.wait_for_config().get();
auto fsid = monc.get_fsid().to_string();
local_conf().set_val("fsid", fsid).get();
logger().debug("{}: got config from monitor, fsid {}", __func__, fsid);
});
}
static tl::expected<early_config_t, int>
_get_early_config(int argc, const char *argv[])
{
early_config_t ret;
// pull off ceph configs the stuff from early_args
std::vector<const char *> early_args;
early_args.insert(
std::end(early_args),
argv, argv + argc);
ret.init_params = ceph_argparse_early_args(
early_args,
CEPH_ENTITY_TYPE_OSD,
&ret.cluster_name,
&ret.conf_file_list);
seastar::app_template::config app_cfg;
app_cfg.name = "Crimson-startup";
app_cfg.auto_handle_sigint_sigterm = false;
seastar::app_template app(std::move(app_cfg));
const char *bootstrap_args[] = { argv[0], "--smp", "1" };
int r = app.run(
sizeof(bootstrap_args) / sizeof(bootstrap_args[0]),
const_cast<char**>(bootstrap_args),
[argc, argv, &ret, &early_args] {
return seastar::async([argc, argv, &ret, &early_args] {
seastar::global_logger_registry().set_all_loggers_level(
seastar::log_level::debug);
sharded_conf().start(
ret.init_params.name, ret.cluster_name).get();
local_conf().start().get();
auto stop_conf = seastar::deferred_stop(sharded_conf());
sharded_perf_coll().start().get();
auto stop_perf_coll = seastar::deferred_stop(sharded_perf_coll());
local_conf().parse_env().get();
local_conf().parse_argv(early_args).get();
local_conf().parse_config_files(ret.conf_file_list).get();
if (local_conf()->no_mon_config) {
logger().info("bypassing the config fetch due to --no-mon-config");
} else {
populate_config_from_mon().get();
}
// get ceph configs
std::set_difference(
argv, argv + argc,
std::begin(early_args),
std::end(early_args),
std::back_inserter(ret.ceph_args));
ret.early_args.insert(
std::end(ret.early_args),
std::begin(early_args),
std::end(early_args));
if (auto found = std::find_if(
std::begin(early_args),
std::end(early_args),
[](auto* arg) { return "--cpuset"sv == arg; });
found == std::end(early_args)) {
auto cpu_cores = crimson::common::get_conf<std::string>("crimson_seastar_cpu_cores");
if (!cpu_cores.empty()) {
// Set --cpuset based on crimson_seastar_cpu_cores config option
// --smp default is one per CPU
ret.early_args.emplace_back("--cpuset");
ret.early_args.emplace_back(cpu_cores);
ret.early_args.emplace_back("--thread-affinity");
ret.early_args.emplace_back("1");
logger().info("get_early_config: set --thread-affinity 1 --cpuset {}",
cpu_cores);
} else {
auto reactor_num = crimson::common::get_conf<uint64_t>("crimson_seastar_num_threads");
if (!reactor_num) {
logger().error("get_early_config: crimson_seastar_cpu_cores"
" or crimson_seastar_num_threads"
" must be set");
ceph_abort();
}
std::string smp = fmt::format("{}", reactor_num);
ret.early_args.emplace_back("--smp");
ret.early_args.emplace_back(smp);
ret.early_args.emplace_back("--thread-affinity");
ret.early_args.emplace_back("0");
logger().info("get_early_config: set --thread-affinity 0 --smp {}",
smp);
}
} else {
logger().error("get_early_config: --cpuset can be "
"set only using crimson_seastar_cpu_cores");
ceph_abort();
}
return 0;
});
});
if (r < 0) {
return tl::unexpected(r);
}
return ret;
}
/* get_early_config handles obtaining config parameters required prior
* to reactor startup. Most deployment mechanisms (cephadm for one)
* rely on pulling configs from the monitor rather than shipping around
* config files, so this process needs to support pulling config options
* from the monitors.
*
* Of particular interest are config params related to the seastar
* reactor itself which can't be modified after the reactor has been
* started -- like the number of cores to use (smp::count). Contacting
* the monitors, however, requires a MonClient, which in turn needs a
* running reactor.
*
* Unfortunately, seastar doesn't clean up thread local state
* associated with seastar::smp task queues etc, so we can't
* start a reactor, stop it, and restart it in the same thread
* without an impractical amount of cleanup in seastar.
*
* More unfortunately, starting a reactor in a seperate thread
* and then joining the thread still doesn't avoid all global state,
* I observed tasks from the previous reactor incarnation nevertheless
* continuing to run in the new one resulting in a crash as they access
* freed memory.
*
* The approach taken here, therefore, is to actually fork, start a
* reactor in the child process, encode the resulting early_config_t,
* and send it back to the parent process.
*/
tl::expected<early_config_t, int>
get_early_config(int argc, const char *argv[])
{
auto args = argv_to_vec(argc, argv);
if (args.empty()) {
std::cerr << argv[0] << ": -h or --help for usage" << std::endl;
exit(1);
}
if (ceph_argparse_need_usage(args)) {
usage(argv[0]);
exit(0);
}
int pipes[2];
int r = pipe2(pipes, 0);
if (r < 0) {
std::cerr << "get_early_config: failed to create pipes: "
<< -errno << std::endl;
return tl::unexpected(-errno);
}
pid_t worker = fork();
if (worker < 0) {
close(pipes[0]);
close(pipes[1]);
std::cerr << "get_early_config: failed to fork: "
<< -errno << std::endl;
return tl::unexpected(-errno);
} else if (worker == 0) { // child
close(pipes[0]);
auto ret = _get_early_config(argc, argv);
if (ret.has_value()) {
bufferlist bl;
::encode(ret.value(), bl);
r = bl.write_fd(pipes[1]);
close(pipes[1]);
if (r < 0) {
std::cerr << "get_early_config: child failed to write_fd: "
<< r << std::endl;
exit(-r);
} else {
exit(0);
}
} else {
std::cerr << "get_early_config: child failed: "
<< -ret.error() << std::endl;
exit(-ret.error());
}
return tl::unexpected(-1);
} else { // parent
close(pipes[1]);
bufferlist bl;
early_config_t ret;
bool have_data = false;
while ((r = bl.read_fd(pipes[0], 1024)) > 0) {
have_data = true;
}
close(pipes[0]);
int status;
waitpid(worker, &status, 0);
// One of the parameters was taged as exit(0) in the child process
// so we need to check if we should exit here
if (!have_data && WIFEXITED(status) && WEXITSTATUS(status) == 0) {
exit(0);
}
if (r < 0) {
std::cerr << "get_early_config: parent failed to read from pipe: "
<< r << std::endl;
return tl::unexpected(r);
}
try {
auto bliter = bl.cbegin();
::decode(ret, bliter);
return ret;
} catch (...) {
std::cerr << "get_early_config: parent failed to decode" << std::endl;
return tl::unexpected(-EINVAL);
}
}
}
}
|