1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|
import logging
from typing import TYPE_CHECKING, Iterator
from ceph.deployment.service_spec import PlacementSpec, ServiceSpec, HostPlacementSpec
from cephadm.schedule import HostAssignment
from orchestrator import OrchestratorError
if TYPE_CHECKING:
from .module import CephadmOrchestrator
LAST_MIGRATION = 2
logger = logging.getLogger(__name__)
class Migrations:
def __init__(self, mgr: "CephadmOrchestrator"):
self.mgr = mgr
# Why having a global counter, instead of spec versions?
#
# for the first migration:
# The specs don't change in (this) migration. but the scheduler here.
# Adding the version to the specs at this time just felt wrong to me.
#
# And the specs are only another part of cephadm which needs potential upgrades.
# We have the cache, the inventory, the config store, the upgrade (imagine changing the
# upgrade code, while an old upgrade is still in progress), naming of daemons,
# fs-layout of the daemons, etc.
if self.mgr.migration_current is None:
self.set(0)
# for some migrations, we don't need to do anything except for
# setting migration_current = 1.
# let's try to shortcut things here.
self.migrate()
def set(self, val):
self.mgr.set_module_option('migration_current', val)
self.mgr.migration_current = val
def is_migration_ongoing(self):
return self.mgr.migration_current != LAST_MIGRATION
def verify_no_migration(self):
if self.is_migration_ongoing():
# this is raised in module.serve()
raise OrchestratorError(
"cephadm migration still ongoing. Please wait, until the migration is complete.")
def migrate(self):
if self.mgr.migration_current == 0:
if self.migrate_0_1():
self.set(1)
if self.mgr.migration_current == 1:
if self.migrate_1_2():
self.set(2)
def migrate_0_1(self) -> bool:
"""
Migration 0 -> 1
New scheduler that takes PlacementSpec as the bound and not as recommendation.
I.e. the new scheduler won't suggest any new placements outside of the hosts
specified by label etc.
Which means, we have to make sure, we're not removing any daemons directly after
upgrading to the new scheduler.
There is a potential race here:
1. user updates his spec to remove daemons
2. mgr gets upgraded to new scheduler, before the old scheduler removed the daemon
3. now, we're converting the spec to explicit placement, thus reverting (1.)
I think this is ok.
"""
def interesting_specs() -> Iterator[ServiceSpec]:
for s in self.mgr.spec_store.specs.values():
if s.unmanaged:
continue
p = s.placement
if p is None:
continue
if p.count is None:
continue
if not p.hosts and not p.host_pattern and not p.label:
continue
yield s
def convert_to_explicit(spec: ServiceSpec) -> None:
placements = HostAssignment(
spec=spec,
hosts=self.mgr.inventory.all_specs(),
get_daemons_func=self.mgr.cache.get_daemons_by_service
).place()
existing_daemons = self.mgr.cache.get_daemons_by_service(spec.service_name())
# We have to migrate, only if the new scheduler would remove daemons
if len(placements) >= len(existing_daemons):
return
old_hosts = {h.hostname: h for h in spec.placement.hosts}
new_hosts = [
old_hosts[d.hostname] if d.hostname in old_hosts else HostPlacementSpec(
hostname=d.hostname, network='', name='')
for d in existing_daemons
]
new_placement = PlacementSpec(
hosts=new_hosts,
count=spec.placement.count
)
new_spec = ServiceSpec.from_json(spec.to_json())
new_spec.placement = new_placement
logger.info(f"Migrating {spec.one_line_str()} to explicit placement")
self.mgr.spec_store.save(new_spec)
specs = list(interesting_specs())
if not specs:
return True # nothing to do. shortcut
if not self.mgr.cache.daemon_cache_filled():
logger.info("Unable to migrate yet. Daemon Cache still incomplete.")
return False
for spec in specs:
convert_to_explicit(spec)
return True
def migrate_1_2(self) -> bool:
"""
After 15.2.4, we unified some service IDs: MONs, MGRs etc no longer have a service id.
Which means, the service names changed:
mon.foo -> mon
mgr.foo -> mgr
This fixes the data structure consistency
"""
bad_specs = {}
for name, spec in self.mgr.spec_store.specs.items():
if name != spec.service_name():
bad_specs[name] = (spec.service_name(), spec)
for old, (new, old_spec) in bad_specs.items():
if new not in self.mgr.spec_store.specs:
spec = old_spec
else:
spec = self.mgr.spec_store.specs[new]
spec.unmanaged = True
self.mgr.spec_store.save(spec)
self.mgr.spec_store.rm(old)
return True
|