diff options
author | Vladimír Čunát <vladimir.cunat@nic.cz> | 2024-12-03 12:01:16 +0100 |
---|---|---|
committer | Vladimír Čunát <vladimir.cunat@nic.cz> | 2024-12-03 12:01:16 +0100 |
commit | 84fa76b71e2ea15084e481cc3f8d8454ae8bdd9f (patch) | |
tree | 2ed19e473c478d5772910227dccd1cff19a3dc5c /python | |
parent | Merge !1626: manager: TLS certificate files auto-reload (diff) | |
parent | doc: debugging with kresctl moved to dev (diff) | |
download | knot-resolver-84fa76b71e2ea15084e481cc3f8d8454ae8bdd9f.tar.xz knot-resolver-84fa76b71e2ea15084e481cc3f8d8454ae8bdd9f.zip |
Merge !1450: manager: subprocess debugging via GDB
Diffstat (limited to 'python')
-rw-r--r-- | python/knot_resolver/client/commands/debug.py | 144 | ||||
-rw-r--r-- | python/knot_resolver/client/commands/pids.py | 63 | ||||
-rw-r--r-- | python/knot_resolver/client/main.py | 17 | ||||
-rw-r--r-- | python/knot_resolver/controller/interface.py | 5 | ||||
-rw-r--r-- | python/knot_resolver/controller/supervisord/__init__.py | 8 | ||||
-rw-r--r-- | python/knot_resolver/manager/manager.py | 25 | ||||
-rw-r--r-- | python/knot_resolver/manager/server.py | 42 |
7 files changed, 292 insertions, 12 deletions
diff --git a/python/knot_resolver/client/commands/debug.py b/python/knot_resolver/client/commands/debug.py new file mode 100644 index 00000000..5d9a81df --- /dev/null +++ b/python/knot_resolver/client/commands/debug.py @@ -0,0 +1,144 @@ +import argparse +import json +import os +import sys +from pathlib import Path +from typing import List, Optional, Tuple, Type + +from knot_resolver.client.command import Command, CommandArgs, CompWords, register_command +from knot_resolver.utils import which +from knot_resolver.utils.requests import request + +PROCS_TYPE = List + + +@register_command +class DebugCommand(Command): + def __init__(self, namespace: argparse.Namespace) -> None: + self.proc_type: Optional[str] = namespace.proc_type + self.sudo: bool = namespace.sudo + self.gdb: str = namespace.gdb + self.print_only: bool = namespace.print_only + self.gdb_args: List[str] = namespace.extra + super().__init__(namespace) + + @staticmethod + def register_args_subparser( + subparser: "argparse._SubParsersAction[argparse.ArgumentParser]", + ) -> Tuple[argparse.ArgumentParser, "Type[Command]"]: + debug = subparser.add_parser( + "debug", + help="Run GDB on the manager's subprocesses", + ) + debug.add_argument( + "proc_type", + help="Optional, the type of process to debug. May be 'kresd' (default), 'gc', or 'all'.", + type=str, + nargs="?", + default="kresd", + ) + debug.add_argument( + "--sudo", + dest="sudo", + help="Run GDB with sudo", + action="store_true", + default=False, + ) + debug.add_argument( + "--gdb", + help="Custom GDB executable (may be a command on PATH, or an absolute path)", + type=str, + default=None, + ) + debug.add_argument( + "--print-only", + help="Prints the GDB command line into stderr as a Python array, does not execute GDB", + action="store_true", + default=False, + ) + return debug, DebugCommand + + @staticmethod + def completion(args: List[str], parser: argparse.ArgumentParser) -> CompWords: + return {} + + def run(self, args: CommandArgs) -> None: # noqa: PLR0912, PLR0915 + if self.gdb is None: + try: + gdb_cmd = str(which.which("gdb")) + except RuntimeError: + print("Could not find 'gdb' in $PATH. Is GDB installed?", file=sys.stderr) + sys.exit(1) + elif "/" not in self.gdb: + try: + gdb_cmd = str(which.which(self.gdb)) + except RuntimeError: + print(f"Could not find '{self.gdb}' in $PATH.", file=sys.stderr) + sys.exit(1) + else: + gdb_cmd_path = Path(self.gdb).absolute() + if not gdb_cmd_path.exists(): + print(f"Could not find '{self.gdb}'.", file=sys.stderr) + sys.exit(1) + gdb_cmd = str(gdb_cmd_path) + + response = request(args.socket, "GET", f"processes/{self.proc_type}") + if response.status != 200: + print(response, file=sys.stderr) + sys.exit(1) + + procs = json.loads(response.body) + if not isinstance(procs, PROCS_TYPE): + print( + f"Unexpected response type '{type(procs).__name__}' from manager. Expected '{PROCS_TYPE.__name__}'", + file=sys.stderr, + ) + sys.exit(1) + if len(procs) == 0: + print( + f"There are no processes of type '{self.proc_type}' available to debug", + file=sys.stderr, + ) + + exec_args = [] + + # Put `sudo --` at the beginning of the command. + if self.sudo: + try: + sudo_cmd = str(which.which("sudo")) + except RuntimeError: + print("Could not find 'sudo' in $PATH. Is sudo installed?", file=sys.stderr) + sys.exit(1) + exec_args.extend([sudo_cmd, "--"]) + + # Attach GDB to processes - the processes are attached using the `add-inferior` and `attach` GDB + # commands. This way, we can debug multiple processes. + exec_args.extend([gdb_cmd, "--"]) + exec_args.extend(["-init-eval-command", "set detach-on-fork off"]) + exec_args.extend(["-init-eval-command", "set schedule-multiple on"]) + exec_args.extend(["-init-eval-command", f'attach {procs[0]["pid"]}']) + inferior = 2 + for proc in procs[1:]: + exec_args.extend(["-init-eval-command", "add-inferior"]) + exec_args.extend(["-init-eval-command", f"inferior {inferior}"]) + exec_args.extend(["-init-eval-command", f'attach {proc["pid"]}']) + inferior += 1 + + num_inferiors = inferior - 1 + if num_inferiors > 1: + # Now we switch back to the first process and add additional provided GDB arguments. + exec_args.extend(["-init-eval-command", "inferior 1"]) + exec_args.extend( + [ + "-init-eval-command", + "echo \\n\\nYou are now debugging multiple Knot Resolver processes. To switch between " + "them, use the 'inferior <n>' command, where <n> is an integer from 1 to " + f"{num_inferiors}.\\n\\n", + ] + ) + exec_args.extend(self.gdb_args) + + if self.print_only: + print(f"{exec_args}") + else: + os.execl(*exec_args) diff --git a/python/knot_resolver/client/commands/pids.py b/python/knot_resolver/client/commands/pids.py new file mode 100644 index 00000000..a1ab5f8c --- /dev/null +++ b/python/knot_resolver/client/commands/pids.py @@ -0,0 +1,63 @@ +import argparse +import json +import sys +from typing import Iterable, List, Optional, Tuple, Type + +from knot_resolver.client.command import Command, CommandArgs, CompWords, register_command +from knot_resolver.utils.requests import request + +PROCESSES_TYPE = Iterable + + +@register_command +class PidsCommand(Command): + def __init__(self, namespace: argparse.Namespace) -> None: + self.proc_type: Optional[str] = namespace.proc_type + self.json: int = namespace.json + + super().__init__(namespace) + + @staticmethod + def register_args_subparser( + subparser: "argparse._SubParsersAction[argparse.ArgumentParser]", + ) -> Tuple[argparse.ArgumentParser, "Type[Command]"]: + pids = subparser.add_parser("pids", help="List the PIDs of the Manager's subprocesses") + pids.add_argument( + "proc_type", + help="Optional, the type of process to query. May be 'kresd', 'gc', or 'all' (default).", + nargs="?", + default="all", + ) + pids.add_argument( + "--json", + help="Optional, makes the output more verbose, in JSON.", + action="store_true", + default=False, + ) + return pids, PidsCommand + + @staticmethod + def completion(args: List[str], parser: argparse.ArgumentParser) -> CompWords: + return {} + + def run(self, args: CommandArgs) -> None: + response = request(args.socket, "GET", f"processes/{self.proc_type}") + + if response.status == 200: + processes = json.loads(response.body) + if isinstance(processes, PROCESSES_TYPE): + if self.json: + print(json.dumps(processes, indent=2)) + else: + for p in processes: + print(p["pid"]) + + else: + print( + f"Unexpected response type '{type(processes).__name__}' from manager. Expected '{PROCESSES_TYPE.__name__}'", + file=sys.stderr, + ) + sys.exit(1) + else: + print(response, file=sys.stderr) + sys.exit(1) diff --git a/python/knot_resolver/client/main.py b/python/knot_resolver/client/main.py index 75cd6a77..461b7fc4 100644 --- a/python/knot_resolver/client/main.py +++ b/python/knot_resolver/client/main.py @@ -1,6 +1,7 @@ import argparse import importlib import os +import sys from knot_resolver.constants import VERSION @@ -68,7 +69,21 @@ def main() -> None: parser = create_main_argument_parser() install_commands_parsers(parser) - namespace = parser.parse_args() + # TODO: This is broken with unpatched versions of poethepoet, because they drop the `--` pseudo-argument. + # Patch submitted at <https://github.com/nat-n/poethepoet/pull/163>. + try: + pa_index = sys.argv.index("--", 1) + argv_to_parse = sys.argv[1:pa_index] + argv_extra = sys.argv[(pa_index + 1) :] + except ValueError: + argv_to_parse = sys.argv[1:] + argv_extra = [] + + namespace = parser.parse_args(argv_to_parse) + if hasattr(namespace, "extra"): + raise TypeError("'extra' is already an attribute - this is disallowed for commands") + namespace.extra = argv_extra + client = KresClient(namespace, parser) client.execute() diff --git a/python/knot_resolver/controller/interface.py b/python/knot_resolver/controller/interface.py index 43c24257..49808d01 100644 --- a/python/knot_resolver/controller/interface.py +++ b/python/knot_resolver/controller/interface.py @@ -109,6 +109,7 @@ class Subprocess(ABC): self._id = kresid self._config = config self._registered_worker: bool = False + self._pid: Optional[int] = None self._config_file: Optional[Path] = None if self.type is SubprocessType.KRESD: @@ -190,6 +191,10 @@ class Subprocess(ABC): pass @abstractmethod + async def get_pid(self) -> int: + pass + + @abstractmethod def status(self) -> SubprocessStatus: pass diff --git a/python/knot_resolver/controller/supervisord/__init__.py b/python/knot_resolver/controller/supervisord/__init__.py index 347ac1e7..ddb9b29b 100644 --- a/python/knot_resolver/controller/supervisord/__init__.py +++ b/python/knot_resolver/controller/supervisord/__init__.py @@ -223,6 +223,14 @@ class SupervisordSubprocess(Subprocess): fast = _create_fast_proxy(self._config) fast.startProcess(self.name) + @async_in_a_thread + def get_pid(self) -> int: + if self._pid is None: + supervisord = _create_supervisord_proxy(self._config) + info = supervisord.getProcessInfo(self.name) + self._pid = info["pid"] + return self._pid + def get_used_config(self) -> KresConfig: return self._config diff --git a/python/knot_resolver/manager/manager.py b/python/knot_resolver/manager/manager.py index f9c68708..952c8b7d 100644 --- a/python/knot_resolver/manager/manager.py +++ b/python/knot_resolver/manager/manager.py @@ -55,6 +55,14 @@ async def _deny_max_worker_changes(config_old: KresConfig, config_new: KresConfi return Result.ok(None) +async def _subprocess_desc(subprocess: Subprocess) -> object: + return { + "type": subprocess.type.name, + "pid": await subprocess.get_pid(), + "status": subprocess.status().name, + } + + class KresManager: # pylint: disable=too-many-instance-attributes """ Core of the whole operation. Orchestrates individual instances under some @@ -63,7 +71,7 @@ class KresManager: # pylint: disable=too-many-instance-attributes Instantiate with `KresManager.create()`, not with the usual constructor! """ - def __init__(self, shutdown_trigger: Callable[[int], None], _i_know_what_i_am_doing: bool = False): + def __init__(self, _i_know_what_i_am_doing: bool = False): if not _i_know_what_i_am_doing: logger.error( "Trying to create an instance of KresManager using normal constructor. Please use " @@ -80,19 +88,18 @@ class KresManager: # pylint: disable=too-many-instance-attributes self._watchdog_task: Optional["asyncio.Task[None]"] = None self._fix_counter: _FixCounter = _FixCounter() self._config_store: ConfigStore - self._shutdown_trigger: Callable[[int], None] = shutdown_trigger + self._shutdown_triggers: List[Callable[[int], None]] = [] @staticmethod async def create( subprocess_controller: SubprocessController, config_store: ConfigStore, - shutdown_trigger: Callable[[int], None], ) -> "KresManager": """ Creates new instance of KresManager. """ - inst = KresManager(shutdown_trigger, _i_know_what_i_am_doing=True) + inst = KresManager(_i_know_what_i_am_doing=True) await inst._async_init(subprocess_controller, config_store) # noqa: SLF001 return inst @@ -211,6 +218,9 @@ class KresManager: # pylint: disable=too-many-instance-attributes await self._gc.stop() self._gc = None + def add_shutdown_trigger(self, trigger: Callable[[int], None]) -> None: + self._shutdown_triggers.append(trigger) + async def validate_config(self, _old: KresConfig, new: KresConfig) -> Result[NoneType, str]: async with self._manager_lock: if _old.rate_limiting != new.rate_limiting: @@ -233,6 +243,10 @@ class KresManager: # pylint: disable=too-many-instance-attributes logger.debug("Canary process test passed.") return Result.ok(None) + async def get_processes(self, proc_type: Optional[SubprocessType]) -> List[object]: + processes = await self._controller.get_all_running_instances() + return [await _subprocess_desc(pr) for pr in processes if proc_type is None or pr.type == proc_type] + async def _reload_system_state(self) -> None: async with self._manager_lock: self._workers = [] @@ -338,7 +352,8 @@ class KresManager: # pylint: disable=too-many-instance-attributes logger.warning("Collecting all remaining workers...") await self._reload_system_state() logger.warning("Terminating...") - self._shutdown_trigger(1) + for trigger in self._shutdown_triggers: + trigger(1) async def _instability_handler(self) -> None: if self._fix_counter.is_too_high(): diff --git a/python/knot_resolver/manager/server.py b/python/knot_resolver/manager/server.py index b09ff7b9..06fab0cf 100644 --- a/python/knot_resolver/manager/server.py +++ b/python/knot_resolver/manager/server.py @@ -21,6 +21,7 @@ from aiohttp.web_runner import AppRunner, TCPSite, UnixSite from knot_resolver.constants import CONFIG_FILE, USER from knot_resolver.controller import get_best_controller_implementation from knot_resolver.controller.exceptions import SubprocessControllerExecError +from knot_resolver.controller.interface import SubprocessType from knot_resolver.controller.registered_workers import command_single_registered_worker from knot_resolver.datamodel import kres_config_json_schema from knot_resolver.datamodel.cache_schema import CacheClearRPCSchema @@ -87,7 +88,7 @@ class Server: # This is top-level class containing pretty much everything. Instead of global # variables, we use instance attributes. That's why there are so many and it's # ok. - def __init__(self, store: ConfigStore, config_path: Optional[Path]): + def __init__(self, store: ConfigStore, config_path: Optional[Path], manager: KresManager): # config store & server dynamic reconfiguration self.config_store = store @@ -100,6 +101,7 @@ class Server: self._config_path: Optional[Path] = config_path self._exit_code: int = 0 self._shutdown_event = asyncio.Event() + self._manager = manager async def _reconfigure(self, config: KresConfig) -> None: await self._reconfigure_listen_address(config) @@ -323,6 +325,30 @@ class Server: await self._reload_config() return web.Response(text="Reloading...") + async def _handler_processes(self, request: web.Request) -> web.Response: + """ + Route handler for listing PIDs of subprocesses + """ + + proc_type: Optional[SubprocessType] = None + + if "path" in request.match_info and len(request.match_info["path"]) > 0: + ptstr = request.match_info["path"] + if ptstr == "/kresd": + proc_type = SubprocessType.KRESD + elif ptstr == "/gc": + proc_type = SubprocessType.GC + elif ptstr == "/all": + proc_type = None + else: + return web.Response(text=f"Invalid process type '{ptstr}'", status=400) + + return web.json_response( + await self._manager.get_processes(proc_type), + headers={"Access-Control-Allow-Origin": "*"}, + dumps=partial(json.dumps, indent=4), + ) + def _setup_routes(self) -> None: self.app.add_routes( [ @@ -339,6 +365,7 @@ class Server: web.get("/metrics/json", self._handler_metrics_json), web.get("/metrics/prometheus", self._handler_metrics_prometheus), web.post("/cache/clear", self._handler_cache_clear), + web.get("/processes{path:.*}", self._handler_processes), ] ) @@ -410,7 +437,7 @@ async def _init_config_store(config: Dict[str, Any]) -> ConfigStore: return ConfigStore(config_validated) -async def _init_manager(config_store: ConfigStore, server: Server) -> KresManager: +async def _init_manager(config_store: ConfigStore) -> KresManager: """ Called asynchronously when the application initializes. """ @@ -420,7 +447,7 @@ async def _init_manager(config_store: ConfigStore, server: Server) -> KresManage # Create KresManager. This will perform autodetection of available service managers and # select the most appropriate to use (or use the one configured directly) - manager = await KresManager.create(controller, config_store, server.trigger_shutdown) + manager = await KresManager.create(controller, config_store) logger.info("Initial configuration applied. Process manager initialized...") return manager @@ -559,11 +586,14 @@ async def start_server(config: Path = CONFIG_FILE) -> int: # noqa: PLR0915 await files.init_files_watchdog(config_store) + # After we have loaded the configuration, we can start worrying about subprocess management. + manager = await _init_manager(config_store) + # prepare instance of the server (no side effects) - server = Server(config_store, config) + server = Server(config_store, config, manager) - # After we have loaded the configuration, we can start worring about subprocess management. - manager = await _init_manager(config_store, server) + # add Server's shutdown trigger to the manager + manager.add_shutdown_trigger(server.trigger_shutdown) except SubprocessControllerExecError as e: # if we caught this exception, some component wants to perform a reexec during startup. Most likely, it would |