diff --git a/doc/guide/bind10-messages.xml b/doc/guide/bind10-messages.xml index fecefd0783..b0cbb264e1 100644 --- a/doc/guide/bind10-messages.xml +++ b/doc/guide/bind10-messages.xml @@ -598,7 +598,7 @@ needs a dedicated message bus. -BIND10_COMPONENT_FAILED component %1 (pid %2) failed with %3 exit status +BIND10_COMPONENT_FAILED component %1 (pid %2) failed: %3 The process terminated, but the bind10 boss didn't expect it to, which means it must have failed. diff --git a/src/bin/bind10/bind10_messages.mes b/src/bin/bind10/bind10_messages.mes index 79635fdf1d..3dd938feb3 100644 --- a/src/bin/bind10/bind10_messages.mes +++ b/src/bin/bind10/bind10_messages.mes @@ -24,7 +24,7 @@ needs a dedicated message bus. An error was encountered when the boss module specified statistics data which is invalid for the boss specification file. -% BIND10_COMPONENT_FAILED component %1 (pid %2) failed with %3 exit status +% BIND10_COMPONENT_FAILED component %1 (pid %2) failed: %3 The process terminated, but the bind10 boss didn't expect it to, which means it must have failed. diff --git a/src/lib/python/isc/bind10/component.py b/src/lib/python/isc/bind10/component.py index 091bfee626..c767b7da20 100644 --- a/src/lib/python/isc/bind10/component.py +++ b/src/lib/python/isc/bind10/component.py @@ -30,6 +30,8 @@ configuration). This is yet to be designed. import isc.log from isc.log_messages.bind10_messages import * import time +import os +import signal logger = isc.log.Logger("boss") DBG_TRACE_DATA = 20 @@ -45,6 +47,14 @@ STATE_DEAD = 'dead' STATE_STOPPED = 'stopped' STATE_RUNNING = 'running' +def get_signame(signal_number): + """Return the symbolic name for a signal.""" + for sig in dir(signal): + if sig.startswith("SIG") and sig[3].isalnum(): + if getattr(signal, sig) == signal_number: + return sig + return "unknown signal" + class BaseComponent: """ This represents a single component. This one is an abstract base class. @@ -206,8 +216,30 @@ class BaseComponent: it is considered a core or needed component, or because the component is to be restarted later. """ + + if exit_code is not None: + if os.WIFEXITED(exit_code): + exit_str = "process exited normally with exit status %d" % (exit_code) + elif os.WIFCONTINUED(exit_code): + exit_str = "process continued with exit status %d" % (exit_code) + elif os.WIFSTOPPED(exit_code): + sig = os.WSTOPSIG(exit_code) + signame = get_signame(sig) + exit_str = "process stopped with exit status %d (killed by signal %d: %s)" % (exit_code, sig, signame) + elif os.WIFSIGNALED(exit_code): + if os.WCOREDUMP(exit_code): + exit_str = "process dumped core with exit status %d" % (exit_code) + else: + sig = os.WTERMSIG(exit_code) + signame = get_signame(sig) + exit_str = "process terminated with exit status %d (killed by signal %d: %s)" % (exit_code, sig, signame) + else: + exit_str = "unknown condition with exit status %d" % (exit_code) + else: + exit_str = "unknown condition" + logger.error(BIND10_COMPONENT_FAILED, self.name(), self.pid(), - exit_code if exit_code is not None else "unknown") + exit_str) if not self.running(): raise ValueError("Can't fail component that isn't running") self.__state = STATE_STOPPED