mirror of
https://gitlab.isc.org/isc-projects/bind9
synced 2025-08-29 13:38:26 +00:00
Fix startup detection after restart in start.pl
The bin/tests/system/start.pl script waits until a "running" message is logged by a given name server instance before attempting to send a version.bind/CH/TXT query to it. The idea behind this was to make the script wait until named loads all the zones it is configured to serve before telling the system test framework that a given server is ready to use; this prevents the need to add boilerplate code that waits for a specific zone to be loaded to each test expecting that. The problem is that when it looks for "running" messages, the bin/tests/system/start.pl script assumes that the existence of any such message in the named.run file indicates that a given named instance has already finished loading all zones. Meanwhile, some system tests restart all the named instances they use throughout their lifetime (some even do that a few times), for example to run Python-based tests. The bin/tests/system/start.pl script handles such a scenario incorrectly: as soon as it finds any "running" message in the named.run file it inspects and it gets a response to a version.bind/CH/TXT query, it tells the system test framework that a given server is ready to use, which might not be true - it is possible that only the "version.bind" zone is loaded at that point and the "running" message found was logged by a previously-shutdown named instance. This triggers intermittent failures for Python-based tests. Fix by improving the logic that the bin/tests/system/start.pl script uses to detect server startup: check how many "running" lines are present in a given named.run file before attempting to start a named instance and only proceed with version.bind/CH/TXT queries when the number of "running" lines found in that named.run file increases after the server is started.
This commit is contained in:
parent
9146b956ae
commit
18e20f95f6
@ -127,9 +127,10 @@ if ($server_arg) {
|
||||
# Start the servers we found.
|
||||
|
||||
foreach my $name(@ns) {
|
||||
my $instances_so_far = count_running_lines($name);
|
||||
&check_ns_port($name);
|
||||
&start_ns_server($name, $options_arg);
|
||||
&verify_ns_server($name);
|
||||
&verify_ns_server($name, $instances_so_far);
|
||||
}
|
||||
|
||||
foreach my $name(@ans) {
|
||||
@ -373,24 +374,28 @@ sub start_ans_server {
|
||||
start_server($server, $command, $pid_file);
|
||||
}
|
||||
|
||||
sub verify_ns_server {
|
||||
sub count_running_lines {
|
||||
my ( $server ) = @_;
|
||||
|
||||
my $tries = 0;
|
||||
|
||||
my $runfile = "$testdir/$server/named.run";
|
||||
|
||||
while (1) {
|
||||
# the shell *ought* to have created the file immediately, but this
|
||||
# logic allows the creation to be delayed without issues
|
||||
if (open(my $fh, "<", $runfile)) {
|
||||
# the two non-whitespace blobs should be the date and time
|
||||
# but we don't care about them really, only that they are there
|
||||
if (grep /^\S+ \S+ running\R/, <$fh>) {
|
||||
last;
|
||||
}
|
||||
}
|
||||
# the shell *ought* to have created the file immediately, but this
|
||||
# logic allows the creation to be delayed without issues
|
||||
if (open(my $fh, "<", $runfile)) {
|
||||
# the two non-whitespace blobs should be the date and time
|
||||
# but we don't care about them really, only that they are there
|
||||
return scalar(grep /^\S+ \S+ running\R/, <$fh>);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
sub verify_ns_server {
|
||||
my ( $server, $instances_so_far ) = @_;
|
||||
|
||||
my $tries = 0;
|
||||
|
||||
while (count_running_lines($server) < $instances_so_far + 1) {
|
||||
$tries++;
|
||||
|
||||
if ($tries >= 30) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user