From 4a8d404876f88e6f9f030c4b80db77cadfb783a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Wed, 28 Apr 2021 07:56:47 +0200 Subject: [PATCH 1/3] Limit logging for verbose system tests The system test framework starts all named instances with the "-d 99" command line option (unless it is overridden by a named.args file in a given instance's working directory). This causes a lot of log messages to be written to named.run files - currently over 5 million lines for a single test suite run. While debugging information preserved in the log files is essential for troubleshooting intermittent test failures, some system tests involve sending hundreds or even thousands of queries, which causes the relevant log files to explode in size. When multiple tests (or even multiple test suites) are run in parallel, excessive logging contributes considerably to the I/O load on the test host, increasing the odds of intermittent test failures getting triggered. Decrease the debug level for the seven most verbose named instances: - use "-d 3" for ns2 in the "cacheclean" system test (it is the lowest logging level at which the test still passes without the need to apply any changes to tests.sh), - use "-d 1" for the other six named instances. This roughly halves the number of lines logged by each test suite run while still leaving enough information in the logs to allow at least basic troubleshooting in case of test failures. This approach was chosen as it results in a greater decrease in the number of lines logged than running all named instances with "-d 3", without causing any test failures. --- bin/tests/system/cacheclean/ns1/named.args | 1 + bin/tests/system/cacheclean/ns2/named.args | 1 + bin/tests/system/fetchlimit/ns3/named.args | 1 + bin/tests/system/timeouts/ns1/named.args | 1 + bin/tests/system/zero/ns2/named.args | 1 + bin/tests/system/zero/ns3/named.args | 1 + bin/tests/system/zero/ns4/named.args | 1 + util/copyrights | 7 +++++++ 8 files changed, 14 insertions(+) create mode 100644 bin/tests/system/cacheclean/ns1/named.args create mode 100644 bin/tests/system/cacheclean/ns2/named.args create mode 100644 bin/tests/system/fetchlimit/ns3/named.args create mode 100644 bin/tests/system/timeouts/ns1/named.args create mode 100644 bin/tests/system/zero/ns2/named.args create mode 100644 bin/tests/system/zero/ns3/named.args create mode 100644 bin/tests/system/zero/ns4/named.args diff --git a/bin/tests/system/cacheclean/ns1/named.args b/bin/tests/system/cacheclean/ns1/named.args new file mode 100644 index 0000000000..2ba9a1403d --- /dev/null +++ b/bin/tests/system/cacheclean/ns1/named.args @@ -0,0 +1 @@ +-m record,size,mctx -c named.conf -d 1 -D cacheclean-ns1 -X named.lock -g -T maxcachesize=2097152 diff --git a/bin/tests/system/cacheclean/ns2/named.args b/bin/tests/system/cacheclean/ns2/named.args new file mode 100644 index 0000000000..1bcc5eab28 --- /dev/null +++ b/bin/tests/system/cacheclean/ns2/named.args @@ -0,0 +1 @@ +-m record,size,mctx -c named.conf -d 3 -D cacheclean-ns2 -X named.lock -g -T maxcachesize=2097152 diff --git a/bin/tests/system/fetchlimit/ns3/named.args b/bin/tests/system/fetchlimit/ns3/named.args new file mode 100644 index 0000000000..6bd3e6cf64 --- /dev/null +++ b/bin/tests/system/fetchlimit/ns3/named.args @@ -0,0 +1 @@ +-m record,size,mctx -c named.conf -d 1 -D fetchlimit-ns3 -X named.lock -g -T maxcachesize=2097152 diff --git a/bin/tests/system/timeouts/ns1/named.args b/bin/tests/system/timeouts/ns1/named.args new file mode 100644 index 0000000000..2df2be2737 --- /dev/null +++ b/bin/tests/system/timeouts/ns1/named.args @@ -0,0 +1 @@ +-m record,size,mctx -c named.conf -d 1 -D timeouts-ns1 -X named.lock -g -T maxcachesize=2097152 diff --git a/bin/tests/system/zero/ns2/named.args b/bin/tests/system/zero/ns2/named.args new file mode 100644 index 0000000000..b20594ea45 --- /dev/null +++ b/bin/tests/system/zero/ns2/named.args @@ -0,0 +1 @@ +-m record,size,mctx -c named.conf -d 1 -D zero-ns2 -X named.lock -g -T maxcachesize=2097152 diff --git a/bin/tests/system/zero/ns3/named.args b/bin/tests/system/zero/ns3/named.args new file mode 100644 index 0000000000..9d89bd697c --- /dev/null +++ b/bin/tests/system/zero/ns3/named.args @@ -0,0 +1 @@ +-m record,size,mctx -c named.conf -d 1 -D zero-ns3 -X named.lock -g -T maxcachesize=2097152 diff --git a/bin/tests/system/zero/ns4/named.args b/bin/tests/system/zero/ns4/named.args new file mode 100644 index 0000000000..09d1fe01fc --- /dev/null +++ b/bin/tests/system/zero/ns4/named.args @@ -0,0 +1 @@ +-m record,size,mctx -c named.conf -d 1 -D zero-ns4 -X named.lock -g -T maxcachesize=2097152 diff --git a/util/copyrights b/util/copyrights index 13f643718c..e716e78985 100644 --- a/util/copyrights +++ b/util/copyrights @@ -246,6 +246,8 @@ ./bin/tests/system/cacheclean/clean.sh SH 2001,2004,2007,2011,2012,2014,2015,2016,2018,2019,2020,2021 ./bin/tests/system/cacheclean/dig.batch X 2001,2018,2019,2020,2021 ./bin/tests/system/cacheclean/knowngood.dig.out X 2001,2018,2019,2020,2021 +./bin/tests/system/cacheclean/ns1/named.args X 2021 +./bin/tests/system/cacheclean/ns2/named.args X 2021 ./bin/tests/system/cacheclean/setup.sh SH 2018,2019,2020,2021 ./bin/tests/system/cacheclean/tests.sh SH 2001,2004,2007,2011,2012,2013,2014,2016,2018,2019,2020,2021 ./bin/tests/system/case/clean.sh SH 2013,2014,2015,2016,2018,2019,2020,2021 @@ -447,6 +449,7 @@ ./bin/tests/system/feature-test.c C 2016,2017,2018,2019,2020,2021 ./bin/tests/system/fetchlimit/ans4/ans.pl PERL 2019,2020,2021 ./bin/tests/system/fetchlimit/clean.sh SH 2015,2016,2018,2019,2020,2021 +./bin/tests/system/fetchlimit/ns3/named.args X 2021 ./bin/tests/system/fetchlimit/setup.sh SH 2015,2016,2018,2019,2020,2021 ./bin/tests/system/fetchlimit/tests.sh SH 2015,2016,2018,2019,2020,2021 ./bin/tests/system/filter-aaaa/clean.sh SH 2010,2012,2014,2016,2018,2019,2020,2021 @@ -904,6 +907,7 @@ ./bin/tests/system/testsummary.sh SH 2018,2019,2020,2021 ./bin/tests/system/timeouts/clean.sh SH 2021 ./bin/tests/system/timeouts/conftest.py PYTHON 2021 +./bin/tests/system/timeouts/ns1/named.args X 2021 ./bin/tests/system/timeouts/prereq.sh SH 2021 ./bin/tests/system/timeouts/setup.sh SH 2021 ./bin/tests/system/timeouts/tests-tcp.py PYTHON-BIN 2021 @@ -1010,6 +1014,9 @@ ./bin/tests/system/xferquota/tests.sh SH 2000,2001,2004,2007,2012,2016,2018,2019,2020,2021 ./bin/tests/system/zero/ans5/ans.pl PERL 2016,2018,2019,2020,2021 ./bin/tests/system/zero/clean.sh SH 2013,2014,2015,2016,2018,2019,2020,2021 +./bin/tests/system/zero/ns2/named.args X 2021 +./bin/tests/system/zero/ns3/named.args X 2021 +./bin/tests/system/zero/ns4/named.args X 2021 ./bin/tests/system/zero/setup.sh SH 2013,2014,2016,2018,2019,2020,2021 ./bin/tests/system/zero/tests.sh SH 2013,2016,2017,2018,2019,2020,2021 ./bin/tests/system/zonechecks/clean.sh SH 2004,2007,2012,2014,2015,2016,2018,2019,2020,2021 From 17e5c2a50e693b7f01eddad4f8bdd4828d1050ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Wed, 28 Apr 2021 07:56:47 +0200 Subject: [PATCH 2/3] Prevent useless logging in the "tcp" system test The regression test for CVE-2020-8620 causes a lot of useless messages to be logged. However, globally decreasing the log level for the affected named instance would be a step too far as debugging information may be useful for troubleshooting other checks in the "tcp" system test. Starting a separate named instance for a single check should be avoided when possible and thus is also not a good solution. As a compromise, run "rndc trace 1" for the affected named instance before starting the regression test for CVE-2020-8620. --- bin/tests/system/tcp/tests.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bin/tests/system/tcp/tests.sh b/bin/tests/system/tcp/tests.sh index 6074427c6f..4faadaa1cb 100644 --- a/bin/tests/system/tcp/tests.sh +++ b/bin/tests/system/tcp/tests.sh @@ -183,9 +183,15 @@ grep "status: NXDOMAIN" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status + ret)) +#################################################### +# NOTE: The next test resets the debug level to 1. # +#################################################### + n=$((n + 1)) echo_i "checking that BIND 9 doesn't crash on long TCP messages ($n)" ret=0 +# Avoid logging useless information. +rndccmd 10.53.0.1 trace 1 || ret=1 { $PERL ../packet.pl -a "10.53.0.1" -p "${PORT}" -t tcp -r 300000 1996-alloc_dnsbuf-crash-test.pkt || ret=1 ; } | cat_i dig_with_opts +tcp @10.53.0.1 txt.example > dig.out.test$n || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi From 241e85ef0c2d9ce71aecb13357369b5e92250aa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Wed, 28 Apr 2021 07:56:47 +0200 Subject: [PATCH 3/3] Warn when log files grow too big in system tests Exerting excessive I/O load on the host running system tests should be avoided in order to limit the number of false positives reported by the system test suite. In some cases, running named with "-d 99" (which is the default for system tests) results in a massive amount of logs being generated, most of which are useless. Implement a log file size check to draw developers' attention to overly verbose named instances used in system tests. The warning threshold of 200,000 lines was chosen arbitrarily. --- bin/tests/system/run.sh.in | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bin/tests/system/run.sh.in b/bin/tests/system/run.sh.in index 67185a02aa..3460ede612 100644 --- a/bin/tests/system/run.sh.in +++ b/bin/tests/system/run.sh.in @@ -321,6 +321,11 @@ else fi fi +NAMED_RUN_LINES_THRESHOLD=200000 +find "${systest}" -type f -name "named.run" -exec wc -l {} \; | awk "\$1 > ${NAMED_RUN_LINES_THRESHOLD} { print \$2 }" | sort | while read -r LOG_FILE; do + echowarn "I:${systest}:${LOG_FILE} contains more than ${NAMED_RUN_LINES_THRESHOLD} lines, consider tweaking the test to limit disk I/O" +done + echoend "E:$systest:$(date_with_args)" exit $status