From 9756292a5f78438343357873809b726cb0a1a2b2 Mon Sep 17 00:00:00 2001 From: Michal Nowak Date: Mon, 19 Feb 2024 15:55:00 +0100 Subject: [PATCH] Add DoH and DoT stress tests, generate test configurations Add DoH and DoT stress test jobs. The DoH scenario on FreeBSD is omitted because all Flamethrower's DoH queries timeout on this platform. Since the response rate of DoT queries is lower than that of DoH and TCP, the expected TCP response rate is 80%. Due to the large number of similar stress test configurations, the "util/generate-stress-test-configs.py" script now generates them as part of a downstream pipeline. The script is expected to be run exclusively within the CI environment, which sources all environmental variables and files. This refactoring brought the following changes: - To start a stress test immediately and not wait for artifacts of the autoreconf job, run the "autoreconf -fi" command as part of every job. - Drop the BIND_STRESS_TEST_* variables as they were rarely used and conflicted with mode and platform selection in the configuration generator. - Most pipelines now include a few short, randomly selected stress test jobs. To schedule all stress tests, set the ALL_BIND_STRESS_TESTS environmental variable, push a tag to CI, or run a scheduled pipeline. - Set the BIND_STRESS_TESTS_RUN_TIME environmental variable to pick the stress test runtime of your choosing, set the BIND_STRESS_TESTS_RATE environmental variable to set different than the default query rate. - Job timeout is set to 30 minutes plus stress test runtime in minutes. --- .gitlab-ci.yml | 292 ++------------------------- util/generate-stress-test-configs.py | 139 +++++++++++++ 2 files changed, 158 insertions(+), 273 deletions(-) create mode 100755 util/generate-stress-test-configs.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dbfb9dffed..8ab95af295 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -51,10 +51,6 @@ variables: # cross-testrun files as there is no need to use that feature in CI. PYTEST_ADDOPTS: "-p no:cacheprovider" - # Default platforms to run "stress" tests on - BIND_STRESS_TEST_OS: linux - BIND_STRESS_TEST_ARCH: amd64 - HYPOTHESIS_PROFILE: "ci" default: @@ -1699,279 +1695,29 @@ shotgun:doh-get: .stress-test: &stress_test stage: performance + +generate-stress-test-configs: + <<: *base_image + <<: *default_triggering_rules + stage: precheck script: - - *configure - - *setup_interfaces - - make -j${BUILD_PARALLEL_JOBS:-1} -k all V=1 - - make DESTDIR="${INSTALL_PATH}" install - - git clone --depth 1 https://gitlab.isc.org/isc-projects/bind9-qa.git - - cd bind9-qa/stress - - LD_LIBRARY_PATH="${INSTALL_PATH}/usr/local/lib" BIND_INSTALL_PATH="${INSTALL_PATH}/usr/local" WORKSPACE="${CI_PROJECT_DIR}" bash stress.sh + - util/generate-stress-test-configs.py > stress-test-configs.yml + artifacts: + paths: + - stress-test-configs.yml + needs: [] + +stress-test-child-pipeline: + <<: *default_triggering_rules + stage: performance + trigger: + include: + - artifact: stress-test-configs.yml + job: generate-stress-test-configs needs: - - job: autoreconf + - job: generate-stress-test-configs artifacts: true -.stress-test-long: &stress_test_long_job - <<: *stress_test - artifacts: - untracked: true - exclude: - - "output/ns4/*.dtq*" - - "output/ns4/large-delta-rpz*.local" - - "output/rpz_*" - expire_in: "1 week" - when: always - timeout: 2h - -.stress-test-short: &stress_test_short_job - <<: *stress_test - only: - - merge_requests - artifacts: - untracked: true - exclude: - - "output/ns4/*.dtq*" - - "output/ns4/large-delta-rpz*.local" - - "output/rpz_*" - when: always - -stress:short:authoritative:fedora:41:amd64: - <<: *fedora_41_amd64_image - <<: *linux_amd64 - <<: *stress_test_short_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: authoritative - RATE: 10000 - RUN_TIME: 15 - -stress:short:recursive:fedora:41:amd64: - <<: *fedora_41_amd64_image - <<: *linux_amd64 - <<: *stress_test_short_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: recursive - RATE: 10000 - RUN_TIME: 15 - -stress:short:rpz:fedora:41:amd64: - <<: *fedora_41_amd64_image - <<: *linux_amd64 - <<: *stress_test_short_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: rpz - RATE: 1500 - RUN_TIME: 15 - -stress:short:authoritative:fedora:41:arm64: - <<: *fedora_41_arm64_image - <<: *linux_arm64 - <<: *stress_test_short_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: authoritative - RATE: 10000 - RUN_TIME: 15 - -stress:short:recursive:fedora:41:arm64: - <<: *fedora_41_arm64_image - <<: *linux_arm64 - <<: *stress_test_short_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: recursive - RATE: 10000 - RUN_TIME: 15 - -stress:short:rpz:fedora:41:arm64: - <<: *fedora_41_arm64_image - <<: *linux_arm64 - <<: *stress_test_short_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: rpz - RATE: 1500 - RUN_TIME: 15 - -stress:short:authoritative:freebsd13:amd64: - <<: *freebsd_stress_amd64 - <<: *stress_test_short_job - variables: - CC: clang - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/local/bin/flame - MODE: authoritative - RATE: 10000 - RUN_TIME: 15 - -stress:short:recursive:freebsd13:amd64: - <<: *freebsd_stress_amd64 - <<: *stress_test_short_job - variables: - CC: clang - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/local/bin/flame - MODE: recursive - RATE: 10000 - RUN_TIME: 15 - -stress:short:rpz:freebsd13:amd64: - <<: *freebsd_stress_amd64 - <<: *stress_test_short_job - variables: - CC: clang - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/local/bin/flame - MODE: rpz - RATE: 1500 - RUN_TIME: 15 - -stress:authoritative:fedora:41:amd64: - <<: *fedora_41_amd64_image - <<: *linux_amd64 - <<: *stress_test_long_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: authoritative - RATE: 10000 - RUN_TIME: 60 - only: - variables: - - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /authoritative/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i) - -stress:recursive:fedora:41:amd64: - <<: *fedora_41_amd64_image - <<: *linux_amd64 - <<: *stress_test_long_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: recursive - RATE: 10000 - RUN_TIME: 60 - only: - variables: - - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /recursive/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i) - -stress:rpz:fedora:41:amd64: - <<: *fedora_41_amd64_image - <<: *linux_amd64 - <<: *stress_test_long_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: rpz - RATE: 1500 - RUN_TIME: 60 - only: - variables: - - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /rpz/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i) - -stress:authoritative:fedora:41:arm64: - <<: *fedora_41_arm64_image - <<: *linux_arm64 - <<: *stress_test_long_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: authoritative - RATE: 10000 - RUN_TIME: 60 - only: - variables: - - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /authoritative/i && $BIND_STRESS_TEST_ARCH =~ /arm64/i) - -stress:recursive:fedora:41:arm64: - <<: *fedora_41_arm64_image - <<: *linux_arm64 - <<: *stress_test_long_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: recursive - RATE: 10000 - RUN_TIME: 60 - only: - variables: - - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /recursive/i && $BIND_STRESS_TEST_ARCH =~ /arm64/i) - -stress:rpz:fedora:41:arm64: - <<: *fedora_41_arm64_image - <<: *linux_arm64 - <<: *stress_test_long_job - variables: - CC: gcc - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/bin/flame - MODE: rpz - RATE: 1500 - RUN_TIME: 60 - only: - variables: - - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /linux/i && $BIND_STRESS_TEST_MODE =~ /rpz/i && $BIND_STRESS_TEST_ARCH =~ /arm64/i) - -stress:authoritative:freebsd13:amd64: - <<: *freebsd_stress_amd64 - <<: *stress_test_long_job - variables: - CC: clang - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/local/bin/flame - MODE: authoritative - RATE: 10000 - RUN_TIME: 60 - only: - variables: - - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /freebsd/i && $BIND_STRESS_TEST_MODE =~ /authoritative/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i) - -stress:recursive:freebsd13:amd64: - <<: *freebsd_stress_amd64 - <<: *stress_test_long_job - variables: - CC: clang - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/local/bin/flame - MODE: recursive - RATE: 10000 - RUN_TIME: 60 - only: - variables: - - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /freebsd/i && $BIND_STRESS_TEST_MODE =~ /recursive/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i) - -stress:rpz:freebsd13:amd64: - <<: *freebsd_stress_amd64 - <<: *stress_test_long_job - variables: - CC: clang - CFLAGS: "${CFLAGS_COMMON} -Og" - FLAME: /usr/local/bin/flame - MODE: rpz - RATE: 1500 - RUN_TIME: 60 - only: - variables: - - $CI_COMMIT_TAG || ($BIND_STRESS_TEST_OS =~ /freebsd/i && $BIND_STRESS_TEST_MODE =~ /rpz/i && $BIND_STRESS_TEST_ARCH =~ /amd64/i) - # git fsck operates over the whole repository and is sufficient to schedule it # only in one branch, preferably "main". GitLab's clone strategy prevents us # from using the "bind9" repo clone; we need to clone it ourselves. diff --git a/util/generate-stress-test-configs.py b/util/generate-stress-test-configs.py new file mode 100755 index 0000000000..55b27c9711 --- /dev/null +++ b/util/generate-stress-test-configs.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 + +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# SPDX-License-Identifier: MPL-2.0 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at https://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. + +import itertools +import os +import random + +import yaml + +with open(".gitlab-ci.yml", encoding="utf-8") as gitlab_ci_yml: + anchors = yaml.load(gitlab_ci_yml, Loader=yaml.Loader) + +# Mandatory environment variables +ci_pipeline_source = os.environ["CI_PIPELINE_SOURCE"] +install_path = os.environ["INSTALL_PATH"] +project_directory = os.environ["CI_PROJECT_DIR"] + +# Optional environment variables +all_bind_stress_tests = os.getenv("ALL_BIND_STRESS_TESTS") +build_parallel_jobs = os.getenv("BUILD_PARALLEL_JOBS", "1") +cflags_common = os.getenv("CFLAGS_COMMON", "") +ci_commit_tag = os.getenv("CI_COMMIT_TAG") + +# Optional overrides for default test parameters +env_traffic_rate = os.getenv("BIND_STRESS_TESTS_RATE") +env_run_time = os.getenv("BIND_STRESS_TESTS_RUN_TIME") + +# Tags and scheduled pipelines produce longer jobs. +if ci_commit_tag or ci_pipeline_source == "schedule": + all_bind_stress_tests = True + scenario = "long" + default_runtime = 60 + expire_in = "1 week" +else: + scenario = "short" + default_runtime = 15 + expire_in = "1 day" + +ALL_MODES = "recursive", "authoritative", "rpz" +ALL_PROTOCOLS = "tcp", "doh", "dot" +ALL_PLATFORMS = ".fedora-41-amd64", ".fedora-41-arm64", ".freebsd-stress-amd64" + +# If ALL_BIND_STRESS_TESTS and CI_COMMIT_TAG environmental variables are unset, +# pick only two of three items from "modes", "protocols", and "machines" to make +# the "modes x protocols x machines" matrix smaller. +if all_bind_stress_tests is None and ci_commit_tag is None: + modes = random.sample(ALL_MODES, k=2) + protocols = random.sample(ALL_PROTOCOLS, k=2) + platforms = random.sample(ALL_PLATFORMS, k=2) +else: + modes = ALL_MODES + protocols = ALL_PROTOCOLS + platforms = ALL_PLATFORMS + +jobs = {} + +for mode, protocol, platform in itertools.product(modes, protocols, platforms): + if "freebsd" in platform: + # Flamethrower-produced DoH queries on FreeBSD always timeout. Skip + # DoH-on-FreeBSD jobs. + if protocol == "doh": + continue + job_platform = "freebsd:amd64" + compiler_binary = "clang" + flame_binary = "/usr/local/bin/flame" + else: + if "amd64" in platform: + job_platform = "linux:amd64" + else: + job_platform = "linux:arm64" + compiler_binary = "gcc" + flame_binary = "/usr/bin/flame" + + if mode == "rpz": + default_traffic_rate = 1500 + else: + default_traffic_rate = 10000 + + traffic_rate = int(env_traffic_rate or default_traffic_rate) + runtime = int(env_run_time or default_runtime) + + expected_tcp_response_rate = 80 if protocol == "dot" else 90 + + job_definition = { + "stage": "test", + "variables": { + "CC": compiler_binary, + "CFLAGS": f"{cflags_common} -Og", + "EXPECTED_TCP_RESPONSE_RATE": expected_tcp_response_rate, + "FLAME": flame_binary, + "MODE": mode, + "PROTOCOL": f"{protocol} udp", + "RATE": traffic_rate, + "RUN_TIME": runtime, + }, + "script": [ + "autoreconf -fi", + *anchors[".configure"], + *anchors[".setup_interfaces"], + f"make -j{build_parallel_jobs} -k all V=1", + f'make DESTDIR="{install_path}" install', + "git clone --depth 1 https://gitlab.isc.org/isc-projects/bind9-qa.git", + "cd bind9-qa/stress", + f'export LD_LIBRARY_PATH="{install_path}/usr/local/lib"', + f'export BIND_INSTALL_PATH="{install_path}/usr/local"', + f'export WORKSPACE="{project_directory}"', + "bash stress.sh", + ], + "rules": [{"if": '$CI_PIPELINE_SOURCE == "parent_pipeline"'}], + "timeout": f"{runtime + 30} minutes", + "artifacts": { + "untracked": True, + "when": "always", + "expire_in": expire_in, + "exclude": [ + "output/ns4/*.dtq*", + "output/ns4/large-delta-rpz*.local", + "output/rpz_*", + ], + }, + } + + job_definition |= anchors[platform] + + job_name = f"stress:{scenario}:{mode}:{protocol}+udp:{job_platform}" + jobs[job_name] = job_definition + + +print(yaml.dump(jobs, Dumper=yaml.Dumper))