From d55f34ed78f009213c0e3b137029b7a74fd78204 Mon Sep 17 00:00:00 2001 From: Zeyad Yasser Date: Wed, 30 Jun 2021 18:59:54 +0200 Subject: [PATCH] test/ci: sync netns_lock test and its --post-start hook The --post-start hook creates a netns which the test should enter at the beginning of the test. The test randomly failed in CI tests, it is most likely caused by a race condition. I suspect this flow is root cause: 1. --post-start hook starts just after the test (in parallel) 2. --post-start hook calls ip netns add to create the test netns 3. ip creates the netns file 4. netns_lock test opens that file and uses it in setns 5. ip mounts the netns to the file Of course test fails at step 4 because the netns is not yet mounted to the file. I made the test wait for SYNCFILE to be created by the --post-start hook before it tries to open the netns file and call setns. Signed-off-by: Zeyad Yasser --- test/zdtm/static/netns_lock.c | 5 +++-- test/zdtm/static/netns_lock.hook | 10 ++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/test/zdtm/static/netns_lock.c b/test/zdtm/static/netns_lock.c index 300349c03..f58ba83da 100644 --- a/test/zdtm/static/netns_lock.c +++ b/test/zdtm/static/netns_lock.c @@ -8,6 +8,7 @@ const char *test_author = "Zeyad Yasser "; #include #define NS_PATH "/var/run/netns/criu-net-lock-test" +#define SYNCFILE_PATH "net_lock.sync" #define MAX_RETRY 3 int main(int argc, char **argv) @@ -41,8 +42,7 @@ int main(int argc, char **argv) */ for (i = 0; i < MAX_RETRY; i++) { - ns_fd = open(NS_PATH, O_RDONLY); - if (ns_fd < 0) { + if (access(SYNCFILE_PATH, F_OK )) { /* Netns not created yet by post-start hook */ sleep(1); continue; @@ -50,6 +50,7 @@ int main(int argc, char **argv) break; } + ns_fd = open(NS_PATH, O_RDONLY); if (ns_fd < 0) { pr_perror("can't open network ns"); return 1; diff --git a/test/zdtm/static/netns_lock.hook b/test/zdtm/static/netns_lock.hook index 666fa4d5f..b5508a7cb 100755 --- a/test/zdtm/static/netns_lock.hook +++ b/test/zdtm/static/netns_lock.hook @@ -15,17 +15,25 @@ CLONE_NEWNET = 0x40000000 PORT = 8880 NETNS = "criu-net-lock-test" TIMEOUT = 0.1 +SYNCFILE = "zdtm/static/net_lock.sync" def nsenter(): with open("/var/run/netns/{}".format(NETNS)) as f: libc.setns(f.fileno(), CLONE_NEWNET) +def create_sync_file(): + open(SYNCFILE, "wb").close() + if sys.argv[1] == "--post-start": # Add test netns subprocess.Popen(["ip", "netns", "add", NETNS]).wait() nsenter() # Enter test netns subprocess.Popen(["ip", "link", "set", "up", "dev", "lo"]).wait() + # Lets test know that the netns is initilized successfully + # by checking the access of SYNCFILE + create_sync_file() + # TCP server daemon pid = os.fork() if(pid == 0): @@ -144,3 +152,5 @@ if sys.argv[1] == "--post-restore": if sys.argv[1] == "--clean": # Delete test netns subprocess.Popen(["ip", "netns", "delete", NETNS]).wait() + # Delete sync file + os.remove(SYNCFILE)