2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-28 12:57:57 +00:00

test/ci: sync netns_lock test and its --post-start hook

The --post-start hook creates a netns which the test should enter
at the beginning of the test.

The test randomly failed in CI tests, it is most likely caused by
a race condition.

I suspect this flow is root cause:
	1. --post-start hook starts just after the test (in parallel)
	2. --post-start hook calls ip netns add to create the test netns
	3. ip creates the netns file
	4. netns_lock test opens that file and uses it in setns
	5. ip mounts the netns to the file

Of course test fails at step 4 because the netns is not yet mounted
to the file.

I made the test wait for SYNCFILE to be created by the --post-start
hook before it tries to open the netns file and call setns.

Signed-off-by: Zeyad Yasser <zeyady98@gmail.com>
This commit is contained in:
Zeyad Yasser 2021-06-30 18:59:54 +02:00 committed by Andrei Vagin
parent b290df9a65
commit d55f34ed78
2 changed files with 13 additions and 2 deletions

View File

@ -8,6 +8,7 @@ const char *test_author = "Zeyad Yasser <zeyady98@gmail.com>";
#include <sched.h>
#define NS_PATH "/var/run/netns/criu-net-lock-test"
#define SYNCFILE_PATH "net_lock.sync"
#define MAX_RETRY 3
int main(int argc, char **argv)
@ -41,8 +42,7 @@ int main(int argc, char **argv)
*/
for (i = 0; i < MAX_RETRY; i++) {
ns_fd = open(NS_PATH, O_RDONLY);
if (ns_fd < 0) {
if (access(SYNCFILE_PATH, F_OK )) {
/* Netns not created yet by post-start hook */
sleep(1);
continue;
@ -50,6 +50,7 @@ int main(int argc, char **argv)
break;
}
ns_fd = open(NS_PATH, O_RDONLY);
if (ns_fd < 0) {
pr_perror("can't open network ns");
return 1;

View File

@ -15,17 +15,25 @@ CLONE_NEWNET = 0x40000000
PORT = 8880
NETNS = "criu-net-lock-test"
TIMEOUT = 0.1
SYNCFILE = "zdtm/static/net_lock.sync"
def nsenter():
with open("/var/run/netns/{}".format(NETNS)) as f:
libc.setns(f.fileno(), CLONE_NEWNET)
def create_sync_file():
open(SYNCFILE, "wb").close()
if sys.argv[1] == "--post-start":
# Add test netns
subprocess.Popen(["ip", "netns", "add", NETNS]).wait()
nsenter() # Enter test netns
subprocess.Popen(["ip", "link", "set", "up", "dev", "lo"]).wait()
# Lets test know that the netns is initilized successfully
# by checking the access of SYNCFILE
create_sync_file()
# TCP server daemon
pid = os.fork()
if(pid == 0):
@ -144,3 +152,5 @@ if sys.argv[1] == "--post-restore":
if sys.argv[1] == "--clean":
# Delete test netns
subprocess.Popen(["ip", "netns", "delete", NETNS]).wait()
# Delete sync file
os.remove(SYNCFILE)