2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-22 09:58:09 +00:00

restorer: Add a lock around cgroupd communication.

Threads are put into cgroups through the cgroupd thread, which
communicates with other threads using a socketpair.

Previously, each thread received a dup'd copy of the socket, and did
the following

    sendmsg(socket_dup_fd, my_cgroup_set);

    // wait for ack.
    while (1) {
        recvmsg(socket_dup_fd, &h, MSG_PEEK);
        if (h.pid != my_pid) continue;
        recvmsg(socket_dup_fd, &h, 0);
    }
    close(socket_dup_fd);

When restoring many threads, many threads would be spinning in the
above loop waiting for their PID to appear.

In my test-case, restoring a process with a 11.5G heap and 491 threads
could take anywhere between 10 seconds and 60 seconds to complete.

To avoid the spinning, we drop the loop and MSG_PEEK, and add a lock
around the above code. This does not decrease parallelism, as the
cgroupd daemon uses a single thread anyway.

With the lock in place, the same restore consistently takes around 10
seconds on my machine (Thinkpad P14s, AMD Ryzen 8840HS).

There is a similar "daemon" thread for user namespaces. That already
is protected with a similar userns_sync_lock in __userns_call().

Fixes #2614

Signed-off-by: Han-Wen Nienhuys <hanwen@engflow.com>
This commit is contained in:
Han-Wen Nienhuys 2025-03-13 08:46:16 +01:00 committed by Andrei Vagin
parent 5b4c819d54
commit 8d5cef546a
3 changed files with 31 additions and 32 deletions

View File

@ -2329,6 +2329,7 @@ int prepare_task_entries(void)
task_entries->nr_helpers = 0; task_entries->nr_helpers = 0;
futex_set(&task_entries->start, CR_STATE_FAIL); futex_set(&task_entries->start, CR_STATE_FAIL);
mutex_init(&task_entries->userns_sync_lock); mutex_init(&task_entries->userns_sync_lock);
mutex_init(&task_entries->cgroupd_sync_lock);
mutex_init(&task_entries->last_pid_mutex); mutex_init(&task_entries->last_pid_mutex);
return 0; return 0;

View File

@ -14,6 +14,7 @@ struct task_entries {
futex_t start; futex_t start;
atomic_t cr_err; atomic_t cr_err;
mutex_t userns_sync_lock; mutex_t userns_sync_lock;
mutex_t cgroupd_sync_lock;
mutex_t last_pid_mutex; mutex_t last_pid_mutex;
}; };

View File

@ -704,9 +704,8 @@ static int send_cg_set(int sk, int cg_set)
} }
/* /*
* As this socket is shared among threads, recvmsg(MSG_PEEK) * As the cgroupd socket is shared among threads and processes, this
* from the socket until getting its own thread id as an * should be called with task_entries->cgroupd_sync_lock held.
* acknowledge of successful threaded cgroup fixup
*/ */
static int recv_cg_set_restore_ack(int sk) static int recv_cg_set_restore_ack(int sk)
{ {
@ -719,33 +718,22 @@ static int recv_cg_set_restore_ack(int sk)
h.msg_control = cmsg; h.msg_control = cmsg;
h.msg_controllen = sizeof(cmsg); h.msg_controllen = sizeof(cmsg);
while (1) { ret = sys_recvmsg(sk, &h, 0);
ret = sys_recvmsg(sk, &h, MSG_PEEK); if (ret < 0) {
if (ret < 0) { pr_err("Unable to receive from cgroupd %d\n", ret);
pr_err("Unable to peek from cgroupd %d\n", ret); return -1;
return -1; }
}
if (h.msg_controllen != sizeof(cmsg)) { if (h.msg_controllen != sizeof(cmsg)) {
pr_err("The message from cgroupd is truncated\n"); pr_err("The message from cgroupd is truncated\n");
return -1; return -1;
} }
ch = CMSG_FIRSTHDR(&h); ch = CMSG_FIRSTHDR(&h);
cred = (struct ucred *)CMSG_DATA(ch); cred = (struct ucred *)CMSG_DATA(ch);
if (cred->pid != sys_gettid()) if (cred->pid != sys_gettid()) {
continue; pr_err("cred pid %d != gettid\n", cred->pid);
return -1;
/*
* Actual remove message from recv queue of socket
*/
ret = sys_recvmsg(sk, &h, 0);
if (ret < 0) {
pr_err("Unable to receive from cgroupd %d\n", ret);
return -1;
}
break;
} }
return 0; return 0;
} }
@ -782,12 +770,21 @@ __visible long __export_restore_thread(struct thread_restore_args *args)
rt_sigframe = (void *)&args->mz->rt_sigframe; rt_sigframe = (void *)&args->mz->rt_sigframe;
if (args->cg_set != -1) { if (args->cg_set != -1) {
int err = 0;
mutex_lock(&task_entries_local->cgroupd_sync_lock);
pr_info("Restore cg_set in thread cg_set: %d\n", args->cg_set); pr_info("Restore cg_set in thread cg_set: %d\n", args->cg_set);
if (send_cg_set(args->cgroupd_sk, args->cg_set))
goto core_restore_end; err = send_cg_set(args->cgroupd_sk, args->cg_set);
if (recv_cg_set_restore_ack(args->cgroupd_sk)) if (!err)
goto core_restore_end; err = recv_cg_set_restore_ack(args->cgroupd_sk);
mutex_unlock(&task_entries_local->cgroupd_sync_lock);
sys_close(args->cgroupd_sk); sys_close(args->cgroupd_sk);
if (err)
goto core_restore_end;
} }
if (restore_thread_common(args)) if (restore_thread_common(args))