mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 01:51:51 +00:00
restorer: Add a lock around cgroupd communication.
Threads are put into cgroups through the cgroupd thread, which communicates with other threads using a socketpair. Previously, each thread received a dup'd copy of the socket, and did the following sendmsg(socket_dup_fd, my_cgroup_set); // wait for ack. while (1) { recvmsg(socket_dup_fd, &h, MSG_PEEK); if (h.pid != my_pid) continue; recvmsg(socket_dup_fd, &h, 0); } close(socket_dup_fd); When restoring many threads, many threads would be spinning in the above loop waiting for their PID to appear. In my test-case, restoring a process with a 11.5G heap and 491 threads could take anywhere between 10 seconds and 60 seconds to complete. To avoid the spinning, we drop the loop and MSG_PEEK, and add a lock around the above code. This does not decrease parallelism, as the cgroupd daemon uses a single thread anyway. With the lock in place, the same restore consistently takes around 10 seconds on my machine (Thinkpad P14s, AMD Ryzen 8840HS). There is a similar "daemon" thread for user namespaces. That already is protected with a similar userns_sync_lock in __userns_call(). Fixes #2614 Signed-off-by: Han-Wen Nienhuys <hanwen@engflow.com>
This commit is contained in:
parent
5b4c819d54
commit
8d5cef546a
@ -2329,6 +2329,7 @@ int prepare_task_entries(void)
|
||||
task_entries->nr_helpers = 0;
|
||||
futex_set(&task_entries->start, CR_STATE_FAIL);
|
||||
mutex_init(&task_entries->userns_sync_lock);
|
||||
mutex_init(&task_entries->cgroupd_sync_lock);
|
||||
mutex_init(&task_entries->last_pid_mutex);
|
||||
|
||||
return 0;
|
||||
|
@ -14,6 +14,7 @@ struct task_entries {
|
||||
futex_t start;
|
||||
atomic_t cr_err;
|
||||
mutex_t userns_sync_lock;
|
||||
mutex_t cgroupd_sync_lock;
|
||||
mutex_t last_pid_mutex;
|
||||
};
|
||||
|
||||
|
@ -704,9 +704,8 @@ static int send_cg_set(int sk, int cg_set)
|
||||
}
|
||||
|
||||
/*
|
||||
* As this socket is shared among threads, recvmsg(MSG_PEEK)
|
||||
* from the socket until getting its own thread id as an
|
||||
* acknowledge of successful threaded cgroup fixup
|
||||
* As the cgroupd socket is shared among threads and processes, this
|
||||
* should be called with task_entries->cgroupd_sync_lock held.
|
||||
*/
|
||||
static int recv_cg_set_restore_ack(int sk)
|
||||
{
|
||||
@ -719,33 +718,22 @@ static int recv_cg_set_restore_ack(int sk)
|
||||
h.msg_control = cmsg;
|
||||
h.msg_controllen = sizeof(cmsg);
|
||||
|
||||
while (1) {
|
||||
ret = sys_recvmsg(sk, &h, MSG_PEEK);
|
||||
if (ret < 0) {
|
||||
pr_err("Unable to peek from cgroupd %d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
ret = sys_recvmsg(sk, &h, 0);
|
||||
if (ret < 0) {
|
||||
pr_err("Unable to receive from cgroupd %d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (h.msg_controllen != sizeof(cmsg)) {
|
||||
pr_err("The message from cgroupd is truncated\n");
|
||||
return -1;
|
||||
}
|
||||
if (h.msg_controllen != sizeof(cmsg)) {
|
||||
pr_err("The message from cgroupd is truncated\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ch = CMSG_FIRSTHDR(&h);
|
||||
cred = (struct ucred *)CMSG_DATA(ch);
|
||||
if (cred->pid != sys_gettid())
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Actual remove message from recv queue of socket
|
||||
*/
|
||||
ret = sys_recvmsg(sk, &h, 0);
|
||||
if (ret < 0) {
|
||||
pr_err("Unable to receive from cgroupd %d\n", ret);
|
||||
return -1;
|
||||
}
|
||||
|
||||
break;
|
||||
ch = CMSG_FIRSTHDR(&h);
|
||||
cred = (struct ucred *)CMSG_DATA(ch);
|
||||
if (cred->pid != sys_gettid()) {
|
||||
pr_err("cred pid %d != gettid\n", cred->pid);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -782,12 +770,21 @@ __visible long __export_restore_thread(struct thread_restore_args *args)
|
||||
rt_sigframe = (void *)&args->mz->rt_sigframe;
|
||||
|
||||
if (args->cg_set != -1) {
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&task_entries_local->cgroupd_sync_lock);
|
||||
|
||||
pr_info("Restore cg_set in thread cg_set: %d\n", args->cg_set);
|
||||
if (send_cg_set(args->cgroupd_sk, args->cg_set))
|
||||
goto core_restore_end;
|
||||
if (recv_cg_set_restore_ack(args->cgroupd_sk))
|
||||
goto core_restore_end;
|
||||
|
||||
err = send_cg_set(args->cgroupd_sk, args->cg_set);
|
||||
if (!err)
|
||||
err = recv_cg_set_restore_ack(args->cgroupd_sk);
|
||||
|
||||
mutex_unlock(&task_entries_local->cgroupd_sync_lock);
|
||||
sys_close(args->cgroupd_sk);
|
||||
|
||||
if (err)
|
||||
goto core_restore_end;
|
||||
}
|
||||
|
||||
if (restore_thread_common(args))
|
||||
|
Loading…
x
Reference in New Issue
Block a user