mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 09:58:09 +00:00
restorer: Add a lock around cgroupd communication.
Threads are put into cgroups through the cgroupd thread, which communicates with other threads using a socketpair. Previously, each thread received a dup'd copy of the socket, and did the following sendmsg(socket_dup_fd, my_cgroup_set); // wait for ack. while (1) { recvmsg(socket_dup_fd, &h, MSG_PEEK); if (h.pid != my_pid) continue; recvmsg(socket_dup_fd, &h, 0); } close(socket_dup_fd); When restoring many threads, many threads would be spinning in the above loop waiting for their PID to appear. In my test-case, restoring a process with a 11.5G heap and 491 threads could take anywhere between 10 seconds and 60 seconds to complete. To avoid the spinning, we drop the loop and MSG_PEEK, and add a lock around the above code. This does not decrease parallelism, as the cgroupd daemon uses a single thread anyway. With the lock in place, the same restore consistently takes around 10 seconds on my machine (Thinkpad P14s, AMD Ryzen 8840HS). There is a similar "daemon" thread for user namespaces. That already is protected with a similar userns_sync_lock in __userns_call(). Fixes #2614 Signed-off-by: Han-Wen Nienhuys <hanwen@engflow.com>
This commit is contained in:
parent
5b4c819d54
commit
8d5cef546a
@ -2329,6 +2329,7 @@ int prepare_task_entries(void)
|
|||||||
task_entries->nr_helpers = 0;
|
task_entries->nr_helpers = 0;
|
||||||
futex_set(&task_entries->start, CR_STATE_FAIL);
|
futex_set(&task_entries->start, CR_STATE_FAIL);
|
||||||
mutex_init(&task_entries->userns_sync_lock);
|
mutex_init(&task_entries->userns_sync_lock);
|
||||||
|
mutex_init(&task_entries->cgroupd_sync_lock);
|
||||||
mutex_init(&task_entries->last_pid_mutex);
|
mutex_init(&task_entries->last_pid_mutex);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -14,6 +14,7 @@ struct task_entries {
|
|||||||
futex_t start;
|
futex_t start;
|
||||||
atomic_t cr_err;
|
atomic_t cr_err;
|
||||||
mutex_t userns_sync_lock;
|
mutex_t userns_sync_lock;
|
||||||
|
mutex_t cgroupd_sync_lock;
|
||||||
mutex_t last_pid_mutex;
|
mutex_t last_pid_mutex;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -704,9 +704,8 @@ static int send_cg_set(int sk, int cg_set)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* As this socket is shared among threads, recvmsg(MSG_PEEK)
|
* As the cgroupd socket is shared among threads and processes, this
|
||||||
* from the socket until getting its own thread id as an
|
* should be called with task_entries->cgroupd_sync_lock held.
|
||||||
* acknowledge of successful threaded cgroup fixup
|
|
||||||
*/
|
*/
|
||||||
static int recv_cg_set_restore_ack(int sk)
|
static int recv_cg_set_restore_ack(int sk)
|
||||||
{
|
{
|
||||||
@ -719,33 +718,22 @@ static int recv_cg_set_restore_ack(int sk)
|
|||||||
h.msg_control = cmsg;
|
h.msg_control = cmsg;
|
||||||
h.msg_controllen = sizeof(cmsg);
|
h.msg_controllen = sizeof(cmsg);
|
||||||
|
|
||||||
while (1) {
|
ret = sys_recvmsg(sk, &h, 0);
|
||||||
ret = sys_recvmsg(sk, &h, MSG_PEEK);
|
if (ret < 0) {
|
||||||
if (ret < 0) {
|
pr_err("Unable to receive from cgroupd %d\n", ret);
|
||||||
pr_err("Unable to peek from cgroupd %d\n", ret);
|
return -1;
|
||||||
return -1;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (h.msg_controllen != sizeof(cmsg)) {
|
if (h.msg_controllen != sizeof(cmsg)) {
|
||||||
pr_err("The message from cgroupd is truncated\n");
|
pr_err("The message from cgroupd is truncated\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ch = CMSG_FIRSTHDR(&h);
|
ch = CMSG_FIRSTHDR(&h);
|
||||||
cred = (struct ucred *)CMSG_DATA(ch);
|
cred = (struct ucred *)CMSG_DATA(ch);
|
||||||
if (cred->pid != sys_gettid())
|
if (cred->pid != sys_gettid()) {
|
||||||
continue;
|
pr_err("cred pid %d != gettid\n", cred->pid);
|
||||||
|
return -1;
|
||||||
/*
|
|
||||||
* Actual remove message from recv queue of socket
|
|
||||||
*/
|
|
||||||
ret = sys_recvmsg(sk, &h, 0);
|
|
||||||
if (ret < 0) {
|
|
||||||
pr_err("Unable to receive from cgroupd %d\n", ret);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -782,12 +770,21 @@ __visible long __export_restore_thread(struct thread_restore_args *args)
|
|||||||
rt_sigframe = (void *)&args->mz->rt_sigframe;
|
rt_sigframe = (void *)&args->mz->rt_sigframe;
|
||||||
|
|
||||||
if (args->cg_set != -1) {
|
if (args->cg_set != -1) {
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
mutex_lock(&task_entries_local->cgroupd_sync_lock);
|
||||||
|
|
||||||
pr_info("Restore cg_set in thread cg_set: %d\n", args->cg_set);
|
pr_info("Restore cg_set in thread cg_set: %d\n", args->cg_set);
|
||||||
if (send_cg_set(args->cgroupd_sk, args->cg_set))
|
|
||||||
goto core_restore_end;
|
err = send_cg_set(args->cgroupd_sk, args->cg_set);
|
||||||
if (recv_cg_set_restore_ack(args->cgroupd_sk))
|
if (!err)
|
||||||
goto core_restore_end;
|
err = recv_cg_set_restore_ack(args->cgroupd_sk);
|
||||||
|
|
||||||
|
mutex_unlock(&task_entries_local->cgroupd_sync_lock);
|
||||||
sys_close(args->cgroupd_sk);
|
sys_close(args->cgroupd_sk);
|
||||||
|
|
||||||
|
if (err)
|
||||||
|
goto core_restore_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (restore_thread_common(args))
|
if (restore_thread_common(args))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user