mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-31 06:15:24 +00:00
net: Pre-create nl diag sk
The setns() syscall (called by switch_ns()) can be extremely slow. If we call it two or more times from the same task the kernel will synchonously go on a very slow routine called synchronize_rcu() trying to put a reference on old namespaces. To avoid doing this more than once I propose to create all per-ns sockets in one place with one setns call. In this patch there's on nl diag socket used to collect other sockets is created this way. Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
This commit is contained in:
16
cr-check.c
16
cr-check.c
@@ -1,4 +1,5 @@
|
||||
#include <unistd.h>
|
||||
#include <linux/netlink.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/eventfd.h>
|
||||
@@ -7,6 +8,7 @@
|
||||
#include <sys/signalfd.h>
|
||||
#include <sys/ptrace.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/socket.h>
|
||||
#include <fcntl.h>
|
||||
#include <signal.h>
|
||||
#include <linux/if.h>
|
||||
@@ -91,11 +93,23 @@ static int check_map_files(void)
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifndef NETLINK_SOCK_DIAG
|
||||
#define NETLINK_SOCK_DIAG NETLINK_INET_DIAG
|
||||
#endif
|
||||
|
||||
static int check_sock_diag(void)
|
||||
{
|
||||
int ret;
|
||||
struct ns_id ns;
|
||||
|
||||
ret = collect_sockets(0);
|
||||
ns.pid = 0;
|
||||
ns.net.nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
|
||||
if (ns.net.nlsk < 0) {
|
||||
pr_perror("Can't make diag socket for check");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = collect_sockets(&ns);
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
|
@@ -21,6 +21,11 @@ struct ns_id {
|
||||
struct mount_info *mntinfo_list;
|
||||
struct mount_info *mntinfo_tree;
|
||||
} mnt;
|
||||
|
||||
struct {
|
||||
int nlsk; /* for sockets collection */
|
||||
int seqsk; /* to talk to parasite daemons */
|
||||
} net;
|
||||
};
|
||||
};
|
||||
extern struct ns_id *ns_ids;
|
||||
|
@@ -32,7 +32,8 @@ extern int restore_prepare_socket(int sk);
|
||||
extern bool socket_test_collect_bit(unsigned int family, unsigned int proto);
|
||||
|
||||
extern int sk_collect_one(int ino, int family, struct socket_desc *d);
|
||||
extern int collect_sockets(int pid);
|
||||
struct ns_id;
|
||||
extern int collect_sockets(struct ns_id *);
|
||||
extern int collect_inet_sockets(void);
|
||||
extern struct collect_image_info unix_sk_cinfo;
|
||||
extern int collect_unix_sockets(void);
|
||||
|
50
net.c
50
net.c
@@ -626,10 +626,58 @@ int veth_pair_add(char *in, char *out)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The setns() syscall (called by switch_ns()) can be extremely
|
||||
* slow. If we call it two or more times from the same task the
|
||||
* kernel will synchonously go on a very slow routine called
|
||||
* synchronize_rcu() trying to put a reference on old namespaces.
|
||||
*
|
||||
* To avoid doing this more than once we pre-create all the
|
||||
* needed other-ns sockets in advance.
|
||||
*/
|
||||
|
||||
static int prep_ns_sockets(struct ns_id *ns)
|
||||
{
|
||||
int nsret = -1, ret;
|
||||
|
||||
if (ns->pid != getpid()) {
|
||||
pr_info("Switching to %d's net for collecting sockets\n", ns->pid);
|
||||
if (switch_ns(ns->pid, &net_ns_desc, &nsret))
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = ns->net.nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
|
||||
if (ret < 0) {
|
||||
pr_perror("Can't create sock diag socket");
|
||||
goto err_nl;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (nsret >= 0 && restore_ns(nsret, &net_ns_desc) < 0) {
|
||||
nsret = -1;
|
||||
if (ret == 0)
|
||||
goto err_ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
err_ret:
|
||||
close(ns->net.nlsk);
|
||||
err_nl:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int collect_net_ns(struct ns_id *ns)
|
||||
{
|
||||
int ret;
|
||||
|
||||
pr_info("Collecting netns %d/%d\n", ns->id, ns->pid);
|
||||
return collect_sockets(ns->pid);
|
||||
ret = prep_ns_sockets(ns);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return collect_sockets(ns);
|
||||
}
|
||||
|
||||
int collect_net_namespaces(void)
|
||||
|
32
sockets.c
32
sockets.c
@@ -515,27 +515,12 @@ static int do_collect_req(int nl, struct sock_diag_req *req, int size,
|
||||
return tmp;
|
||||
}
|
||||
|
||||
int collect_sockets(int pid)
|
||||
int collect_sockets(struct ns_id *ns)
|
||||
{
|
||||
int err = 0, tmp;
|
||||
int rst = -1;
|
||||
int nl;
|
||||
int nl = ns->net.nlsk;
|
||||
struct sock_diag_req req;
|
||||
|
||||
if (root_ns_mask & CLONE_NEWNET) {
|
||||
pr_info("Switching to %d's net for collecting sockets\n", pid);
|
||||
|
||||
if (switch_ns(pid, &net_ns_desc, &rst))
|
||||
return -1;
|
||||
}
|
||||
|
||||
nl = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
|
||||
if (nl < 0) {
|
||||
pr_perror("Can't create sock diag socket");
|
||||
err = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(&req, 0, sizeof(req));
|
||||
req.hdr.nlmsg_len = sizeof(req);
|
||||
req.hdr.nlmsg_type = SOCK_DIAG_BY_FAMILY;
|
||||
@@ -615,7 +600,7 @@ int collect_sockets(int pid)
|
||||
tmp = do_collect_req(nl, &req, sizeof(req), packet_receive_one, NULL);
|
||||
if (tmp) {
|
||||
pr_warn("The current kernel doesn't support packet_diag\n");
|
||||
if (pid == 0 || tmp != -ENOENT) /* Fedora 19 */
|
||||
if (ns->pid == 0 || tmp != -ENOENT) /* Fedora 19 */
|
||||
err = tmp;
|
||||
}
|
||||
|
||||
@@ -625,16 +610,15 @@ int collect_sockets(int pid)
|
||||
tmp = do_collect_req(nl, &req, sizeof(req), netlink_receive_one, NULL);
|
||||
if (tmp) {
|
||||
pr_warn("The current kernel doesn't support netlink_diag\n");
|
||||
if (pid == 0 || tmp != -ENOENT) /* Fedora 19 */
|
||||
if (ns->pid == 0 || tmp != -ENOENT) /* Fedora 19 */
|
||||
err = tmp;
|
||||
}
|
||||
|
||||
/* don't need anymore */
|
||||
close(nl);
|
||||
out:
|
||||
if (rst >= 0) {
|
||||
if (restore_ns(rst, &net_ns_desc) < 0)
|
||||
err = -1;
|
||||
} else if (pid != 0) {
|
||||
ns->net.nlsk = -1;
|
||||
|
||||
if (ns->pid == getpid()) {
|
||||
/*
|
||||
* If netns isn't dumped, criu will fail only
|
||||
* if an unsupported socket will be really dumped.
|
||||
|
Reference in New Issue
Block a user