diff --git a/cr-check.c b/cr-check.c index 7647cacb9..cb3747b33 100644 --- a/cr-check.c +++ b/cr-check.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -7,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -91,11 +93,23 @@ static int check_map_files(void) return -1; } +#ifndef NETLINK_SOCK_DIAG +#define NETLINK_SOCK_DIAG NETLINK_INET_DIAG +#endif + static int check_sock_diag(void) { int ret; + struct ns_id ns; - ret = collect_sockets(0); + ns.pid = 0; + ns.net.nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (ns.net.nlsk < 0) { + pr_perror("Can't make diag socket for check"); + return -1; + } + + ret = collect_sockets(&ns); if (!ret) return 0; diff --git a/include/namespaces.h b/include/namespaces.h index 83fba9a3a..09d631c38 100644 --- a/include/namespaces.h +++ b/include/namespaces.h @@ -21,6 +21,11 @@ struct ns_id { struct mount_info *mntinfo_list; struct mount_info *mntinfo_tree; } mnt; + + struct { + int nlsk; /* for sockets collection */ + int seqsk; /* to talk to parasite daemons */ + } net; }; }; extern struct ns_id *ns_ids; diff --git a/include/sockets.h b/include/sockets.h index 3a2fe81ea..105cb10bf 100644 --- a/include/sockets.h +++ b/include/sockets.h @@ -32,7 +32,8 @@ extern int restore_prepare_socket(int sk); extern bool socket_test_collect_bit(unsigned int family, unsigned int proto); extern int sk_collect_one(int ino, int family, struct socket_desc *d); -extern int collect_sockets(int pid); +struct ns_id; +extern int collect_sockets(struct ns_id *); extern int collect_inet_sockets(void); extern struct collect_image_info unix_sk_cinfo; extern int collect_unix_sockets(void); diff --git a/net.c b/net.c index 52327fe97..b7df37de1 100644 --- a/net.c +++ b/net.c @@ -626,10 +626,58 @@ int veth_pair_add(char *in, char *out) return 0; } +/* + * The setns() syscall (called by switch_ns()) can be extremely + * slow. If we call it two or more times from the same task the + * kernel will synchonously go on a very slow routine called + * synchronize_rcu() trying to put a reference on old namespaces. + * + * To avoid doing this more than once we pre-create all the + * needed other-ns sockets in advance. + */ + +static int prep_ns_sockets(struct ns_id *ns) +{ + int nsret = -1, ret; + + if (ns->pid != getpid()) { + pr_info("Switching to %d's net for collecting sockets\n", ns->pid); + if (switch_ns(ns->pid, &net_ns_desc, &nsret)) + return -1; + } + + ret = ns->net.nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (ret < 0) { + pr_perror("Can't create sock diag socket"); + goto err_nl; + } + + ret = 0; +out: + if (nsret >= 0 && restore_ns(nsret, &net_ns_desc) < 0) { + nsret = -1; + if (ret == 0) + goto err_ret; + } + + return ret; + +err_ret: + close(ns->net.nlsk); +err_nl: + goto out; +} + static int collect_net_ns(struct ns_id *ns) { + int ret; + pr_info("Collecting netns %d/%d\n", ns->id, ns->pid); - return collect_sockets(ns->pid); + ret = prep_ns_sockets(ns); + if (ret) + return ret; + + return collect_sockets(ns); } int collect_net_namespaces(void) diff --git a/sockets.c b/sockets.c index 103774f57..b36caef27 100644 --- a/sockets.c +++ b/sockets.c @@ -515,27 +515,12 @@ static int do_collect_req(int nl, struct sock_diag_req *req, int size, return tmp; } -int collect_sockets(int pid) +int collect_sockets(struct ns_id *ns) { int err = 0, tmp; - int rst = -1; - int nl; + int nl = ns->net.nlsk; struct sock_diag_req req; - if (root_ns_mask & CLONE_NEWNET) { - pr_info("Switching to %d's net for collecting sockets\n", pid); - - if (switch_ns(pid, &net_ns_desc, &rst)) - return -1; - } - - nl = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); - if (nl < 0) { - pr_perror("Can't create sock diag socket"); - err = -1; - goto out; - } - memset(&req, 0, sizeof(req)); req.hdr.nlmsg_len = sizeof(req); req.hdr.nlmsg_type = SOCK_DIAG_BY_FAMILY; @@ -615,7 +600,7 @@ int collect_sockets(int pid) tmp = do_collect_req(nl, &req, sizeof(req), packet_receive_one, NULL); if (tmp) { pr_warn("The current kernel doesn't support packet_diag\n"); - if (pid == 0 || tmp != -ENOENT) /* Fedora 19 */ + if (ns->pid == 0 || tmp != -ENOENT) /* Fedora 19 */ err = tmp; } @@ -625,16 +610,15 @@ int collect_sockets(int pid) tmp = do_collect_req(nl, &req, sizeof(req), netlink_receive_one, NULL); if (tmp) { pr_warn("The current kernel doesn't support netlink_diag\n"); - if (pid == 0 || tmp != -ENOENT) /* Fedora 19 */ + if (ns->pid == 0 || tmp != -ENOENT) /* Fedora 19 */ err = tmp; } + /* don't need anymore */ close(nl); -out: - if (rst >= 0) { - if (restore_ns(rst, &net_ns_desc) < 0) - err = -1; - } else if (pid != 0) { + ns->net.nlsk = -1; + + if (ns->pid == getpid()) { /* * If netns isn't dumped, criu will fail only * if an unsupported socket will be really dumped.