mirror of
https://github.com/checkpoint-restore/criu
synced 2025-09-01 06:45:35 +00:00
net: Pre-create nl diag sk
The setns() syscall (called by switch_ns()) can be extremely slow. If we call it two or more times from the same task the kernel will synchonously go on a very slow routine called synchronize_rcu() trying to put a reference on old namespaces. To avoid doing this more than once I propose to create all per-ns sockets in one place with one setns call. In this patch there's on nl diag socket used to collect other sockets is created this way. Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
This commit is contained in:
16
cr-check.c
16
cr-check.c
@@ -1,4 +1,5 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <linux/netlink.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/eventfd.h>
|
#include <sys/eventfd.h>
|
||||||
@@ -7,6 +8,7 @@
|
|||||||
#include <sys/signalfd.h>
|
#include <sys/signalfd.h>
|
||||||
#include <sys/ptrace.h>
|
#include <sys/ptrace.h>
|
||||||
#include <sys/wait.h>
|
#include <sys/wait.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <linux/if.h>
|
#include <linux/if.h>
|
||||||
@@ -91,11 +93,23 @@ static int check_map_files(void)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef NETLINK_SOCK_DIAG
|
||||||
|
#define NETLINK_SOCK_DIAG NETLINK_INET_DIAG
|
||||||
|
#endif
|
||||||
|
|
||||||
static int check_sock_diag(void)
|
static int check_sock_diag(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
struct ns_id ns;
|
||||||
|
|
||||||
ret = collect_sockets(0);
|
ns.pid = 0;
|
||||||
|
ns.net.nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
|
||||||
|
if (ns.net.nlsk < 0) {
|
||||||
|
pr_perror("Can't make diag socket for check");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = collect_sockets(&ns);
|
||||||
if (!ret)
|
if (!ret)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@@ -21,6 +21,11 @@ struct ns_id {
|
|||||||
struct mount_info *mntinfo_list;
|
struct mount_info *mntinfo_list;
|
||||||
struct mount_info *mntinfo_tree;
|
struct mount_info *mntinfo_tree;
|
||||||
} mnt;
|
} mnt;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
int nlsk; /* for sockets collection */
|
||||||
|
int seqsk; /* to talk to parasite daemons */
|
||||||
|
} net;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
extern struct ns_id *ns_ids;
|
extern struct ns_id *ns_ids;
|
||||||
|
@@ -32,7 +32,8 @@ extern int restore_prepare_socket(int sk);
|
|||||||
extern bool socket_test_collect_bit(unsigned int family, unsigned int proto);
|
extern bool socket_test_collect_bit(unsigned int family, unsigned int proto);
|
||||||
|
|
||||||
extern int sk_collect_one(int ino, int family, struct socket_desc *d);
|
extern int sk_collect_one(int ino, int family, struct socket_desc *d);
|
||||||
extern int collect_sockets(int pid);
|
struct ns_id;
|
||||||
|
extern int collect_sockets(struct ns_id *);
|
||||||
extern int collect_inet_sockets(void);
|
extern int collect_inet_sockets(void);
|
||||||
extern struct collect_image_info unix_sk_cinfo;
|
extern struct collect_image_info unix_sk_cinfo;
|
||||||
extern int collect_unix_sockets(void);
|
extern int collect_unix_sockets(void);
|
||||||
|
50
net.c
50
net.c
@@ -626,10 +626,58 @@ int veth_pair_add(char *in, char *out)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The setns() syscall (called by switch_ns()) can be extremely
|
||||||
|
* slow. If we call it two or more times from the same task the
|
||||||
|
* kernel will synchonously go on a very slow routine called
|
||||||
|
* synchronize_rcu() trying to put a reference on old namespaces.
|
||||||
|
*
|
||||||
|
* To avoid doing this more than once we pre-create all the
|
||||||
|
* needed other-ns sockets in advance.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int prep_ns_sockets(struct ns_id *ns)
|
||||||
|
{
|
||||||
|
int nsret = -1, ret;
|
||||||
|
|
||||||
|
if (ns->pid != getpid()) {
|
||||||
|
pr_info("Switching to %d's net for collecting sockets\n", ns->pid);
|
||||||
|
if (switch_ns(ns->pid, &net_ns_desc, &nsret))
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ns->net.nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
|
||||||
|
if (ret < 0) {
|
||||||
|
pr_perror("Can't create sock diag socket");
|
||||||
|
goto err_nl;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
out:
|
||||||
|
if (nsret >= 0 && restore_ns(nsret, &net_ns_desc) < 0) {
|
||||||
|
nsret = -1;
|
||||||
|
if (ret == 0)
|
||||||
|
goto err_ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
err_ret:
|
||||||
|
close(ns->net.nlsk);
|
||||||
|
err_nl:
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
static int collect_net_ns(struct ns_id *ns)
|
static int collect_net_ns(struct ns_id *ns)
|
||||||
{
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
pr_info("Collecting netns %d/%d\n", ns->id, ns->pid);
|
pr_info("Collecting netns %d/%d\n", ns->id, ns->pid);
|
||||||
return collect_sockets(ns->pid);
|
ret = prep_ns_sockets(ns);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
return collect_sockets(ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
int collect_net_namespaces(void)
|
int collect_net_namespaces(void)
|
||||||
|
32
sockets.c
32
sockets.c
@@ -515,27 +515,12 @@ static int do_collect_req(int nl, struct sock_diag_req *req, int size,
|
|||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
int collect_sockets(int pid)
|
int collect_sockets(struct ns_id *ns)
|
||||||
{
|
{
|
||||||
int err = 0, tmp;
|
int err = 0, tmp;
|
||||||
int rst = -1;
|
int nl = ns->net.nlsk;
|
||||||
int nl;
|
|
||||||
struct sock_diag_req req;
|
struct sock_diag_req req;
|
||||||
|
|
||||||
if (root_ns_mask & CLONE_NEWNET) {
|
|
||||||
pr_info("Switching to %d's net for collecting sockets\n", pid);
|
|
||||||
|
|
||||||
if (switch_ns(pid, &net_ns_desc, &rst))
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
nl = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
|
|
||||||
if (nl < 0) {
|
|
||||||
pr_perror("Can't create sock diag socket");
|
|
||||||
err = -1;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(&req, 0, sizeof(req));
|
memset(&req, 0, sizeof(req));
|
||||||
req.hdr.nlmsg_len = sizeof(req);
|
req.hdr.nlmsg_len = sizeof(req);
|
||||||
req.hdr.nlmsg_type = SOCK_DIAG_BY_FAMILY;
|
req.hdr.nlmsg_type = SOCK_DIAG_BY_FAMILY;
|
||||||
@@ -615,7 +600,7 @@ int collect_sockets(int pid)
|
|||||||
tmp = do_collect_req(nl, &req, sizeof(req), packet_receive_one, NULL);
|
tmp = do_collect_req(nl, &req, sizeof(req), packet_receive_one, NULL);
|
||||||
if (tmp) {
|
if (tmp) {
|
||||||
pr_warn("The current kernel doesn't support packet_diag\n");
|
pr_warn("The current kernel doesn't support packet_diag\n");
|
||||||
if (pid == 0 || tmp != -ENOENT) /* Fedora 19 */
|
if (ns->pid == 0 || tmp != -ENOENT) /* Fedora 19 */
|
||||||
err = tmp;
|
err = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -625,16 +610,15 @@ int collect_sockets(int pid)
|
|||||||
tmp = do_collect_req(nl, &req, sizeof(req), netlink_receive_one, NULL);
|
tmp = do_collect_req(nl, &req, sizeof(req), netlink_receive_one, NULL);
|
||||||
if (tmp) {
|
if (tmp) {
|
||||||
pr_warn("The current kernel doesn't support netlink_diag\n");
|
pr_warn("The current kernel doesn't support netlink_diag\n");
|
||||||
if (pid == 0 || tmp != -ENOENT) /* Fedora 19 */
|
if (ns->pid == 0 || tmp != -ENOENT) /* Fedora 19 */
|
||||||
err = tmp;
|
err = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* don't need anymore */
|
||||||
close(nl);
|
close(nl);
|
||||||
out:
|
ns->net.nlsk = -1;
|
||||||
if (rst >= 0) {
|
|
||||||
if (restore_ns(rst, &net_ns_desc) < 0)
|
if (ns->pid == getpid()) {
|
||||||
err = -1;
|
|
||||||
} else if (pid != 0) {
|
|
||||||
/*
|
/*
|
||||||
* If netns isn't dumped, criu will fail only
|
* If netns isn't dumped, criu will fail only
|
||||||
* if an unsupported socket will be really dumped.
|
* if an unsupported socket will be really dumped.
|
||||||
|
Reference in New Issue
Block a user