mirror of
https://github.com/checkpoint-restore/criu
synced 2025-08-22 09:58:09 +00:00
This values will be doubled in kernel to account for "struct sk_buff" etc. overhead. Currently criu restores snd and rcv buffer limits incorrectly, they become bigger on each iteration. $ ../crit show dump/zdtm/live/static/socket-tcp/6299/1/inetsk.img | grep buf "so_sndbuf": 2626560, "so_rcvbuf": 1060720, "so_sndbuf": 16384, "so_rcvbuf": 87380, $ ../crit show dump/zdtm/live/static/socket-tcp/6299/2/inetsk.img | grep buf "so_sndbuf": 5253120, "so_rcvbuf": 2121440, "so_sndbuf": 32768, "so_rcvbuf": 174760, $ ../crit show dump/zdtm/live/static/socket-tcp/6299/3/inetsk.img | grep buf "so_sndbuf": 10506240, "so_rcvbuf": 4242880, "so_sndbuf": 65536, "so_rcvbuf": 349520, With-help-of: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: Andrew Vagin <avagin@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
731 lines
17 KiB
C
731 lines
17 KiB
C
#include <unistd.h>
|
|
#include <sys/socket.h>
|
|
#include <linux/netlink.h>
|
|
#include <linux/rtnetlink.h>
|
|
#include <netinet/tcp.h>
|
|
#include <errno.h>
|
|
#include <linux/if.h>
|
|
#include <linux/filter.h>
|
|
#include <string.h>
|
|
|
|
#include "libnetlink.h"
|
|
#include "sockets.h"
|
|
#include "unix_diag.h"
|
|
#include "inet_diag.h"
|
|
#include "packet_diag.h"
|
|
#include "netlink_diag.h"
|
|
#include "files.h"
|
|
#include "util-pie.h"
|
|
#include "sk-packet.h"
|
|
#include "namespaces.h"
|
|
#include "net.h"
|
|
#include "fs-magic.h"
|
|
|
|
#ifndef SOCK_DIAG_BY_FAMILY
|
|
#define SOCK_DIAG_BY_FAMILY 20
|
|
#endif
|
|
|
|
#define SK_HASH_SIZE 32
|
|
|
|
#ifndef SO_GET_FILTER
|
|
#define SO_GET_FILTER SO_ATTACH_FILTER
|
|
#endif
|
|
|
|
struct sock_diag_greq {
|
|
u8 family;
|
|
u8 protocol;
|
|
};
|
|
|
|
struct sock_diag_req {
|
|
struct nlmsghdr hdr;
|
|
union {
|
|
struct unix_diag_req u;
|
|
struct inet_diag_req_v2 i;
|
|
struct packet_diag_req p;
|
|
struct netlink_diag_req n;
|
|
struct sock_diag_greq g;
|
|
} r;
|
|
};
|
|
|
|
enum socket_cl_bits
|
|
{
|
|
NETLINK_CL_BIT,
|
|
INET_TCP_CL_BIT,
|
|
INET_UDP_CL_BIT,
|
|
INET_UDPLITE_CL_BIT,
|
|
INET6_TCP_CL_BIT,
|
|
INET6_UDP_CL_BIT,
|
|
INET6_UDPLITE_CL_BIT,
|
|
UNIX_CL_BIT,
|
|
PACKET_CL_BIT,
|
|
_MAX_CL_BIT,
|
|
};
|
|
|
|
#define MAX_CL_BIT (_MAX_CL_BIT - 1)
|
|
|
|
static DECLARE_BITMAP(socket_cl_bits, MAX_CL_BIT);
|
|
|
|
static inline
|
|
enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
|
|
{
|
|
if (family == AF_NETLINK)
|
|
return NETLINK_CL_BIT;
|
|
if (family == AF_UNIX)
|
|
return UNIX_CL_BIT;
|
|
if (family == AF_PACKET)
|
|
return PACKET_CL_BIT;
|
|
if (family == AF_INET) {
|
|
if (proto == IPPROTO_TCP)
|
|
return INET_TCP_CL_BIT;
|
|
if (proto == IPPROTO_UDP)
|
|
return INET_UDP_CL_BIT;
|
|
if (proto == IPPROTO_UDPLITE)
|
|
return INET_UDPLITE_CL_BIT;
|
|
}
|
|
if (family == AF_INET6) {
|
|
if (proto == IPPROTO_TCP)
|
|
return INET6_TCP_CL_BIT;
|
|
if (proto == IPPROTO_UDP)
|
|
return INET6_UDP_CL_BIT;
|
|
if (proto == IPPROTO_UDPLITE)
|
|
return INET6_UDPLITE_CL_BIT;
|
|
}
|
|
|
|
pr_err("Unknown pair family %d proto %d\n", family, proto);
|
|
BUG();
|
|
return -1;
|
|
}
|
|
|
|
static void set_collect_bit(unsigned int family, unsigned int proto)
|
|
{
|
|
enum socket_cl_bits nr;
|
|
|
|
nr = get_collect_bit_nr(family, proto);
|
|
set_bit(nr, socket_cl_bits);
|
|
}
|
|
|
|
bool socket_test_collect_bit(unsigned int family, unsigned int proto)
|
|
{
|
|
enum socket_cl_bits nr;
|
|
|
|
nr = get_collect_bit_nr(family, proto);
|
|
return test_bit(nr, socket_cl_bits) != 0;
|
|
}
|
|
|
|
static int probe_recv_one(struct nlmsghdr *h, void *arg)
|
|
{
|
|
pr_err("PROBE RECEIVED\n");
|
|
return -1;
|
|
}
|
|
|
|
static int probe_err(int err, void *arg)
|
|
{
|
|
int expected_err = *(int *)arg;
|
|
|
|
if (err == expected_err)
|
|
return 0;
|
|
|
|
pr_err("Diag module missing (%d)\n", err);
|
|
return err;
|
|
}
|
|
|
|
static inline void probe_diag(int nl, struct sock_diag_req *req, int expected_err)
|
|
{
|
|
do_rtnl_req(nl, req, req->hdr.nlmsg_len, probe_recv_one, probe_err, &expected_err);
|
|
}
|
|
|
|
void preload_socket_modules()
|
|
{
|
|
int nl;
|
|
struct sock_diag_req req;
|
|
|
|
/*
|
|
* If the task to dump (e.g. an LXC container) has any netlink
|
|
* KOBJECT_UEVENT socket open and the _diag modules aren't
|
|
* loaded is dumped, criu will freeze the task and then the
|
|
* kernel will send it messages on the socket, and then we will
|
|
* fail to dump because the socket has pending data. The Real
|
|
* Solution is to dump this pending data, but we just make sure
|
|
* modules are there beforehand for now so that the first dump
|
|
* doesn't fail.
|
|
*/
|
|
|
|
nl = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
|
|
if (nl < 0)
|
|
return;
|
|
|
|
pr_info("Probing sock diag modules\n");
|
|
|
|
memset(&req, 0, sizeof(req));
|
|
req.hdr.nlmsg_type = SOCK_DIAG_BY_FAMILY;
|
|
req.hdr.nlmsg_seq = CR_NLMSG_SEQ;
|
|
|
|
/*
|
|
* Probe UNIX, netlink and packet diag-s by feeding
|
|
* to the kernel request that is shorter than they
|
|
* expect, byt still containing the family to make
|
|
* sure the family handler is there. The family-level
|
|
* diag module would report EINVAL in this case.
|
|
*/
|
|
|
|
req.hdr.nlmsg_len = sizeof(req.hdr) + sizeof(req.r.g);
|
|
req.hdr.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST;
|
|
|
|
req.r.g.family = AF_UNIX;
|
|
probe_diag(nl, &req, -EINVAL);
|
|
|
|
req.r.g.family = AF_PACKET;
|
|
probe_diag(nl, &req, -EINVAL);
|
|
|
|
req.r.g.family = AF_NETLINK;
|
|
probe_diag(nl, &req, -EINVAL);
|
|
|
|
/*
|
|
* TCP and UDP(LITE) diags do not support such trick, only
|
|
* inet_diag module can be probed like that. For the protocol
|
|
* level ones it's OK to request for exact non-existing socket
|
|
* and check for ENOENT being reported back as error.
|
|
*/
|
|
|
|
req.hdr.nlmsg_len = sizeof(req.hdr) + sizeof(req.r.i);
|
|
req.hdr.nlmsg_flags = NLM_F_REQUEST;
|
|
req.r.i.sdiag_family = AF_INET;
|
|
|
|
req.r.i.sdiag_protocol = IPPROTO_TCP;
|
|
probe_diag(nl, &req, -ENOENT);
|
|
|
|
req.r.i.sdiag_protocol = IPPROTO_UDP; /* UDLITE is merged with UDP */
|
|
probe_diag(nl, &req, -ENOENT);
|
|
|
|
close(nl);
|
|
pr_info("Done probing\n");
|
|
}
|
|
|
|
static int dump_bound_dev(int sk, SkOptsEntry *soe)
|
|
{
|
|
int ret;
|
|
char dev[IFNAMSIZ];
|
|
socklen_t len = sizeof(dev);
|
|
|
|
ret = getsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, &dev, &len);
|
|
if (ret) {
|
|
pr_perror("Can't get bound dev");
|
|
return ret;
|
|
}
|
|
|
|
if (len == 0)
|
|
return 0;
|
|
|
|
pr_debug("\tDumping %s bound dev for sk\n", dev);
|
|
soe->so_bound_dev = xmalloc(len);
|
|
if (soe->so_bound_dev == NULL)
|
|
return -1;
|
|
strcpy(soe->so_bound_dev, dev);
|
|
return 0;
|
|
}
|
|
|
|
static int restore_bound_dev(int sk, SkOptsEntry *soe)
|
|
{
|
|
char *n = soe->so_bound_dev;
|
|
|
|
if (!n)
|
|
return 0;
|
|
|
|
pr_debug("\tBinding socket to %s dev\n", n);
|
|
return do_restore_opt(sk, SOL_SOCKET, SO_BINDTODEVICE, n, strlen(n));
|
|
}
|
|
|
|
/*
|
|
* Protobuf handles le/be himself, but the sock_filter is not just u64,
|
|
* it's a structure and we have to preserve the fields order to be able
|
|
* to move socket image across architectures.
|
|
*/
|
|
|
|
static void encode_filter(struct sock_filter *f, u64 *img, int n)
|
|
{
|
|
int i;
|
|
|
|
BUILD_BUG_ON(sizeof(*f) != sizeof(*img));
|
|
|
|
for (i = 0; i < n; i++)
|
|
img[i] = ((u64)f[i].code << 48) |
|
|
((u64)f[i].jt << 40) |
|
|
((u64)f[i].jf << 32) |
|
|
((u64)f[i].k << 0);
|
|
}
|
|
|
|
static void decode_filter(u64 *img, struct sock_filter *f, int n)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < n; i++) {
|
|
f[i].code = img[i] >> 48;
|
|
f[i].jt = img[i] >> 40;
|
|
f[i].jf = img[i] >> 32;
|
|
f[i].k = img[i] >> 0;
|
|
}
|
|
}
|
|
|
|
static int dump_socket_filter(int sk, SkOptsEntry *soe)
|
|
{
|
|
socklen_t len = 0;
|
|
int ret;
|
|
struct sock_filter *flt;
|
|
|
|
ret = getsockopt(sk, SOL_SOCKET, SO_GET_FILTER, NULL, &len);
|
|
if (ret) {
|
|
pr_perror("Can't get socket filter len");
|
|
return ret;
|
|
}
|
|
|
|
if (!len) {
|
|
pr_info("No filter for socket\n");
|
|
return 0;
|
|
}
|
|
|
|
flt = xmalloc(len * sizeof(*flt));
|
|
if (!flt)
|
|
return -1;
|
|
|
|
ret = getsockopt(sk, SOL_SOCKET, SO_GET_FILTER, flt, &len);
|
|
if (ret) {
|
|
pr_perror("Can't get socket filter");
|
|
xfree(flt);
|
|
return ret;
|
|
}
|
|
|
|
soe->so_filter = xmalloc(len * sizeof(*soe->so_filter));
|
|
if (!soe->so_filter) {
|
|
xfree(flt);
|
|
return -1;
|
|
}
|
|
|
|
encode_filter(flt, soe->so_filter, len);
|
|
soe->n_so_filter = len;
|
|
xfree(flt);
|
|
return 0;
|
|
}
|
|
|
|
static int restore_socket_filter(int sk, SkOptsEntry *soe)
|
|
{
|
|
int ret;
|
|
struct sock_fprog sfp;
|
|
|
|
if (!soe->n_so_filter)
|
|
return 0;
|
|
|
|
pr_info("Restoring socket filter\n");
|
|
sfp.len = soe->n_so_filter;
|
|
sfp.filter = xmalloc(soe->n_so_filter * sfp.len);
|
|
if (!sfp.filter)
|
|
return -1;
|
|
|
|
decode_filter(soe->so_filter, sfp.filter, sfp.len);
|
|
ret = restore_opt(sk, SOL_SOCKET, SO_ATTACH_FILTER, &sfp);
|
|
xfree(sfp.filter);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static struct socket_desc *sockets[SK_HASH_SIZE];
|
|
|
|
struct socket_desc *lookup_socket(int ino, int family, int proto)
|
|
{
|
|
struct socket_desc *sd;
|
|
|
|
if (!socket_test_collect_bit(family, proto)) {
|
|
pr_err("Sockets (family %d, proto %d) are not collected\n",
|
|
family, proto);
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
pr_debug("\tSearching for socket %x (family %d)\n", ino, family);
|
|
for (sd = sockets[ino % SK_HASH_SIZE]; sd; sd = sd->next)
|
|
if (sd->ino == ino) {
|
|
BUG_ON(sd->family != family);
|
|
return sd;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int sk_collect_one(int ino, int family, struct socket_desc *d)
|
|
{
|
|
struct socket_desc **chain;
|
|
|
|
d->ino = ino;
|
|
d->family = family;
|
|
d->already_dumped = 0;
|
|
|
|
chain = &sockets[ino % SK_HASH_SIZE];
|
|
d->next = *chain;
|
|
*chain = d;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int do_restore_opt(int sk, int level, int name, void *val, int len)
|
|
{
|
|
if (setsockopt(sk, level, name, val, len) < 0) {
|
|
pr_perror("Can't set %d:%d (len %d)", level, name, len);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int sk_setbufs(void *arg, int fd)
|
|
{
|
|
u32 *buf = (u32 *)arg;
|
|
|
|
if (restore_opt(fd, SOL_SOCKET, SO_SNDBUFFORCE, &buf[0]))
|
|
return -1;
|
|
if (restore_opt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &buf[1]))
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Set sizes of buffers to maximum and prevent blocking
|
|
* Caller of this fn should call other socket restoring
|
|
* routines to drop the non-blocking and set proper send
|
|
* and receive buffers.
|
|
*/
|
|
int restore_prepare_socket(int sk)
|
|
{
|
|
int flags;
|
|
/* In kernel a bufsize has type int and a value is doubled. */
|
|
u32 maxbuf[2] = { INT_MAX / 2, INT_MAX / 2 };
|
|
|
|
if (userns_call(sk_setbufs, 0, maxbuf, sizeof(maxbuf), sk))
|
|
return -1;
|
|
|
|
/* Prevent blocking on restore */
|
|
flags = fcntl(sk, F_GETFL, 0);
|
|
if (flags == -1) {
|
|
pr_perror("Unable to get flags for %d", sk);
|
|
return -1;
|
|
}
|
|
if (fcntl(sk, F_SETFL, flags | O_NONBLOCK) ) {
|
|
pr_perror("Unable to set O_NONBLOCK for %d", sk);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int restore_socket_opts(int sk, SkOptsEntry *soe)
|
|
{
|
|
int ret = 0, val;
|
|
struct timeval tv;
|
|
/* In kernel a bufsize value is doubled. */
|
|
u32 bufs[2] = { soe->so_sndbuf / 2, soe->so_rcvbuf / 2};
|
|
|
|
pr_info("%d restore sndbuf %d rcv buf %d\n", sk, soe->so_sndbuf, soe->so_rcvbuf);
|
|
|
|
/* setsockopt() multiplies the input values by 2 */
|
|
ret |= userns_call(sk_setbufs, UNS_ASYNC, bufs, sizeof(bufs), sk);
|
|
|
|
if (soe->has_so_priority) {
|
|
pr_debug("\trestore priority %d for socket\n", soe->so_priority);
|
|
ret |= restore_opt(sk, SOL_SOCKET, SO_PRIORITY, &soe->so_priority);
|
|
}
|
|
if (soe->has_so_rcvlowat) {
|
|
pr_debug("\trestore rcvlowat %d for socket\n", soe->so_rcvlowat);
|
|
ret |= restore_opt(sk, SOL_SOCKET, SO_RCVLOWAT, &soe->so_rcvlowat);
|
|
}
|
|
if (soe->has_so_mark) {
|
|
pr_debug("\trestore mark %d for socket\n", soe->so_mark);
|
|
ret |= restore_opt(sk, SOL_SOCKET, SO_MARK, &soe->so_mark);
|
|
}
|
|
if (soe->has_so_passcred && soe->so_passcred) {
|
|
val = 1;
|
|
pr_debug("\tset passcred for socket\n");
|
|
ret |= restore_opt(sk, SOL_SOCKET, SO_PASSCRED, &val);
|
|
}
|
|
if (soe->has_so_passsec && soe->so_passsec) {
|
|
val = 1;
|
|
pr_debug("\tset passsec for socket\n");
|
|
ret |= restore_opt(sk, SOL_SOCKET, SO_PASSSEC, &val);
|
|
}
|
|
if (soe->has_so_dontroute && soe->so_dontroute) {
|
|
val = 1;
|
|
pr_debug("\tset dontroute for socket\n");
|
|
ret |= restore_opt(sk, SOL_SOCKET, SO_DONTROUTE, &val);
|
|
}
|
|
if (soe->has_so_no_check && soe->so_no_check) {
|
|
val = 1;
|
|
pr_debug("\tset no_check for socket\n");
|
|
ret |= restore_opt(sk, SOL_SOCKET, SO_NO_CHECK, &val);
|
|
}
|
|
|
|
tv.tv_sec = soe->so_snd_tmo_sec;
|
|
tv.tv_usec = soe->so_snd_tmo_usec;
|
|
ret |= restore_opt(sk, SOL_SOCKET, SO_SNDTIMEO, &tv);
|
|
|
|
tv.tv_sec = soe->so_rcv_tmo_sec;
|
|
tv.tv_usec = soe->so_rcv_tmo_usec;
|
|
ret |= restore_opt(sk, SOL_SOCKET, SO_RCVTIMEO, &tv);
|
|
|
|
ret |= restore_bound_dev(sk, soe);
|
|
ret |= restore_socket_filter(sk, soe);
|
|
|
|
/* The restore of SO_REUSEADDR depends on type of socket */
|
|
|
|
return ret;
|
|
}
|
|
|
|
int do_dump_opt(int sk, int level, int name, void *val, int len)
|
|
{
|
|
socklen_t aux = len;
|
|
|
|
if (getsockopt(sk, level, name, val, &aux) < 0) {
|
|
pr_perror("Can't get %d:%d opt", level, name);
|
|
return -1;
|
|
}
|
|
|
|
if (aux != len) {
|
|
pr_err("Len mismatch on %d:%d : %d, want %d\n",
|
|
level, name, aux, len);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int dump_socket_opts(int sk, SkOptsEntry *soe)
|
|
{
|
|
int ret = 0, val;
|
|
struct timeval tv;
|
|
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_SNDBUF, &soe->so_sndbuf);
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_RCVBUF, &soe->so_rcvbuf);
|
|
soe->has_so_priority = true;
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_PRIORITY, &soe->so_priority);
|
|
soe->has_so_rcvlowat = true;
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_RCVLOWAT, &soe->so_rcvlowat);
|
|
soe->has_so_mark = true;
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_MARK, &soe->so_mark);
|
|
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_SNDTIMEO, &tv);
|
|
soe->so_snd_tmo_sec = tv.tv_sec;
|
|
soe->so_snd_tmo_usec = tv.tv_usec;
|
|
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_RCVTIMEO, &tv);
|
|
soe->so_rcv_tmo_sec = tv.tv_sec;
|
|
soe->so_rcv_tmo_usec = tv.tv_usec;
|
|
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_REUSEADDR, &val);
|
|
soe->reuseaddr = val ? true : false;
|
|
soe->has_reuseaddr = true;
|
|
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_PASSCRED, &val);
|
|
soe->has_so_passcred = true;
|
|
soe->so_passcred = val ? true : false;
|
|
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_PASSSEC, &val);
|
|
soe->has_so_passsec = true;
|
|
soe->so_passsec = val ? true : false;
|
|
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_DONTROUTE, &val);
|
|
soe->has_so_dontroute = true;
|
|
soe->so_dontroute = val ? true : false;
|
|
|
|
ret |= dump_opt(sk, SOL_SOCKET, SO_NO_CHECK, &val);
|
|
soe->has_so_no_check = true;
|
|
soe->so_no_check = val ? true : false;
|
|
|
|
ret |= dump_bound_dev(sk, soe);
|
|
ret |= dump_socket_filter(sk, soe);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void release_skopts(SkOptsEntry *soe)
|
|
{
|
|
xfree(soe->so_filter);
|
|
xfree(soe->so_bound_dev);
|
|
}
|
|
|
|
int dump_socket(struct fd_parms *p, int lfd, struct cr_img *img)
|
|
{
|
|
int family;
|
|
const struct fdtype_ops *ops;
|
|
|
|
if (dump_opt(lfd, SOL_SOCKET, SO_DOMAIN, &family))
|
|
return -1;
|
|
|
|
switch (family) {
|
|
case AF_UNIX:
|
|
ops = &unix_dump_ops;
|
|
break;
|
|
case AF_INET:
|
|
ops = &inet_dump_ops;
|
|
break;
|
|
case AF_INET6:
|
|
ops = &inet6_dump_ops;
|
|
break;
|
|
case AF_PACKET:
|
|
ops = &packet_dump_ops;
|
|
break;
|
|
case AF_NETLINK:
|
|
ops = &netlink_dump_ops;
|
|
break;
|
|
default:
|
|
pr_err("BUG! Unknown socket collected (family %d)\n", family);
|
|
return -1;
|
|
}
|
|
|
|
return do_dump_gen_file(p, lfd, ops, img);
|
|
}
|
|
|
|
static int inet_receive_one(struct nlmsghdr *h, void *arg)
|
|
{
|
|
struct inet_diag_req_v2 *i = arg;
|
|
int type;
|
|
|
|
switch (i->sdiag_protocol) {
|
|
case IPPROTO_TCP:
|
|
type = SOCK_STREAM;
|
|
break;
|
|
case IPPROTO_UDP:
|
|
case IPPROTO_UDPLITE:
|
|
type = SOCK_DGRAM;
|
|
break;
|
|
default:
|
|
BUG_ON(1);
|
|
return -1;
|
|
}
|
|
|
|
return inet_collect_one(h, i->sdiag_family, type);
|
|
}
|
|
|
|
static int do_collect_req(int nl, struct sock_diag_req *req, int size,
|
|
int (*receive_callback)(struct nlmsghdr *h, void *), void *arg)
|
|
{
|
|
int tmp;
|
|
|
|
tmp = do_rtnl_req(nl, req, size, receive_callback, NULL, arg);
|
|
|
|
if (tmp == 0)
|
|
set_collect_bit(req->r.n.sdiag_family, req->r.n.sdiag_protocol);
|
|
|
|
return tmp;
|
|
}
|
|
|
|
int collect_sockets(struct ns_id *ns)
|
|
{
|
|
int err = 0, tmp;
|
|
int nl = ns->net.nlsk;
|
|
struct sock_diag_req req;
|
|
|
|
memset(&req, 0, sizeof(req));
|
|
req.hdr.nlmsg_len = sizeof(req);
|
|
req.hdr.nlmsg_type = SOCK_DIAG_BY_FAMILY;
|
|
req.hdr.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST;
|
|
req.hdr.nlmsg_seq = CR_NLMSG_SEQ;
|
|
|
|
/* Collect UNIX sockets */
|
|
req.r.u.sdiag_family = AF_UNIX;
|
|
req.r.u.udiag_states = -1; /* All */
|
|
req.r.u.udiag_show = UDIAG_SHOW_NAME | UDIAG_SHOW_VFS |
|
|
UDIAG_SHOW_PEER | UDIAG_SHOW_ICONS |
|
|
UDIAG_SHOW_RQLEN;
|
|
tmp = do_collect_req(nl, &req, sizeof(req), unix_receive_one, NULL);
|
|
if (tmp)
|
|
err = tmp;
|
|
|
|
/* Collect IPv4 TCP sockets */
|
|
req.r.i.sdiag_family = AF_INET;
|
|
req.r.i.sdiag_protocol = IPPROTO_TCP;
|
|
req.r.i.idiag_ext = 0;
|
|
/* Only listening and established sockets supported yet */
|
|
req.r.i.idiag_states = (1 << TCP_LISTEN) | (1 << TCP_ESTABLISHED);
|
|
tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
|
|
if (tmp)
|
|
err = tmp;
|
|
|
|
/* Collect IPv4 UDP sockets */
|
|
req.r.i.sdiag_family = AF_INET;
|
|
req.r.i.sdiag_protocol = IPPROTO_UDP;
|
|
req.r.i.idiag_ext = 0;
|
|
req.r.i.idiag_states = -1; /* All */
|
|
tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
|
|
if (tmp)
|
|
err = tmp;
|
|
|
|
/* Collect IPv4 UDP-lite sockets */
|
|
req.r.i.sdiag_family = AF_INET;
|
|
req.r.i.sdiag_protocol = IPPROTO_UDPLITE;
|
|
req.r.i.idiag_ext = 0;
|
|
req.r.i.idiag_states = -1; /* All */
|
|
tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
|
|
if (tmp)
|
|
err = tmp;
|
|
|
|
/* Collect IPv6 TCP sockets */
|
|
req.r.i.sdiag_family = AF_INET6;
|
|
req.r.i.sdiag_protocol = IPPROTO_TCP;
|
|
req.r.i.idiag_ext = 0;
|
|
/* Only listening sockets supported yet */
|
|
req.r.i.idiag_states = (1 << TCP_LISTEN) | (1 << TCP_ESTABLISHED);
|
|
tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
|
|
if (tmp)
|
|
err = tmp;
|
|
|
|
/* Collect IPv6 UDP sockets */
|
|
req.r.i.sdiag_family = AF_INET6;
|
|
req.r.i.sdiag_protocol = IPPROTO_UDP;
|
|
req.r.i.idiag_ext = 0;
|
|
req.r.i.idiag_states = -1; /* All */
|
|
tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
|
|
if (tmp)
|
|
err = tmp;
|
|
|
|
/* Collect IPv6 UDP-lite sockets */
|
|
req.r.i.sdiag_family = AF_INET6;
|
|
req.r.i.sdiag_protocol = IPPROTO_UDPLITE;
|
|
req.r.i.idiag_ext = 0;
|
|
req.r.i.idiag_states = -1; /* All */
|
|
tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
|
|
if (tmp)
|
|
err = tmp;
|
|
|
|
req.r.p.sdiag_family = AF_PACKET;
|
|
req.r.p.sdiag_protocol = 0;
|
|
req.r.p.pdiag_show = PACKET_SHOW_INFO | PACKET_SHOW_MCLIST |
|
|
PACKET_SHOW_FANOUT | PACKET_SHOW_RING_CFG;
|
|
tmp = do_collect_req(nl, &req, sizeof(req), packet_receive_one, NULL);
|
|
if (tmp) {
|
|
pr_warn("The current kernel doesn't support packet_diag\n");
|
|
if (ns->pid == 0 || tmp != -ENOENT) /* Fedora 19 */
|
|
err = tmp;
|
|
}
|
|
|
|
req.r.n.sdiag_family = AF_NETLINK;
|
|
req.r.n.sdiag_protocol = NDIAG_PROTO_ALL;
|
|
req.r.n.ndiag_show = NDIAG_SHOW_GROUPS;
|
|
tmp = do_collect_req(nl, &req, sizeof(req), netlink_receive_one, NULL);
|
|
if (tmp) {
|
|
pr_warn("The current kernel doesn't support netlink_diag\n");
|
|
if (ns->pid == 0 || tmp != -ENOENT) /* Fedora 19 */
|
|
err = tmp;
|
|
}
|
|
|
|
/* don't need anymore */
|
|
close(nl);
|
|
ns->net.nlsk = -1;
|
|
|
|
if (err && (ns->pid == getpid())) {
|
|
/*
|
|
* If netns isn't dumped, criu will fail only
|
|
* if an unsupported socket will be really dumped.
|
|
*/
|
|
pr_info("Uncollected sockets! Will probably fail later.\n");
|
|
err = 0;
|
|
}
|
|
|
|
return err;
|
|
}
|