2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-22 18:07:57 +00:00
criu/sk-unix.c

1259 lines
28 KiB
C
Raw Normal View History

#include <sys/socket.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <unistd.h>
#include <netinet/tcp.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/un.h>
#include <stdlib.h>
#include <dlfcn.h>
#include "asm/types.h"
#include "libnetlink.h"
#include "cr_options.h"
#include "imgset.h"
#include "unix_diag.h"
#include "files.h"
#include "file-ids.h"
#include "image.h"
#include "log.h"
#include "util.h"
#include "util-pie.h"
#include "sockets.h"
#include "sk-queue.h"
#include "mount.h"
#include "cr-service.h"
#include "plugin.h"
#include "namespaces.h"
#include "pstree.h"
#include "protobuf.h"
#include "protobuf/sk-unix.pb-c.h"
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
#undef LOG_PREFIX
#define LOG_PREFIX "sk unix: "
typedef struct {
char *dir;
unsigned int udiag_vfs_dev;
unsigned int udiag_vfs_ino;
} rel_name_desc_t;
struct unix_sk_desc {
struct socket_desc sd;
unsigned int type;
unsigned int state;
unsigned int peer_ino;
unsigned int rqlen;
unsigned int wqlen;
unsigned int namelen;
char *name;
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
rel_name_desc_t *rel_name;
unsigned int nr_icons;
unsigned int *icons;
unsigned char shutdown;
mode_t mode;
uid_t uid;
gid_t gid;
struct list_head list;
int fd;
struct list_head peer_list;
struct list_head peer_node;
UnixSkEntry *ue;
};
static LIST_HEAD(unix_sockets);
struct unix_sk_listen_icon {
unsigned int peer_ino;
struct unix_sk_desc *sk_desc;
struct unix_sk_listen_icon *next;
};
#define SK_HASH_SIZE 32
static struct unix_sk_listen_icon *unix_listen_icons[SK_HASH_SIZE];
static struct unix_sk_listen_icon *lookup_unix_listen_icons(int peer_ino)
{
struct unix_sk_listen_icon *ic;
for (ic = unix_listen_icons[peer_ino % SK_HASH_SIZE];
ic; ic = ic->next)
if (ic->peer_ino == peer_ino)
return ic;
return NULL;
}
static void show_one_unix(char *act, const struct unix_sk_desc *sk)
{
pr_debug("\t%s: ino %#x peer_ino %#x family %4d type %4d state %2d name %s\n",
act, sk->sd.ino, sk->peer_ino, sk->sd.family, sk->type, sk->state, sk->name);
if (sk->nr_icons) {
int i;
for (i = 0; i < sk->nr_icons; i++)
pr_debug("\t\ticon: %4d\n", sk->icons[i]);
}
}
static void show_one_unix_img(const char *act, const UnixSkEntry *e)
{
pr_info("\t%s: id %#x ino %#x peer %#x type %d state %d name %d bytes\n",
act, e->id, e->ino, e->peer, e->type, e->state, (int)e->name.len);
}
static int can_dump_unix_sk(const struct unix_sk_desc *sk)
{
/*
* The last case in this "if" is seqpacket socket,
* that is connected to cr_service. We will dump
* it properly below.
*/
if (sk->type != SOCK_STREAM &&
sk->type != SOCK_DGRAM &&
sk->type != SOCK_SEQPACKET) {
pr_err("Unsupported type (%d) on socket %x.\n"
"Only stream/dgram/seqpacket are supported.\n",
sk->type, sk->sd.ino);
return 0;
}
switch (sk->state) {
case TCP_LISTEN:
case TCP_ESTABLISHED:
case TCP_CLOSE:
break;
default:
pr_err("Unknown state %d for unix socket %x\n",
sk->state, sk->sd.ino);
return 0;
}
return 1;
}
static int write_unix_entry(struct unix_sk_desc *sk)
{
int ret;
ret = pb_write_one(img_from_set(glob_imgset, CR_FD_UNIXSK), sk->ue, PB_UNIX_SK);
show_one_unix_img("Dumped", sk->ue);
release_skopts(sk->ue->opts);
xfree(sk->ue);
sk->ue = NULL;
return ret;
}
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
static int resolve_rel_name(struct unix_sk_desc *sk, const struct fd_parms *p)
{
rel_name_desc_t *rel_name = sk->rel_name;
const char *dirs[] = { "cwd", "root" };
struct pstree_item *task;
int mntns_root, i;
struct ns_id *ns;
for_each_pstree_item(task) {
if (task->pid.real == p->pid)
break;
}
if (!task)
return -ENOENT;
ns = lookup_ns_by_id(task->ids->mnt_ns_id, &mnt_ns_desc);
if (!ns)
return -ENOENT;
mntns_root = mntns_get_root_fd(ns);
if (mntns_root < 0)
return -ENOENT;
pr_debug("Resolving relative name %s for socket %x\n",
sk->name, sk->sd.ino);
for (i = 0; i < ARRAY_SIZE(dirs); i++) {
char dir[PATH_MAX], path[PATH_MAX];
struct stat st;
int ret;
snprintf(path, sizeof(path), "/proc/%d/%s", p->pid, dirs[i]);
ret = readlink(path, dir, sizeof(dir));
if (ret < 0 || (size_t)ret == sizeof(dir)) {
pr_err("Can't readlink for %s\n", dirs[i]);
return -1;
}
dir[ret] = 0;
snprintf(path, sizeof(path), ".%s/%s", dir, sk->name);
if (fstatat(mntns_root, path, &st, 0)) {
if (errno == ENOENT)
continue;
goto err;
}
if ((st.st_ino == rel_name->udiag_vfs_ino) &&
phys_stat_dev_match(st.st_dev, rel_name->udiag_vfs_dev, ns, path)) {
rel_name->dir = xstrdup(dir);
if (!rel_name->dir)
return -ENOMEM;
pr_debug("Resolved relative socket name to dir %s\n", rel_name->dir);
sk->mode = st.st_mode;
sk->uid = st.st_uid;
sk->gid = st.st_gid;
return 0;
}
}
err:
pr_err("Can't resolve name for socket %#x\n", rel_name->udiag_vfs_ino);
return -ENOENT;
}
static int dump_one_unix_fd(int lfd, u32 id, const struct fd_parms *p)
{
struct unix_sk_desc *sk, *peer;
UnixSkEntry *ue;
SkOptsEntry *skopts;
FilePermsEntry *perms;
FownEntry *fown;
ue = xmalloc(sizeof(UnixSkEntry) +
sizeof(SkOptsEntry) +
sizeof(FilePermsEntry) +
sizeof(FownEntry));
if (ue == NULL)
return -1;
skopts = (void *) ue + sizeof(UnixSkEntry);
perms = (void *) skopts + sizeof(SkOptsEntry);
fown = (void *) perms + sizeof(FilePermsEntry);
unix_sk_entry__init(ue);
sk_opts_entry__init(skopts);
file_perms_entry__init(perms);
*fown = p->fown;
sk = (struct unix_sk_desc *)lookup_socket(p->stat.st_ino, PF_UNIX, 0);
if (IS_ERR_OR_NULL(sk)) {
pr_err("Unix socket %#x not found\n", (int)p->stat.st_ino);
goto err;
}
if (!can_dump_unix_sk(sk))
goto err;
BUG_ON(sk->sd.already_dumped);
ue->name.len = (size_t)sk->namelen;
ue->name.data = (void *)sk->name;
ue->id = id;
ue->ino = sk->sd.ino;
ue->type = sk->type;
ue->state = sk->state;
ue->flags = p->flags;
ue->backlog = sk->wqlen;
ue->peer = sk->peer_ino;
ue->fown = fown;
ue->opts = skopts;
ue->uflags = 0;
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
if (sk->rel_name) {
if (resolve_rel_name(sk, p))
goto err;
ue->name_dir = sk->rel_name->dir;
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
}
/*
* Check if this socket is connected to criu service.
* Dump it like closed one and mark it for restore.
*/
if (unlikely(ue->peer == service_sk_ino)) {
ue->state = TCP_CLOSE;
ue->peer = 0;
ue->uflags |= USK_SERVICE;
}
if (sk->namelen && *sk->name) {
ue->file_perms = perms;
perms->mode = sk->mode;
perms->uid = userns_uid(sk->uid);
perms->gid = userns_gid(sk->gid);
}
sk_encode_shutdown(ue, sk->shutdown);
if (ue->peer) {
peer = (struct unix_sk_desc *)lookup_socket(ue->peer, PF_UNIX, 0);
if (IS_ERR_OR_NULL(peer)) {
pr_err("Unix socket %#x without peer %#x\n",
ue->ino, ue->peer);
goto err;
}
/*
* Peer should have us as peer or have a name by which
* we can access one.
*/
if (peer->peer_ino != ue->ino) {
if (!peer->name) {
pr_err("Unix socket %#x with unreachable peer %#x (%#x/%s)\n",
ue->ino, ue->peer, peer->peer_ino, peer->name);
goto err;
}
}
/*
* It can be external socket, so we defer dumping
* until all sockets the program owns are processed.
*/
if (!peer->sd.already_dumped) {
if (list_empty(&peer->list)) {
show_one_unix("Add a peer", peer);
list_add_tail(&peer->list, &unix_sockets);
}
list_add(&sk->peer_node, &peer->peer_list);
sk->fd = dup(lfd);
if (sk->fd < 0) {
pr_perror("Unable to dup(%d)", lfd);
goto err;
}
}
if ((ue->type != SOCK_DGRAM) && (
((ue->shutdown == SK_SHUTDOWN__READ) &&
(peer->shutdown != SK_SHUTDOWN__WRITE)) ||
((ue->shutdown == SK_SHUTDOWN__WRITE) &&
(peer->shutdown != SK_SHUTDOWN__READ)) ||
((ue->shutdown == SK_SHUTDOWN__BOTH) &&
(peer->shutdown != SK_SHUTDOWN__BOTH)) )) {
/*
* On restore we assume, that stream pairs must
* be shut down from one end only
*/
pr_err("Shutdown mismatch %u:%d -> %u:%d\n",
ue->ino, ue->shutdown, peer->sd.ino, peer->shutdown);
goto err;
}
} else if (ue->state == TCP_ESTABLISHED) {
const struct unix_sk_listen_icon *e;
e = lookup_unix_listen_icons(ue->ino);
if (!e) {
/*
* ESTABLISHED socket without peer and without
* anyone waiting for it should be semi-closed
* connection.
*/
if (ue->shutdown == SK_SHUTDOWN__BOTH) {
pr_info("Dumping semi-closed connection\n");
goto dump;
}
pr_err("Dangling connection %#x\n", ue->ino);
goto err;
}
/*
* If this is in-flight connection we need to figure
* out where to connect it on restore. Thus, tune up peer
* id by searching an existing listening socket.
*
* Note the socket name will be found at restore stage,
* not now, just to reduce size of dump files.
*/
/* e->sk_desc is _never_ NULL */
if (e->sk_desc->state != TCP_LISTEN) {
pr_err("In-flight connection on "
"non-listening socket %d\n", ue->ino);
goto err;
}
ue->peer = e->sk_desc->sd.ino;
pr_debug("\t\tFixed inflight socket %#x peer %#x)\n",
ue->ino, ue->peer);
}
dump:
if (dump_socket_opts(lfd, skopts))
goto err;
/*
* If a stream listening socket has non-zero rqueue, this
* means there are in-flight connections waiting to get
* accept()-ed. We handle them separately with the "icons"
* (i stands for in-flight, cons -- for connections) things.
*/
if (sk->rqlen != 0 && !(sk->type == SOCK_STREAM &&
sk->state == TCP_LISTEN))
if (dump_sk_queue(lfd, id))
goto err;
pr_info("Dumping unix socket at %d\n", p->fd);
show_one_unix("Dumping", sk);
sk->ue = ue;
/*
* Postpone writing the entry if a peer isn't found yet.
* It's required, because we may need to modify the entry.
* For example, if a socket is external and is dumped by
* a callback, the USK_CALLBACK flag must be set.
*/
if (list_empty(&sk->peer_node) && write_unix_entry(sk))
return -1;
list_del_init(&sk->list);
sk->sd.already_dumped = 1;
while (!list_empty(&sk->peer_list)) {
struct unix_sk_desc *psk;
psk = list_first_entry(&sk->peer_list, struct unix_sk_desc, peer_node);
close_safe(&psk->fd);
list_del_init(&psk->peer_node);
if (write_unix_entry(psk))
return -1;
}
return 0;
err:
release_skopts(skopts);
xfree(ue);
return -1;
}
const struct fdtype_ops unix_dump_ops = {
.type = FD_TYPES__UNIXSK,
.dump = dump_one_unix_fd,
};
/*
* Returns: < 0 on error, 0 if OK, 1 to skip the socket
*/
static int unix_process_name(struct unix_sk_desc *d, const struct unix_diag_msg *m, struct rtattr **tb)
{
char *name;
int len;
len = RTA_PAYLOAD(tb[UNIX_DIAG_NAME]);
name = xmalloc(len + 1);
if (!name)
return -ENOMEM;
memcpy(name, RTA_DATA(tb[UNIX_DIAG_NAME]), len);
name[len] = '\0';
if (name[0] != '\0') {
struct unix_diag_vfs *uv;
bool drop_path = false;
char rpath[PATH_MAX];
struct ns_id *ns;
struct stat st;
int mntns_root;
if (!tb[UNIX_DIAG_VFS]) {
pr_err("Bound socket w/o inode %#x\n", m->udiag_ino);
goto skip;
}
ns = lookup_ns_by_id(root_item->ids->mnt_ns_id, &mnt_ns_desc);
if (!ns)
return -ENOENT;
mntns_root = mntns_get_root_fd(ns);
if (mntns_root < 0)
return -ENOENT;
uv = RTA_DATA(tb[UNIX_DIAG_VFS]);
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
if (name[0] != '/') {
/*
* Relative names are be resolved later at first
* dump attempt.
*/
rel_name_desc_t *rel_name = xzalloc(sizeof(*rel_name));
if (!rel_name)
return -ENOMEM;
rel_name->udiag_vfs_dev = uv->udiag_vfs_dev;
rel_name->udiag_vfs_ino = uv->udiag_vfs_ino;
d->rel_name = rel_name;
goto postprone;
}
snprintf(rpath, sizeof(rpath), ".%s", name);
if (fstatat(mntns_root, rpath, &st, 0)) {
if (errno != ENOENT) {
pr_warn("Can't stat socket %#x(%s), skipping: %m (err %d)\n",
m->udiag_ino, rpath, errno);
goto skip;
}
pr_info("unix: Dropping path %s for unlinked sk %#x\n",
name, m->udiag_ino);
drop_path = true;
} else if ((st.st_ino != uv->udiag_vfs_ino) ||
!phys_stat_dev_match(st.st_dev, uv->udiag_vfs_dev, ns, name)) {
pr_info("unix: Dropping path %s for unlinked bound "
"sk %#x.%#x real %#x.%#x\n",
name, (int)st.st_dev, (int)st.st_ino,
(int)uv->udiag_vfs_dev, (int)uv->udiag_vfs_ino);
drop_path = true;
}
if (drop_path) {
/*
* When a socket is bound to unlinked file, we
* just drop his name, since no one will access
* it via one.
*/
xfree(name);
len = 0;
name = NULL;
}
d->mode = st.st_mode;
d->uid = st.st_uid;
d->gid = st.st_gid;
}
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
postprone:
d->namelen = len;
d->name = name;
return 0;
skip:
xfree(name);
return 1;
}
static int unix_collect_one(const struct unix_diag_msg *m,
struct rtattr **tb)
{
struct unix_sk_desc *d;
int ret = 0;
d = xzalloc(sizeof(*d));
if (!d)
return -1;
d->type = m->udiag_type;
d->state = m->udiag_state;
INIT_LIST_HEAD(&d->list);
INIT_LIST_HEAD(&d->peer_list);
INIT_LIST_HEAD(&d->peer_node);
d->fd = -1;
if (tb[UNIX_DIAG_SHUTDOWN])
d->shutdown = *(u8 *)RTA_DATA(tb[UNIX_DIAG_SHUTDOWN]);
else
pr_err_once("No socket shutdown info\n");
if (tb[UNIX_DIAG_PEER])
d->peer_ino = *(int *)RTA_DATA(tb[UNIX_DIAG_PEER]);
if (tb[UNIX_DIAG_NAME]) {
ret = unix_process_name(d, m, tb);
if (ret < 0)
goto err;
else if (ret == 1)
goto skip;
BUG_ON(ret != 0);
}
if (tb[UNIX_DIAG_ICONS]) {
int len = RTA_PAYLOAD(tb[UNIX_DIAG_ICONS]);
int i;
d->icons = xmalloc(len);
if (!d->icons)
goto err;
memcpy(d->icons, RTA_DATA(tb[UNIX_DIAG_ICONS]), len);
d->nr_icons = len / sizeof(u32);
/*
* Remember these sockets, we will need them
* to fix up in-flight sockets peers.
*/
for (i = 0; i < d->nr_icons; i++) {
struct unix_sk_listen_icon *e, **chain;
int n;
e = xzalloc(sizeof(*e));
if (!e)
goto err;
n = d->icons[i];
chain = &unix_listen_icons[n % SK_HASH_SIZE];
e->next = *chain;
*chain = e;
pr_debug("\t\tCollected icon %d\n", d->icons[i]);
e->peer_ino = n;
e->sk_desc = d;
}
}
if (tb[UNIX_DIAG_RQLEN]) {
struct unix_diag_rqlen *rq;
rq = (struct unix_diag_rqlen *)RTA_DATA(tb[UNIX_DIAG_RQLEN]);
d->rqlen = rq->udiag_rqueue;
d->wqlen = rq->udiag_wqueue;
}
sk_collect_one(m->udiag_ino, AF_UNIX, &d->sd);
show_one_unix("Collected", d);
return 0;
err:
ret = -1;
skip:
xfree(d->icons);
xfree(d->name);
xfree(d);
return ret;
}
int unix_receive_one(struct nlmsghdr *h, void *arg)
{
struct unix_diag_msg *m = NLMSG_DATA(h);
struct rtattr *tb[UNIX_DIAG_MAX+1];
parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr *)(m + 1),
h->nlmsg_len - NLMSG_LENGTH(sizeof(*m)));
return unix_collect_one(m, tb);
}
static int dump_external_sockets(struct unix_sk_desc *peer)
{
struct unix_sk_desc *sk;
int ret;
while (!list_empty(&peer->peer_list)) {
sk = list_first_entry(&peer->peer_list, struct unix_sk_desc, peer_node);
plugin: Rework plugins API, v2 Here we define new api to be used in plugins. - Plugin should provide a descriptor with help of CR_PLUGIN_REGISTER macro, or in case if plugin require no init/exit functions -- with CR_PLUGIN_REGISTER_DUMMY. - Plugin should define a plugin hook with help of CR_PLUGIN_REGISTER_HOOK macro. - Now init/exit functions of plugins takes @stage argument which tells plugin which stage of criu it's been called on dump/restore. For exit it also takes @ret which allows plugin to know if something went wrong and it needs to cleanup own resources. The idea behind is to not limit plugins authors with names of functions they might need to use for particular hook. Such new API deprecates olds plugins structure but to keep backward compatibility we will provide a tiny layer of additional code to support old plugins for at least a couple of release cycles. For example a trivial plugin might look like | #include <sys/types.h> | #include <sys/stat.h> | #include <fcntl.h> | #include <libgen.h> | #include <errno.h> | | #include <sys/socket.h> | #include <linux/un.h> | | #include <stdio.h> | #include <stdlib.h> | #include <string.h> | #include <unistd.h> | | #include "criu-plugin.h" | #include "criu-log.h" | | static int dump_ext_file(int fd, int id) | { | pr_info("dump_ext_file: fd %d id %d\n", fd, id); | return 0; | } | | CR_PLUGIN_REGISTER_DUMMY("trivial") | CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__DUMP_EXT_FILE, dump_ext_file) Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Acked-by: Andrew Vagin <avagin@parallels.com> Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
2014-02-27 20:58:23 +04:00
ret = run_plugins(DUMP_UNIX_SK, sk->fd, sk->sd.ino);
if (ret == -ENOTSUP) {
if (!opts.ext_unix_sk) {
show_one_unix("Runaway socket", peer);
pr_err("External socket is used. "
"Consider using --" USK_EXT_PARAM " option.\n");
return -1;
}
if (peer->type != SOCK_DGRAM) {
show_one_unix("Ext stream not supported", peer);
pr_err("Can't dump half of stream unix connection.\n");
return -1;
}
if (!peer->name) {
show_one_unix("Ext dgram w/o name", peer);
pr_err("Can't dump name-less external socket.\n");
return -1;
}
} else if (ret < 0)
return -1;
else
sk->ue->uflags |= USK_CALLBACK;
if (write_unix_entry(sk))
return -1;
close_safe(&sk->fd);
list_del_init(&sk->peer_node);
}
return 0;
}
int fix_external_unix_sockets(void)
{
struct unix_sk_desc *sk;
pr_debug("Dumping external sockets\n");
list_for_each_entry(sk, &unix_sockets, list) {
UnixSkEntry e = UNIX_SK_ENTRY__INIT;
FownEntry fown = FOWN_ENTRY__INIT;
SkOptsEntry skopts = SK_OPTS_ENTRY__INIT;
show_one_unix("Dumping extern", sk);
BUG_ON(sk->sd.already_dumped);
fd_id_generate_special(NULL, &e.id);
e.ino = sk->sd.ino;
e.type = SOCK_DGRAM;
e.state = TCP_LISTEN;
e.name.data = (void *)sk->name;
e.name.len = (size_t)sk->namelen;
e.uflags = USK_EXTERN;
e.peer = 0;
e.fown = &fown;
e.opts = &skopts;
if (pb_write_one(img_from_set(glob_imgset, CR_FD_UNIXSK), &e, PB_UNIX_SK))
goto err;
show_one_unix_img("Dumped extern", &e);
if (dump_external_sockets(sk))
goto err;
}
return 0;
err:
return -1;
}
struct unix_sk_info {
UnixSkEntry *ue;
struct list_head list;
char *name;
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
char *name_dir;
unsigned flags;
struct unix_sk_info *peer;
struct file_desc d;
/*
* Futex to signal when the socket is prepared. In particular, we
* signal after bind()ing the socket if it is not in TCP_LISTEN, or
* after listen() if the socket is in TCP_LISTEN.
*/
futex_t prepared;
/*
* For DGRAM sockets with queues, we should only restore the queue
* once although it may be open by more than one tid. This is the peer
* that should do the queueing.
*/
u32 queuer;
};
#define USK_PAIR_MASTER 0x1
#define USK_PAIR_SLAVE 0x2
static struct unix_sk_info *find_unix_sk_by_ino(int ino)
{
struct unix_sk_info *ui;
list_for_each_entry(ui, &unix_sockets, list) {
if (ui->ue->ino == ino)
return ui;
}
return NULL;
}
static int shutdown_unix_sk(int sk, struct unix_sk_info *ui)
{
int how;
UnixSkEntry *ue = ui->ue;
if (!ue->has_shutdown || ue->shutdown == SK_SHUTDOWN__NONE)
return 0;
how = sk_decode_shutdown(ue->shutdown);
if (shutdown(sk, how)) {
pr_perror("Can't shutdown unix socket");
return -1;
}
pr_debug("Socket %#x is shut down %d\n", ue->ino, how);
return 0;
}
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
static int prep_unix_sk_cwd(struct unix_sk_info *ui)
{
if (ui->name_dir) {
if (chdir(ui->name_dir)) {
pr_perror("Can't change working dir %s\n",
ui->name_dir);
return -1;
}
pr_debug("Change working dir to %s\n", ui->name_dir);
}
return 0;
}
static int post_open_unix_sk(struct file_desc *d, int fd)
{
struct unix_sk_info *ui;
struct unix_sk_info *peer;
struct sockaddr_un addr;
ui = container_of(d, struct unix_sk_info, d);
if (ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE))
return 0;
peer = ui->peer;
if (peer == NULL)
return 0;
if (ui->ue->uflags & USK_CALLBACK)
return 0;
pr_info("\tConnect %#x to %#x\n", ui->ue->ino, peer->ue->ino);
/* Skip external sockets */
if (!list_empty(&peer->d.fd_info_head))
futex_wait_while(&peer->prepared, 0);
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
if (prep_unix_sk_cwd(peer))
return -1;
if (connect(fd, (struct sockaddr *)&addr,
sizeof(addr.sun_family) +
peer->ue->name.len) < 0) {
pr_perror("Can't connect %#x socket", ui->ue->ino);
return -1;
}
if (peer->queuer == ui->ue->ino && restore_sk_queue(fd, peer->ue->id))
return -1;
if (rst_file_params(fd, ui->ue->fown, ui->ue->flags))
return -1;
if (restore_socket_opts(fd, ui->ue->opts))
return -1;
if (shutdown_unix_sk(fd, ui))
return -1;
return 0;
}
static int bind_unix_sk(int sk, struct unix_sk_info *ui)
{
struct sockaddr_un addr;
if ((ui->ue->type == SOCK_STREAM) && (ui->ue->state == TCP_ESTABLISHED))
/*
* FIXME this can be done, but for doing this properly we
* need to bind socket to its name, then rename one to
* some temporary unique one and after all the sockets are
* restored we should walk those temp names and rename
* some of them back to real ones.
*/
goto done;
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
if (prep_unix_sk_cwd(ui))
return -1;
if (bind(sk, (struct sockaddr *)&addr,
sizeof(addr.sun_family) + ui->ue->name.len)) {
pr_perror("Can't bind socket");
return -1;
}
if (ui->ue->name.len && *ui->name && ui->ue->file_perms) {
FilePermsEntry *perms = ui->ue->file_perms;
char fname[PATH_MAX];
if (ui->ue->name.len >= sizeof(fname)) {
pr_err("The file name is too long\n");
return -1;
}
memcpy(fname, ui->name, ui->ue->name.len);
fname[ui->ue->name.len] = '\0';
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
if (fchownat(AT_FDCWD, fname, perms->uid, perms->gid, 0) == -1) {
pr_perror("Unable to change file owner and group");
return -1;
}
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
if (fchmodat(AT_FDCWD, fname, perms->mode, 0) == -1) {
pr_perror("Unable to change file mode bits");
return -1;
}
}
if (ui->ue->state != TCP_LISTEN)
futex_set_and_wake(&ui->prepared, 1);
done:
return 0;
}
static int unixsk_should_open_transport(FdinfoEntry *fe,
struct file_desc *d)
{
struct unix_sk_info *ui;
ui = container_of(d, struct unix_sk_info, d);
return ui->flags & USK_PAIR_SLAVE;
}
static int open_unixsk_pair_master(struct unix_sk_info *ui)
{
int sk[2], tsk;
struct unix_sk_info *peer = ui->peer;
struct fdinfo_list_entry *fle;
pr_info("Opening pair master (id %#x ino %#x peer %#x)\n",
ui->ue->id, ui->ue->ino, ui->ue->peer);
if (socketpair(PF_UNIX, ui->ue->type, 0, sk) < 0) {
pr_perror("Can't make socketpair");
return -1;
}
if (restore_sk_queue(sk[0], peer->ue->id))
return -1;
if (restore_sk_queue(sk[1], ui->ue->id))
return -1;
if (bind_unix_sk(sk[0], ui))
return -1;
if (rst_file_params(sk[0], ui->ue->fown, ui->ue->flags))
return -1;
if (restore_socket_opts(sk[0], ui->ue->opts))
return -1;
if (shutdown_unix_sk(sk[0], ui))
return -1;
tsk = socket(PF_UNIX, SOCK_DGRAM, 0);
if (tsk < 0) {
pr_perror("Can't make transport socket");
return -1;
}
fle = file_master(&peer->d);
if (send_fd_to_peer(sk[1], fle, tsk)) {
pr_err("Can't send pair slave\n");
return -1;
}
close(tsk);
close(sk[1]);
return sk[0];
}
static int open_unixsk_pair_slave(struct unix_sk_info *ui)
{
struct fdinfo_list_entry *fle;
int sk;
fle = file_master(&ui->d);
pr_info("Opening pair slave (id %#x ino %#x peer %#x) on %d\n",
ui->ue->id, ui->ue->ino, ui->ue->peer, fle->fe->fd);
sk = recv_fd(fle->fe->fd);
if (sk < 0) {
pr_err("Can't recv pair slave");
return -1;
}
close(fle->fe->fd);
if (bind_unix_sk(sk, ui))
return -1;
if (rst_file_params(sk, ui->ue->fown, ui->ue->flags))
return -1;
if (restore_socket_opts(sk, ui->ue->opts))
return -1;
if (ui->ue->type == SOCK_DGRAM)
/*
* Stream socket's "slave" end will be shut down
* together with master
*/
if (shutdown_unix_sk(sk, ui))
return -1;
return sk;
}
static int open_unixsk_standalone(struct unix_sk_info *ui)
{
int sk;
pr_info("Opening standalone socket (id %#x ino %#x peer %#x)\n",
ui->ue->id, ui->ue->ino, ui->ue->peer);
/*
* Check if this socket was connected to criu service.
* If so, put response, that dumping and restoring
* was successful.
*/
if (ui->ue->uflags & USK_SERVICE) {
int sks[2];
if (socketpair(PF_UNIX, ui->ue->type, 0, sks)) {
pr_perror("Can't create socketpair");
return -1;
}
if (send_criu_dump_resp(sks[1], true, true) == -1)
return -1;
close(sks[1]);
sk = sks[0];
} else if ((ui->ue->state == TCP_ESTABLISHED) && !ui->ue->peer) {
int ret, sks[2];
if (ui->ue->type != SOCK_STREAM) {
pr_err("Non-stream socket %x in established state\n",
ui->ue->ino);
return -1;
}
if (ui->ue->shutdown != SK_SHUTDOWN__BOTH) {
pr_err("Wrong shutdown/peer state for %x\n",
ui->ue->ino);
return -1;
}
ret = socketpair(PF_UNIX, ui->ue->type, 0, sks);
if (ret < 0) {
pr_perror("Can't create socketpair");
return -1;
}
/*
* Restore queue at the one end,
* before closing the second one.
*/
if (restore_sk_queue(sks[1], ui->ue->id)) {
pr_perror("Can't restore socket queue");
return -1;
}
close(sks[1]);
sk = sks[0];
} else {
if (ui->ue->uflags & USK_CALLBACK) {
plugin: Rework plugins API, v2 Here we define new api to be used in plugins. - Plugin should provide a descriptor with help of CR_PLUGIN_REGISTER macro, or in case if plugin require no init/exit functions -- with CR_PLUGIN_REGISTER_DUMMY. - Plugin should define a plugin hook with help of CR_PLUGIN_REGISTER_HOOK macro. - Now init/exit functions of plugins takes @stage argument which tells plugin which stage of criu it's been called on dump/restore. For exit it also takes @ret which allows plugin to know if something went wrong and it needs to cleanup own resources. The idea behind is to not limit plugins authors with names of functions they might need to use for particular hook. Such new API deprecates olds plugins structure but to keep backward compatibility we will provide a tiny layer of additional code to support old plugins for at least a couple of release cycles. For example a trivial plugin might look like | #include <sys/types.h> | #include <sys/stat.h> | #include <fcntl.h> | #include <libgen.h> | #include <errno.h> | | #include <sys/socket.h> | #include <linux/un.h> | | #include <stdio.h> | #include <stdlib.h> | #include <string.h> | #include <unistd.h> | | #include "criu-plugin.h" | #include "criu-log.h" | | static int dump_ext_file(int fd, int id) | { | pr_info("dump_ext_file: fd %d id %d\n", fd, id); | return 0; | } | | CR_PLUGIN_REGISTER_DUMMY("trivial") | CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__DUMP_EXT_FILE, dump_ext_file) Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Acked-by: Andrew Vagin <avagin@parallels.com> Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
2014-02-27 20:58:23 +04:00
sk = run_plugins(RESTORE_UNIX_SK, ui->ue->ino);
if (sk >= 0)
goto out;
}
/*
* Connect to external sockets requires
* special option to be passed.
*/
if (ui->peer && (ui->peer->ue->uflags & USK_EXTERN) &&
!(opts.ext_unix_sk)) {
pr_err("External socket found in image. "
"Consider using the --" USK_EXT_PARAM
"option to allow restoring it.\n");
return -1;
}
sk = socket(PF_UNIX, ui->ue->type, 0);
if (sk < 0) {
pr_perror("Can't make unix socket");
return -1;
}
}
if (bind_unix_sk(sk, ui))
return -1;
if (ui->ue->state == TCP_LISTEN) {
pr_info("\tPutting %#x into listen state\n", ui->ue->ino);
if (listen(sk, ui->ue->backlog) < 0) {
pr_perror("Can't make usk listen");
return -1;
}
futex_set_and_wake(&ui->prepared, 1);
}
out:
if (rst_file_params(sk, ui->ue->fown, ui->ue->flags))
return -1;
if (restore_socket_opts(sk, ui->ue->opts))
return -1;
return sk;
}
static int open_unix_sk(struct file_desc *d)
{
struct unix_sk_info *ui;
ui = container_of(d, struct unix_sk_info, d);
if (ui->flags & USK_PAIR_MASTER)
return open_unixsk_pair_master(ui);
else if (ui->flags & USK_PAIR_SLAVE)
return open_unixsk_pair_slave(ui);
else
return open_unixsk_standalone(ui);
}
static struct file_desc_ops unix_desc_ops = {
.type = FD_TYPES__UNIXSK,
.open = open_unix_sk,
.post_open = post_open_unix_sk,
.want_transport = unixsk_should_open_transport,
};
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
/*
* Make FS clean from sockets we're about to
* restore. See for how we bind them for details
*/
static int unlink_stale(struct unix_sk_info *ui)
{
if (ui->name[0] == '\0' || (ui->ue->uflags & USK_EXTERN))
return 0;
if (prep_unix_sk_cwd(ui))
return -1;
return unlinkat(AT_FDCWD, ui->name, 0) ? -1 : 0;
}
static int collect_one_unixsk(void *o, ProtobufCMessage *base)
{
struct unix_sk_info *ui = o;
ui->ue = pb_msg(base, UnixSkEntry);
ui->name_dir = (void *)ui->ue->name_dir;
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
if (ui->ue->name.len) {
if (ui->ue->name.len >= UNIX_PATH_MAX) {
pr_err("Bad unix name len %d\n", (int)ui->ue->name.len);
return -1;
}
ui->name = (void *)ui->ue->name.data;
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
if (unlink_stale(ui)) {
pr_warn("Can't unlink stale socket %#x peer %#x (name %s dir %s)\n",
ui->ue->ino, ui->ue->peer,
ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-",
ui->name_dir ? ui->name_dir : "-");
}
} else
ui->name = NULL;
futex_init(&ui->prepared);
ui->queuer = 0;
ui->peer = NULL;
ui->flags = 0;
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
pr_info(" `- Got %#x peer %#x (name %s dir %s)\n",
ui->ue->ino, ui->ue->peer,
sk-unix: Add trivial name resolver for sockets with relative names Unix sockets may be created with non-absolute (relative) path (when kernel creates one it always use AT_FDCWD for name resolving), So when we collect sockets we see them as having names without leading slash. In common cases for such sockets application doesn't change own working directory after that but this is not always the true. So we need to invent some name resolver. The good candidate is IRMAP cache but after a number of testings I found that it might slow down performance very dramatically. Thus we need some more intelligent way here. For a while, for common applications such as postfix, fetching dumpee working directory and root is enough. So here what we do - when socket get collected from diag interface we remember its relative name parameters (device and inode) but postprone name resolving to not bring perf penalty until really needed - when we meet a socket to dump with relative name assigned we try to use $cwd/name and $root/name for this socket to check if it has been created in those directories. On success we simply remember the directory in image and when restore such socket call for chdir helper to change working dir and generate relative name v2: - Use new unlink_stale to remove sockets we're to restore - Use *at() helpers once we're changed working dir in bind_unix_sk - Add more debug ouput Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Conflicts: sk-unix.c
2015-07-28 17:02:36 +03:00
ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-",
ui->name_dir ? ui->name_dir : "-");
list_add_tail(&ui->list, &unix_sockets);
return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops);
}
struct collect_image_info unix_sk_cinfo = {
.fd_type = CR_FD_UNIXSK,
.pb_type = PB_UNIX_SK,
.priv_size = sizeof(struct unix_sk_info),
.collect = collect_one_unixsk,
.flags = COLLECT_SHARED,
};
int collect_unix_sockets(void)
{
return read_sk_queues();
}
int resolve_unix_peers(void)
{
struct unix_sk_info *ui, *peer;
struct fdinfo_list_entry *fle, *fle_peer;
list_for_each_entry(ui, &unix_sockets, list) {
if (ui->peer)
continue;
if (!ui->ue->peer)
continue;
peer = find_unix_sk_by_ino(ui->ue->peer);
if (!peer) {
pr_err("FATAL: Peer %#x unresolved for %#x\n",
ui->ue->peer, ui->ue->ino);
return -1;
}
ui->peer = peer;
if (!peer->queuer)
peer->queuer = ui->ue->ino;
if (ui == peer)
/* socket connected to self %) */
continue;
if (peer->ue->peer != ui->ue->ino)
continue;
/* socketpair or interconnected sockets */
peer->peer = ui;
/*
* Select who will restore the pair. Check is identical to
* the one in pipes.c and makes sure tasks wait for each other
* in pids sorting order (ascending).
*/
fle = file_master(&ui->d);
fle_peer = file_master(&peer->d);
if (fdinfo_rst_prio(fle, fle_peer)) {
ui->flags |= USK_PAIR_MASTER;
peer->flags |= USK_PAIR_SLAVE;
} else {
peer->flags |= USK_PAIR_MASTER;
ui->flags |= USK_PAIR_SLAVE;
}
}
pr_info("Unix sockets:\n");
list_for_each_entry(ui, &unix_sockets, list) {
struct fdinfo_list_entry *fle;
pr_info("\t%#x -> %#x (%#x) flags %#x\n", ui->ue->ino, ui->ue->peer,
ui->peer ? ui->peer->ue->ino : 0, ui->flags);
list_for_each_entry(fle, &ui->d.fd_info_head, desc_list)
pr_info("\t\tfd %d in pid %d\n",
fle->fe->fd, fle->pid);
}
return 0;
}