mirror of
https://github.com/checkpoint-restore/criu
synced 2025-09-02 15:25:21 +00:00
SCM: Dump and restore SCM_RIGHTs
Most of the pieces has already been described in the previous patches :) so here's the summary. * Dump: When receiving a message, also receive any SCM-s (already there) and when SCM_RIGHTs one is met -- go ahead and just dump received descriptors using regular code, but taking current as the victim task. Few words about file paths resolution -- since we do dump path-ed files by receiving them from victim's parasite, such files sent via sockets should still work OK, as we still receive them, just from another socket. Several problems here: 1. Unix sockets sent via unix sockets form knots. Not supported. 2. Eventpolls sent via unix might themseves poll unix sockets. Knots again. Not supported either. * Restore: On restore we need to make unix socket wait for the soon-to-be-scm-sent descriptors to get restored, so we need to find them, then put a dependency. After that, the fake fdinfo entry is attached to the respective file descs, when sent the respective descriptors are closed. https://github.com/xemul/criu/issues/251 v2: Addressed comments from Kirill * Moved prepare_scms before adding fake fles (with comment) * Add scm-only fles as fake, thus removing close_scm_fds * Try hard finding any suitable fle to use as scm one when queuing them for unix socket scm list, only allocate a new one if really needed Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com> Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com> Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
This commit is contained in:
@@ -366,6 +366,14 @@ static int root_prepare_shared(void)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This should be called with all packets collected AND all
|
||||||
|
* fdescs and fles prepared BUT post-prep-s not run.
|
||||||
|
*/
|
||||||
|
ret = prepare_scms();
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
|
||||||
ret = run_post_prepare();
|
ret = run_post_prepare();
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
@@ -37,6 +37,8 @@ extern int collect_sockets(struct ns_id *);
|
|||||||
extern struct collect_image_info inet_sk_cinfo;
|
extern struct collect_image_info inet_sk_cinfo;
|
||||||
extern struct collect_image_info unix_sk_cinfo;
|
extern struct collect_image_info unix_sk_cinfo;
|
||||||
extern int fix_external_unix_sockets(void);
|
extern int fix_external_unix_sockets(void);
|
||||||
|
extern int prepare_scms(void);
|
||||||
|
extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
|
||||||
|
|
||||||
extern struct collect_image_info netlink_sk_cinfo;
|
extern struct collect_image_info netlink_sk_cinfo;
|
||||||
|
|
||||||
|
140
criu/sk-queue.c
140
criu/sk-queue.c
@@ -18,9 +18,9 @@
|
|||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "util-pie.h"
|
#include "util-pie.h"
|
||||||
#include "sockets.h"
|
#include "sockets.h"
|
||||||
|
#include "xmalloc.h"
|
||||||
#include "sk-queue.h"
|
#include "sk-queue.h"
|
||||||
|
#include "files.h"
|
||||||
#include "protobuf.h"
|
#include "protobuf.h"
|
||||||
#include "images/sk-packet.pb-c.h"
|
#include "images/sk-packet.pb-c.h"
|
||||||
|
|
||||||
@@ -28,6 +28,8 @@ struct sk_packet {
|
|||||||
struct list_head list;
|
struct list_head list;
|
||||||
SkPacketEntry *entry;
|
SkPacketEntry *entry;
|
||||||
char *data;
|
char *data;
|
||||||
|
unsigned scm_len;
|
||||||
|
int *scm;
|
||||||
};
|
};
|
||||||
|
|
||||||
static LIST_HEAD(packets_list);
|
static LIST_HEAD(packets_list);
|
||||||
@@ -37,11 +39,21 @@ static int collect_one_packet(void *obj, ProtobufCMessage *msg, struct cr_img *i
|
|||||||
struct sk_packet *pkt = obj;
|
struct sk_packet *pkt = obj;
|
||||||
|
|
||||||
pkt->entry = pb_msg(msg, SkPacketEntry);
|
pkt->entry = pb_msg(msg, SkPacketEntry);
|
||||||
|
pkt->scm = NULL;
|
||||||
pkt->data = xmalloc(pkt->entry->length);
|
pkt->data = xmalloc(pkt->entry->length);
|
||||||
if (pkt->data ==NULL)
|
if (pkt->data ==NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* See dump_packet_cmsg() -- only SCM_RIGHTS are supported and
|
||||||
|
* only 1 of that kind is possible, thus not more than 1 SCMs
|
||||||
|
* on a packet.
|
||||||
|
*/
|
||||||
|
if (pkt->entry->n_scm > 1) {
|
||||||
|
pr_err("More than 1 SCM is not possible\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NOTE: packet must be added to the tail. Otherwise sequence
|
* NOTE: packet must be added to the tail. Otherwise sequence
|
||||||
* will be broken.
|
* will be broken.
|
||||||
@@ -64,6 +76,50 @@ struct collect_image_info sk_queues_cinfo = {
|
|||||||
.collect = collect_one_packet,
|
.collect = collect_one_packet,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int dump_scm_rights(struct cmsghdr *ch, SkPacketEntry *pe)
|
||||||
|
{
|
||||||
|
int nr_fds, *fds, i;
|
||||||
|
void *buf;
|
||||||
|
ScmEntry *scme;
|
||||||
|
|
||||||
|
nr_fds = (ch->cmsg_len - sizeof(*ch)) / sizeof(int);
|
||||||
|
fds = (int *)CMSG_DATA(ch);
|
||||||
|
|
||||||
|
buf = xmalloc(sizeof(ScmEntry) + nr_fds * sizeof(uint32_t));
|
||||||
|
if (!buf)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
scme = xptr_pull(&buf, ScmEntry);
|
||||||
|
scm_entry__init(scme);
|
||||||
|
scme->type = SCM_RIGHTS;
|
||||||
|
scme->n_rights = nr_fds;
|
||||||
|
scme->rights = xptr_pull_s(&buf, nr_fds * sizeof(uint32_t));
|
||||||
|
|
||||||
|
for (i = 0; i < nr_fds; i++) {
|
||||||
|
int ftyp;
|
||||||
|
|
||||||
|
if (dump_my_file(fds[i], &scme->rights[i], &ftyp))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unix sent over Unix or Epoll with some other sh*t
|
||||||
|
* sent over unix (maybe with this very unix polled)
|
||||||
|
* are tricky and not supported for now. (XXX -- todo)
|
||||||
|
*/
|
||||||
|
if (ftyp == FD_TYPES__UNIXSK || ftyp == FD_TYPES__EVENTPOLL) {
|
||||||
|
pr_err("Can't dump send %d (unix/epoll) fd\n", ftyp);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
i = pe->n_scm++;
|
||||||
|
if (xrealloc_safe(&pe->scm, pe->n_scm * sizeof(ScmEntry*)))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
pe->scm[i] = scme;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Maximum size of the control messages. XXX -- is there any
|
* Maximum size of the control messages. XXX -- is there any
|
||||||
* way to get this value out of the kernel?
|
* way to get this value out of the kernel?
|
||||||
@@ -73,8 +129,26 @@ struct collect_image_info sk_queues_cinfo = {
|
|||||||
static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
|
static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
|
||||||
{
|
{
|
||||||
struct cmsghdr *ch;
|
struct cmsghdr *ch;
|
||||||
|
int n_rights = 0;
|
||||||
|
|
||||||
for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) {
|
for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) {
|
||||||
|
if (ch->cmsg_type == SCM_RIGHTS) {
|
||||||
|
if (n_rights) {
|
||||||
|
/*
|
||||||
|
* Even if user is sending more than one cmsg with
|
||||||
|
* rights, kernel merges them alltogether on recv.
|
||||||
|
*/
|
||||||
|
pr_err("Unexpected 2nd SCM_RIGHTS from the kernel\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dump_scm_rights(ch, pe))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
n_rights++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
pr_err("Control messages in queue, not supported\n");
|
pr_err("Control messages in queue, not supported\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@@ -82,6 +156,18 @@ static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void release_cmsg(SkPacketEntry *pe)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < pe->n_scm; i++)
|
||||||
|
xfree(pe->scm[i]);
|
||||||
|
xfree(pe->scm);
|
||||||
|
|
||||||
|
pe->n_scm = 0;
|
||||||
|
pe->scm = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
int dump_sk_queue(int sock_fd, int sock_id)
|
int dump_sk_queue(int sock_fd, int sock_id)
|
||||||
{
|
{
|
||||||
SkPacketEntry pe = SK_PACKET_ENTRY__INIT;
|
SkPacketEntry pe = SK_PACKET_ENTRY__INIT;
|
||||||
@@ -181,6 +267,9 @@ int dump_sk_queue(int sock_fd, int sock_id)
|
|||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
goto err_set_sock;
|
goto err_set_sock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pe.scm)
|
||||||
|
release_cmsg(&pe);
|
||||||
}
|
}
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
@@ -209,6 +298,11 @@ static int send_one_pkt(int fd, struct sk_packet *pkt)
|
|||||||
iov.iov_base = pkt->data;
|
iov.iov_base = pkt->data;
|
||||||
iov.iov_len = entry->length;
|
iov.iov_len = entry->length;
|
||||||
|
|
||||||
|
if (pkt->scm != NULL) {
|
||||||
|
mh.msg_controllen = pkt->scm_len;
|
||||||
|
mh.msg_control = pkt->scm;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't try to use sendfile here, because it use sendpage() and
|
* Don't try to use sendfile here, because it use sendpage() and
|
||||||
* all data are split on pages and a new skb is allocated for
|
* all data are split on pages and a new skb is allocated for
|
||||||
@@ -264,3 +358,43 @@ int restore_sk_queue(int fd, unsigned int peer_id)
|
|||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int prepare_scms(void)
|
||||||
|
{
|
||||||
|
struct sk_packet *pkt;
|
||||||
|
|
||||||
|
pr_info("Preparing SCMs\n");
|
||||||
|
list_for_each_entry(pkt, &packets_list, list) {
|
||||||
|
SkPacketEntry *pe = pkt->entry;
|
||||||
|
ScmEntry *se;
|
||||||
|
struct cmsghdr *ch;
|
||||||
|
|
||||||
|
if (!pe->n_scm)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
se = pe->scm[0]; /* Only 1 SCM is possible */
|
||||||
|
|
||||||
|
if (se->type == SCM_RIGHTS) {
|
||||||
|
pkt->scm_len = CMSG_SPACE(se->n_rights * sizeof(int));
|
||||||
|
pkt->scm = xmalloc(pkt->scm_len);
|
||||||
|
if (!pkt->scm)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
ch = (struct cmsghdr *)pkt->scm; /* FIXME -- via msghdr */
|
||||||
|
ch->cmsg_level = SOL_SOCKET;
|
||||||
|
ch->cmsg_type = SCM_RIGHTS;
|
||||||
|
ch->cmsg_len = CMSG_LEN(se->n_rights * sizeof(int));
|
||||||
|
|
||||||
|
if (unix_note_scm_rights(pe->id_for, se->rights,
|
||||||
|
(int *)CMSG_DATA(ch), se->n_rights))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_err("Unsupported scm %d in image\n", se->type);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
152
criu/sk-unix.c
152
criu/sk-unix.c
@@ -795,6 +795,7 @@ struct unix_sk_info {
|
|||||||
struct file_desc d;
|
struct file_desc d;
|
||||||
struct list_head connected; /* List of sockets, connected to me */
|
struct list_head connected; /* List of sockets, connected to me */
|
||||||
struct list_head node; /* To link in peer's connected list */
|
struct list_head node; /* To link in peer's connected list */
|
||||||
|
struct list_head scm_fles;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For DGRAM sockets with queues, we should only restore the queue
|
* For DGRAM sockets with queues, we should only restore the queue
|
||||||
@@ -806,6 +807,11 @@ struct unix_sk_info {
|
|||||||
u8 listen:1;
|
u8 listen:1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct scm_fle {
|
||||||
|
struct list_head l;
|
||||||
|
struct fdinfo_list_entry *fle;
|
||||||
|
};
|
||||||
|
|
||||||
#define USK_PAIR_MASTER 0x1
|
#define USK_PAIR_MASTER 0x1
|
||||||
#define USK_PAIR_SLAVE 0x2
|
#define USK_PAIR_SLAVE 0x2
|
||||||
|
|
||||||
@@ -821,6 +827,141 @@ static struct unix_sk_info *find_unix_sk_by_ino(int ino)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct unix_sk_info *find_queuer_for(int id)
|
||||||
|
{
|
||||||
|
struct unix_sk_info *ui;
|
||||||
|
|
||||||
|
list_for_each_entry(ui, &unix_sockets, list) {
|
||||||
|
if (ui->queuer == id)
|
||||||
|
return ui;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct fdinfo_list_entry *get_fle_for_scm(struct file_desc *tgt,
|
||||||
|
struct pstree_item *owner)
|
||||||
|
{
|
||||||
|
struct fdinfo_list_entry *fle;
|
||||||
|
FdinfoEntry *e = NULL;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
list_for_each_entry(fle, &tgt->fd_info_head, desc_list) {
|
||||||
|
if (fle->task == owner)
|
||||||
|
/*
|
||||||
|
* Owner already has this file in its fdtable.
|
||||||
|
* Just use one.
|
||||||
|
*/
|
||||||
|
return fle;
|
||||||
|
|
||||||
|
e = fle->fe; /* keep any for further reference */
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Some other task restores this file. Pretend that
|
||||||
|
* we're another user of it.
|
||||||
|
*/
|
||||||
|
fd = find_unused_fd(owner, -1);
|
||||||
|
pr_info("`- will add SCM-only %d fd\n", fd);
|
||||||
|
|
||||||
|
if (e != NULL) {
|
||||||
|
e = dup_fdinfo(e, fd, 0);
|
||||||
|
if (!e) {
|
||||||
|
pr_err("Can't duplicate fdinfo for scm\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* This can happen if the file in question is
|
||||||
|
* sent over the socket and closed. In this case
|
||||||
|
* we need to ... invent a new one!
|
||||||
|
*/
|
||||||
|
|
||||||
|
e = xmalloc(sizeof(*e));
|
||||||
|
if (!e)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
fdinfo_entry__init(e);
|
||||||
|
e->id = tgt->id;
|
||||||
|
e->type = tgt->ops->type;
|
||||||
|
e->fd = fd;
|
||||||
|
e->flags = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make this fle fake, so that files collecting engine
|
||||||
|
* closes them at the end.
|
||||||
|
*/
|
||||||
|
return collect_fd_to(vpid(owner), e, rsti(owner), tgt, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids)
|
||||||
|
{
|
||||||
|
struct unix_sk_info *ui;
|
||||||
|
struct pstree_item *owner;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
ui = find_queuer_for(id_for);
|
||||||
|
if (!ui) {
|
||||||
|
pr_err("Can't find sender for %d\n", id_for);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_info("Found queuer for %d -> %d\n", id_for, ui->ue->id);
|
||||||
|
/*
|
||||||
|
* This is the task that will restore this socket
|
||||||
|
*/
|
||||||
|
owner = file_master(&ui->d)->task;
|
||||||
|
|
||||||
|
pr_info("-> will set up deps\n");
|
||||||
|
/*
|
||||||
|
* The ui will send data to the rights receiver. Add a fake fle
|
||||||
|
* for the file and a dependency.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < n_ids; i++) {
|
||||||
|
struct file_desc *tgt;
|
||||||
|
struct scm_fle *sfle;
|
||||||
|
|
||||||
|
tgt = find_file_desc_raw(FD_TYPES__UND, file_ids[i]);
|
||||||
|
if (!tgt) {
|
||||||
|
pr_err("Can't find fdesc to send\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_info("scm: add file %d -> %d\n", tgt->id, vpid(owner));
|
||||||
|
sfle = xmalloc(sizeof(*sfle));
|
||||||
|
if (!sfle)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
sfle->fle = get_fle_for_scm(tgt, owner);
|
||||||
|
if (!sfle->fle) {
|
||||||
|
pr_err("Can't request new fle for scm\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_add_tail(&sfle->l, &ui->scm_fles);
|
||||||
|
fds[i] = sfle->fle->fe->fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int chk_restored_scms(struct unix_sk_info *ui)
|
||||||
|
{
|
||||||
|
struct scm_fle *sf, *n;
|
||||||
|
|
||||||
|
list_for_each_entry_safe(sf, n, &ui->scm_fles, l) {
|
||||||
|
if (sf->fle->stage < FLE_OPEN)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/* Optimization for the next pass */
|
||||||
|
list_del(&sf->l);
|
||||||
|
xfree(sf);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int wake_connected_sockets(struct unix_sk_info *ui)
|
static int wake_connected_sockets(struct unix_sk_info *ui)
|
||||||
{
|
{
|
||||||
struct fdinfo_list_entry *fle;
|
struct fdinfo_list_entry *fle;
|
||||||
@@ -1306,12 +1447,18 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
|
|||||||
struct unix_sk_info *ui;
|
struct unix_sk_info *ui;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
ui = container_of(d, struct unix_sk_info, d);
|
||||||
|
|
||||||
|
/* FIXME -- only queue restore may be postponed */
|
||||||
|
if (chk_restored_scms(ui)) {
|
||||||
|
pr_info("scm: Wait for tgt to restore\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
fle = file_master(d);
|
fle = file_master(d);
|
||||||
if (fle->stage >= FLE_OPEN)
|
if (fle->stage >= FLE_OPEN)
|
||||||
return post_open_unix_sk(d, fle->fe->fd);
|
return post_open_unix_sk(d, fle->fe->fd);
|
||||||
|
|
||||||
ui = container_of(d, struct unix_sk_info, d);
|
|
||||||
|
|
||||||
if (inherited_fd(d, new_fd)) {
|
if (inherited_fd(d, new_fd)) {
|
||||||
ui->ue->uflags |= USK_INHERIT;
|
ui->ue->uflags |= USK_INHERIT;
|
||||||
ret = *new_fd >= 0 ? 0 : -1;
|
ret = *new_fd >= 0 ? 0 : -1;
|
||||||
@@ -1410,6 +1557,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
|
|||||||
ui->listen = 0;
|
ui->listen = 0;
|
||||||
INIT_LIST_HEAD(&ui->connected);
|
INIT_LIST_HEAD(&ui->connected);
|
||||||
INIT_LIST_HEAD(&ui->node);
|
INIT_LIST_HEAD(&ui->node);
|
||||||
|
INIT_LIST_HEAD(&ui->scm_fles);
|
||||||
ui->flags = 0;
|
ui->flags = 0;
|
||||||
|
|
||||||
uname = ui->name;
|
uname = ui->name;
|
||||||
|
@@ -1,6 +1,12 @@
|
|||||||
syntax = "proto2";
|
syntax = "proto2";
|
||||||
|
|
||||||
|
message scm_entry {
|
||||||
|
required uint32 type = 1;
|
||||||
|
repeated uint32 rights = 2;
|
||||||
|
}
|
||||||
|
|
||||||
message sk_packet_entry {
|
message sk_packet_entry {
|
||||||
required uint32 id_for = 1;
|
required uint32 id_for = 1;
|
||||||
required uint32 length = 2;
|
required uint32 length = 2;
|
||||||
|
repeated scm_entry scm = 4;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user