From 5087faa0b852ab19f7f4dcc0a03afadce12b8a0c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 13 Jul 2017 14:24:29 +0300 Subject: [PATCH] SCM: Dump and restore SCM_RIGHTs Most of the pieces has already been described in the previous patches :) so here's the summary. * Dump: When receiving a message, also receive any SCM-s (already there) and when SCM_RIGHTs one is met -- go ahead and just dump received descriptors using regular code, but taking current as the victim task. Few words about file paths resolution -- since we do dump path-ed files by receiving them from victim's parasite, such files sent via sockets should still work OK, as we still receive them, just from another socket. Several problems here: 1. Unix sockets sent via unix sockets form knots. Not supported. 2. Eventpolls sent via unix might themseves poll unix sockets. Knots again. Not supported either. * Restore: On restore we need to make unix socket wait for the soon-to-be-scm-sent descriptors to get restored, so we need to find them, then put a dependency. After that, the fake fdinfo entry is attached to the respective file descs, when sent the respective descriptors are closed. https://github.com/xemul/criu/issues/251 v2: Addressed comments from Kirill * Moved prepare_scms before adding fake fles (with comment) * Add scm-only fles as fake, thus removing close_scm_fds * Try hard finding any suitable fle to use as scm one when queuing them for unix socket scm list, only allocate a new one if really needed Reviewed-by: Kirill Tkhai Signed-off-by: Pavel Emelyanov Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 8 +++ criu/include/sockets.h | 2 + criu/sk-queue.c | 140 ++++++++++++++++++++++++++++++++++++- criu/sk-unix.c | 152 ++++++++++++++++++++++++++++++++++++++++- images/sk-packet.proto | 6 ++ 5 files changed, 303 insertions(+), 5 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 442844ee9..7086c7415 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -366,6 +366,14 @@ static int root_prepare_shared(void) if (ret) goto err; + /* + * This should be called with all packets collected AND all + * fdescs and fles prepared BUT post-prep-s not run. + */ + ret = prepare_scms(); + if (ret) + goto err; + ret = run_post_prepare(); if (ret) goto err; diff --git a/criu/include/sockets.h b/criu/include/sockets.h index c93177f7f..c0f302474 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -37,6 +37,8 @@ extern int collect_sockets(struct ns_id *); extern struct collect_image_info inet_sk_cinfo; extern struct collect_image_info unix_sk_cinfo; extern int fix_external_unix_sockets(void); +extern int prepare_scms(void); +extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids); extern struct collect_image_info netlink_sk_cinfo; diff --git a/criu/sk-queue.c b/criu/sk-queue.c index 77e203e38..f3ebd6c64 100644 --- a/criu/sk-queue.c +++ b/criu/sk-queue.c @@ -18,9 +18,9 @@ #include "util.h" #include "util-pie.h" #include "sockets.h" - +#include "xmalloc.h" #include "sk-queue.h" - +#include "files.h" #include "protobuf.h" #include "images/sk-packet.pb-c.h" @@ -28,6 +28,8 @@ struct sk_packet { struct list_head list; SkPacketEntry *entry; char *data; + unsigned scm_len; + int *scm; }; static LIST_HEAD(packets_list); @@ -37,11 +39,21 @@ static int collect_one_packet(void *obj, ProtobufCMessage *msg, struct cr_img *i struct sk_packet *pkt = obj; pkt->entry = pb_msg(msg, SkPacketEntry); - + pkt->scm = NULL; pkt->data = xmalloc(pkt->entry->length); if (pkt->data ==NULL) return -1; + /* + * See dump_packet_cmsg() -- only SCM_RIGHTS are supported and + * only 1 of that kind is possible, thus not more than 1 SCMs + * on a packet. + */ + if (pkt->entry->n_scm > 1) { + pr_err("More than 1 SCM is not possible\n"); + return -1; + } + /* * NOTE: packet must be added to the tail. Otherwise sequence * will be broken. @@ -64,6 +76,50 @@ struct collect_image_info sk_queues_cinfo = { .collect = collect_one_packet, }; +static int dump_scm_rights(struct cmsghdr *ch, SkPacketEntry *pe) +{ + int nr_fds, *fds, i; + void *buf; + ScmEntry *scme; + + nr_fds = (ch->cmsg_len - sizeof(*ch)) / sizeof(int); + fds = (int *)CMSG_DATA(ch); + + buf = xmalloc(sizeof(ScmEntry) + nr_fds * sizeof(uint32_t)); + if (!buf) + return -1; + + scme = xptr_pull(&buf, ScmEntry); + scm_entry__init(scme); + scme->type = SCM_RIGHTS; + scme->n_rights = nr_fds; + scme->rights = xptr_pull_s(&buf, nr_fds * sizeof(uint32_t)); + + for (i = 0; i < nr_fds; i++) { + int ftyp; + + if (dump_my_file(fds[i], &scme->rights[i], &ftyp)) + return -1; + + /* + * Unix sent over Unix or Epoll with some other sh*t + * sent over unix (maybe with this very unix polled) + * are tricky and not supported for now. (XXX -- todo) + */ + if (ftyp == FD_TYPES__UNIXSK || ftyp == FD_TYPES__EVENTPOLL) { + pr_err("Can't dump send %d (unix/epoll) fd\n", ftyp); + return -1; + } + } + + i = pe->n_scm++; + if (xrealloc_safe(&pe->scm, pe->n_scm * sizeof(ScmEntry*))) + return -1; + + pe->scm[i] = scme; + return 0; +} + /* * Maximum size of the control messages. XXX -- is there any * way to get this value out of the kernel? @@ -73,8 +129,26 @@ struct collect_image_info sk_queues_cinfo = { static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe) { struct cmsghdr *ch; + int n_rights = 0; for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) { + if (ch->cmsg_type == SCM_RIGHTS) { + if (n_rights) { + /* + * Even if user is sending more than one cmsg with + * rights, kernel merges them alltogether on recv. + */ + pr_err("Unexpected 2nd SCM_RIGHTS from the kernel\n"); + return -1; + } + + if (dump_scm_rights(ch, pe)) + return -1; + + n_rights++; + continue; + } + pr_err("Control messages in queue, not supported\n"); return -1; } @@ -82,6 +156,18 @@ static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe) return 0; } +static void release_cmsg(SkPacketEntry *pe) +{ + int i; + + for (i = 0; i < pe->n_scm; i++) + xfree(pe->scm[i]); + xfree(pe->scm); + + pe->n_scm = 0; + pe->scm = NULL; +} + int dump_sk_queue(int sock_fd, int sock_id) { SkPacketEntry pe = SK_PACKET_ENTRY__INIT; @@ -181,6 +267,9 @@ int dump_sk_queue(int sock_fd, int sock_id) ret = -EIO; goto err_set_sock; } + + if (pe.scm) + release_cmsg(&pe); } ret = 0; @@ -209,6 +298,11 @@ static int send_one_pkt(int fd, struct sk_packet *pkt) iov.iov_base = pkt->data; iov.iov_len = entry->length; + if (pkt->scm != NULL) { + mh.msg_controllen = pkt->scm_len; + mh.msg_control = pkt->scm; + } + /* * Don't try to use sendfile here, because it use sendpage() and * all data are split on pages and a new skb is allocated for @@ -264,3 +358,43 @@ int restore_sk_queue(int fd, unsigned int peer_id) out: return ret; } + +int prepare_scms(void) +{ + struct sk_packet *pkt; + + pr_info("Preparing SCMs\n"); + list_for_each_entry(pkt, &packets_list, list) { + SkPacketEntry *pe = pkt->entry; + ScmEntry *se; + struct cmsghdr *ch; + + if (!pe->n_scm) + continue; + + se = pe->scm[0]; /* Only 1 SCM is possible */ + + if (se->type == SCM_RIGHTS) { + pkt->scm_len = CMSG_SPACE(se->n_rights * sizeof(int)); + pkt->scm = xmalloc(pkt->scm_len); + if (!pkt->scm) + return -1; + + ch = (struct cmsghdr *)pkt->scm; /* FIXME -- via msghdr */ + ch->cmsg_level = SOL_SOCKET; + ch->cmsg_type = SCM_RIGHTS; + ch->cmsg_len = CMSG_LEN(se->n_rights * sizeof(int)); + + if (unix_note_scm_rights(pe->id_for, se->rights, + (int *)CMSG_DATA(ch), se->n_rights)) + return -1; + + continue; + } + + pr_err("Unsupported scm %d in image\n", se->type); + return -1; + } + + return 0; +} diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 0dac2f845..16b4bf3ec 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -795,6 +795,7 @@ struct unix_sk_info { struct file_desc d; struct list_head connected; /* List of sockets, connected to me */ struct list_head node; /* To link in peer's connected list */ + struct list_head scm_fles; /* * For DGRAM sockets with queues, we should only restore the queue @@ -806,6 +807,11 @@ struct unix_sk_info { u8 listen:1; }; +struct scm_fle { + struct list_head l; + struct fdinfo_list_entry *fle; +}; + #define USK_PAIR_MASTER 0x1 #define USK_PAIR_SLAVE 0x2 @@ -821,6 +827,141 @@ static struct unix_sk_info *find_unix_sk_by_ino(int ino) return NULL; } +static struct unix_sk_info *find_queuer_for(int id) +{ + struct unix_sk_info *ui; + + list_for_each_entry(ui, &unix_sockets, list) { + if (ui->queuer == id) + return ui; + } + + return NULL; +} + +static struct fdinfo_list_entry *get_fle_for_scm(struct file_desc *tgt, + struct pstree_item *owner) +{ + struct fdinfo_list_entry *fle; + FdinfoEntry *e = NULL; + int fd; + + list_for_each_entry(fle, &tgt->fd_info_head, desc_list) { + if (fle->task == owner) + /* + * Owner already has this file in its fdtable. + * Just use one. + */ + return fle; + + e = fle->fe; /* keep any for further reference */ + } + + /* + * Some other task restores this file. Pretend that + * we're another user of it. + */ + fd = find_unused_fd(owner, -1); + pr_info("`- will add SCM-only %d fd\n", fd); + + if (e != NULL) { + e = dup_fdinfo(e, fd, 0); + if (!e) { + pr_err("Can't duplicate fdinfo for scm\n"); + return NULL; + } + } else { + /* + * This can happen if the file in question is + * sent over the socket and closed. In this case + * we need to ... invent a new one! + */ + + e = xmalloc(sizeof(*e)); + if (!e) + return NULL; + + fdinfo_entry__init(e); + e->id = tgt->id; + e->type = tgt->ops->type; + e->fd = fd; + e->flags = 0; + } + + /* + * Make this fle fake, so that files collecting engine + * closes them at the end. + */ + return collect_fd_to(vpid(owner), e, rsti(owner), tgt, true); +} + +int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids) +{ + struct unix_sk_info *ui; + struct pstree_item *owner; + int i; + + ui = find_queuer_for(id_for); + if (!ui) { + pr_err("Can't find sender for %d\n", id_for); + return -1; + } + + pr_info("Found queuer for %d -> %d\n", id_for, ui->ue->id); + /* + * This is the task that will restore this socket + */ + owner = file_master(&ui->d)->task; + + pr_info("-> will set up deps\n"); + /* + * The ui will send data to the rights receiver. Add a fake fle + * for the file and a dependency. + */ + for (i = 0; i < n_ids; i++) { + struct file_desc *tgt; + struct scm_fle *sfle; + + tgt = find_file_desc_raw(FD_TYPES__UND, file_ids[i]); + if (!tgt) { + pr_err("Can't find fdesc to send\n"); + return -1; + } + + pr_info("scm: add file %d -> %d\n", tgt->id, vpid(owner)); + sfle = xmalloc(sizeof(*sfle)); + if (!sfle) + return -1; + + sfle->fle = get_fle_for_scm(tgt, owner); + if (!sfle->fle) { + pr_err("Can't request new fle for scm\n"); + return -1; + } + + list_add_tail(&sfle->l, &ui->scm_fles); + fds[i] = sfle->fle->fe->fd; + } + + return 0; +} + +static int chk_restored_scms(struct unix_sk_info *ui) +{ + struct scm_fle *sf, *n; + + list_for_each_entry_safe(sf, n, &ui->scm_fles, l) { + if (sf->fle->stage < FLE_OPEN) + return 1; + + /* Optimization for the next pass */ + list_del(&sf->l); + xfree(sf); + } + + return 0; +} + static int wake_connected_sockets(struct unix_sk_info *ui) { struct fdinfo_list_entry *fle; @@ -1306,12 +1447,18 @@ static int open_unix_sk(struct file_desc *d, int *new_fd) struct unix_sk_info *ui; int ret; + ui = container_of(d, struct unix_sk_info, d); + + /* FIXME -- only queue restore may be postponed */ + if (chk_restored_scms(ui)) { + pr_info("scm: Wait for tgt to restore\n"); + return 1; + } + fle = file_master(d); if (fle->stage >= FLE_OPEN) return post_open_unix_sk(d, fle->fe->fd); - ui = container_of(d, struct unix_sk_info, d); - if (inherited_fd(d, new_fd)) { ui->ue->uflags |= USK_INHERIT; ret = *new_fd >= 0 ? 0 : -1; @@ -1410,6 +1557,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) ui->listen = 0; INIT_LIST_HEAD(&ui->connected); INIT_LIST_HEAD(&ui->node); + INIT_LIST_HEAD(&ui->scm_fles); ui->flags = 0; uname = ui->name; diff --git a/images/sk-packet.proto b/images/sk-packet.proto index 5f61c7376..dcda280f6 100644 --- a/images/sk-packet.proto +++ b/images/sk-packet.proto @@ -1,6 +1,12 @@ syntax = "proto2"; +message scm_entry { + required uint32 type = 1; + repeated uint32 rights = 2; +} + message sk_packet_entry { required uint32 id_for = 1; required uint32 length = 2; + repeated scm_entry scm = 4; }