diff --git a/Makefile.crtools b/Makefile.crtools index 4e81afe49..6033b2ce0 100644 --- a/Makefile.crtools +++ b/Makefile.crtools @@ -57,6 +57,7 @@ obj-y += pagemap-cache.o obj-y += kerndat.o obj-y += stats.o obj-y += cgroup.o +obj-y += timerfd.o obj-y += string.o obj-y += sigframe.o ifeq ($(VDSO),y) diff --git a/cr-restore.c b/cr-restore.c index 9e2f61f6b..7569afa1d 100644 --- a/cr-restore.c +++ b/cr-restore.c @@ -69,6 +69,7 @@ #include "rst-malloc.h" #include "plugin.h" #include "cgroup.h" +#include "timerfd.h" #include "parasite-syscall.h" @@ -153,6 +154,7 @@ static struct collect_image_info *cinfos[] = { &tty_cinfo, &tunfile_cinfo, &ext_file_cinfo, + &timerfd_cinfo, }; static int root_prepare_shared(void) @@ -2358,6 +2360,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) void *tcp_socks_mem; unsigned long tcp_socks; + void *timerfd_mem; + unsigned long timerfd_mem_cpos; + #ifdef CONFIG_VDSO unsigned long vdso_rt_size = 0; unsigned long vdso_rt_delta = 0; @@ -2410,6 +2415,16 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) memcpy(tcp_socks_mem, rst_tcp_socks, rst_tcp_socks_len()); + /* + * Copy timerfd params for restorer args, we need to proceed + * timer setting at the very late. + */ + timerfd_mem_cpos = rst_mem_cpos(RM_PRIVATE); + timerfd_mem = rst_mem_alloc(rst_timerfd_len(), RM_PRIVATE); + if (!timerfd_mem) + goto err_nv; + memcpy(timerfd_mem, rst_timerfd, rst_timerfd_len()); + /* * We're about to search for free VM area and inject the restorer blob * into it. No irrelevent mmaps/mremaps beyond this point, otherwise @@ -2533,6 +2548,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) task_args->timer_n = posix_timers_nr; task_args->posix_timers = rst_mem_remap_ptr(posix_timers_cpos, RM_PRIVATE); + task_args->timerfd_n = rst_timerfd_nr; + task_args->timerfd = rst_mem_remap_ptr(timerfd_mem_cpos, RM_PRIVATE); + task_args->siginfo_nr = siginfo_nr; task_args->siginfo = rst_mem_remap_ptr(siginfo_cpos, RM_PRIVATE); diff --git a/files.c b/files.c index 309bb3480..68c06c749 100644 --- a/files.c +++ b/files.c @@ -32,6 +32,7 @@ #include "signalfd.h" #include "namespaces.h" #include "tun.h" +#include "timerfd.h" #include "fdset.h" #include "fs-magic.h" #include "proc_parse.h" @@ -325,6 +326,8 @@ static int dump_one_file(struct parasite_ctl *ctl, int fd, int lfd, struct fd_op ops = &fanotify_dump_ops; else if (is_signalfd_link(link)) ops = &signalfd_dump_ops; + else if (is_timerfd_link(link)) + ops = &timerfd_dump_ops; else return dump_unsupp_fd(&p, lfd, fdinfo, "anon", link); diff --git a/include/proc_parse.h b/include/proc_parse.h index 99cf75c10..35d429224 100644 --- a/include/proc_parse.h +++ b/include/proc_parse.h @@ -11,6 +11,7 @@ #include "protobuf/eventpoll.pb-c.h" #include "protobuf/signalfd.pb-c.h" #include "protobuf/fsnotify.pb-c.h" +#include "protobuf/timerfd.pb-c.h" #define PROC_TASK_COMM_LEN 32 #define PROC_TASK_COMM_LEN_FMT "(%31s" @@ -171,6 +172,7 @@ union fdinfo_entries { SignalfdEntry sfd; InotifyWdEntry ify; FanotifyMarkEntry ffy; + TimerfdEntry tfy; }; struct fdinfo_common { diff --git a/include/restorer.h b/include/restorer.h index 842cc32fe..0f1034d76 100644 --- a/include/restorer.h +++ b/include/restorer.h @@ -16,6 +16,7 @@ #include "config.h" #include "posix-timer.h" +#include "timerfd.h" #include "shmem.h" #include "sigframe.h" #include "vdso.h" @@ -126,6 +127,9 @@ struct task_restore_args { int timer_n; struct restore_posix_timer *posix_timers; + int timerfd_n; + struct restore_timerfd *timerfd; + CredsEntry creds; u32 cap_inh[CR_CAP_SIZE]; u32 cap_prm[CR_CAP_SIZE]; diff --git a/include/timerfd.h b/include/timerfd.h new file mode 100644 index 000000000..8e88f9f3d --- /dev/null +++ b/include/timerfd.h @@ -0,0 +1,39 @@ +#ifndef __CR_TIMERFD_H__ +#define __CR_TIMERFD_H__ + +#include + +#include "files.h" + +struct pstree_item; + +struct restore_timerfd { + int id; + int fd; + int clockid; + int settime_flags; + unsigned long ticks; + struct itimerspec val; +}; + +extern const struct fdtype_ops timerfd_dump_ops; +extern struct collect_image_info timerfd_cinfo; +extern struct restore_timerfd *rst_timerfd; +extern unsigned int rst_timerfd_nr; + +static inline unsigned long rst_timerfd_len(void) +{ + return sizeof(*rst_timerfd) * rst_timerfd_nr; +} + +extern int is_timerfd_link(char *link); + +#ifndef TFD_TIMER_ABSTIME +# define TFD_TIMER_ABSTIME (1 << 0) +#endif + +#ifndef TFD_IOC_SET_TICKS +# define TFD_IOC_SET_TICKS 0x40085400 +#endif + +#endif /* __CR_TIMERFD_H__ */ diff --git a/pie/restorer.c b/pie/restorer.c index 1a33031f6..6388fd56c 100644 --- a/pie/restorer.c +++ b/pie/restorer.c @@ -534,6 +534,49 @@ static int vma_remap(unsigned long src, unsigned long dst, unsigned long len) return 0; } +static int timerfd_arm(struct task_restore_args *args) +{ + int i; + + for (i = 0; i < args->timerfd_n; i++) { + struct restore_timerfd *t = &args->timerfd[i]; + int ret; + + pr_debug("timerfd: arm for fd %d (%d)\n", t->fd, i); + + if (t->settime_flags & TFD_TIMER_ABSTIME) { + struct timespec ts = { }; + + /* + * We might need to adjust value because the checkpoint + * and restore procedure takes some time itself. Note + * we don't adjust nanoseconds, since the result may + * overflow the limit NSEC_PER_SEC FIXME + */ + if (sys_clock_gettime(t->clockid, &ts)) { + pr_err("Can't get current time"); + return -1; + } + + t->val.it_value.tv_sec += (time_t)ts.tv_sec; + + pr_debug("Ajust id %#x it_value(%llu, %llu) -> it_value(%llu, %llu)\n", + t->id, (unsigned long long)ts.tv_sec, + (unsigned long long)ts.tv_nsec, + (unsigned long long)t->val.it_value.tv_sec, + (unsigned long long)t->val.it_value.tv_nsec); + } + + ret = sys_timerfd_settime(t->fd, t->settime_flags, &t->val, NULL); + ret |= sys_ioctl(t->fd, TFD_IOC_SET_TICKS, (unsigned long)&t->ticks); + if (ret) { + pr_err("Can't restore ticks/time for timerfd - %d\n", i); + return ret; + } + } + return 0; +} + static int create_posix_timers(struct task_restore_args *args) { int ret, i; @@ -962,6 +1005,12 @@ long __export_restore_task(struct task_restore_args *args) goto core_restore_end; } + ret = timerfd_arm(args); + if (ret < 0) { + pr_err("Can't restore timerfd %ld\n", ret); + goto core_restore_end; + } + pr_info("%ld: Restored\n", sys_getpid()); futex_set(&zombies_inprogress, args->nr_zombies); diff --git a/proc_parse.c b/proc_parse.c index 1c6678258..3da54454e 100644 --- a/proc_parse.c +++ b/proc_parse.c @@ -1043,6 +1043,51 @@ static void parse_fhandle_encoded(char *tok, FhEntry *fh) } } +static int parse_timerfd(FILE *f, char *buf, size_t size, TimerfdEntry *tfy) +{ + /* + * Format is + * clockid: 0 + * ticks: 0 + * settime flags: 01 + * it_value: (0, 49406829) + * it_interval: (1, 0) + */ + if (sscanf(buf, "clockid: %d", &tfy->clockid) != 1) + goto parse_err; + + if (!fgets(buf, size, f)) + goto nodata; + if (sscanf(buf, "ticks: %llu", (unsigned long long *)&tfy->ticks) != 1) + goto parse_err; + + if (!fgets(buf, size, f)) + goto nodata; + if (sscanf(buf, "settime flags: 0%o", &tfy->settime_flags) != 1) + goto parse_err; + + if (!fgets(buf, size, f)) + goto nodata; + if (sscanf(buf, "it_value: (%llu, %llu)", + (unsigned long long *)&tfy->vsec, + (unsigned long long *)&tfy->vnsec) != 2) + goto parse_err; + + if (!fgets(buf, size, f)) + goto nodata; + if (sscanf(buf, "it_interval: (%llu, %llu)", + (unsigned long long *)&tfy->isec, + (unsigned long long *)&tfy->insec) != 2) + goto parse_err; + return 0; + +parse_err: + return -1; +nodata: + pr_err("No data left in proc file while parsing timerfd\n"); + goto parse_err; +} + #define fdinfo_field(str, field) !strncmp(str, field":", sizeof(field)) static int parse_fdinfo_pid_s(char *pid, int fd, int type, @@ -1105,6 +1150,21 @@ static int parse_fdinfo_pid_s(char *pid, int fd, int type, entry_met = true; continue; } + if (fdinfo_field(str, "clockid")) { + timerfd_entry__init(&entry.tfy); + + if (type != FD_TYPES__TIMERFD) + goto parse_err; + ret = parse_timerfd(f, str, sizeof(str), &entry.tfy); + if (ret) + goto parse_err; + ret = cb(&entry, arg); + if (ret) + goto out; + + entry_met = true; + continue; + } if (fdinfo_field(str, "tfd")) { eventpoll_tfd_entry__init(&entry.epl); diff --git a/timerfd.c b/timerfd.c new file mode 100644 index 000000000..9075e9761 --- /dev/null +++ b/timerfd.c @@ -0,0 +1,170 @@ +#include +#include +#include + +#include +#include + +#include "protobuf.h" +#include "protobuf/timerfd.pb-c.h" + +#include "proc_parse.h" +#include "rst-malloc.h" +#include "restorer.h" +#include "timerfd.h" +#include "pstree.h" +#include "files.h" +#include "fdset.h" +#include "util.h" +#include "log.h" +#include "bug.h" + +#undef LOG_PREFIX +#define LOG_PREFIX "timerfd: " + +struct timerfd_dump_arg { + u32 id; + const struct fd_parms *p; +}; + +struct timerfd_info { + TimerfdEntry *tfe; + struct file_desc d; +}; + +struct restore_timerfd *rst_timerfd; +unsigned int rst_timerfd_nr; + +int is_timerfd_link(char *link) +{ + return is_anon_link_type(link, "[timerfd]"); +} + +static int dump_timerfd_entry(union fdinfo_entries *e, void *arg) +{ + struct timerfd_dump_arg *da = arg; + TimerfdEntry *tfy = &e->tfy; + + tfy->id = da->id; + tfy->flags = da->p->flags; + tfy->fown = (FownEntry *)&da->p->fown; + + pr_info("Dumping id %#x clockid %d it_value(%llu, %llu) it_interval(%llu, %llu)\n", + tfy->id, tfy->clockid, (unsigned long long)tfy->vsec, (unsigned long long)tfy->vnsec, + (unsigned long long)tfy->isec, (unsigned long long)tfy->insec); + + return pb_write_one(fdset_fd(glob_fdset, CR_FD_TIMERFD), &e->tfy, PB_TIMERFD); +} + +static int dump_one_timerfd(int lfd, u32 id, const struct fd_parms *p) +{ + struct timerfd_dump_arg da = { .id = id, .p = p, }; + return parse_fdinfo(lfd, FD_TYPES__TIMERFD, dump_timerfd_entry, &da); +} + +const struct fdtype_ops timerfd_dump_ops = { + .type = FD_TYPES__TIMERFD, + .dump = dump_one_timerfd, +}; + +/* + * We need to restore timers at the very late stage in restorer + * to eliminate the case when timer is expired but we have not + * yet finished restore procedure and signal handlers are not + * set up properly. We need to copy timers settings into restorer + * area that's why post-open is used for. + */ +static int timerfd_post_open(struct file_desc *d, int fd) +{ + struct timerfd_info *info = container_of(d, struct timerfd_info, d); + TimerfdEntry *tfe = info->tfe; + struct restore_timerfd *t; + + rst_timerfd_nr++; + rst_timerfd = xrealloc(rst_timerfd, rst_timerfd_len()); + if (!rst_timerfd) + return -ENOMEM; + + t = &rst_timerfd[rst_timerfd_nr - 1]; + t->id = tfe->id; + t->fd = fd; + t->clockid = tfe->clockid; + t->ticks = (unsigned long)tfe->ticks; + t->settime_flags = tfe->settime_flags; + t->val.it_interval.tv_sec = (time_t)tfe->isec; + t->val.it_interval.tv_nsec = (long)tfe->insec; + t->val.it_value.tv_sec = (time_t)tfe->vsec; + t->val.it_value.tv_nsec = (long)tfe->vnsec; + + return 0; +} + +static int timerfd_open(struct file_desc *d) +{ + struct timerfd_info *info; + TimerfdEntry *tfe; + int tmp = -1; + + info = container_of(d, struct timerfd_info, d); + tfe = info->tfe; + pr_info("Creating timerfd id %#x clockid %d settime_flags %x ticks %llu " + "it_value(%llu, %llu) it_interval(%llu, %llu)\n", + tfe->id, tfe->clockid, tfe->settime_flags, (unsigned long long)tfe->ticks, + (unsigned long long)tfe->vsec, (unsigned long long)tfe->vnsec, + (unsigned long long)tfe->isec, (unsigned long long)tfe->insec); + + tmp = timerfd_create(tfe->clockid, 0); + if (tmp < 0) { + pr_perror("Can't create for %#x\n", tfe->id); + return -1; + } + + if (rst_file_params(tmp, tfe->fown, tfe->flags)) { + pr_perror("Can't restore params for %#x", tfe->id); + goto err_close; + } + + return tmp; + +err_close: + close_safe(&tmp); + return -1; +} + +static struct file_desc_ops timerfd_desc_ops = { + .type = FD_TYPES__TIMERFD, + .open = timerfd_open, + .post_open = timerfd_post_open, +}; + +static int verify_timerfd(TimerfdEntry *tfe) +{ + if (tfe->clockid != CLOCK_REALTIME && + tfe->clockid != CLOCK_MONOTONIC) { + pr_err("Unknown clock type %d for %#x\n", tfe->clockid, tfe->id); + return -1; + } + + return 0; +} + +static int collect_one_timerfd(void *o, ProtobufCMessage *msg) +{ + struct timerfd_info *info = o; + + info->tfe = pb_msg(msg, TimerfdEntry); + if (verify_timerfd(info->tfe)) { + pr_err("Verification failed for %#x\n", info->tfe->id); + return -1; + } + + return file_desc_add(&info->d, info->tfe->id, &timerfd_desc_ops); +} + +struct collect_image_info timerfd_cinfo = { + .fd_type = CR_FD_TIMERFD, + .pb_type = PB_TIMERFD, + .priv_size = sizeof(struct timerfd_info), + .collect = collect_one_timerfd, + .flags = COLLECT_OPTIONAL, +};