2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-31 22:35:33 +00:00

inotify: cleanup auxiliary events from queue

I've mentioned the problem that after c/r each inotify receives one or
more unexpected events.

This happens because our algorithm mixes setting up an inotify watch on
the file with opening and closing it.

We mix inotify creation and watched file open/close because we need to
create the inotify watch on the file from another mntns (generally). And
we do a trick opening the file so that it can be referenced in current
mntns by /proc/<pid>/fd/<id> path.

Moreover if we have several inotifies on the same file, than queue gets
even more events than just one which happens in a simple case.

note: For now we don't have a way to c/r events in queue but we need to
at least leave the queue clean from events generated by our own.

These, still, looks harder to rewrite wd creation without this proc-fd
trick than to remove unexpected events from queues.

So just cleanup these events for each fdt-restorer process, for each of
its inotify fds _after_ restore stage (at CR_STATE_RESTORE_SIGCHLD).
These is a closest place where for an _alive_ process we know that all
prepare_fds() are done by all processes. These means we need to do the
cleanup in PIE code, so need to add sys_ppoll definitions for PIE and
divide process in two phases: first collect and transfer fds, second do
real cleanup.

note: We still do prepare_fds() for zombies. But zombies have no fds in
/proc/pid/fd so we will collect no in collect_fds() and therefore we
have no in prepare_fds(), thus there is no need to cleanup inotifies for
zombies.

v2: adopt to multiple unexpected events
v3: do not cleanup from fdt-receivers, done from fdt-restorer
v4: do without additional fds restore stage
v5: replace sys_poll with sys_ppoll and fix minor nits

Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>

use ppoll always and remove poll
This commit is contained in:
Pavel Tikhomirov
2019-06-26 11:55:19 +03:00
committed by Andrei Vagin
parent f12a0f0a8f
commit a3cdf94869
9 changed files with 117 additions and 0 deletions

View File

@@ -111,3 +111,4 @@ preadv_raw 69 361 (int fd, struct iovec *iov, unsigned long nr, unsigned long
userfaultfd 282 388 (int flags)
fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len)
cacheflush ! 983042 (void *start, void *end, int flags)
ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)

View File

@@ -107,3 +107,4 @@ __NR_ipc 117 sys_ipc (unsigned int call, int first, unsigned long second, un
__NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz)
__NR_preadv 320 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
__NR_userfaultfd 364 sys_userfaultfd (int flags)
__NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)

View File

@@ -107,3 +107,4 @@ __NR_ipc 117 sys_ipc (unsigned int call, int first, unsigned long second, un
__NR_userfaultfd 355 sys_userfaultfd (int flags)
__NR_preadv 328 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
__NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz)
__NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)

View File

@@ -95,3 +95,4 @@ __NR_kcmp 349 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1,
__NR_seccomp 354 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs)
__NR_memfd_create 356 sys_memfd_create (const char *name, unsigned int flags)
__NR_userfaultfd 374 sys_userfaultfd (int flags)
__NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)

View File

@@ -106,3 +106,4 @@ __NR_setns 308 sys_setns (int fd, int nstype)
__NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2)
__NR_memfd_create 319 sys_memfd_create (const char *name, unsigned int flags)
__NR_userfaultfd 323 sys_userfaultfd (int flags)
__NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize)

View File

@@ -38,6 +38,7 @@ struct siginfo;
struct msghdr;
struct rusage;
struct iocb;
struct pollfd;
typedef unsigned long aio_context_t;

View File

@@ -727,6 +727,40 @@ static int collect_zombie_pids(struct task_restore_args *ta)
return collect_child_pids(TASK_DEAD, &ta->zombies_n);
}
static int collect_inotify_fds(struct task_restore_args *ta)
{
struct list_head *list = &rsti(current)->fds;
struct fdt *fdt = rsti(current)->fdt;
struct fdinfo_list_entry *fle;
/* Check we are an fdt-restorer */
if (fdt && fdt->pid != vpid(current))
return 0;
ta->inotify_fds = (int *)rst_mem_align_cpos(RM_PRIVATE);
list_for_each_entry(fle, list, ps_list) {
struct file_desc *d = fle->desc;
int *inotify_fd;
if (d->ops->type != FD_TYPES__INOTIFY)
continue;
if (fle != file_master(d))
continue;
inotify_fd = rst_mem_alloc(sizeof(*inotify_fd), RM_PRIVATE);
if (!inotify_fd)
return -1;
ta->inotify_fds_n++;
*inotify_fd = fle->fe->fd;
pr_debug("Collect inotify fd %d to cleanup later\n", *inotify_fd);
}
return 0;
}
static int open_core(int pid, CoreEntry **pcore)
{
int ret;
@@ -881,6 +915,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
if (collect_zombie_pids(ta) < 0)
return -1;
if (collect_inotify_fds(ta) < 0)
return -1;
if (prepare_proc_misc(pid, core->tc, ta))
return -1;
@@ -3417,6 +3454,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
RST_MEM_FIXUP_PPTR(task_args->helpers);
RST_MEM_FIXUP_PPTR(task_args->zombies);
RST_MEM_FIXUP_PPTR(task_args->vma_ios);
RST_MEM_FIXUP_PPTR(task_args->inotify_fds);
task_args->compatible_mode = core_is_compat(core);
/*

View File

@@ -177,6 +177,9 @@ struct task_restore_args {
pid_t *zombies;
unsigned int zombies_n;
int *inotify_fds; /* fds to cleanup inotify events at CR_STATE_RESTORE_SIGCHLD stage */
unsigned int inotify_fds_n;
/* * * * * * * * * * * * * * * * * * * * */
unsigned long task_size;

View File

@@ -16,6 +16,7 @@
#include <sched.h>
#include <sys/resource.h>
#include <signal.h>
#include <sys/inotify.h>
#include "linux/userfaultfd.h"
@@ -1307,6 +1308,72 @@ static int map_vdso(struct task_restore_args *args, bool compatible)
return 0;
}
static int fd_poll(int inotify_fd)
{
struct pollfd pfd = {inotify_fd, POLLIN, 0};
struct timespec tmo = {0, 0};
return sys_ppoll(&pfd, 1, &tmo, NULL, sizeof(sigset_t));
}
/*
* note: Actually kernel may want even more space for one event (see
* round_event_name_len), so using buffer of EVENT_BUFF_SIZE size may fail.
* To be on the safe side - take a bigger buffer, and these also allows to
* read more events in one syscall.
*/
#define EVENT_BUFF_SIZE ((sizeof(struct inotify_event) + PATH_MAX))
/*
* Read all available events from inotify queue
*/
static int cleanup_inotify_events(int inotify_fd)
{
char buf[EVENT_BUFF_SIZE * 8];
int ret;
while (1) {
ret = fd_poll(inotify_fd);
if (ret < 0) {
pr_err("Failed to poll from inotify fd: %d\n", ret);
return -1;
} else if (ret == 0) {
break;
}
ret = sys_read(inotify_fd, buf, sizeof(buf));
if (ret < 0) {
pr_err("Failed to read inotify events\n");
return -1;
}
}
return 0;
}
/*
* When we restore inotifies we can open and close files we create a watch
* for. So wee need to cleanup these auxiliary events which we've generated.
*
* note: For now we don't have a way to c/r events in queue but we need to
* at least leave the queue clean from events generated by our own.
*/
int cleanup_current_inotify_events(struct task_restore_args *task_args)
{
int i;
for (i = 0; i < task_args->inotify_fds_n; i++) {
int inotify_fd = task_args->inotify_fds[i];
pr_debug("Cleaning inotify events from %d\n", inotify_fd);
if (cleanup_inotify_events(inotify_fd))
return -1;
}
return 0;
}
/*
* The main routine to restore task via sigreturn.
* This one is very special, we never return there
@@ -1767,6 +1834,9 @@ long __export_restore_task(struct task_restore_args *args)
restore_finish_stage(task_entries_local, CR_STATE_RESTORE);
if (cleanup_current_inotify_events(args))
goto core_restore_end;
if (wait_helpers(args) < 0)
goto core_restore_end;
if (wait_zombies(args) < 0)