diff --git a/files.c b/files.c index c82920dd0..3848acf89 100644 --- a/files.c +++ b/files.c @@ -30,6 +30,7 @@ #include "eventfd.h" #include "eventpoll.h" #include "fsnotify.h" +#include "mount.h" #include "signalfd.h" #include "namespaces.h" #include "tun.h" @@ -156,6 +157,56 @@ void show_saved_files(void) } } +/* + * Workaround for the OverlayFS bug present before Kernel 4.2 + * + * This is here only to support the Linux Kernel between versions + * 3.18 and 4.2. After that, this workaround is not needed anymore, + * but it will work properly on both a kernel with and withouth the bug. + * + * When a process has a file open in an OverlayFS directory, + * the information in /proc//fd/ and /proc//fdinfo/ + * is wrong. We can't even rely on stat()-ing /proc//fd/ since + * this will show us the wrong filesystem type. + * + * So we grab that information from the mountinfo table instead. This is done + * every time fill_fdlink is called. See lookup_overlayfs for more details. + * + */ +static int fixup_overlayfs(struct fd_parms *p, struct fd_link *link) +{ + struct mount_info *m; + + if (!link) + return 0; + + m = lookup_overlayfs(link->name, p->stat.st_dev, p->stat.st_ino, p->mnt_id); + if (IS_ERR(m)) + return -1; + + if (!m) + return 0; + + p->mnt_id = m->mnt_id; + + /* + * If the bug is present, the file path from /proc//fd + * does not include the mountpoint, so we prepend it ourselves. + */ + if (strcmp("./", m->mountpoint) != 0) { + char buf[PATH_MAX]; + int n; + + strncpy(buf, link->name, PATH_MAX); + n = snprintf(link->name, PATH_MAX, "%s/%s", m->mountpoint, buf + 2); + if (n >= PATH_MAX) { + pr_err("Not enough space to replace %s\n", buf); + return -1; + } + } + return 0; +} + /* * The gen_id thing is used to optimize the comparison of shared files. * If two files have different gen_ids, then they are different for sure. @@ -206,6 +257,10 @@ int fill_fdlink(int lfd, const struct fd_parms *p, struct fd_link *link) } link->len = len + 1; + + if (opts.overlayfs) + if (fixup_overlayfs((struct fd_parms *)p, link) < 0) + return -1; return 0; } diff --git a/include/cr_options.h b/include/cr_options.h index 19c2f7702..011349c3e 100644 --- a/include/cr_options.h +++ b/include/cr_options.h @@ -79,6 +79,7 @@ struct cr_options { bool enable_external_sharing; bool enable_external_masters; bool aufs; /* auto-deteced, not via cli */ + bool overlayfs; }; extern struct cr_options opts; diff --git a/include/mount.h b/include/mount.h index 0d5fc7f3d..01da3f55f 100644 --- a/include/mount.h +++ b/include/mount.h @@ -22,6 +22,8 @@ extern int prepare_mnt_ns(void); extern int pivot_root(const char *new_root, const char *put_old); struct mount_info; +struct mount_info *lookup_overlayfs(char *rpath, unsigned int s_dev, + unsigned int st_ino, unsigned int mnt_id); extern struct mount_info *lookup_mnt_id(unsigned int id); extern struct mount_info *lookup_mnt_sdev(unsigned int s_dev); diff --git a/include/proc_parse.h b/include/proc_parse.h index d084e7602..42f889316 100644 --- a/include/proc_parse.h +++ b/include/proc_parse.h @@ -242,6 +242,9 @@ int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups); /* callback for AUFS support */ extern int aufs_parse(struct mount_info *mi); +/* callback for OverlayFS support */ +extern int overlayfs_parse(struct mount_info *mi); + int parse_children(pid_t pid, pid_t **_c, int *_n); #endif /* __CR_PROC_PARSE_H__ */ diff --git a/mount.c b/mount.c index acd74f0c3..36ffbc3fe 100644 --- a/mount.c +++ b/mount.c @@ -127,6 +127,89 @@ static inline int fsroot_mounted(struct mount_info *mi) return is_root(mi->root); } +static struct mount_info *__lookup_overlayfs(struct mount_info *list, char *rpath, + unsigned int st_dev, unsigned int st_ino, + unsigned int mnt_id) +{ + /* + * Goes through all entries in the mountinfo table + * looking for a mount point that contains the file specified + * in rpath. Uses the device number st_dev and the inode number st_ino + * to make sure the file is correct. + */ + struct mount_info *mi_ret = NULL; + struct mount_info *m; + int mntns_root = -1; + + for (m = list; m != NULL; m = m->next) { + if (m->fstype->code == FSTYPE__OVERLAYFS) { + struct stat f_stat; + int ret_stat; + + /* + * We need the mntns root fd of the process to be dumped, + * to make sure we stat the correct file + */ + if (mntns_root == -1) { + mntns_root = __mntns_get_root_fd(root_item->pid.real); + + if (mntns_root < 0) { + pr_err("Unable to get the root file descriptor of pid %d\n", root_item->pid.real); + return ERR_PTR(-1); + } + } + + /* Concatenates m->mountpoint with rpath and attempts to stat the resulting path */ + if (strcmp("./", m->mountpoint) == 0) + ret_stat = fstatat(mntns_root, rpath, &f_stat, 0); + else { + char _full_path[PATH_MAX]; + int n = snprintf(_full_path, PATH_MAX, "%s/%s", m->mountpoint, rpath); + + if (n >= PATH_MAX) { + pr_err("Not enough space to concatenate %s and %s\n", m->mountpoint, rpath); + return ERR_PTR(-1); + } + ret_stat = fstatat(mntns_root, _full_path, &f_stat, 0); + } + + if (ret_stat == 0 && st_dev == f_stat.st_dev && st_ino == f_stat.st_ino) + mi_ret = m; + } + } + + return mi_ret; +} + +/* + * Looks up the mnt_id and path of a file in an overlayFS directory. + * + * This is useful in order to fix the OverlayFS bug present in the + * Linux Kernel before version 4.2. See fixup_overlayfs for details. + * + * We first check to see if the mnt_id and st_dev numbers currently match + * some entry in the mountinfo table. If so, we already have the correct mnt_id + * and no fixup is needed. + * + * Then we proceed to see if there are any overlayFS mounted directories + * in the mountinfo table. If so, we concatenate the mountpoint with the + * name of the file, and stat the resulting path to check if we found the + * correct device id and node number. If that is the case, we update the + * mount id and link variables with the correct values. + */ +struct mount_info *lookup_overlayfs(char *rpath, unsigned int st_dev, + unsigned int st_ino, unsigned int mnt_id) +{ + struct mount_info *m; + + /* If the mnt_id and device number match for some entry, no fixup is needed */ + for (m = mntinfo; m != NULL; m = m->next) + if (st_dev == m->s_dev && mnt_id == m->mnt_id) + return NULL; + + return __lookup_overlayfs(mntinfo, rpath, st_dev, st_ino, mnt_id); +} + static struct mount_info *__lookup_mnt_id(struct mount_info *list, int id) { struct mount_info *m; @@ -1365,6 +1448,10 @@ static struct fstype fstypes[32] = { .code = FSTYPE__FUSE, .dump = always_fail, .restore = always_fail, + }, { + .name = "overlay", + .code = FSTYPE__OVERLAYFS, + .parse = overlayfs_parse, }, }; diff --git a/proc_parse.c b/proc_parse.c index 2b25dea08..8fe0cad22 100644 --- a/proc_parse.c +++ b/proc_parse.c @@ -2057,6 +2057,19 @@ out: return exit_code; } +/* + * If an OverlayFS mountpoint is found in the mountinfo table, + * we enable opts.overlayfs, which is a workaround for the + * OverlayFS Kernel bug. + * + * See fixup_overlayfs for details. + */ +int overlayfs_parse(struct mount_info *new) +{ + opts.overlayfs = true; + return 0; +} + /* * AUFS callback function to "fix up" the root pathname. * See sysfs_parse.c for details. diff --git a/protobuf/mnt.proto b/protobuf/mnt.proto index 6f8e7d1e6..6e58e1d6e 100644 --- a/protobuf/mnt.proto +++ b/protobuf/mnt.proto @@ -18,6 +18,7 @@ enum fstype { MQUEUE = 14; FUSE = 15; AUTO = 16; + OVERLAYFS = 17; }; message mnt_entry {