From dbaab31f31173d9f83cbf2a391d0dbea8037d5c4 Mon Sep 17 00:00:00 2001 From: Gabriel Guimaraes Date: Fri, 24 Jul 2015 21:15:28 +0000 Subject: [PATCH] Workaround for the OverlayFS bug present before Kernel 4.2 This is here only to support the Linux Kernel between versions 3.18 and 4.2. After that, this workaround is not needed anymore, but it will work properly on both a kernel with and without the bug. The bug is that when a process has a file open in an OverlayFS directory, the information in /proc//fd/ and /proc//fdinfo/ is wrong, so we grab that information from the mountinfo table instead. This is done every time fill_fdlink is called. We first check to see if the mnt_id and st_dev numbers currently match some entry in the mountinfo table. If so, we already have the correct mnt_id and no fixup is needed. Then we proceed to see if there are any overlayFS mounted directories in the mountinfo table. If so, we concatenate the mountpoint with the name of the file, and stat the resulting path to check if we found the correct device id and node number. If that is the case, we update the mount id and link variables with the correct values. Signed-off-by: Gabriel Guimaraes Signed-off-by: Pavel Emelyanov --- files.c | 55 ++++++++++++++++++++++++++++ include/cr_options.h | 1 + include/mount.h | 2 + include/proc_parse.h | 3 ++ mount.c | 87 ++++++++++++++++++++++++++++++++++++++++++++ proc_parse.c | 13 +++++++ protobuf/mnt.proto | 1 + 7 files changed, 162 insertions(+) diff --git a/files.c b/files.c index c82920dd0..3848acf89 100644 --- a/files.c +++ b/files.c @@ -30,6 +30,7 @@ #include "eventfd.h" #include "eventpoll.h" #include "fsnotify.h" +#include "mount.h" #include "signalfd.h" #include "namespaces.h" #include "tun.h" @@ -156,6 +157,56 @@ void show_saved_files(void) } } +/* + * Workaround for the OverlayFS bug present before Kernel 4.2 + * + * This is here only to support the Linux Kernel between versions + * 3.18 and 4.2. After that, this workaround is not needed anymore, + * but it will work properly on both a kernel with and withouth the bug. + * + * When a process has a file open in an OverlayFS directory, + * the information in /proc//fd/ and /proc//fdinfo/ + * is wrong. We can't even rely on stat()-ing /proc//fd/ since + * this will show us the wrong filesystem type. + * + * So we grab that information from the mountinfo table instead. This is done + * every time fill_fdlink is called. See lookup_overlayfs for more details. + * + */ +static int fixup_overlayfs(struct fd_parms *p, struct fd_link *link) +{ + struct mount_info *m; + + if (!link) + return 0; + + m = lookup_overlayfs(link->name, p->stat.st_dev, p->stat.st_ino, p->mnt_id); + if (IS_ERR(m)) + return -1; + + if (!m) + return 0; + + p->mnt_id = m->mnt_id; + + /* + * If the bug is present, the file path from /proc//fd + * does not include the mountpoint, so we prepend it ourselves. + */ + if (strcmp("./", m->mountpoint) != 0) { + char buf[PATH_MAX]; + int n; + + strncpy(buf, link->name, PATH_MAX); + n = snprintf(link->name, PATH_MAX, "%s/%s", m->mountpoint, buf + 2); + if (n >= PATH_MAX) { + pr_err("Not enough space to replace %s\n", buf); + return -1; + } + } + return 0; +} + /* * The gen_id thing is used to optimize the comparison of shared files. * If two files have different gen_ids, then they are different for sure. @@ -206,6 +257,10 @@ int fill_fdlink(int lfd, const struct fd_parms *p, struct fd_link *link) } link->len = len + 1; + + if (opts.overlayfs) + if (fixup_overlayfs((struct fd_parms *)p, link) < 0) + return -1; return 0; } diff --git a/include/cr_options.h b/include/cr_options.h index 19c2f7702..011349c3e 100644 --- a/include/cr_options.h +++ b/include/cr_options.h @@ -79,6 +79,7 @@ struct cr_options { bool enable_external_sharing; bool enable_external_masters; bool aufs; /* auto-deteced, not via cli */ + bool overlayfs; }; extern struct cr_options opts; diff --git a/include/mount.h b/include/mount.h index 0d5fc7f3d..01da3f55f 100644 --- a/include/mount.h +++ b/include/mount.h @@ -22,6 +22,8 @@ extern int prepare_mnt_ns(void); extern int pivot_root(const char *new_root, const char *put_old); struct mount_info; +struct mount_info *lookup_overlayfs(char *rpath, unsigned int s_dev, + unsigned int st_ino, unsigned int mnt_id); extern struct mount_info *lookup_mnt_id(unsigned int id); extern struct mount_info *lookup_mnt_sdev(unsigned int s_dev); diff --git a/include/proc_parse.h b/include/proc_parse.h index d084e7602..42f889316 100644 --- a/include/proc_parse.h +++ b/include/proc_parse.h @@ -242,6 +242,9 @@ int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups); /* callback for AUFS support */ extern int aufs_parse(struct mount_info *mi); +/* callback for OverlayFS support */ +extern int overlayfs_parse(struct mount_info *mi); + int parse_children(pid_t pid, pid_t **_c, int *_n); #endif /* __CR_PROC_PARSE_H__ */ diff --git a/mount.c b/mount.c index acd74f0c3..36ffbc3fe 100644 --- a/mount.c +++ b/mount.c @@ -127,6 +127,89 @@ static inline int fsroot_mounted(struct mount_info *mi) return is_root(mi->root); } +static struct mount_info *__lookup_overlayfs(struct mount_info *list, char *rpath, + unsigned int st_dev, unsigned int st_ino, + unsigned int mnt_id) +{ + /* + * Goes through all entries in the mountinfo table + * looking for a mount point that contains the file specified + * in rpath. Uses the device number st_dev and the inode number st_ino + * to make sure the file is correct. + */ + struct mount_info *mi_ret = NULL; + struct mount_info *m; + int mntns_root = -1; + + for (m = list; m != NULL; m = m->next) { + if (m->fstype->code == FSTYPE__OVERLAYFS) { + struct stat f_stat; + int ret_stat; + + /* + * We need the mntns root fd of the process to be dumped, + * to make sure we stat the correct file + */ + if (mntns_root == -1) { + mntns_root = __mntns_get_root_fd(root_item->pid.real); + + if (mntns_root < 0) { + pr_err("Unable to get the root file descriptor of pid %d\n", root_item->pid.real); + return ERR_PTR(-1); + } + } + + /* Concatenates m->mountpoint with rpath and attempts to stat the resulting path */ + if (strcmp("./", m->mountpoint) == 0) + ret_stat = fstatat(mntns_root, rpath, &f_stat, 0); + else { + char _full_path[PATH_MAX]; + int n = snprintf(_full_path, PATH_MAX, "%s/%s", m->mountpoint, rpath); + + if (n >= PATH_MAX) { + pr_err("Not enough space to concatenate %s and %s\n", m->mountpoint, rpath); + return ERR_PTR(-1); + } + ret_stat = fstatat(mntns_root, _full_path, &f_stat, 0); + } + + if (ret_stat == 0 && st_dev == f_stat.st_dev && st_ino == f_stat.st_ino) + mi_ret = m; + } + } + + return mi_ret; +} + +/* + * Looks up the mnt_id and path of a file in an overlayFS directory. + * + * This is useful in order to fix the OverlayFS bug present in the + * Linux Kernel before version 4.2. See fixup_overlayfs for details. + * + * We first check to see if the mnt_id and st_dev numbers currently match + * some entry in the mountinfo table. If so, we already have the correct mnt_id + * and no fixup is needed. + * + * Then we proceed to see if there are any overlayFS mounted directories + * in the mountinfo table. If so, we concatenate the mountpoint with the + * name of the file, and stat the resulting path to check if we found the + * correct device id and node number. If that is the case, we update the + * mount id and link variables with the correct values. + */ +struct mount_info *lookup_overlayfs(char *rpath, unsigned int st_dev, + unsigned int st_ino, unsigned int mnt_id) +{ + struct mount_info *m; + + /* If the mnt_id and device number match for some entry, no fixup is needed */ + for (m = mntinfo; m != NULL; m = m->next) + if (st_dev == m->s_dev && mnt_id == m->mnt_id) + return NULL; + + return __lookup_overlayfs(mntinfo, rpath, st_dev, st_ino, mnt_id); +} + static struct mount_info *__lookup_mnt_id(struct mount_info *list, int id) { struct mount_info *m; @@ -1365,6 +1448,10 @@ static struct fstype fstypes[32] = { .code = FSTYPE__FUSE, .dump = always_fail, .restore = always_fail, + }, { + .name = "overlay", + .code = FSTYPE__OVERLAYFS, + .parse = overlayfs_parse, }, }; diff --git a/proc_parse.c b/proc_parse.c index 2b25dea08..8fe0cad22 100644 --- a/proc_parse.c +++ b/proc_parse.c @@ -2057,6 +2057,19 @@ out: return exit_code; } +/* + * If an OverlayFS mountpoint is found in the mountinfo table, + * we enable opts.overlayfs, which is a workaround for the + * OverlayFS Kernel bug. + * + * See fixup_overlayfs for details. + */ +int overlayfs_parse(struct mount_info *new) +{ + opts.overlayfs = true; + return 0; +} + /* * AUFS callback function to "fix up" the root pathname. * See sysfs_parse.c for details. diff --git a/protobuf/mnt.proto b/protobuf/mnt.proto index 6f8e7d1e6..6e58e1d6e 100644 --- a/protobuf/mnt.proto +++ b/protobuf/mnt.proto @@ -18,6 +18,7 @@ enum fstype { MQUEUE = 14; FUSE = 15; AUTO = 16; + OVERLAYFS = 17; }; message mnt_entry {