2
0
mirror of https://github.com/checkpoint-restore/criu synced 2025-08-30 13:58:34 +00:00

mount: add new mounts-v2 engine

Design of mounts-v2:

  As a preparation step we classify mounts in groups by (shared_id,
  master_id) in new resolve_shared_mounts_v2 (just after reading images).

  New function prepare_mnt_ns_v2 is our main entry point when switching
  from old mount engine to new one actually happens.

  First we pre-create each mount namespace nearly empty, only with root
  yard in place (pre_create_mount_namespaces).

  We walk the mount tree and mount each mount similar to old mount
  engine but not in mount tree but as a sub-directory of root yard
  (plain mountpoint) in service (criu) mount namespace. Also we
  bind this mount from service mntns to real mntns just after creation.
  (do_mount_in_right_mntns)

  Note: this way we initially have the final mount which would be
  visible to restored container user with right mnt_id for the sake of
  e.g. creating unix sockets on it (for unix socket bindmounts), and
  both have copy of the mount in service mntns so that old code which
  accesses files on mounts through service mntns still can acces them.

  New can_mount_now_v2 is now free from heuristics we had for restoring
  shared groups, we will restore them later via MOVE_MOUNT_SET_GROUP,
  for now everything is private.

  Now when all plain mount are created in real mount namespaces, we can
  move them to the tree for each namespace. Also we open fds on the
  mountpoint: one mp_fd_id before moving and another mnt_fd_id after,
  so that we can access each file later from final mntns via those fds.
  (assemble_mount_namespaces)

  New restore_mount_sharing_options walks each root sharing group and
  their descendants with dfs tree walk. It creates sharing for the first
  mount in the sharing group and then sets the same sharing on all other
  mounts in this group.

  Sharing creation for fist mount is two step:

  a) If mount has master_id we either copy shared_id from parent sharing
  group or from external source and then make mount slave thus
  converting it to right master_id.
  b) Next if mount has shared_id we just make us shared, creating right
  shared_id.

Cherry-picked from Virtuozzo criu:
https://src.openvz.org/projects/OVZ/repos/criu/commits/596651d02

Changes:
- Split all "exporting" to separate preparational patches
- Rework cr_time
- Switch to MOVE_MOUNT_SET_GROUP
- Use resolve_mountpoint for external mounts (for MOVE_MOUNT_SET_GROUP)
- Mounting plain mounts both in service and in restored-final mntns
- Call MOVE_MOUNT_SET_GROUP from usernsd
- Rework can_mount_now_v2 to handle bind of both root and external.
- Use sys_move_mount for mount assembling.

Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
This commit is contained in:
Pavel Tikhomirov 2020-05-25 18:12:49 +03:00 committed by Andrei Vagin
parent c29675c9a5
commit b35c842d0f
4 changed files with 1307 additions and 0 deletions

View File

@ -4,6 +4,8 @@
#include "linux/mount.h"
#include "linux/openat2.h"
#include "common/list.h"
#include <compel/plugins/std/syscall-codes.h>
#ifndef MOVE_MOUNT_SET_GROUP
@ -57,4 +59,37 @@ static inline long sys_openat2(int dirfd, const char *pathname, struct open_how
extern int check_mount_v2(void);
struct sharing_group {
/* This pair identifies the group */
int shared_id;
int master_id;
/* List of shared groups */
struct list_head list;
/* List of mounts in this group */
struct list_head mnt_list;
/*
* List of dependant shared groups:
* - all siblings have equal master_id
* - the parent has shared_id equal to children's master_id
*
* This is a bit tricky: parent pointer indicates if there is one
* parent sharing_group in list or only siblings.
* So for traversal if parent pointer is set we can do:
* list_for_each_entry(t, &sg->parent->children, siblings)
* and overvise we can do:
* list_for_each_entry(t, &sg->siblings, siblings)
*/
struct list_head children;
struct list_head siblings;
struct sharing_group *parent;
char *source;
};
extern int resolve_shared_mounts_v2(void);
extern int prepare_mnt_ns_v2(void);
#endif /* __CR_MOUNT_V2_H__ */

View File

@ -62,7 +62,15 @@ struct mount_info {
*/
char *mountpoint;
char *ns_mountpoint;
/* Mount-v2 specific */
char *plain_mountpoint;
int is_dir;
int mp_fd_id;
int mnt_fd_id;
struct sharing_group *sg;
struct list_head mnt_sharing;
int fd;
unsigned flags;
unsigned sb_flags;
@ -79,6 +87,8 @@ struct mount_info {
bool need_plugin;
bool is_ns_root;
bool deleted;
int deleted_level;
struct list_head deleted_list;
struct mount_info *next;
struct ns_id *nsid;

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@
#include "plugin.h"
#include "filesystems.h"
#include "mount.h"
#include "mount-v2.h"
#include "pstree.h"
#include "image.h"
#include "namespaces.h"
@ -1680,6 +1681,7 @@ struct mount_info __maybe_unused *add_cr_time_mount(struct mount_info *root, cha
goto err;
}
mi->mnt_id = HELPER_MNT_ID;
mi->is_dir = true;
mi->flags = mi->sb_flags = 0;
mi->root = xstrdup("/");
mi->fsname = xstrdup(fsname);
@ -2987,6 +2989,9 @@ struct mount_info *mnt_entry_alloc(bool rst)
}
memset(new->rmi, 0, sizeof(struct rst_mount_info));
}
new->mp_fd_id = -1;
new->mnt_fd_id = -1;
new->is_dir = -1;
new->fd = -1;
new->is_overmounted = -1;
INIT_LIST_HEAD(&new->children);
@ -2999,6 +3004,7 @@ struct mount_info *mnt_entry_alloc(bool rst)
INIT_LIST_HEAD(&new->mnt_notprop);
INIT_LIST_HEAD(&new->mnt_unbindable);
INIT_LIST_HEAD(&new->postpone);
INIT_LIST_HEAD(&new->deleted_list);
}
return new;
}
@ -3314,6 +3320,7 @@ static int merge_mount_trees(void)
root_yard_mp->plain_mountpoint = xstrdup(mnt_roots);
if (!root_yard_mp->plain_mountpoint)
return -1;
root_yard_mp->is_dir = true;
root_yard_mp->mounted = true;
root_yard_mp->mnt_bind_is_populated = true;
root_yard_mp->is_overmounted = false;
@ -3359,6 +3366,9 @@ int read_mnt_ns_img(void)
if (!nsid->mnt.mntinfo_tree)
return -1;
/* mntns root mounts are always directories */
nsid->mnt.mntinfo_tree->is_dir = true;
tail->next = pms;
pms = head;
}
@ -3368,6 +3378,9 @@ int read_mnt_ns_img(void)
search_bindmounts();
prepare_is_overmounted();
if (!opts.mntns_compat_mode && resolve_shared_mounts_v2())
return -1;
if (merge_mount_trees())
return -1;
@ -3691,6 +3704,9 @@ int prepare_mnt_ns(void)
free_mntinfo(old);
}
if (!opts.mntns_compat_mode)
return prepare_mnt_ns_v2();
ret = populate_mnt_ns();
if (ret)
return -1;