diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 624eeb0a2..50a2fa9c5 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -5,6 +5,7 @@ ldflags-y += -r obj-y += action-scripts.o obj-y += external.o obj-y += aio.o +obj-y += apparmor.o obj-y += bfd.o obj-y += bitmap.o obj-y += cgroup.o diff --git a/criu/apparmor.c b/criu/apparmor.c new file mode 100644 index 000000000..d5c8b7638 --- /dev/null +++ b/criu/apparmor.c @@ -0,0 +1,541 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/config.h" +#include "imgset.h" +#include "pstree.h" +#include "util.h" +#include "string.h" +#include "lsm.h" +#include "cr_options.h" +#include "kerndat.h" + +#include "protobuf.h" +#include "images/inventory.pb-c.h" +#include "images/apparmor.pb-c.h" + +/* + * Apparmor stacked profile checkpoint restore. Previously, we just saved the + * profile that was in use by the task, and we expected it to be present on the + * target host. Now with stacking, containers are able to load their own + * profiles, so we can't rely on this. + * + * The basic idea here is that there is some (collection) of (potentially + * nested) namespaces that a container uses. We don't collect everything on the + * host level, but we *do* collect everything inside the namespace; a container + * could have loaded a profile but not yet used it when we start to checkpoint. + * + * Thus, the old code that saves and restores AA profiles is still relevant, we + * just need to add the new code in this file to walk the namespace and dump + * any blobs in that AA namespace, and then restore these blobs on restore so + * that the profiles the old code tries to use are actualy present. + */ + +static AaNamespace **namespaces = NULL; +static int n_namespaces = 0; + +static AaNamespace *new_namespace(char *name, AaNamespace *parent) +{ + void *m; + AaNamespace *ret; + + ret = xmalloc(sizeof(*ret)); + if (!ret) + return NULL; + aa_namespace__init(ret); + + ret->name = xstrdup(name); + if (!ret->name) { + xfree(ret); + return NULL; + } + + if (parent) { + m = xrealloc(parent->namespaces, sizeof(*parent->namespaces) * (parent->n_namespaces + 1)); + if (!m) { + xfree(ret->name); + xfree(ret); + return NULL; + } + + parent->namespaces = m; + parent->namespaces[parent->n_namespaces++] = ret; + } + + m = xrealloc(namespaces, sizeof(*namespaces) * (n_namespaces + 1)); + if (!m) { + if (parent) + parent->n_namespaces--; + + xfree(ret->name); + xfree(ret); + return NULL; + } + + namespaces = m; + namespaces[n_namespaces++] = ret; + + return ret; +} + +static int collect_profile(char *path, int offset, char *dir, AaNamespace *ns) +{ + AaPolicy *cur; + int fd, my_offset, ret; + struct stat sb; + ssize_t n; + void *m; + FILE *f; + + my_offset = snprintf(path + offset, PATH_MAX - offset, "%s/", dir); + if (my_offset < 0 || my_offset >= PATH_MAX - offset) { + pr_err("snprintf failed\n"); + return -1; + } + my_offset += offset; + + pr_info("dumping profile %s\n", path); + + cur = xmalloc(sizeof(*cur)); + if (!cur) + return -1; + aa_policy__init(cur); + + strlcat(path + my_offset, "name", PATH_MAX - my_offset); + f = fopen(path, "r"); + if (!f) { + xfree(cur); + pr_perror("failed to open %s", path); + return -1; + } + + ret = fscanf(f, "%ms", &cur->name); + fclose(f); + if (ret != 1) { + xfree(cur); + pr_perror("couldn't scanf %s", path); + return -1; + } + + strlcpy(path + my_offset, "raw_data", PATH_MAX - my_offset); + fd = open(path, O_RDONLY); + if (fd < 0) { + pr_perror("failed to open aa policy %s", path); + goto err; + } + + if (fstat(fd, &sb) < 0) { + pr_perror("failed to stat %s", path); + goto close; + } + + cur->blob.len = sb.st_size; + cur->blob.data = xmalloc(sb.st_size); + if (!cur->blob.data) + goto close; + + n = read(fd, cur->blob.data, sb.st_size); + if (n < 0) { + pr_perror("failed to read %s", path); + goto close; + } + + if (n != sb.st_size) { + pr_err("didn't read all of %s\n", path); + goto close; + } + + close(fd); + + m = xrealloc(ns->policies, sizeof(*ns->policies) * (ns->n_policies + 1)); + if (!m) + goto err; + ns->policies = m; + ns->policies[ns->n_policies++] = cur; + + return 0; + +close: + close(fd); + +err: + xfree(cur->name); + xfree(cur); + return -1; +} + +char *ns_path; +int sort_err; + +static int no_dirdots(const struct dirent *de) +{ + return !dir_dots(de); +} + +static int by_time(const struct dirent **de1, const struct dirent **de2) +{ + char path[PATH_MAX]; + struct stat sb1, sb2; + + snprintf(path, sizeof(path), "%s/%s", ns_path, (*de1)->d_name); + if (stat(path, &sb1) < 0) { + pr_perror("couldn't stat %s", path); + sort_err = errno; + return 0; + } + + snprintf(path, sizeof(path), "%s/%s", ns_path, (*de2)->d_name); + if (stat(path, &sb2) < 0) { + pr_perror("couldn't state %s", path); + sort_err = errno; + return 0; + } + + if (sb1.st_mtim.tv_sec == sb2.st_mtim.tv_sec) { + if (sb1.st_mtim.tv_nsec < sb2.st_mtim.tv_nsec) + return -1; + if (sb1.st_mtim.tv_nsec == sb2.st_mtim.tv_nsec) + return 0; + return 1; + } else { + if (sb1.st_mtim.tv_sec < sb2.st_mtim.tv_sec) + return -1; + if (sb1.st_mtim.tv_sec == sb2.st_mtim.tv_sec) + return 0; + return 1; + } +} + +static int walk_namespace(char *path, size_t offset, AaNamespace *ns) +{ + DIR *dir = NULL; + struct dirent *de, **namelist = NULL; + int ret = -1, n_names = 0, i; + size_t my_offset; + + /* collect all the child namespaces */ + strcat(path, "/namespaces/"); + my_offset = offset + 12; + + dir = opendir(path); + if (!dir) + goto out; + + while ((de = readdir(dir))) { + AaNamespace *cur; + + if (dir_dots(de)) + continue; + + path[my_offset] = '\0'; + strcat(path, de->d_name); + + cur = new_namespace(de->d_name, ns); + if (!cur) + goto out; + + if (walk_namespace(path, my_offset + strlen(de->d_name), cur) < 0) { + aa_namespace__free_unpacked(cur, NULL); + ns->n_namespaces--; + goto out; + } + } + + closedir(dir); + dir = NULL; + + /* now collect the profiles for this namespace */ + path[offset] = '\0'; + strcat(path, "/profiles/"); + my_offset = offset + 10; + + sort_err = 0; + ns_path = path; + n_names = scandir(path, &namelist, no_dirdots, by_time); + if (n_names < 0 || sort_err != 0) { + pr_perror("scandir failed"); + goto out; + } + + for (i = 0; i < n_names; i++) { + de = namelist[i]; + + path[my_offset] = 0; + if (collect_profile(path, my_offset, de->d_name, ns) < 0) + goto out; + } + + ret = 0; +out: + if (dir) + closedir(dir); + + if (namelist) { + for (i = 0; i < n_names; i++) + xfree(namelist[i]); + xfree(namelist); + } + + return ret; +} + +int collect_aa_namespace(char *profile) +{ + char path[PATH_MAX], *namespace, *end; + int ret, i; + AaNamespace *ns; + + if (!profile) + return 0; + + namespace = strchr(profile, ':'); + if (!namespace) + return 0; /* no namespace to dump */ + namespace ++; + + if (!kdat.apparmor_ns_dumping_enabled) { + pr_warn("Apparmor namespace present but dumping not enabled\n"); + return 0; + } + + /* XXX: this is not strictly correct; if something is using namespace + * views, extra //s can indicate a namespace separation. However, I + * think only the apparmor developers use this feature :) + */ + end = strchr(namespace, ':'); + if (!end) { + pr_err("couldn't find AA namespace end in: %s\n", namespace); + return -1; + } + + *end = '\0'; + + for (i = 0; i < n_namespaces; i++) { + /* did we already dump this namespace? */ + if (!strcmp(namespaces[i]->name, namespace)) { + *end = ':'; + return 0; + } + } + + pr_info("dumping AA namespace %s\n", namespace); + + ns = new_namespace(namespace, NULL); + *end = ':'; + if (!ns) + return -1; + + ret = snprintf(path, sizeof(path), AA_SECURITYFS_PATH "/policy/namespaces/%s", ns->name); + if (ret < 0 || ret >= sizeof(path)) { + pr_err("snprintf failed?\n"); + goto err; + } + + if (walk_namespace(path, ret, ns) < 0) { + pr_err("walking AA namespace %s failed\n", ns->name); + goto err; + } + + return 0; + +err: + aa_namespace__free_unpacked(ns, NULL); + n_namespaces--; + return -1; +} + +int dump_aa_namespaces(void) +{ + ApparmorEntry *ae = NULL; + int ret; + + if (n_namespaces == 0) + return 0; + + ae = xmalloc(sizeof(*ae)); + if (!ae) + return -1; + apparmor_entry__init(ae); + + ae->n_namespaces = n_namespaces; + ae->namespaces = namespaces; + + ret = pb_write_one(img_from_set(glob_imgset, CR_FD_APPARMOR), ae, PB_APPARMOR); + + apparmor_entry__free_unpacked(ae, NULL); + n_namespaces = -1; + namespaces = NULL; + + return ret; +} + +bool check_aa_ns_dumping(void) +{ + char contents[48]; + int major, minor, ret; + FILE *f; + + f = fopen(AA_SECURITYFS_PATH "/features/domain/stack", "r"); + if (!f) + return false; + + ret = fscanf(f, "%48s", contents); + fclose(f); + if (ret != 1) { + pr_err("scanning aa stack feature failed\n"); + return false; + } + + if (strcmp("yes", contents)) { + pr_warn("aa stack featured disabled: %s\n", contents); + return false; + } + + f = fopen(AA_SECURITYFS_PATH "/features/domain/version", "r"); + if (!f) + return false; + + ret = fscanf(f, "%d.%d", &major, &minor); + fclose(f); + if (ret != 2) { + pr_err("scanning aa stack version failed\n"); + return false; + } + + return major >= 1 && minor >= 2; +} + +static int restore_aa_namespace(AaNamespace *ns, char *path, int offset) +{ + pid_t pid; + int status; + + pid = fork(); + if (pid < 0) { + pr_perror("fork failed"); + return -1; + } + + if (!pid) { + int i, my_offset, ret, fd; + char buf[1024]; + + ret = snprintf(buf, sizeof(buf), "changeprofile :%s:", ns->name); + if (ret < 0 || ret >= sizeof(buf)) { + pr_err("profile %s too big\n", ns->name); + exit(1); + } + + my_offset = snprintf(path + offset, PATH_MAX - offset, "/namespaces/%s", ns->name); + if (my_offset < 0 || my_offset >= PATH_MAX - offset) { + pr_err("snprintf'd too many characters\n"); + exit(1); + } + + if (mkdir(path, 0755) < 0) { + if (errno == EEXIST) { + pr_warn("apparmor namespace %s already exists, restoring into it\n", path); + } else { + pr_perror("failed to create namespace %s", path); + exit(1); + } + } + + fd = open_proc_rw(PROC_SELF, "attr/current"); + if (fd < 0) { + pr_perror("couldn't open attr/current"); + goto fail; + } + + errno = 0; + ret = write(fd, buf, strlen(buf)); + close(fd); + if (ret != strlen(buf)) { + pr_perror("failed to change aa namespace %s", buf); + goto fail; + } + + for (i = 0; i < ns->n_namespaces; i++) { + if (restore_aa_namespace(ns, path, offset + my_offset) < 0) + goto fail; + } + + for (i = 0; i < ns->n_policies; i++) { + int fd, n; + AaPolicy *p = ns->policies[i]; + + fd = open(AA_SECURITYFS_PATH "/.replace", O_WRONLY); + if (fd < 0) { + pr_perror("couldn't open apparmor load file"); + goto fail; + } + + n = write(fd, p->blob.data, p->blob.len); + close(fd); + if (n != p->blob.len) { + pr_perror("write AA policy failed"); + goto fail; + } + } + + exit(0); + fail: + rmdir(path); + exit(1); + } + + if (waitpid(pid, &status, 0) < 0) { + pr_perror("waitpid failed"); + return -1; + } + + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) + return 0; + + pr_err("failed to restore aa namespace, worker exited: %d\n", status); + return -1; +} + +int prepare_apparmor_namespaces(void) +{ + struct cr_img *img; + int ret, i; + ApparmorEntry *ae; + + img = open_image(CR_FD_APPARMOR, O_RSTR); + if (!img) + return -1; + + ret = pb_read_one_eof(img, &ae, PB_APPARMOR); + close_image(img); + if (ret <= 0) + return 0; /* there was no AA namespace entry */ + + if (!ae) { + pr_err("missing aa namespace entry\n"); + return -1; + } + + /* no real reason we couldn't do this in parallel, but in usually we + * expect one namespace so there's probably not a lot to be gained. + */ + for (i = 0; i < ae->n_namespaces; i++) { + char path[PATH_MAX] = AA_SECURITYFS_PATH "/policy"; + + if (restore_aa_namespace(ae->namespaces[i], path, strlen(path)) < 0) { + ret = -1; + goto out; + } + } + + ret = 0; +out: + apparmor_entry__free_unpacked(ae, NULL); + return ret; +} diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 521701e3f..1782d8bf6 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -83,6 +83,8 @@ #include "timens.h" #include "img-streamer.h" #include "pidfd-store.h" +#include "apparmor.h" +#include "asm/dump.h" /* * Architectures can overwrite this function to restore register sets that @@ -1936,6 +1938,9 @@ int cr_dump_tasks(pid_t pid) goto err; } + if (dump_aa_namespaces() < 0) + goto err; + ret = dump_cgroups(); if (ret) goto err; diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 82dbaa0dc..dec4e6097 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -78,6 +78,7 @@ #include "memfd.h" #include "timens.h" #include "bpfmap.h" +#include "apparmor.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -250,6 +251,9 @@ static int crtools_prepare_shared(void) if (tty_prep_fds()) return -1; + if (prepare_apparmor_namespaces()) + return -1; + return 0; } diff --git a/criu/image-desc.c b/criu/image-desc.c index 03842934f..d65d9c098 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -106,6 +106,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(PIDNS, "pidns-%u"), FD_ENTRY_F(BPFMAP_FILE, "bpfmap-file", O_NOBUF), FD_ENTRY_F(BPFMAP_DATA, "bpfmap-data", O_NOBUF), + FD_ENTRY(APPARMOR, "apparmor"), [CR_FD_STATS] = { .fmt = "stats-%s", diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 384d6d07b..5045baee8 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -67,6 +67,7 @@ enum { CR_FD_CGROUP, CR_FD_FILE_LOCKS, CR_FD_SECCOMP, + CR_FD_APPARMOR, CR_FD_MEMFD_INODE, CR_FD_BPFMAP_FILE, CR_FD_BPFMAP_DATA, diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 3fb3b2492..b2f5d4305 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -50,6 +50,7 @@ struct kerndat_s { bool has_tcp_half_closed; bool stack_guard_gap_hidden; int lsm; + bool apparmor_ns_dumping_enabled; bool has_uffd; unsigned long uffd_features; bool has_thp_disable; diff --git a/criu/include/magic.h b/criu/include/magic.h index 50e3b6b35..22d7218e4 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -99,6 +99,7 @@ #define PIDNS_MAGIC 0x61157326 /* Surgut */ #define BPFMAP_FILE_MAGIC 0x57506142 /* Alapayevsk */ #define BPFMAP_DATA_MAGIC 0x64324033 /* Arkhangelsk */ +#define APPARMOR_MAGIC 0x59423047 /* Nikolskoye */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 87f20d23a..3824de101 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -69,6 +69,7 @@ enum { PB_PIDNS, PB_BPFMAP_FILE, PB_BPFMAP_DATA, + PB_APPARMOR, /* PB_AUTOGEN_STOP */ diff --git a/criu/lsm.c b/criu/lsm.c index 85966a191..6f7e6d8d2 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -13,6 +13,7 @@ #include "cr_options.h" #include "lsm.h" #include "fdstore.h" +#include "apparmor.h" #include "protobuf.h" #include "images/inventory.pb-c.h" @@ -58,6 +59,13 @@ static int apparmor_get_label(pid_t pid, char **profile_name) *profile_name = NULL; } + if (*profile_name && collect_aa_namespace(*profile_name) < 0) { + free(*profile_name); + *profile_name = NULL; + pr_err("failed to collect AA namespace\n"); + return -1; + } + return 0; } @@ -215,6 +223,7 @@ void kerndat_lsm(void) { if (access(AA_SECURITYFS_PATH, F_OK) == 0) { kdat.lsm = LSMTYPE__APPARMOR; + kdat.apparmor_ns_dumping_enabled = check_aa_ns_dumping(); return; } diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index d1a11b295..ff16b9f5b 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -67,6 +67,7 @@ #include "images/img-streamer.pb-c.h" #include "images/bpfmap-file.pb-c.h" #include "images/bpfmap-data.pb-c.h" +#include "images/apparmor.pb-c.h" struct cr_pb_message_desc cr_pb_descs[PB_MAX]; diff --git a/images/Makefile b/images/Makefile index 34bc36792..2eaeb7cad 100644 --- a/images/Makefile +++ b/images/Makefile @@ -70,6 +70,7 @@ proto-obj-y += timens.o proto-obj-y += img-streamer.o proto-obj-y += bpfmap-file.o proto-obj-y += bpfmap-data.o +proto-obj-y += apparmor.o CFLAGS += -iquote $(obj)/ diff --git a/images/apparmor.proto b/images/apparmor.proto new file mode 100644 index 000000000..0c84f80a6 --- /dev/null +++ b/images/apparmor.proto @@ -0,0 +1,16 @@ +syntax = "proto2"; + +message aa_policy { + required string name = 1; + required bytes blob = 2; +} + +message aa_namespace { + required string name = 1; + repeated aa_policy policies = 2; + repeated aa_namespace namespaces = 3; +} + +message apparmor_entry { + repeated aa_namespace namespaces = 1; +} diff --git a/images/creds.proto b/images/creds.proto index 0007fb46d..6228f7fcb 100644 --- a/images/creds.proto +++ b/images/creds.proto @@ -23,4 +23,5 @@ message creds_entry { optional string lsm_profile = 15; optional string lsm_sockcreate = 16; + optional bytes apparmor_data = 17; } diff --git a/include/apparmor.h b/include/apparmor.h new file mode 100644 index 000000000..bccf928d5 --- /dev/null +++ b/include/apparmor.h @@ -0,0 +1,11 @@ +#ifndef __CR_APPARMOR_H__ +#define __CR_APPARMOR_H__ + +int collect_aa_namespace(char *profile); +int dump_aa_namespaces(void); + +bool check_aa_ns_dumping(void); + +int prepare_apparmor_namespaces(void); + +#endif /* __CR_APPARMOR_H__ */ diff --git a/lib/py/images/images.py b/lib/py/images/images.py index 3ab52d56a..300b1cc69 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -561,6 +561,7 @@ handlers = { 'BPFMAP_FILE': entry_handler(pb.bpfmap_file_entry), 'BPFMAP_DATA': entry_handler(pb.bpfmap_data_entry, bpfmap_data_extra_handler()), + 'APPARMOR': entry_handler(pb.apparmor_entry), }