diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in index 16529bbf0..11ec5c45b 100644 --- a/config/apparmor/abstractions/container-base.in +++ b/config/apparmor/abstractions/container-base.in @@ -85,7 +85,6 @@ mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/, deny /sys/firmware/efi/efivars/** rwklx, deny /sys/kernel/security/** rwklx, - mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/, mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/, # deny reads from debugfs diff --git a/config/apparmor/abstractions/start-container b/config/apparmor/abstractions/start-container index 414d058ba..3df9883e3 100644 --- a/config/apparmor/abstractions/start-container +++ b/config/apparmor/abstractions/start-container @@ -40,5 +40,6 @@ pivot_root /usr/lib*/*/lxc/**, change_profile -> lxc-*, + change_profile -> lxc-**, change_profile -> unconfined, change_profile -> :lxc-*:unconfined, diff --git a/config/apparmor/profiles/lxc-default-cgns b/config/apparmor/profiles/lxc-default-cgns index ff599ef81..f69eb994b 100644 --- a/config/apparmor/profiles/lxc-default-cgns +++ b/config/apparmor/profiles/lxc-default-cgns @@ -9,4 +9,5 @@ profile lxc-container-default-cgns flags=(attach_disconnected,mediate_deleted) { # the newinstance option (but, right now, we don't). deny mount fstype=devpts, mount fstype=cgroup -> /sys/fs/cgroup/**, + mount fstype=cgroup2 -> /sys/fs/cgroup/**, } diff --git a/config/apparmor/profiles/lxc-default-with-nesting b/config/apparmor/profiles/lxc-default-with-nesting index 6e5745f97..cd198beb8 100644 --- a/config/apparmor/profiles/lxc-default-with-nesting +++ b/config/apparmor/profiles/lxc-default-with-nesting @@ -11,4 +11,5 @@ profile lxc-container-default-with-nesting flags=(attach_disconnected,mediate_de mount fstype=sysfs -> /var/cache/lxc/**, mount options=(rw,bind), mount fstype=cgroup -> /sys/fs/cgroup/**, + mount fstype=cgroup2 -> /sys/fs/cgroup/**, } diff --git a/configure.ac b/configure.ac index c24f8f3ee..f1811205f 100644 --- a/configure.ac +++ b/configure.ac @@ -469,6 +469,13 @@ AC_ARG_WITH([cgroup-pattern], [pattern for container cgroups] )], [], [with_cgroup_pattern=['lxc/%n']]) +# The path for the apparmor_parser's cache for generated apparmor profiles +AC_ARG_WITH([apparmor-cache-dir], + [AC_HELP_STRING( + [--with-apparmor-cache-dir=dir], + [path for apparmor_parser cache] + )], [], [with_apparmor_cache_dir=['${localstatedir}/cache/lxc/apparmor']]) + # Container log path. By default, use $lxcpath. AC_MSG_CHECKING([Whether to place logfiles in container config path]) AC_ARG_ENABLE([configpath-log], @@ -515,6 +522,7 @@ AS_AC_EXPAND(LXCBINHOOKDIR, "$libexecdir/lxc/hooks") AS_AC_EXPAND(LXCINITDIR, "$libexecdir") AS_AC_EXPAND(LOGPATH, "$with_log_path") AS_AC_EXPAND(RUNTIME_PATH, "$with_runtime_path") +AS_AC_EXPAND(APPARMOR_CACHE_DIR, "$with_apparmor_cache_dir") AC_SUBST(DEFAULT_CGROUP_PATTERN, ["$with_cgroup_pattern"]) # We need the install path so criu knows where to reference the hook scripts. diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am index c5e46ac28..1359eb3e4 100644 --- a/src/lxc/Makefile.am +++ b/src/lxc/Makefile.am @@ -174,6 +174,7 @@ AM_CFLAGS = -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \ -DDEFAULT_CGROUP_PATTERN=\"$(DEFAULT_CGROUP_PATTERN)\" \ -DRUNTIME_PATH=\"$(RUNTIME_PATH)\" \ -DSBINDIR=\"$(SBINDIR)\" \ + -DAPPARMOR_CACHE_DIR=\"$(APPARMOR_CACHE_DIR)\" \ -I $(top_srcdir)/src \ -I $(top_srcdir)/src/lxc \ -I $(top_srcdir)/src/lxc/storage \ diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 4d17c277d..f5b94b091 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -2360,7 +2360,23 @@ static int setup_mount(const struct lxc_conf *conf, return ret; } -FILE *make_anonymous_mount_file(struct lxc_list *mount) +/* + * In order for nested containers to be able to mount /proc and /sys they need + * to see a "pure" proc and sysfs mount points with nothing mounted on top + * (like lxcfs). + * For this we provide proc and sysfs in /dev/.lxc/{proc,sys} while using an + * apparmor rule to deny access to them. This is mostly for convenience: The + * container's root user can mount them anyway and thus has access to the two + * file systems. But a non-root user in the container should not be allowed to + * access them as a side effect without explicitly allowing it. + */ +static const char nesting_helpers[] = +"proc dev/.lxc/proc proc create=dir,optional\n" +"sys dev/.lxc/sys sysfs create=dir,optional\n" +; + +FILE *make_anonymous_mount_file(struct lxc_list *mount, + bool include_nesting_helpers) { int ret; char *mount_entry; @@ -2402,6 +2418,13 @@ FILE *make_anonymous_mount_file(struct lxc_list *mount) goto on_error; } + if (include_nesting_helpers) { + ret = lxc_write_nointr(fd, nesting_helpers, + sizeof(nesting_helpers) - 1); + if (ret != sizeof(nesting_helpers) - 1) + goto on_error; + } + ret = lseek(fd, 0, SEEK_SET); if (ret < 0) goto on_error; @@ -2422,7 +2445,7 @@ static int setup_mount_entries(const struct lxc_conf *conf, int ret; FILE *f; - f = make_anonymous_mount_file(mount); + f = make_anonymous_mount_file(mount, conf->lsm_aa_allow_nesting); if (!f) return -1; @@ -2738,6 +2761,7 @@ struct lxc_conf *lxc_conf_init(void) lxc_list_init(&new->groups); lxc_list_init(&new->state_clients); new->lsm_aa_profile = NULL; + lxc_list_init(&new->lsm_aa_raw); new->lsm_se_context = NULL; new->tmp_umount_proc = false; new->tmp_umount_proc = 0; @@ -4025,6 +4049,19 @@ void lxc_clear_includes(struct lxc_conf *conf) } } +int lxc_clear_apparmor_raw(struct lxc_conf *c) +{ + struct lxc_list *it, *next; + + lxc_list_for_each_safe (it, &c->lsm_aa_raw, next) { + lxc_list_del(it); + free(it->elem); + free(it); + } + + return 0; +} + void lxc_conf_free(struct lxc_conf *conf) { if (!conf) @@ -4052,6 +4089,7 @@ void lxc_conf_free(struct lxc_conf *conf) free(conf->syslog); lxc_free_networks(&conf->network); free(conf->lsm_aa_profile); + free(conf->lsm_aa_profile_computed); free(conf->lsm_se_context); lxc_seccomp_free(conf); lxc_clear_config_caps(conf); @@ -4068,6 +4106,7 @@ void lxc_conf_free(struct lxc_conf *conf) lxc_clear_limits(conf, "lxc.prlimit"); lxc_clear_sysctls(conf, "lxc.sysctl"); lxc_clear_procs(conf, "lxc.proc"); + lxc_clear_apparmor_raw(conf); free(conf->cgroup_meta.dir); free(conf->cgroup_meta.controllers); free(conf->shmount.path_host); diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 8d7ded80e..6b299cab9 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -275,7 +275,11 @@ struct lxc_conf { }; char *lsm_aa_profile; + char *lsm_aa_profile_computed; + bool lsm_aa_profile_created; + unsigned int lsm_aa_allow_nesting; unsigned int lsm_aa_allow_incomplete; + struct lxc_list lsm_aa_raw; char *lsm_se_context; bool tmp_umount_proc; char *seccomp; /* filename with the seccomp rules */ @@ -427,7 +431,8 @@ extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, extern void tmp_proc_unmount(struct lxc_conf *lxc_conf); extern void remount_all_slave(void); extern void suggest_default_idmap(void); -extern FILE *make_anonymous_mount_file(struct lxc_list *mount); +extern FILE *make_anonymous_mount_file(struct lxc_list *mount, + bool include_nesting_helpers); extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings); extern unsigned long add_required_remount_flags(const char *s, const char *d, unsigned long flags); @@ -441,5 +446,6 @@ extern int setup_sysctl_parameters(struct lxc_list *sysctls); extern int lxc_clear_sysctls(struct lxc_conf *c, const char *key); extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid); extern int lxc_clear_procs(struct lxc_conf *c, const char *key); +extern int lxc_clear_apparmor_raw(struct lxc_conf *c); #endif /* __LXC_CONF_H */ diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 091dc67d9..456cd4c2e 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -84,7 +84,9 @@ lxc_log_define(confile, lxc); lxc_config_define(autodev); lxc_config_define(apparmor_allow_incomplete); +lxc_config_define(apparmor_allow_nesting); lxc_config_define(apparmor_profile); +lxc_config_define(apparmor_raw); lxc_config_define(cap_drop); lxc_config_define(cap_keep); lxc_config_define(cgroup_controller); @@ -158,6 +160,8 @@ static struct lxc_config_t config[] = { { "lxc.arch", set_config_personality, get_config_personality, clr_config_personality, }, { "lxc.apparmor.profile", set_config_apparmor_profile, get_config_apparmor_profile, clr_config_apparmor_profile, }, { "lxc.apparmor.allow_incomplete", set_config_apparmor_allow_incomplete, get_config_apparmor_allow_incomplete, clr_config_apparmor_allow_incomplete, }, + { "lxc.apparmor.allow_nesting", set_config_apparmor_allow_nesting, get_config_apparmor_allow_nesting, clr_config_apparmor_allow_nesting, }, + { "lxc.apparmor.raw", set_config_apparmor_raw, get_config_apparmor_raw, clr_config_apparmor_raw, }, { "lxc.autodev", set_config_autodev, get_config_autodev, clr_config_autodev, }, { "lxc.cap.drop", set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, }, { "lxc.cap.keep", set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, }, @@ -1132,6 +1136,52 @@ static int set_config_apparmor_allow_incomplete(const char *key, return 0; } +static int set_config_apparmor_allow_nesting(const char *key, + const char *value, + struct lxc_conf *lxc_conf, + void *data) +{ + if (lxc_config_value_empty(value)) + return clr_config_apparmor_allow_nesting(key, lxc_conf, NULL); + + if (lxc_safe_uint(value, &lxc_conf->lsm_aa_allow_nesting) < 0) + return -1; + + if (lxc_conf->lsm_aa_allow_nesting > 1) + return -1; + + return 0; +} + +static int set_config_apparmor_raw(const char *key, + const char *value, + struct lxc_conf *lxc_conf, + void *data) +{ + char *elem; + struct lxc_list *list; + + if (lxc_config_value_empty(value)) + return lxc_clear_apparmor_raw(lxc_conf); + + list = malloc(sizeof(*list)); + if (!list) { + errno = ENOMEM; + return -1; + } + + elem = strdup(value); + if (!elem) { + free(list); + return -1; + } + list->elem = elem; + + lxc_list_add_tail(&lxc_conf->lsm_aa_raw, list); + + return 0; +} + static int set_config_selinux_context(const char *key, const char *value, struct lxc_conf *lxc_conf, void *data) { @@ -3004,6 +3054,34 @@ static int get_config_apparmor_allow_incomplete(const char *key, char *retv, c->lsm_aa_allow_incomplete); } +static int get_config_apparmor_allow_nesting(const char *key, char *retv, + int inlen, struct lxc_conf *c, + void *data) +{ + return lxc_get_conf_int(c, retv, inlen, + c->lsm_aa_allow_nesting); +} + +static int get_config_apparmor_raw(const char *key, char *retv, + int inlen, struct lxc_conf *c, + void *data) +{ + int len; + struct lxc_list *it; + int fulllen = 0; + + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + lxc_list_for_each(it, &c->lsm_aa_raw) { + strprint(retv, inlen, "%s\n", (char *)it->elem); + } + + return fulllen; +} + static int get_config_selinux_context(const char *key, char *retv, int inlen, struct lxc_conf *c, void *data) { @@ -3794,6 +3872,21 @@ static inline int clr_config_apparmor_allow_incomplete(const char *key, return 0; } +static inline int clr_config_apparmor_allow_nesting(const char *key, + struct lxc_conf *c, + void *data) +{ + c->lsm_aa_allow_nesting = 0; + return 0; +} + +static inline int clr_config_apparmor_raw(const char *key, + struct lxc_conf *c, + void *data) +{ + return lxc_clear_apparmor_raw(c); +} + static inline int clr_config_selinux_context(const char *key, struct lxc_conf *c, void *data) { @@ -4986,7 +5079,9 @@ int lxc_list_subkeys(struct lxc_conf *conf, const char *key, char *retv, if (!strcmp(key, "lxc.apparmor")) { strprint(retv, inlen, "allow_incomplete\n"); + strprint(retv, inlen, "allow_nesting\n"); strprint(retv, inlen, "profile\n"); + strprint(retv, inlen, "raw\n"); } else if (!strcmp(key, "lxc.cgroup")) { strprint(retv, inlen, "dir\n"); } else if (!strcmp(key, "lxc.selinux")) { diff --git a/src/lxc/criu.c b/src/lxc/criu.c index c36421627..64ea4f024 100644 --- a/src/lxc/criu.c +++ b/src/lxc/criu.c @@ -378,7 +378,8 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct criu_opts *opts) DECLARE_ARG(opts->user->action_script); } - mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list); + mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list, + opts->c->lxc_conf->lsm_aa_allow_nesting); if (!mnts) goto err; diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c index 1507917c8..ec3f805de 100644 --- a/src/lxc/lsm/apparmor.c +++ b/src/lxc/lsm/apparmor.c @@ -32,11 +32,19 @@ #include "lsm.h" #include "conf.h" #include "utils.h" +#include "initutils.h" +#include "caps.h" +#include "parse.h" lxc_log_define(apparmor, lsm); /* set by lsm_apparmor_drv_init if true */ static int aa_enabled = 0; +static bool aa_parser_available = false; +static bool aa_supports_unix = false; +static bool aa_can_stack = false; +static bool aa_is_stacked = false; +static bool aa_admin = false; static int mount_features_enabled = 0; @@ -45,6 +53,332 @@ static int mount_features_enabled = 0; #define AA_MOUNT_RESTR "/sys/kernel/security/apparmor/features/mount/mask" #define AA_ENABLED_FILE "/sys/module/apparmor/parameters/enabled" #define AA_UNCHANGED "unchanged" +#define AA_GENERATED "generated" + +#define AA_CMD_LOAD 'r' +#define AA_CMD_UNLOAD 'R' +#define AA_CMD_PARSE 'Q' + +static const char AA_PROFILE_BASE[] = +" ### Base profile\n" +" capability,\n" +" dbus,\n" +" file,\n" +" network,\n" +" umount,\n" +"\n" +" # Allow us to receive signals from anywhere.\n" +" signal (receive),\n" +"\n" +" # Allow us to send signals to ourselves\n" +" signal peer=@{profile_name},\n" +"\n" +" # Allow other processes to read our /proc entries, futexes, perf tracing and\n" +" # kcmp for now (they will need 'read' in the first place). Administrators can\n" +" # override with:\n" +" # deny ptrace (readby) ...\n" +" ptrace (readby),\n" +"\n" +" # Allow other processes to trace us by default (they will need 'trace' in\n" +" # the first place). Administrators can override with:\n" +" # deny ptrace (tracedby) ...\n" +" ptrace (tracedby),\n" +"\n" +" # Allow us to ptrace ourselves\n" +" ptrace peer=@{profile_name},\n" +"\n" +" # ignore DENIED message on / remount\n" +" deny mount options=(ro, remount) -> /,\n" +" deny mount options=(ro, remount, silent) -> /,\n" +"\n" +" # allow tmpfs mounts everywhere\n" +" mount fstype=tmpfs,\n" +"\n" +" # allow hugetlbfs mounts everywhere\n" +" mount fstype=hugetlbfs,\n" +"\n" +" # allow mqueue mounts everywhere\n" +" mount fstype=mqueue,\n" +"\n" +" # allow fuse mounts everywhere\n" +" mount fstype=fuse,\n" +" mount fstype=fuse.*,\n" +"\n" +" # deny access under /proc/bus to avoid e.g. messing with pci devices directly\n" +" deny @{PROC}/bus/** wklx,\n" +"\n" +" # deny writes in /proc/sys/fs but allow binfmt_misc to be mounted\n" +" mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,\n" +" deny @{PROC}/sys/fs/** wklx,\n" +"\n" +" # allow efivars to be mounted, writing to it will be blocked though\n" +" mount fstype=efivarfs -> /sys/firmware/efi/efivars/,\n" +"\n" +" # block some other dangerous paths\n" +" deny @{PROC}/kcore rwklx,\n" +" deny @{PROC}/sysrq-trigger rwklx,\n" +"\n" +" # deny writes in /sys except for /sys/fs/cgroup, also allow\n" +" # fusectl, securityfs and debugfs to be mounted there (read-only)\n" +" mount fstype=fusectl -> /sys/fs/fuse/connections/,\n" +" mount fstype=securityfs -> /sys/kernel/security/,\n" +" mount fstype=debugfs -> /sys/kernel/debug/,\n" +" deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,\n" +" mount fstype=proc -> /proc/,\n" +" mount fstype=sysfs -> /sys/,\n" +" mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,\n" +" deny /sys/firmware/efi/efivars/** rwklx,\n" +" # note, /sys/kernel/security/** handled below\n" +" mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,\n" +"\n" +" # deny reads from debugfs\n" +" deny /sys/kernel/debug/{,**} rwklx,\n" +"\n" +" # allow paths to be made slave, shared, private or unbindable\n" +" # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n" +"# mount options=(rw,make-slave) -> **,\n" +"# mount options=(rw,make-rslave) -> **,\n" +"# mount options=(rw,make-shared) -> **,\n" +"# mount options=(rw,make-rshared) -> **,\n" +"# mount options=(rw,make-private) -> **,\n" +"# mount options=(rw,make-rprivate) -> **,\n" +"# mount options=(rw,make-unbindable) -> **,\n" +"# mount options=(rw,make-runbindable) -> **,\n" +"\n" +" # allow bind-mounts of anything except /proc, /sys and /dev\n" +" mount options=(rw,bind) /[^spd]*{,/**},\n" +" mount options=(rw,bind) /d[^e]*{,/**},\n" +" mount options=(rw,bind) /de[^v]*{,/**},\n" +" mount options=(rw,bind) /dev/.[^l]*{,/**},\n" +" mount options=(rw,bind) /dev/.l[^x]*{,/**},\n" +" mount options=(rw,bind) /dev/.lx[^c]*{,/**},\n" +" mount options=(rw,bind) /dev/.lxc?*{,/**},\n" +" mount options=(rw,bind) /dev/[^.]*{,/**},\n" +" mount options=(rw,bind) /dev?*{,/**},\n" +" mount options=(rw,bind) /p[^r]*{,/**},\n" +" mount options=(rw,bind) /pr[^o]*{,/**},\n" +" mount options=(rw,bind) /pro[^c]*{,/**},\n" +" mount options=(rw,bind) /proc?*{,/**},\n" +" mount options=(rw,bind) /s[^y]*{,/**},\n" +" mount options=(rw,bind) /sy[^s]*{,/**},\n" +" mount options=(rw,bind) /sys?*{,/**},\n" +"\n" +" # allow read-only bind-mounts of anything except /proc, /sys and /dev\n" +" mount options=(ro,remount,bind) -> /[^spd]*{,/**},\n" +" mount options=(ro,remount,bind) -> /d[^e]*{,/**},\n" +" mount options=(ro,remount,bind) -> /de[^v]*{,/**},\n" +" mount options=(ro,remount,bind) -> /dev/.[^l]*{,/**},\n" +" mount options=(ro,remount,bind) -> /dev/.l[^x]*{,/**},\n" +" mount options=(ro,remount,bind) -> /dev/.lx[^c]*{,/**},\n" +" mount options=(ro,remount,bind) -> /dev/.lxc?*{,/**},\n" +" mount options=(ro,remount,bind) -> /dev/[^.]*{,/**},\n" +" mount options=(ro,remount,bind) -> /dev?*{,/**},\n" +" mount options=(ro,remount,bind) -> /p[^r]*{,/**},\n" +" mount options=(ro,remount,bind) -> /pr[^o]*{,/**},\n" +" mount options=(ro,remount,bind) -> /pro[^c]*{,/**},\n" +" mount options=(ro,remount,bind) -> /proc?*{,/**},\n" +" mount options=(ro,remount,bind) -> /s[^y]*{,/**},\n" +" mount options=(ro,remount,bind) -> /sy[^s]*{,/**},\n" +" mount options=(ro,remount,bind) -> /sys?*{,/**},\n" +"\n" +" # allow moving mounts except for /proc, /sys and /dev\n" +" mount options=(rw,move) /[^spd]*{,/**},\n" +" mount options=(rw,move) /d[^e]*{,/**},\n" +" mount options=(rw,move) /de[^v]*{,/**},\n" +" mount options=(rw,move) /dev/.[^l]*{,/**},\n" +" mount options=(rw,move) /dev/.l[^x]*{,/**},\n" +" mount options=(rw,move) /dev/.lx[^c]*{,/**},\n" +" mount options=(rw,move) /dev/.lxc?*{,/**},\n" +" mount options=(rw,move) /dev/[^.]*{,/**},\n" +" mount options=(rw,move) /dev?*{,/**},\n" +" mount options=(rw,move) /p[^r]*{,/**},\n" +" mount options=(rw,move) /pr[^o]*{,/**},\n" +" mount options=(rw,move) /pro[^c]*{,/**},\n" +" mount options=(rw,move) /proc?*{,/**},\n" +" mount options=(rw,move) /s[^y]*{,/**},\n" +" mount options=(rw,move) /sy[^s]*{,/**},\n" +" mount options=(rw,move) /sys?*{,/**},\n" +"\n" +" # generated by: lxc-generate-aa-rules.py container-rules.base\n" +" deny /proc/sys/[^kn]*{,/**} wklx,\n" +" deny /proc/sys/k[^e]*{,/**} wklx,\n" +" deny /proc/sys/ke[^r]*{,/**} wklx,\n" +" deny /proc/sys/ker[^n]*{,/**} wklx,\n" +" deny /proc/sys/kern[^e]*{,/**} wklx,\n" +" deny /proc/sys/kerne[^l]*{,/**} wklx,\n" +" deny /proc/sys/kernel/[^smhd]*{,/**} wklx,\n" +" deny /proc/sys/kernel/d[^o]*{,/**} wklx,\n" +" deny /proc/sys/kernel/do[^m]*{,/**} wklx,\n" +" deny /proc/sys/kernel/dom[^a]*{,/**} wklx,\n" +" deny /proc/sys/kernel/doma[^i]*{,/**} wklx,\n" +" deny /proc/sys/kernel/domai[^n]*{,/**} wklx,\n" +" deny /proc/sys/kernel/domain[^n]*{,/**} wklx,\n" +" deny /proc/sys/kernel/domainn[^a]*{,/**} wklx,\n" +" deny /proc/sys/kernel/domainna[^m]*{,/**} wklx,\n" +" deny /proc/sys/kernel/domainnam[^e]*{,/**} wklx,\n" +" deny /proc/sys/kernel/domainname?*{,/**} wklx,\n" +" deny /proc/sys/kernel/h[^o]*{,/**} wklx,\n" +" deny /proc/sys/kernel/ho[^s]*{,/**} wklx,\n" +" deny /proc/sys/kernel/hos[^t]*{,/**} wklx,\n" +" deny /proc/sys/kernel/host[^n]*{,/**} wklx,\n" +" deny /proc/sys/kernel/hostn[^a]*{,/**} wklx,\n" +" deny /proc/sys/kernel/hostna[^m]*{,/**} wklx,\n" +" deny /proc/sys/kernel/hostnam[^e]*{,/**} wklx,\n" +" deny /proc/sys/kernel/hostname?*{,/**} wklx,\n" +" deny /proc/sys/kernel/m[^s]*{,/**} wklx,\n" +" deny /proc/sys/kernel/ms[^g]*{,/**} wklx,\n" +" deny /proc/sys/kernel/msg*/** wklx,\n" +" deny /proc/sys/kernel/s[^he]*{,/**} wklx,\n" +" deny /proc/sys/kernel/se[^m]*{,/**} wklx,\n" +" deny /proc/sys/kernel/sem*/** wklx,\n" +" deny /proc/sys/kernel/sh[^m]*{,/**} wklx,\n" +" deny /proc/sys/kernel/shm*/** wklx,\n" +" deny /proc/sys/kernel?*{,/**} wklx,\n" +" deny /proc/sys/n[^e]*{,/**} wklx,\n" +" deny /proc/sys/ne[^t]*{,/**} wklx,\n" +" deny /proc/sys/net?*{,/**} wklx,\n" +" deny /sys/[^fdck]*{,/**} wklx,\n" +" deny /sys/c[^l]*{,/**} wklx,\n" +" deny /sys/cl[^a]*{,/**} wklx,\n" +" deny /sys/cla[^s]*{,/**} wklx,\n" +" deny /sys/clas[^s]*{,/**} wklx,\n" +" deny /sys/class/[^n]*{,/**} wklx,\n" +" deny /sys/class/n[^e]*{,/**} wklx,\n" +" deny /sys/class/ne[^t]*{,/**} wklx,\n" +" deny /sys/class/net?*{,/**} wklx,\n" +" deny /sys/class?*{,/**} wklx,\n" +" deny /sys/d[^e]*{,/**} wklx,\n" +" deny /sys/de[^v]*{,/**} wklx,\n" +" deny /sys/dev[^i]*{,/**} wklx,\n" +" deny /sys/devi[^c]*{,/**} wklx,\n" +" deny /sys/devic[^e]*{,/**} wklx,\n" +" deny /sys/device[^s]*{,/**} wklx,\n" +" deny /sys/devices/[^v]*{,/**} wklx,\n" +" deny /sys/devices/v[^i]*{,/**} wklx,\n" +" deny /sys/devices/vi[^r]*{,/**} wklx,\n" +" deny /sys/devices/vir[^t]*{,/**} wklx,\n" +" deny /sys/devices/virt[^u]*{,/**} wklx,\n" +" deny /sys/devices/virtu[^a]*{,/**} wklx,\n" +" deny /sys/devices/virtua[^l]*{,/**} wklx,\n" +" deny /sys/devices/virtual/[^n]*{,/**} wklx,\n" +" deny /sys/devices/virtual/n[^e]*{,/**} wklx,\n" +" deny /sys/devices/virtual/ne[^t]*{,/**} wklx,\n" +" deny /sys/devices/virtual/net?*{,/**} wklx,\n" +" deny /sys/devices/virtual?*{,/**} wklx,\n" +" deny /sys/devices?*{,/**} wklx,\n" +" deny /sys/f[^s]*{,/**} wklx,\n" +" deny /sys/fs/[^c]*{,/**} wklx,\n" +" deny /sys/fs/c[^g]*{,/**} wklx,\n" +" deny /sys/fs/cg[^r]*{,/**} wklx,\n" +" deny /sys/fs/cgr[^o]*{,/**} wklx,\n" +" deny /sys/fs/cgro[^u]*{,/**} wklx,\n" +" deny /sys/fs/cgrou[^p]*{,/**} wklx,\n" +" deny /sys/fs/cgroup?*{,/**} wklx,\n" +" deny /sys/fs?*{,/**} wklx,\n" +; + +static const char AA_PROFILE_UNIX_SOCKETS[] = +"\n" +" ### Feature: unix\n" +" # Allow receive via unix sockets from anywhere\n" +" unix (receive),\n" +"\n" +" # Allow all unix sockets in the container\n" +" unix peer=(label=@{profile_name}),\n" +; + +static const char AA_PROFILE_CGROUP_NAMESPACES[] = +"\n" +" ### Feature: cgroup namespace\n" +" mount fstype=cgroup -> /sys/fs/cgroup/**,\n" +" mount fstype=cgroup2 -> /sys/fs/cgroup/**,\n" +; + +/* '_BASE' because we still need to append generated change_profile rules */ +static const char AA_PROFILE_STACKING_BASE[] = +"\n" +" ### Feature: apparmor stacking\n" +" ### Configuration: apparmor profile loading (in namespace)\n" +" deny /sys/k[^e]*{,/**} wklx,\n" +" deny /sys/ke[^r]*{,/**} wklx,\n" +" deny /sys/ker[^n]*{,/**} wklx,\n" +" deny /sys/kern[^e]*{,/**} wklx,\n" +" deny /sys/kerne[^l]*{,/**} wklx,\n" +" deny /sys/kernel/[^s]*{,/**} wklx,\n" +" deny /sys/kernel/s[^e]*{,/**} wklx,\n" +" deny /sys/kernel/se[^c]*{,/**} wklx,\n" +" deny /sys/kernel/sec[^u]*{,/**} wklx,\n" +" deny /sys/kernel/secu[^r]*{,/**} wklx,\n" +" deny /sys/kernel/secur[^i]*{,/**} wklx,\n" +" deny /sys/kernel/securi[^t]*{,/**} wklx,\n" +" deny /sys/kernel/securit[^y]*{,/**} wklx,\n" +" deny /sys/kernel/security/[^a]*{,/**} wklx,\n" +" deny /sys/kernel/security/a[^p]*{,/**} wklx,\n" +" deny /sys/kernel/security/ap[^p]*{,/**} wklx,\n" +" deny /sys/kernel/security/app[^a]*{,/**} wklx,\n" +" deny /sys/kernel/security/appa[^r]*{,/**} wklx,\n" +" deny /sys/kernel/security/appar[^m]*{,/**} wklx,\n" +" deny /sys/kernel/security/apparm[^o]*{,/**} wklx,\n" +" deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,\n" +" deny /sys/kernel/security/apparmor?*{,/**} wklx,\n" +" deny /sys/kernel/security?*{,/**} wklx,\n" +" deny /sys/kernel?*{,/**} wklx,\n" +; + +static const char AA_PROFILE_NO_STACKING[] = +"\n" +" ### Feature: apparmor stacking (not present)\n" +" deny /sys/k*{,/**} rwklx,\n" +; + +/* '_BASE' because we need to append change_profile for stacking */ +static const char AA_PROFILE_NESTING_BASE[] = +"\n" +" ### Configuration: nesting\n" +" pivot_root,\n" +" ptrace,\n" +" signal,\n" +"\n" + /* NOTE: See conf.c's "nesting_helpers" for details. */ +" deny /dev/.lxc/proc/** rw,\n" +" deny /dev/.lxc/sys/** rw,\n" +"\n" +" mount fstype=proc -> /usr/lib/*/lxc/**,\n" +" mount fstype=sysfs -> /usr/lib/*/lxc/**,\n" +" mount options=(rw,bind),\n" +" mount options=(rw,rbind),\n" +" mount options=(rw,make-rshared),\n" +"\n" + /* FIXME: What's the state here on apparmor's side? */ +" # there doesn't seem to be a way to ask for:\n" +" # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n" +" # as we always get mount to $cdir/proc/sys with those flags denied\n" +" # So allow all mounts until that is straightened out:\n" +" mount,\n" +; + +static const char AA_PROFILE_UNPRIVILEGED[] = +"\n" +" ### Configuration: unprivileged container\n" +" pivot_root,\n" +"\n" +" # Allow modifying mount propagation\n" +" mount options=(rw,make-slave) -> **,\n" +" mount options=(rw,make-rslave) -> **,\n" +" mount options=(rw,make-shared) -> **,\n" +" mount options=(rw,make-rshared) -> **,\n" +" mount options=(rw,make-private) -> **,\n" +" mount options=(rw,make-rprivate) -> **,\n" +" mount options=(rw,make-unbindable) -> **,\n" +" mount options=(rw,make-runbindable) -> **,\n" +"\n" +" # Allow all bind-mounts\n" +" mount options=(rw,bind),\n" +" mount options=(rw,rbind),\n" +"\n" +" # Allow remounting things read-only\n" +" mount options=(ro,remount),\n" +; static bool check_mount_feature_enabled(void) { @@ -68,7 +402,7 @@ static int apparmor_enabled(void) char e; int ret; - fin = fopen(AA_ENABLED_FILE, "r"); + fin = fopen_cloexec(AA_ENABLED_FILE, "r"); if (!fin) return 0; ret = fscanf(fin, "%c", &e); @@ -95,7 +429,7 @@ static char *apparmor_process_label_get(pid_t pid) return NULL; } again: - f = fopen(path, "r"); + f = fopen_cloexec(path, "r"); if (!f) { SYSERROR("opening %s", path); free(buf); @@ -143,11 +477,6 @@ static bool apparmor_am_unconfined(void) return ret; } -/* aa stacking is not yet supported */ -static bool aa_stacking_supported(void) { - return false; -} - static bool aa_needs_transition(char *curlabel) { if (!curlabel) @@ -159,6 +488,571 @@ static bool aa_needs_transition(char *curlabel) return true; } +static inline void uint64hex(char *buf, uint64_t num) +{ + size_t i; + + buf[16] = 0; + for (i = 16; i--;) { + char c = (char)(num & 0xf); + buf[i] = c + (c < 0xa ? '0' : 'a' - 0xa); + num >>= 4; + } +} + +static inline char *shorten_apparmor_name(char *name) +{ + size_t len = strlen(name); + if (len + 7 > 253) { + uint64_t hash; + hash = fnv_64a_buf(name, len, FNV1A_64_INIT); + name = must_realloc(name, 16 + 1); + uint64hex(name, hash); + } + + return name; +} + +/* Replace slashes with hyphens */ +static inline void sanitize_path(char *path) +{ + size_t i; + + for (i = 0; path[i]; i++) + if (path[i] == '/') + path[i] = '-'; +} + +static inline char *apparmor_dir(const char *ctname, const char *lxcpath) +{ + return must_make_path(lxcpath, ctname, "apparmor", NULL); +} + + +static inline char *apparmor_profile_full(const char *ctname, const char *lxcpath) +{ + return shorten_apparmor_name(must_concat("lxc-", ctname, "_<", lxcpath, ">", NULL)); +} + +/* Like apparmor_profile_full() but with slashes replaced by hyphens */ +static inline char *apparmor_namespace(const char *ctname, const char *lxcpath) +{ + char *full; + + full = apparmor_profile_full(ctname, lxcpath); + sanitize_path(full); + + return full; +} + +/* FIXME: This is currently run only in the context of a constructor (via the + * initial lsm_init() called due to its __attribute__((constructor)), so we + * do not have ERROR/... macros available, so there are some fprintf(stderr)s + * in there. + */ +static bool check_apparmor_parser_version() +{ + struct lxc_popen_FILE *parserpipe; + int rc; + int major = 0, minor = 0, micro = 0; + + parserpipe = lxc_popen("apparmor_parser --version"); + if (!parserpipe) { + fprintf(stderr, "Failed to run check for apparmor_parser\n"); + return false; + } + + rc = fscanf(parserpipe->f, "AppArmor parser version %d.%d.%d", &major, &minor, µ); + if (rc < 1) { + lxc_pclose(parserpipe); + /* We stay silent for now as this most likely means the shell + * lxc_popen executed failed to find the apparmor_parser binary. + * See the FIXME comment above for details. + */ + return false; + } + + rc = lxc_pclose(parserpipe); + if (rc < 0) { + fprintf(stderr, "Error waiting for child process\n"); + return false; + } + if (rc != 0) { + fprintf(stderr, "'apparmor_parser --version' executed with an error status\n"); + return false; + } + + aa_supports_unix = (major > 2) || + (major == 2 && minor > 10) || + (major == 2 && minor == 10 && micro >= 95); + + return true; +} + +static bool file_is_yes(const char *path) +{ + ssize_t rd; + int fd; + char buf[8]; /* we actually just expect "yes" or "no" */ + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) + return false; + + rd = read(fd, buf, sizeof(buf)); + close(fd); + + return rd >= 4 && strncmp(buf, "yes\n", 4) == 0; +} + +static bool apparmor_can_stack() +{ + int major, minor, scanned; + FILE *f; + + if (!file_is_yes("/sys/kernel/security/apparmor/features/domain/stack")) + return false; + + f = fopen_cloexec("/sys/kernel/security/apparmor/features/domain/version", "r"); + if (!f) + return false; + + scanned = fscanf(f, "%d.%d", &major, &minor); + fclose(f); + if (scanned != 2) + return false; + + return major > 1 || (major == 1 && minor >= 2); +} + +static void must_append_sized_full(char **buf, size_t *bufsz, const char *data, + size_t size, bool append_newline) +{ + size_t newsize = *bufsz + size; + + if (append_newline) + ++newsize; + + *buf = must_realloc(*buf, newsize); + memcpy(*buf + *bufsz, data, size); + + if (append_newline) + (*buf)[newsize - 1] = '\n'; + + *bufsz = newsize; +} + +static void must_append_sized(char **buf, size_t *bufsz, const char *data, size_t size) +{ + return must_append_sized_full(buf, bufsz, data, size, false); +} + +static bool is_privileged(struct lxc_conf *conf) +{ + return lxc_list_empty(&conf->id_map); +} + +static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxcpath) +{ + char *profile, *profile_name_full; + size_t size; + struct lxc_list *it; + + profile_name_full = apparmor_profile_full(conf->name, lxcpath); + + profile = must_concat( +"#include \n" +"profile \"", profile_name_full, "\" flags=(attach_disconnected,mediate_deleted) {\n", + NULL); + size = strlen(profile); + + must_append_sized(&profile, &size, AA_PROFILE_BASE, + sizeof(AA_PROFILE_BASE) - 1); + + if (aa_supports_unix) + must_append_sized(&profile, &size, AA_PROFILE_UNIX_SOCKETS, + sizeof(AA_PROFILE_UNIX_SOCKETS) - 1); + + if (file_exists("/proc/self/ns/cgroup")) + must_append_sized(&profile, &size, AA_PROFILE_CGROUP_NAMESPACES, + sizeof(AA_PROFILE_CGROUP_NAMESPACES) - 1); + + if (aa_can_stack && !aa_is_stacked) { + char *namespace, *temp; + + must_append_sized(&profile, &size, AA_PROFILE_STACKING_BASE, + sizeof(AA_PROFILE_STACKING_BASE) - 1); + + namespace = apparmor_namespace(conf->name, lxcpath); + temp = must_concat(" change_profile -> \":", namespace, ":*\",\n" + " change_profile -> \":", namespace, "://*\",\n", + NULL); + free(namespace); + + must_append_sized(&profile, &size, temp, strlen(temp)); + free(temp); + } else { + must_append_sized(&profile, &size, AA_PROFILE_NO_STACKING, + sizeof(AA_PROFILE_NO_STACKING) - 1); + } + + if (conf->lsm_aa_allow_nesting) { + must_append_sized(&profile, &size, AA_PROFILE_NESTING_BASE, + sizeof(AA_PROFILE_NESTING_BASE) - 1); + + if (!aa_can_stack || aa_is_stacked) { + char *temp; + + temp = must_concat(" change_profile -> \"", + profile_name_full, "\",\n", NULL); + must_append_sized(&profile, &size, temp, strlen(temp)); + free(temp); + } + } + + if (!is_privileged(conf) || am_host_unpriv()) + must_append_sized(&profile, &size, AA_PROFILE_UNPRIVILEGED, + sizeof(AA_PROFILE_UNPRIVILEGED) - 1); + + lxc_list_for_each(it, &conf->lsm_aa_raw) { + const char *line = it->elem; + + must_append_sized_full(&profile, &size, line, strlen(line), true); + } + + /* include terminating \0 byte */ + must_append_sized(&profile, &size, "}\n", 3); + + free(profile_name_full); + + return profile; +} + +/* + * apparmor_parser creates a cache file using the parsed file's name as a name. + * This means there may be multiple containers with the same name but different + * lxcpaths. Therefore we need a sanitized version of the complete profile name + * as profile file-name. + * We already get this exactly from apparmor_namespace(). + */ +static char *make_apparmor_profile_path(const char *ctname, const char *lxcpath) +{ + char *ret, *filename; + + filename = apparmor_namespace(ctname, lxcpath); + ret = must_make_path(lxcpath, ctname, "apparmor", filename, NULL); + free(filename); + + return ret; +} + +static char *make_apparmor_namespace_path(const char *ctname, const char *lxcpath) +{ + char *ret, *namespace; + + namespace = apparmor_namespace(ctname, lxcpath); + ret = must_make_path("/sys/kernel/security/apparmor/policy/namespaces", namespace, NULL); + free(namespace); + + return ret; +} + +static bool make_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath) +{ + char *path; + + if (!aa_can_stack || aa_is_stacked) + return true; + + path = make_apparmor_namespace_path(conf->name, lxcpath); + errno = 0; + if (mkdir(path, 0755) < 0 && errno != EEXIST) { + SYSERROR("Error creating AppArmor namespace: %s", path); + free(path); + return false; + } + free(path); + + return true; +} + +static void remove_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath) +{ + char *path; + + path = make_apparmor_namespace_path(conf->name, lxcpath); + if (rmdir(path) != 0) + SYSERROR("Error removing AppArmor namespace"); + free(path); +} + +struct apparmor_parser_args { + char cmd; + char *file; +}; + +static int apparmor_parser_exec(void *data) +{ + struct apparmor_parser_args *args = data; + char cmdbuf[] = { '-', args->cmd, 'W', 'L', 0 }; + + execlp("apparmor_parser", "apparmor_parser", cmdbuf, APPARMOR_CACHE_DIR, args->file, NULL); + + return -1; +} + +static int run_apparmor_parser(char command, + struct lxc_conf *conf, + const char *lxcpath) +{ + char output[MAXPATHLEN]; + int ret; + struct apparmor_parser_args args = { + .cmd = command, + .file = make_apparmor_profile_path(conf->name, lxcpath), + }; + + ret = run_command(output, sizeof(output), apparmor_parser_exec, (void*)&args); + if (ret < 0) { + ERROR("Failed to run apparmor_parser on \"%s\": %s", args.file, output); + ret = -1; + } + + + free(args.file); + return ret; +} + +static void remove_apparmor_profile(struct lxc_conf *conf, const char *lxcpath) +{ + char *path; + + /* It's ok if these deletes fail: if the container was never started, + * we'll have never written a profile or cached it. + */ + + path = make_apparmor_profile_path(conf->name, lxcpath); + (void)unlink(path); + free(path); + + /* Also remove the apparmor/ subdirectory */ + path = apparmor_dir(conf->name, lxcpath); + (void)rmdir(path); + free(path); +} + +static int load_apparmor_profile(struct lxc_conf *conf, const char *lxcpath) +{ + struct stat profile_sb; + size_t content_len; + int ret = -1; + size_t old_len = 0; + char *profile_path = NULL, *old_content = NULL, *new_content = NULL; + int profile_fd = -1; + + if (!make_apparmor_namespace(conf, lxcpath)) + return -1; + + /* In order to avoid forcing a profile parse (potentially slow) on + * every container start, let's use apparmor's binary policy cache, + * which checks mtime of the files to figure out if the policy needs to + * be regenerated. + * + * Since it uses mtimes, we shouldn't just always write out our local + * apparmor template; instead we should check to see whether the + * template is the same as ours. If it isn't we should write our + * version out so that the new changes are reflected and we definitely + * force a recompile. + */ + + profile_path = make_apparmor_profile_path(conf->name, lxcpath); + profile_fd = open(profile_path, O_RDONLY | O_CLOEXEC); + if (profile_fd >= 0) { + if (fstat(profile_fd, &profile_sb) < 0) { + SYSERROR("Error accessing old profile from %s", + profile_path); + goto out; + } + old_len = profile_sb.st_size; + old_content = lxc_strmmap(NULL, old_len, PROT_READ, + MAP_PRIVATE, profile_fd, 0); + if (!old_content) { + SYSERROR("Failed to mmap old profile from %s", + profile_path); + goto out; + } + } else if (errno != ENOENT) { + SYSERROR("Error reading old profile from %s", profile_path); + goto out; + } + + new_content = get_apparmor_profile_content(conf, lxcpath); + if (!new_content) + goto out; + + content_len = strlen(new_content); + + if (!old_content || old_len != content_len || memcmp(old_content, new_content, content_len) != 0) { + char *path; + + ret = mkdir_p(APPARMOR_CACHE_DIR, 0755); + if (ret < 0) { + SYSERROR("Error creating AppArmor profile cache directory " APPARMOR_CACHE_DIR); + goto out; + } + + path = apparmor_dir(conf->name, lxcpath); + ret = mkdir_p(path, 0755); + if (ret < 0) { + SYSERROR("Error creating AppArmor profile directory: %s", path); + free(path); + goto out; + } + free(path); + + ret = lxc_write_to_file(profile_path, new_content, content_len, false, 0600); + if (ret < 0) { + SYSERROR("Error writing profile to %s", profile_path); + goto out; + } + } + + ret = run_apparmor_parser(AA_CMD_LOAD, conf, lxcpath); + if (ret != 0) + goto out_remove_profile; + + conf->lsm_aa_profile_created = true; + + goto out_ok; + +out_remove_profile: + remove_apparmor_profile(conf, lxcpath); +out: + remove_apparmor_namespace(conf, lxcpath); +out_ok: + if (profile_fd >= 0) { + if (old_content) + lxc_strmunmap(old_content, old_len); + close(profile_fd); + } + free(profile_path); + free(new_content); + return ret; +} + +/* + * Ensure that the container's policy namespace is unloaded to free kernel + * memory. This does not delete the policy from disk or cache. + */ +static void apparmor_cleanup(struct lxc_conf *conf, const char *lxcpath) +{ + if (!aa_admin) + return; + + if (!conf->lsm_aa_profile_created) + return; + + remove_apparmor_namespace(conf, lxcpath); + (void)run_apparmor_parser(AA_CMD_UNLOAD, conf, lxcpath); + + remove_apparmor_profile(conf, lxcpath); +} + +static int apparmor_prepare(struct lxc_conf *conf, const char *lxcpath) +{ + int ret = -1; + const char *label; + char *curlabel = NULL, *genlabel = NULL; + + if (!aa_enabled) { + ERROR("AppArmor not enabled"); + return -1; + } + + label = conf->lsm_aa_profile; + + /* user may request that we just ignore apparmor */ + if (label && strcmp(label, AA_UNCHANGED) == 0) { + INFO("AppArmor profile unchanged per user request"); + conf->lsm_aa_profile_computed = must_copy_string(label); + return 0; + } + + if (label && strcmp(label, AA_GENERATED) == 0) { + if (!aa_parser_available) { + ERROR("Cannot use generated profile: apparmor_parser not available"); + goto out; + } + + /* auto-generate profile based on available/requested security features */ + if (load_apparmor_profile(conf, lxcpath) != 0) { + ERROR("Failed to load generated AppArmor profile"); + goto out; + } + + genlabel = apparmor_profile_full(conf->name, lxcpath); + if (!genlabel) { + ERROR("Failed to build AppArmor profile name"); + goto out; + } + + if (aa_can_stack && !aa_is_stacked) { + char *namespace = apparmor_namespace(conf->name, lxcpath); + size_t llen = strlen(genlabel); + must_append_sized(&genlabel, &llen, "//&:", sizeof("//&:") - 1); + must_append_sized(&genlabel, &llen, namespace, strlen(namespace)); + must_append_sized(&genlabel, &llen, ":", sizeof(":")); /* with the nul byte */ + free(namespace); + } + + label = genlabel; + } + + curlabel = apparmor_process_label_get(lxc_raw_getpid()); + + if (!aa_can_stack && aa_needs_transition(curlabel)) { + /* we're already confined, and stacking isn't supported */ + + if (!label || strcmp(curlabel, label) == 0) { + /* no change requested */ + ret = 0; + goto out; + } + + ERROR("Already AppArmor confined, but new label requested."); + goto out; + } + + if (!label) { + if (cgns_supported()) + label = AA_DEF_PROFILE_CGNS; + else + label = AA_DEF_PROFILE; + } + + if (!check_mount_feature_enabled() && strcmp(label, "unconfined") != 0) { + WARN("Incomplete AppArmor support in your kernel"); + if (!conf->lsm_aa_allow_incomplete) { + ERROR("If you really want to start this container, set"); + ERROR("lxc.apparmor.allow_incomplete = 1"); + ERROR("in your container configuration file"); + goto out; + } + } + + conf->lsm_aa_profile_computed = must_copy_string(label); + ret = 0; + +out: + if (genlabel) { + free(genlabel); + if (ret != 0) + apparmor_cleanup(conf, lxcpath); + } + free(curlabel); + return ret; +} + /* * apparmor_process_label_set: Set AppArmor process profile * @@ -172,80 +1066,48 @@ static bool aa_needs_transition(char *curlabel) * Notes: This relies on /proc being available. */ static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf, - bool use_default, bool on_exec) + bool on_exec) { int label_fd, ret; pid_t tid; - const char *label = inlabel ? inlabel : conf->lsm_aa_profile; - char *curlabel; + const char *label; - if (!aa_enabled) - return 0; - - /* user may request that we just ignore apparmor */ - if (label && strcmp(label, AA_UNCHANGED) == 0) { - INFO("apparmor profile unchanged per user request"); - return 0; - } - - curlabel = apparmor_process_label_get(lxc_raw_getpid()); - - if (!aa_stacking_supported() && aa_needs_transition(curlabel)) { - /* we're already confined, and stacking isn't supported */ - - if (!label || strcmp(curlabel, label) == 0) { - /* no change requested */ - free(curlabel); - return 0; - } - - ERROR("already apparmor confined, but new label requested."); - free(curlabel); + if (!aa_enabled) { + ERROR("AppArmor not enabled"); return -1; } - free(curlabel); + label = inlabel ? inlabel : conf->lsm_aa_profile_computed; if (!label) { - if (use_default) { - if (cgns_supported()) - label = AA_DEF_PROFILE_CGNS; - else - label = AA_DEF_PROFILE; - } - else - label = "unconfined"; + ERROR("LSM wasn't prepared"); + return -1; } - if (!check_mount_feature_enabled() && strcmp(label, "unconfined") != 0) { - WARN("Incomplete AppArmor support in your kernel"); - if (!conf->lsm_aa_allow_incomplete) { - ERROR("If you really want to start this container, set"); - ERROR("lxc.apparmor.allow_incomplete = 1"); - ERROR("in your container configuration file"); - return -1; - } + /* user may request that we just ignore apparmor */ + if (strcmp(label, AA_UNCHANGED) == 0) { + INFO("AppArmor profile unchanged per user request"); + return 0; } - if (strcmp(label, "unconfined") == 0 && apparmor_am_unconfined()) { - INFO("apparmor profile unchanged"); + INFO("AppArmor profile unchanged"); return 0; } tid = lxc_raw_gettid(); label_fd = lsm_process_label_fd_get(tid, on_exec); if (label_fd < 0) { - SYSERROR("Failed to change apparmor profile to %s", label); + SYSERROR("Failed to change AppArmor profile to %s", label); return -1; } ret = lsm_process_label_set_at(label_fd, label, on_exec); close(label_fd); if (ret < 0) { - SYSERROR("Failed to change apparmor profile to %s", label); + ERROR("Failed to change AppArmor profile to %s", label); return -1; } - INFO("Changed apparmor profile to %s", label); + INFO("Changed AppArmor profile to %s", label); return 0; } @@ -254,12 +1116,39 @@ static struct lsm_drv apparmor_drv = { .enabled = apparmor_enabled, .process_label_get = apparmor_process_label_get, .process_label_set = apparmor_process_label_set, + .prepare = apparmor_prepare, + .cleanup = apparmor_cleanup, }; struct lsm_drv *lsm_apparmor_drv_init(void) { + bool have_mac_admin = false; + if (!apparmor_enabled()) return NULL; + + /* We only support generated profiles when apparmor_parser is usable */ + if (!check_apparmor_parser_version()) + goto out; + + aa_parser_available = true; + + aa_can_stack = apparmor_can_stack(); + if (aa_can_stack) + aa_is_stacked = file_is_yes("/sys/kernel/security/apparmor/.ns_stacked"); + + #if HAVE_LIBCAP + have_mac_admin = lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE); + #endif + + if (!have_mac_admin) + WARN("Per-container AppArmor profiles are disabled because the mac_admin capability is missing"); + else if (am_host_unpriv() && !aa_is_stacked) + WARN("Per-container AppArmor profiles are disabled because LXC is running in an unprivileged container without stacking"); + else + aa_admin = true; + +out: aa_enabled = 1; return &apparmor_drv; } diff --git a/src/lxc/lsm/lsm.c b/src/lxc/lsm/lsm.c index f4500ae20..46e212069 100644 --- a/src/lxc/lsm/lsm.c +++ b/src/lxc/lsm/lsm.c @@ -142,18 +142,20 @@ int lsm_process_label_set_at(int label_fd, const char *label, bool on_exec) if (on_exec) { ERROR("Changing AppArmor profile on exec not supported"); - return -EINVAL; + return -1; } len = strlen(label) + strlen("changeprofile ") + 1; command = malloc(len); if (!command) - return -1; + goto on_error; ret = snprintf(command, len, "changeprofile %s", label); if (ret < 0 || (size_t)ret >= len) { + int saved_errno = errno; free(command); - return -1; + errno = saved_errno; + goto on_error; } ret = lxc_write_nointr(label_fd, command, len - 1); @@ -161,9 +163,11 @@ int lsm_process_label_set_at(int label_fd, const char *label, bool on_exec) } else if (strcmp(name, "SELinux") == 0) { ret = lxc_write_nointr(label_fd, label, strlen(label)); } else { - ret = -EINVAL; + errno = EINVAL; + ret = -1; } if (ret < 0) { +on_error: SYSERROR("Failed to set %s label \"%s\"", name, label); return -1; } @@ -173,11 +177,37 @@ int lsm_process_label_set_at(int label_fd, const char *label, bool on_exec) } int lsm_process_label_set(const char *label, struct lxc_conf *conf, - bool use_default, bool on_exec) + bool on_exec) { if (!drv) { ERROR("LSM driver not inited"); return -1; } - return drv->process_label_set(label, conf, use_default, on_exec); + return drv->process_label_set(label, conf, on_exec); +} + +int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath) +{ + if (!drv) { + ERROR("LSM driver not inited"); + return 0; + } + + if (!drv->prepare) + return 0; + + return drv->prepare(conf, lxcpath); +} + +void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath) +{ + if (!drv) { + ERROR("LSM driver not inited"); + return; + } + + if (!drv->cleanup) + return; + + drv->cleanup(conf, lxcpath); } diff --git a/src/lxc/lsm/lsm.h b/src/lxc/lsm/lsm.h index cafb2ac7c..52e656d6f 100644 --- a/src/lxc/lsm/lsm.h +++ b/src/lxc/lsm/lsm.h @@ -38,17 +38,21 @@ struct lsm_drv { int (*enabled)(void); char *(*process_label_get)(pid_t pid); int (*process_label_set)(const char *label, struct lxc_conf *conf, - bool use_default, bool on_exec); + bool on_exec); + int (*prepare)(struct lxc_conf *conf, const char *lxcpath); + void (*cleanup)(struct lxc_conf *conf, const char *lxcpath); }; extern void lsm_init(void); extern int lsm_enabled(void); extern const char *lsm_name(void); extern char *lsm_process_label_get(pid_t pid); +extern int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath); extern int lsm_process_label_set(const char *label, struct lxc_conf *conf, - bool use_default, bool on_exec); + bool on_exec); extern int lsm_process_label_fd_get(pid_t pid, bool on_exec); extern int lsm_process_label_set_at(int label_fd, const char *label, bool on_exec); +extern void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath); #endif /* __LXC_LSM_H */ diff --git a/src/lxc/lsm/nop.c b/src/lxc/lsm/nop.c index 7bb8121b8..9397f2bfb 100644 --- a/src/lxc/lsm/nop.c +++ b/src/lxc/lsm/nop.c @@ -30,7 +30,7 @@ static char *nop_process_label_get(pid_t pid) } static int nop_process_label_set(const char *label, struct lxc_conf *conf, - bool use_default, bool on_exec) + bool on_exec) { return 0; } diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c index c88c18e3d..9f7b7bc31 100644 --- a/src/lxc/lsm/selinux.c +++ b/src/lxc/lsm/selinux.c @@ -75,15 +75,13 @@ static char *selinux_process_label_get(pid_t pid) * Notes: This relies on /proc being available. */ static int selinux_process_label_set(const char *inlabel, struct lxc_conf *conf, - bool use_default, bool on_exec) + bool on_exec) { int ret; const char *label; label = inlabel ? inlabel : conf->lsm_se_context; if (!label) { - if (!use_default) - return -EINVAL; label = DEFAULT_LABEL; } diff --git a/src/lxc/start.c b/src/lxc/start.c index ec372b4a2..061e47917 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -863,9 +863,19 @@ int lxc_init(const char *name, struct lxc_handler *handler) } TRACE("Initialized cgroup driver"); + ret = lsm_process_prepare(conf, handler->lxcpath); + if (ret < 0) { + ERROR("Failed to initialize LSM"); + goto out_destroy_cgroups; + } + TRACE("Initialized LSM"); + INFO("Container \"%s\" is initialized", name); return 0; +out_destroy_cgroups: + handler->cgroup_ops->destroy(handler->cgroup_ops, handler); + out_delete_terminal: lxc_terminal_delete(&handler->conf->console); @@ -956,6 +966,8 @@ void lxc_fini(const char *name, struct lxc_handler *handler) while (namespace_count--) free(namespaces[namespace_count]); + lsm_process_cleanup(handler->conf, handler->lxcpath); + cgroup_ops->destroy(cgroup_ops, handler); cgroup_exit(cgroup_ops); @@ -1235,7 +1247,7 @@ static int do_start(void *data) } /* Set the label to change to when we exec(2) the container's init. */ - ret = lsm_process_label_set(NULL, handler->conf, 1, 1); + ret = lsm_process_label_set(NULL, handler->conf, true); if (ret < 0) goto out_warn_father; diff --git a/src/lxc/utils.c b/src/lxc/utils.c index bad355265..82c24c941 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -2433,6 +2433,30 @@ int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args) return fret; } +char *must_concat(const char *first, ...) +{ + va_list args; + char *cur, *dest; + size_t cur_len, it_len; + + dest = must_copy_string(first); + cur_len = it_len = strlen(first); + + va_start(args, first); + while ((cur = va_arg(args, char *)) != NULL) { + it_len = strlen(cur); + + dest = must_realloc(dest, cur_len + it_len + 1); + + (void)memcpy(dest + cur_len, cur, it_len); + cur_len += it_len; + } + va_end(args); + + dest[cur_len] = 0; + return dest; +} + char *must_make_path(const char *first, ...) { va_list args; diff --git a/src/lxc/utils.h b/src/lxc/utils.h index 46ef28504..f09bc9018 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -568,6 +568,7 @@ extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), /* Concatenate all passed-in strings into one path. Do not fail. If any piece * is not prefixed with '/', add a '/'. */ +__attribute__((sentinel)) extern char *must_concat(const char *first, ...); __attribute__((sentinel)) extern char *must_make_path(const char *first, ...); __attribute__((sentinel)) extern char *must_append_path(char *first, ...); diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am index 00d4c0b7a..e1532a102 100644 --- a/src/tests/Makefile.am +++ b/src/tests/Makefile.am @@ -81,6 +81,7 @@ if DISTRO_UBUNTU bin_SCRIPTS += \ lxc-test-lxc-attach \ lxc-test-apparmor-mount \ + lxc-test-apparmor-generated \ lxc-test-checkpoint-restore \ lxc-test-snapdeps \ lxc-test-symlink \ @@ -114,6 +115,7 @@ EXTRA_DIST = \ lxc-test-rootfs \ lxc-test-autostart \ lxc-test-apparmor-mount \ + lxc-test-apparmor-generated \ lxc-test-checkpoint-restore \ lxc-test-cloneconfig \ lxc-test-createconfig \ diff --git a/src/tests/lxc-test-apparmor-generated b/src/tests/lxc-test-apparmor-generated new file mode 100755 index 000000000..be2e32619 --- /dev/null +++ b/src/tests/lxc-test-apparmor-generated @@ -0,0 +1,84 @@ +#!/bin/sh + +# lxc: linux Container library + +# This is a test script for generated apparmor profiles + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +if ! which apparmor_parser >/dev/null 2>&1; then + echo 'SKIP: test for generated apparmor profiles: apparmor_parser missing' +fi +exit 0 + +DONE=0 +KNOWN_RELEASES="precise trusty xenial yakkety zesty" +LOGFILE="/tmp/lxc-test-$$.log" +cleanup() { + lxc-destroy -n $CONTAINER_NAME >/dev/null 2>&1 || true + + if [ $DONE -eq 0 ]; then + [ -f "$LOGFILE" ] && cat "$LOGFILE" >&2 + rm -f "$LOGFILE" + echo "FAIL" + exit 1 + fi + rm -f "$LOGFILE" + echo "PASS" +} + +ARCH=i386 +if type dpkg >/dev/null 2>&1; then + ARCH=$(dpkg --print-architecture) +fi + +trap cleanup EXIT HUP INT TERM +set -eu + +# Create a container +CONTAINER_NAME=lxc-test-apparmor-generated + +# default release is trusty, or the systems release if recognized +release=trusty +if [ -f /etc/lsb-release ]; then + . /etc/lsb-release + rels=$(ubuntu-distro-info --supported 2>/dev/null) || + rels="$KNOWN_RELEASES" + for r in $rels; do + [ "$DISTRIB_CODENAME" = "$r" ] && release="$r" + done +fi + +lxc-create -t download -n $CONTAINER_NAME -B dir -- -d ubuntu -r $release -a $ARCH +CONTAINER_PATH=$(dirname $(lxc-info -n $CONTAINER_NAME -c lxc.rootfs.path -H) | sed -e 's/dir://') +cp $CONTAINER_PATH/config $CONTAINER_PATH/config.bak + +# Set the profile to be auto-generated +echo "lxc.apparmor.profile = generated" >> $CONTAINER_PATH/config + +# Start it +lxc-start -n $CONTAINER_NAME -lDEBUG -o "$LOGFILE" +lxc-wait -n $CONTAINER_NAME -t 5 -s RUNNING || (echo "Container didn't start" && exit 1) +pid=`lxc-info -p -H -n $CONTAINER_NAME` +profile=`cat /proc/$pid/attr/current` +expected_profile="lxc-${CONTAINER_NAME}_//&:lxc-${CONTAINER_NAME}_<-var-lib-lxc>:unconfined (enforce)" +lxc-stop -n $CONTAINER_NAME -k +if [ "x$profile" != "x$expected_profile" ]; then + echo "FAIL: container was in profile $profile" >&2 + echo "expected profile: $expected_profile" >&2 + exit 1 +fi + +DONE=1 diff --git a/src/tests/lxc-test-apparmor-mount b/src/tests/lxc-test-apparmor-mount index ddcee8a76..56d598f4c 100755 --- a/src/tests/lxc-test-apparmor-mount +++ b/src/tests/lxc-test-apparmor-mount @@ -23,6 +23,16 @@ set -e +# Only run on a normally configured ubuntu lxc system +if [ ! -d /sys/class/net/lxcbr0 ]; then + echo "lxcbr0 is not configured." + exit 1 +fi +if [ "$(id -u)" != "0" ]; then + echo "ERROR: Must run as root." + exit 1 +fi + if [ -f /proc/self/ns/cgroup ]; then default_profile="lxc-container-default-cgns (enforce)" else @@ -45,6 +55,7 @@ DONE=0 KNOWN_RELEASES="precise trusty xenial yakkety zesty" MOUNTSR=/sys/kernel/security/apparmor/features/mount dnam=`mktemp -d` +logfile=`mktemp` cname=`basename $dnam` cleanup() { run_cmd lxc-destroy -f -n $cname || true @@ -56,23 +67,24 @@ cleanup() { rm -Rf $HDIR /run/user/$(id -u $TUSER) deluser $TUSER if [ $DONE -eq 0 ]; then + echo 'Failed container log:' >&2 + cat "$logfile" >&2 + echo 'End log' >&2 + rm -f "$logfile" echo "FAIL" exit 1 fi + rm -f "$logfile" echo "PASS" } +clear_log() { + truncate -s0 "$logfile" +} + trap cleanup exit -# Only run on a normally configured ubuntu lxc system -if [ ! -d /sys/class/net/lxcbr0 ]; then - echo "lxcbr0 is not configured." - exit 1 -fi -if [ "$(id -u)" != "0" ]; then - echo "ERROR: Must run as root." - exit 1 -fi +chmod 0666 "$logfile" # This would be much simpler if we could run it as # root. However, in order to not have the bind mount @@ -160,7 +172,7 @@ fi run_cmd lxc-create -t download -n $cname -- -d ubuntu -r $release -a $ARCH echo "test default confined container" -run_cmd lxc-start -n $cname -d +run_cmd lxc-start -n $cname -d -lDEBUG -o "$logfile" run_cmd lxc-wait -n $cname -s RUNNING pid=`run_cmd lxc-info -p -H -n $cname` profile=`cat /proc/$pid/attr/current` @@ -169,10 +181,11 @@ if [ "x$profile" != "x${default_profile}" ]; then exit 1 fi run_cmd lxc-stop -n $cname -k +clear_log echo "test regular unconfined container" echo "lxc.apparmor.profile = unconfined" >> $HDIR/.local/share/lxc/$cname/config -run_cmd lxc-start -n $cname -d +run_cmd lxc-start -n $cname -d -lDEBUG -o "$logfile" run_cmd lxc-wait -n $cname -s RUNNING pid=`run_cmd lxc-info -p -H -n $cname` profile=`cat /proc/$pid/attr/current` @@ -181,6 +194,7 @@ if [ "x$profile" != "xunconfined" ]; then exit 1 fi run_cmd lxc-stop -n $cname -k +clear_log echo "masking $MOUNTSR" mount --bind $dnam $MOUNTSR @@ -198,7 +212,7 @@ fi echo "test regular unconfined container" echo "lxc.apparmor.profile = unconfined" >> $HDIR/.local/share/lxc/$cname/config -run_cmd lxc-start -n $cname -d +run_cmd lxc-start -n $cname -d -lDEBUG -o "$logfile" run_cmd lxc-wait -n $cname -s RUNNING pid=`run_cmd lxc-info -p -H -n $cname` if [ "$pid" = "-1" ]; then @@ -211,11 +225,12 @@ if [ "x$profile" != "xunconfined" ]; then exit 1 fi run_cmd lxc-stop -n $cname -k +clear_log echo "testing override" sed -i '/apparmor.profile/d' $HDIR/.local/share/lxc/$cname/config echo "lxc.apparmor.allow_incomplete = 1" >> $HDIR/.local/share/lxc/$cname/config -run_cmd lxc-start -n $cname -d +run_cmd lxc-start -n $cname -d -lDEBUG -o "$logfile" run_cmd lxc-wait -n $cname -s RUNNING pid=`run_cmd lxc-info -p -H -n $cname` if [ "$pid" = "-1" ]; then @@ -228,5 +243,6 @@ if [ "x$profile" != "x${default_profile}" ]; then exit 1 fi run_cmd lxc-stop -n $cname -k +clear_log DONE=1