mirror of
git://github.com/lxc/lxc
synced 2025-08-29 16:59:49 +00:00
cgroups: add cgroup2 device controller support
Add a bpf-based device controller implementation. Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
This commit is contained in:
parent
f177506f59
commit
bf6519892e
@ -368,6 +368,10 @@ AC_CHECK_TYPES([struct seccomp_notif_sizes], [], [], [[#include <seccomp.h>]])
|
||||
AC_CHECK_DECLS([seccomp_syscall_resolve_name_arch], [], [], [[#include <seccomp.h>]])
|
||||
CFLAGS="$OLD_CFLAGS"
|
||||
|
||||
AC_CHECK_HEADERS([linux/bpf.h], [
|
||||
AC_CHECK_TYPES([struct bpf_cgroup_dev_ctx], [], [], [[#include <linux/bpf.h>]])
|
||||
], [], [])
|
||||
|
||||
# Configuration examples
|
||||
AC_ARG_ENABLE([examples],
|
||||
[AS_HELP_STRING([--enable-examples], [install examples [default=yes]])],
|
||||
|
@ -7,6 +7,7 @@ noinst_HEADERS = api_extensions.h \
|
||||
caps.h \
|
||||
cgroups/cgroup.h \
|
||||
cgroups/cgroup_utils.h \
|
||||
cgroups/cgroup2_devices.h \
|
||||
compiler.h \
|
||||
conf.h \
|
||||
confile.h \
|
||||
@ -95,6 +96,7 @@ liblxc_la_SOURCES = af_unix.c af_unix.h \
|
||||
caps.c caps.h \
|
||||
cgroups/cgfsng.c \
|
||||
cgroups/cgroup.c cgroups/cgroup.h \
|
||||
cgroups/cgroup2_devices.c cgroups/cgroup2_devices.h \
|
||||
cgroups/cgroup_utils.c cgroups/cgroup_utils.h \
|
||||
compiler.h \
|
||||
commands.c commands.h \
|
||||
|
@ -54,6 +54,7 @@
|
||||
|
||||
#include "caps.h"
|
||||
#include "cgroup.h"
|
||||
#include "cgroup2_devices.h"
|
||||
#include "cgroup_utils.h"
|
||||
#include "commands.h"
|
||||
#include "conf.h"
|
||||
@ -1105,6 +1106,12 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
|
||||
wrap.hierarchies = ops->hierarchies;
|
||||
wrap.conf = handler->conf;
|
||||
|
||||
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
|
||||
ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices);
|
||||
if (ret < 0)
|
||||
WARN("Failed to detach bpf program from cgroup");
|
||||
#endif
|
||||
|
||||
if (handler->conf && !lxc_list_empty(&handler->conf->id_map))
|
||||
ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap,
|
||||
"cgroup_rmdir_wrapper");
|
||||
@ -2474,8 +2481,146 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some of the parsing logic comes from the original cgroup device v1
|
||||
* implementation in the kernel.
|
||||
*/
|
||||
static int bpf_device_cgroup_prepare(struct lxc_conf *conf, const char *key,
|
||||
const char *val)
|
||||
{
|
||||
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
|
||||
struct device_item {
|
||||
char type;
|
||||
int major;
|
||||
int minor;
|
||||
char access[100];
|
||||
int allow;
|
||||
} device_item = {0};
|
||||
int count, ret;
|
||||
char temp[50];
|
||||
struct bpf_program *device;
|
||||
|
||||
if (conf->cgroup2_devices) {
|
||||
device = conf->cgroup2_devices;
|
||||
} else {
|
||||
device = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
|
||||
if (device && bpf_program_init(device)) {
|
||||
ERROR("Failed to initialize bpf program");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (!device) {
|
||||
ERROR("Failed to create new ebpf device program");
|
||||
return -1;
|
||||
}
|
||||
|
||||
conf->cgroup2_devices = device;
|
||||
|
||||
if (strcmp("devices.allow", key) == 0)
|
||||
device_item.allow = 1;
|
||||
|
||||
if (strcmp(val, "a") == 0) {
|
||||
device->blacklist = (device_item.allow == 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (*val) {
|
||||
case 'a':
|
||||
__fallthrough;
|
||||
case 'b':
|
||||
__fallthrough;
|
||||
case 'c':
|
||||
device_item.type = *val;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
val++;
|
||||
if (!isspace(*val))
|
||||
return -1;
|
||||
val++;
|
||||
if (*val == '*') {
|
||||
device_item.major = ~0;
|
||||
val++;
|
||||
} else if (isdigit(*val)) {
|
||||
memset(temp, 0, sizeof(temp));
|
||||
for (count = 0; count < sizeof(temp) - 1; count++) {
|
||||
temp[count] = *val;
|
||||
val++;
|
||||
if (!isdigit(*val))
|
||||
break;
|
||||
}
|
||||
ret = lxc_safe_uint(temp, &device_item.major);
|
||||
if (ret)
|
||||
return -1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
if (*val != ':')
|
||||
return -1;
|
||||
val++;
|
||||
|
||||
/* read minor */
|
||||
if (*val == '*') {
|
||||
device_item.minor = ~0;
|
||||
val++;
|
||||
} else if (isdigit(*val)) {
|
||||
memset(temp, 0, sizeof(temp));
|
||||
for (count = 0; count < sizeof(temp) - 1; count++) {
|
||||
temp[count] = *val;
|
||||
val++;
|
||||
if (!isdigit(*val))
|
||||
break;
|
||||
}
|
||||
ret = lxc_safe_uint(temp, &device_item.minor);
|
||||
if (ret)
|
||||
return -1;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
if (!isspace(*val))
|
||||
return -1;
|
||||
for (val++, count = 0; count < 3; count++, val++) {
|
||||
switch (*val) {
|
||||
case 'r':
|
||||
device_item.access[count] = *val;
|
||||
break;
|
||||
case 'w':
|
||||
device_item.access[count] = *val;
|
||||
break;
|
||||
case 'm':
|
||||
device_item.access[count] = *val;
|
||||
break;
|
||||
case '\n':
|
||||
case '\0':
|
||||
count = 3;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
ret = bpf_program_append_device(device, device_item.type, device_item.major,
|
||||
device_item.minor, device_item.access,
|
||||
device_item.allow);
|
||||
if (ret) {
|
||||
ERROR("Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d",
|
||||
device_item.type, device_item.major, device_item.minor,
|
||||
device_item.access, device_item.allow);
|
||||
return -1;
|
||||
} else {
|
||||
TRACE("Added new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d",
|
||||
device_item.type, device_item.major, device_item.minor,
|
||||
device_item.access, device_item.allow);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
|
||||
struct lxc_list *cgroup_settings)
|
||||
struct lxc_list *cgroup_settings,
|
||||
struct lxc_conf *conf)
|
||||
{
|
||||
struct lxc_list *iterator;
|
||||
struct hierarchy *h = ops->unified;
|
||||
@ -2486,17 +2631,24 @@ static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
|
||||
if (!h)
|
||||
return false;
|
||||
|
||||
lxc_list_for_each(iterator, cgroup_settings) {
|
||||
lxc_list_for_each (iterator, cgroup_settings) {
|
||||
__do_free char *fullpath = NULL;
|
||||
int ret;
|
||||
struct lxc_cgroup *cg = iterator->elem;
|
||||
|
||||
fullpath = must_make_path(h->container_full_path, cg->subsystem, NULL);
|
||||
ret = lxc_write_to_file(fullpath, cg->value, strlen(cg->value), false, 0666);
|
||||
if (ret < 0) {
|
||||
SYSERROR("Failed to set \"%s\" to \"%s\"",
|
||||
cg->subsystem, cg->value);
|
||||
return false;
|
||||
if (strncmp("devices", cg->subsystem, 7) == 0) {
|
||||
ret = bpf_device_cgroup_prepare(conf, cg->subsystem,
|
||||
cg->value);
|
||||
} else {
|
||||
fullpath = must_make_path(h->container_full_path,
|
||||
cg->subsystem, NULL);
|
||||
ret = lxc_write_to_file(fullpath, cg->value,
|
||||
strlen(cg->value), false, 0666);
|
||||
if (ret < 0) {
|
||||
SYSERROR("Failed to set \"%s\" to \"%s\"",
|
||||
cg->subsystem, cg->value);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
TRACE("Set \"%s\" to \"%s\"", cg->subsystem, cg->value);
|
||||
}
|
||||
@ -2505,6 +2657,32 @@ static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
|
||||
return true;
|
||||
}
|
||||
|
||||
__cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
|
||||
struct lxc_handler *handler)
|
||||
{
|
||||
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
|
||||
int ret;
|
||||
struct hierarchy *h = ops->unified;
|
||||
struct bpf_program *device = handler->conf->cgroup2_devices;
|
||||
|
||||
if (!h)
|
||||
return false;
|
||||
|
||||
if (!device)
|
||||
return true;
|
||||
|
||||
ret = bpf_program_finalize(device);
|
||||
if (ret)
|
||||
return false;
|
||||
|
||||
return bpf_program_cgroup_attach(device, BPF_CGROUP_DEVICE,
|
||||
h->container_full_path,
|
||||
BPF_F_ALLOW_MULTI) == 0;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
__cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
|
||||
struct lxc_conf *conf,
|
||||
bool do_devices)
|
||||
@ -2512,7 +2690,11 @@ __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
|
||||
if (!__cg_legacy_setup_limits(ops, &conf->cgroup, do_devices))
|
||||
return false;
|
||||
|
||||
return __cg_unified_setup_limits(ops, &conf->cgroup2);
|
||||
/* for v2 we will have already set up devices */
|
||||
if (do_devices)
|
||||
return true;
|
||||
|
||||
return __cg_unified_setup_limits(ops, &conf->cgroup2, conf);
|
||||
}
|
||||
|
||||
static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
|
||||
@ -2893,6 +3075,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
|
||||
cgfsng_ops->chown = cgfsng_chown;
|
||||
cgfsng_ops->mount = cgfsng_mount;
|
||||
cgfsng_ops->nrtasks = cgfsng_nrtasks;
|
||||
cgfsng_ops->devices_activate = cgfsng_devices_activate;
|
||||
|
||||
return move_ptr(cgfsng_ops);
|
||||
}
|
||||
|
@ -164,6 +164,8 @@ struct cgroup_ops {
|
||||
bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler,
|
||||
const char *root, int type);
|
||||
int (*nrtasks)(struct cgroup_ops *ops);
|
||||
bool (*devices_activate)(struct cgroup_ops *ops,
|
||||
struct lxc_handler *handler);
|
||||
};
|
||||
|
||||
extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf);
|
||||
|
414
src/lxc/cgroups/cgroup2_devices.c
Normal file
414
src/lxc/cgroups/cgroup2_devices.c
Normal file
@ -0,0 +1,414 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1+ */
|
||||
|
||||
/* Parts of this taken from systemd's implementation. */
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE 1
|
||||
#endif
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/filter.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "cgroup2_devices.h"
|
||||
#include "config.h"
|
||||
#include "log.h"
|
||||
#include "macro.h"
|
||||
#include "memory_utils.h"
|
||||
|
||||
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
|
||||
#include <linux/bpf.h>
|
||||
|
||||
lxc_log_define(cgroup2_devices, cgroup);
|
||||
|
||||
static int bpf_program_add_instructions(struct bpf_program *prog,
|
||||
const struct bpf_insn *instructions,
|
||||
size_t count)
|
||||
{
|
||||
|
||||
struct bpf_insn *new_insn;
|
||||
|
||||
if (prog->kernel_fd >= 0)
|
||||
return error_log_errno(EBUSY, "Refusing to update bpf cgroup program that's already loaded");
|
||||
|
||||
new_insn = realloc(prog->instructions, sizeof(struct bpf_insn) * (count + prog->n_instructions));
|
||||
if (!new_insn)
|
||||
return error_log_errno(ENOMEM, "Failed to reallocate bpf cgroup program");
|
||||
|
||||
prog->instructions = new_insn;
|
||||
memcpy(prog->instructions + prog->n_instructions, instructions,
|
||||
sizeof(struct bpf_insn) * count);
|
||||
prog->n_instructions += count;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bpf_program_free(struct bpf_program *prog)
|
||||
{
|
||||
(void)bpf_program_cgroup_detach(prog);
|
||||
|
||||
if (prog->kernel_fd >= 0)
|
||||
close(prog->kernel_fd);
|
||||
free(prog->instructions);
|
||||
free(prog->attached_path);
|
||||
free(prog);
|
||||
}
|
||||
|
||||
/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
|
||||
#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
|
||||
((struct bpf_insn){.code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = 0})
|
||||
|
||||
/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
|
||||
#define BPF_ALU32_IMM(OP, DST, IMM) \
|
||||
((struct bpf_insn){.code = BPF_ALU | BPF_OP(OP) | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM})
|
||||
|
||||
/* Short form of mov, dst_reg = src_reg */
|
||||
#define BPF_MOV64_IMM(DST, IMM) \
|
||||
((struct bpf_insn){.code = BPF_ALU64 | BPF_MOV | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = IMM})
|
||||
|
||||
#define BPF_MOV32_REG(DST, SRC) \
|
||||
((struct bpf_insn){.code = BPF_ALU | BPF_MOV | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = 0, \
|
||||
.imm = 0})
|
||||
|
||||
/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
|
||||
#define BPF_JMP_REG(OP, DST, SRC, OFF) \
|
||||
((struct bpf_insn){.code = BPF_JMP | BPF_OP(OP) | BPF_X, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = SRC, \
|
||||
.off = OFF, \
|
||||
.imm = 0})
|
||||
|
||||
/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
|
||||
#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
|
||||
((struct bpf_insn){.code = BPF_JMP | BPF_OP(OP) | BPF_K, \
|
||||
.dst_reg = DST, \
|
||||
.src_reg = 0, \
|
||||
.off = OFF, \
|
||||
.imm = IMM})
|
||||
|
||||
/* Program exit */
|
||||
#define BPF_EXIT_INSN() \
|
||||
((struct bpf_insn){.code = BPF_JMP | BPF_EXIT, \
|
||||
.dst_reg = 0, \
|
||||
.src_reg = 0, \
|
||||
.off = 0, \
|
||||
.imm = 0})
|
||||
|
||||
static int bpf_access_mask(const char *acc)
|
||||
{
|
||||
int mask = 0;
|
||||
|
||||
if (!acc)
|
||||
return mask;
|
||||
|
||||
for (; *acc; acc++)
|
||||
switch (*acc) {
|
||||
case 'r':
|
||||
mask |= BPF_DEVCG_ACC_READ;
|
||||
break;
|
||||
case 'w':
|
||||
mask |= BPF_DEVCG_ACC_WRITE;
|
||||
break;
|
||||
case 'm':
|
||||
mask |= BPF_DEVCG_ACC_MKNOD;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
static int bpf_device_type(char type)
|
||||
{
|
||||
switch (type) {
|
||||
case 'a':
|
||||
return 0;
|
||||
case 'b':
|
||||
return BPF_DEVCG_DEV_BLOCK;
|
||||
case 'c':
|
||||
return BPF_DEVCG_DEV_CHAR;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline bool bpf_device_all_access(int access_mask)
|
||||
{
|
||||
return (access_mask == (BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE |
|
||||
BPF_DEVCG_ACC_MKNOD));
|
||||
}
|
||||
|
||||
struct bpf_program *bpf_program_new(uint32_t prog_type)
|
||||
{
|
||||
__do_free struct bpf_program *prog = NULL;
|
||||
|
||||
prog = calloc(1, sizeof(struct bpf_program));
|
||||
if (!prog)
|
||||
return NULL;
|
||||
|
||||
prog->prog_type = prog_type;
|
||||
prog->kernel_fd = -EBADF;
|
||||
|
||||
return move_ptr(prog);
|
||||
}
|
||||
|
||||
int bpf_program_init(struct bpf_program *prog)
|
||||
{
|
||||
const struct bpf_insn pre_insn[] = {
|
||||
/* load device type to r2 */
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, access_type)),
|
||||
BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF),
|
||||
|
||||
/* load access type to r3 */
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, access_type)),
|
||||
BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
|
||||
|
||||
/* load major number to r4 */
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, major)),
|
||||
|
||||
/* load minor number to r5 */
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, minor)),
|
||||
};
|
||||
|
||||
return bpf_program_add_instructions(prog, pre_insn, ARRAY_SIZE(pre_insn));
|
||||
}
|
||||
|
||||
int bpf_program_append_device(struct bpf_program *prog, char type, int major,
|
||||
int minor, const char *access, int allow)
|
||||
{
|
||||
int ret;
|
||||
int jump_nr = 1;
|
||||
struct bpf_insn bpf_access_decision[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, allow),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
int access_mask;
|
||||
int device_type;
|
||||
|
||||
device_type = bpf_device_type(type);
|
||||
if (device_type < 0)
|
||||
return error_log_errno(EINVAL, "Invalid bpf cgroup device type %c", type);
|
||||
|
||||
if (device_type > 0)
|
||||
jump_nr++;
|
||||
|
||||
access_mask = bpf_access_mask(access);
|
||||
if (!bpf_device_all_access(access_mask))
|
||||
jump_nr += 3;
|
||||
|
||||
if (major >= 0)
|
||||
jump_nr++;
|
||||
|
||||
if (minor >= 0)
|
||||
jump_nr++;
|
||||
|
||||
if (device_type > 0) {
|
||||
struct bpf_insn ins[] = {
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, device_type, jump_nr--),
|
||||
};
|
||||
|
||||
ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
|
||||
if (ret)
|
||||
return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
|
||||
}
|
||||
|
||||
if (!bpf_device_all_access(access_mask)) {
|
||||
struct bpf_insn ins[] = {
|
||||
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
|
||||
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access_mask),
|
||||
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, jump_nr),
|
||||
};
|
||||
|
||||
jump_nr -= 3;
|
||||
ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
|
||||
if (ret)
|
||||
return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
|
||||
}
|
||||
|
||||
if (major >= 0) {
|
||||
struct bpf_insn ins[] = {
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, jump_nr--),
|
||||
};
|
||||
|
||||
ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
|
||||
if (ret)
|
||||
return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
|
||||
}
|
||||
|
||||
if (minor >= 0) {
|
||||
struct bpf_insn ins[] = {
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, jump_nr--),
|
||||
};
|
||||
|
||||
ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
|
||||
if (ret)
|
||||
return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
|
||||
}
|
||||
|
||||
ret = bpf_program_add_instructions(prog, bpf_access_decision,
|
||||
ARRAY_SIZE(bpf_access_decision));
|
||||
if (ret)
|
||||
return error_log_errno(errno, "Failed to add instructions to bpf cgroup program");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_program_finalize(struct bpf_program *prog)
|
||||
{
|
||||
struct bpf_insn ins[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, prog->blacklist ? 1 : 0),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
|
||||
TRACE("Implementing %s bpf device cgroup program",
|
||||
prog->blacklist ? "blacklist" : "whitelist");
|
||||
return bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins));
|
||||
}
|
||||
|
||||
static int bpf_program_load_kernel(struct bpf_program *prog, char *log_buf,
|
||||
size_t log_size)
|
||||
{
|
||||
union bpf_attr attr;
|
||||
|
||||
if (prog->kernel_fd >= 0) {
|
||||
memset(log_buf, 0, log_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
attr = (union bpf_attr){
|
||||
.prog_type = prog->prog_type,
|
||||
.insns = PTR_TO_UINT64(prog->instructions),
|
||||
.insn_cnt = prog->n_instructions,
|
||||
.license = PTR_TO_UINT64("GPL"),
|
||||
.log_buf = PTR_TO_UINT64(log_buf),
|
||||
.log_level = !!log_buf,
|
||||
.log_size = log_size,
|
||||
};
|
||||
|
||||
prog->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
|
||||
if (prog->kernel_fd < 0)
|
||||
return error_log_errno(errno, "Failed to load bpf program");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
|
||||
const char *path, uint32_t flags)
|
||||
{
|
||||
__do_free char *copy = NULL;
|
||||
__do_close_prot_errno int fd = -EBADF;
|
||||
union bpf_attr attr;
|
||||
int ret;
|
||||
|
||||
if (flags & ~(BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
|
||||
return error_log_errno(EINVAL, "Invalid flags for bpf program");
|
||||
|
||||
if (prog->attached_path) {
|
||||
if (prog->attached_type != type)
|
||||
return error_log_errno(EBUSY, "Wrong type for bpf program");
|
||||
|
||||
if (prog->attached_flags != flags)
|
||||
return error_log_errno(EBUSY, "Wrong flags for bpf program");
|
||||
|
||||
if (flags != BPF_F_ALLOW_OVERRIDE)
|
||||
return true;
|
||||
}
|
||||
|
||||
ret = bpf_program_load_kernel(prog, NULL, 0);
|
||||
if (ret < 0)
|
||||
return error_log_errno(ret, "Failed to load bpf program");
|
||||
|
||||
copy = strdup(path);
|
||||
if (!copy)
|
||||
return error_log_errno(ENOMEM, "Failed to duplicate cgroup path %s", path);
|
||||
|
||||
fd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return error_log_errno(errno, "Failed to open cgroup path %s", path);
|
||||
|
||||
attr = (union bpf_attr){
|
||||
.attach_type = type,
|
||||
.target_fd = fd,
|
||||
.attach_bpf_fd = prog->kernel_fd,
|
||||
.attach_flags = flags,
|
||||
};
|
||||
|
||||
ret = bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
|
||||
if (ret < 0)
|
||||
return error_log_errno(errno, "Failed to attach bpf program");
|
||||
|
||||
free_and_replace(prog->attached_path, copy);
|
||||
prog->attached_type = type;
|
||||
prog->attached_flags = flags;
|
||||
|
||||
TRACE("Loaded and attached bpf program to cgroup %s", prog->attached_path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_program_cgroup_detach(struct bpf_program *prog)
|
||||
{
|
||||
int ret;
|
||||
__do_close_prot_errno int fd = -EBADF;
|
||||
|
||||
if (!prog)
|
||||
return 0;
|
||||
|
||||
if (!prog->attached_path)
|
||||
return 0;
|
||||
|
||||
fd = open(prog->attached_path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
|
||||
if (fd < 0) {
|
||||
if (errno != ENOENT)
|
||||
return error_log_errno(errno, "Failed to open attach cgroup %s",
|
||||
prog->attached_path);
|
||||
} else {
|
||||
union bpf_attr attr;
|
||||
|
||||
attr = (union bpf_attr){
|
||||
.attach_type = prog->attached_type,
|
||||
.target_fd = fd,
|
||||
.attach_bpf_fd = prog->kernel_fd,
|
||||
};
|
||||
|
||||
ret = bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
|
||||
if (ret < 0)
|
||||
return error_log_errno(errno, "Failed to detach bpf program from cgroup %s",
|
||||
prog->attached_path);
|
||||
}
|
||||
|
||||
free(prog->attached_path);
|
||||
prog->attached_path = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void lxc_clear_cgroup2_devices(struct lxc_conf *conf)
|
||||
{
|
||||
if (conf->cgroup2_devices) {
|
||||
(void)bpf_program_cgroup_detach(conf->cgroup2_devices);
|
||||
(void)bpf_program_free(conf->cgroup2_devices);
|
||||
}
|
||||
}
|
||||
#endif
|
144
src/lxc/cgroups/cgroup2_devices.h
Normal file
144
src/lxc/cgroups/cgroup2_devices.h
Normal file
@ -0,0 +1,144 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1+ */
|
||||
|
||||
/* Parts of this taken from systemd's implementation. */
|
||||
|
||||
#ifndef __LXC_CGROUP2_DEVICES_H
|
||||
#define __LXC_CGROUP2_DEVICES_H
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "conf.h"
|
||||
#include "config.h"
|
||||
|
||||
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
#endif
|
||||
|
||||
#if !HAVE_BPF
|
||||
#if !(defined __NR_bpf && __NR_bpf > 0)
|
||||
#if defined __NR_bpf
|
||||
#undef __NR_bpf
|
||||
#endif
|
||||
#if defined __i386__
|
||||
#define __NR_bpf 357
|
||||
#elif defined __x86_64__
|
||||
#define __NR_bpf 321
|
||||
#elif defined __aarch64__
|
||||
#define __NR_bpf 280
|
||||
#elif defined __arm__
|
||||
#define __NR_bpf 386
|
||||
#elif defined __sparc__
|
||||
#define __NR_bpf 349
|
||||
#elif defined __s390__
|
||||
#define __NR_bpf 351
|
||||
#elif defined __tilegx__
|
||||
#define __NR_bpf 280
|
||||
#else
|
||||
#warning "__NR_bpf not defined for your architecture"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
union bpf_attr;
|
||||
|
||||
static inline int missing_bpf(int cmd, union bpf_attr *attr, size_t size)
|
||||
{
|
||||
#ifdef __NR_bpf
|
||||
return (int)syscall(__NR_bpf, cmd, attr, size);
|
||||
#else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define bpf missing_bpf
|
||||
#endif
|
||||
|
||||
struct bpf_program {
|
||||
bool blacklist;
|
||||
int kernel_fd;
|
||||
uint32_t prog_type;
|
||||
|
||||
size_t n_instructions;
|
||||
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
|
||||
struct bpf_insn *instructions;
|
||||
#endif
|
||||
|
||||
char *attached_path;
|
||||
int attached_type;
|
||||
uint32_t attached_flags;
|
||||
};
|
||||
|
||||
#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
|
||||
struct bpf_program *bpf_program_new(uint32_t prog_type);
|
||||
int bpf_program_init(struct bpf_program *prog);
|
||||
int bpf_program_append_device(struct bpf_program *prog, char type, int major,
|
||||
int minor, const char *access, int allow);
|
||||
int bpf_program_finalize(struct bpf_program *prog);
|
||||
int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
|
||||
const char *path, uint32_t flags);
|
||||
int bpf_program_cgroup_detach(struct bpf_program *prog);
|
||||
void bpf_program_free(struct bpf_program *prog);
|
||||
void lxc_clear_cgroup2_devices(struct lxc_conf *conf);
|
||||
static inline void __do_bpf_program_free(struct bpf_program **prog)
|
||||
{
|
||||
if (*prog) {
|
||||
bpf_program_free(*prog);
|
||||
*prog = NULL;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline struct bpf_program *bpf_program_new(uint32_t prog_type)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int bpf_program_init(struct bpf_program *prog)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static inline int bpf_program_append_device(struct bpf_program *prog, char type,
|
||||
int major, int minor,
|
||||
const char *access, int allow)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static inline int bpf_program_finalize(struct bpf_program *prog)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static inline int bpf_program_cgroup_attach(struct bpf_program *prog, int type,
|
||||
const char *path, uint32_t flags)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static inline int bpf_program_cgroup_detach(struct bpf_program *prog)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static inline void bpf_program_free(struct bpf_program *prog)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void lxc_clear_cgroup2_devices(struct lxc_conf *conf)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void __do_bpf_program_free(struct bpf_program **prog)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __LXC_CGROUP2_DEVICES_H */
|
@ -57,6 +57,7 @@
|
||||
#include "af_unix.h"
|
||||
#include "caps.h"
|
||||
#include "cgroup.h"
|
||||
#include "cgroup2_devices.h"
|
||||
#include "conf.h"
|
||||
#include "config.h"
|
||||
#include "confile.h"
|
||||
@ -4118,6 +4119,7 @@ void lxc_conf_free(struct lxc_conf *conf)
|
||||
lxc_clear_config_keepcaps(conf);
|
||||
lxc_clear_cgroups(conf, "lxc.cgroup", CGROUP_SUPER_MAGIC);
|
||||
lxc_clear_cgroups(conf, "lxc.cgroup2", CGROUP2_SUPER_MAGIC);
|
||||
lxc_clear_cgroup2_devices(conf);
|
||||
lxc_clear_hooks(conf, "lxc.hook");
|
||||
lxc_clear_mount_entries(conf);
|
||||
lxc_clear_idmaps(conf);
|
||||
|
@ -241,6 +241,7 @@ struct lxc_conf {
|
||||
struct {
|
||||
struct lxc_list cgroup;
|
||||
struct lxc_list cgroup2;
|
||||
struct bpf_program *cgroup2_devices;
|
||||
};
|
||||
|
||||
struct {
|
||||
|
@ -505,11 +505,11 @@ ATTR_UNUSED static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#define error_log_errno(__errno__, format, ...) \
|
||||
({ \
|
||||
errno = __errno__; \
|
||||
SYSERROR(format, ##__VA_ARGS__); \
|
||||
-1; \
|
||||
#define error_log_errno(__errno__, format, ...) \
|
||||
({ \
|
||||
errno = __errno__; \
|
||||
SYSERROR(format, ##__VA_ARGS__); \
|
||||
-1; \
|
||||
})
|
||||
|
||||
extern int lxc_log_fd;
|
||||
|
@ -429,6 +429,8 @@ enum {
|
||||
#define PTR_TO_INTMAX(p) ((intmax_t)((intptr_t)(p)))
|
||||
#define INTMAX_TO_PTR(u) ((void *)((intptr_t)(u)))
|
||||
|
||||
#define PTR_TO_UINT64(p) ((uint64_t)((intptr_t)(p)))
|
||||
|
||||
#define LXC_INVALID_UID ((uid_t)-1)
|
||||
#define LXC_INVALID_GID ((gid_t)-1)
|
||||
|
||||
@ -465,4 +467,80 @@ enum {
|
||||
#define LXC_TIMESTAMP_FNAME "ts"
|
||||
#define LXC_COMMENT_FNAME "comment"
|
||||
|
||||
/* Taken from systemd. */
|
||||
#define free_and_replace(a, b) \
|
||||
({ \
|
||||
free(a); \
|
||||
(a) = (b); \
|
||||
(b) = NULL; \
|
||||
0; \
|
||||
})
|
||||
|
||||
#define XCONCATENATE(x, y) x##y
|
||||
#define CONCATENATE(x, y) XCONCATENATE(x, y)
|
||||
#define UNIQ_T(x, uniq) CONCATENATE(__unique_prefix_, CONCATENATE(x, uniq))
|
||||
#define UNIQ __COUNTER__
|
||||
#undef MIN
|
||||
#define MIN(a, b) __MIN(UNIQ, (a), UNIQ, (b))
|
||||
#define __MIN(aq, a, bq, b) \
|
||||
({ \
|
||||
const typeof(a) UNIQ_T(A, aq) = (a); \
|
||||
const typeof(b) UNIQ_T(B, bq) = (b); \
|
||||
UNIQ_T(A, aq) < UNIQ_T(B, bq) ? UNIQ_T(A, aq) : UNIQ_T(B, bq); \
|
||||
})
|
||||
|
||||
/* Taken from the kernel. */
|
||||
|
||||
/*
|
||||
* min()/max()/clamp() macros must accomplish three things:
|
||||
*
|
||||
* - avoid multiple evaluations of the arguments (so side-effects like
|
||||
* "x++" happen only once) when non-constant.
|
||||
* - perform strict type-checking (to generate warnings instead of
|
||||
* nasty runtime surprises). See the "unnecessary" pointer comparison
|
||||
* in __typecheck().
|
||||
* - retain result as a constant expressions when called with only
|
||||
* constant expressions (to avoid tripping VLA warnings in stack
|
||||
* allocation usage).
|
||||
*/
|
||||
#define __typecheck(x, y) (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
|
||||
|
||||
/*
|
||||
* This returns a constant expression while determining if an argument is
|
||||
* a constant expression, most importantly without evaluating the argument.
|
||||
* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
|
||||
*/
|
||||
#define __is_constexpr(x) \
|
||||
(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x)*0l)) : (int *)8)))
|
||||
|
||||
#define __no_side_effects(x, y) (__is_constexpr(x) && __is_constexpr(y))
|
||||
|
||||
#define __safe_cmp(x, y) (__typecheck(x, y) && __no_side_effects(x, y))
|
||||
|
||||
#define __cmp(x, y, op) ((x)op(y) ? (x) : (y))
|
||||
|
||||
#define __cmp_once(x, y, unique_x, unique_y, op) \
|
||||
({ \
|
||||
typeof(x) unique_x = (x); \
|
||||
typeof(y) unique_y = (y); \
|
||||
__cmp(unique_x, unique_y, op); \
|
||||
})
|
||||
|
||||
#define __careful_cmp(x, y, op) \
|
||||
__builtin_choose_expr(__safe_cmp(x, y), __cmp(x, y, op), \
|
||||
__cmp_once(x, y, __UNIQUE_ID(__x), \
|
||||
__UNIQUE_ID(__y), op))
|
||||
|
||||
/**
|
||||
* min - return minimum of two values of the same or compatible types
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define min(x, y) __careful_cmp(x, y, <)
|
||||
|
||||
#define ARRAY_SIZE(x) \
|
||||
(__builtin_choose_expr(!__builtin_types_compatible_p(typeof(x), \
|
||||
typeof(&*(x))), \
|
||||
sizeof(x) / sizeof((x)[0]), ((void)0)))
|
||||
|
||||
#endif /* __LXC_MACRO_H */
|
||||
|
@ -1912,6 +1912,12 @@ static int lxc_spawn(struct lxc_handler *handler)
|
||||
}
|
||||
TRACE("Set up legacy device cgroup controller limits");
|
||||
|
||||
if (!cgroup_ops->devices_activate(cgroup_ops, handler)) {
|
||||
ERROR("Failed to setup cgroup2 device controller limits");
|
||||
goto out_delete_net;
|
||||
}
|
||||
TRACE("Set up cgroup2 device controller limits");
|
||||
|
||||
if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
|
||||
/* Now we're ready to preserve the cgroup namespace */
|
||||
ret = lxc_try_preserve_ns(handler->pid, "cgroup");
|
||||
|
Loading…
x
Reference in New Issue
Block a user