From 89d09707b023ced7d3b54b880c312efa9cb6e63c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 27 Aug 2017 00:36:40 +0200 Subject: [PATCH 01/18] conf: non-functional changes Signed-off-by: Christian Brauner --- src/lxc/conf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 3a993c6e1..26cb42ab1 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -2455,7 +2455,10 @@ static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev) return -1; } - /* default: let the system to choose one interface name */ + /* Default: let the system to choose one interface name. + * When the IFLA_IFNAME attribute is passed something like "%d" + * netlink will replace the format specifier with an appropriate index. + */ if (!netdev->name) netdev->name = netdev->type == LXC_NET_PHYS ? netdev->link : "eth%d"; From 01b2d1f1740f6f0b94dbdb1add54cb1ebde85ff2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 27 Aug 2017 00:48:34 +0200 Subject: [PATCH 02/18] conf: do not deref null pointer Signed-off-by: Christian Brauner --- src/lxc/conf.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 26cb42ab1..6171a017c 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -3324,11 +3324,17 @@ static int unpriv_assign_nic(const char *lxcpath, char *lxcname, exit(EXIT_FAILURE); pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; - INFO("Execing lxc-user-nic %s %s %s veth %s %s", lxcpath, - lxcname, pidstr, netdev_link, netdev->name); - execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, lxcpath, lxcname, - pidstr, "veth", netdev_link, netdev->name, NULL); - + INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath, + lxcname, pidstr, netdev_link, + netdev->name ? netdev->name : "(null)"); + if (netdev->name) + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", + lxcpath, lxcname, pidstr, "veth", netdev_link, + netdev->name, (char *)NULL); + else + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", + lxcpath, lxcname, pidstr, "veth", netdev_link, + (char *)NULL); SYSERROR("Failed to exec lxc-user-nic."); exit(EXIT_FAILURE); } From a17f8b3f4658a1022d83575173cd846158f5d0f9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 25 Aug 2017 09:52:14 +0200 Subject: [PATCH 03/18] cgfsng: non-functional changes Signed-off-by: Christian Brauner --- src/lxc/cgroups/cgfsng.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index fc658faf2..e137b33c0 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -1199,6 +1199,7 @@ out_free: static int cgroup_rmdir(char *dirname) { + int ret; struct dirent *direntp; DIR *dir; int r = 0; @@ -1208,8 +1209,8 @@ static int cgroup_rmdir(char *dirname) return -1; while ((direntp = readdir(dir))) { - struct stat mystat; char *pathname; + struct stat mystat; if (!direntp) break; @@ -1220,32 +1221,40 @@ static int cgroup_rmdir(char *dirname) pathname = must_make_path(dirname, direntp->d_name, NULL); - if (lstat(pathname, &mystat)) { + ret = lstat(pathname, &mystat); + if (ret < 0) { if (!r) - WARN("failed to stat %s", pathname); + WARN("Failed to stat %s", pathname); r = -1; goto next; } if (!S_ISDIR(mystat.st_mode)) goto next; - if (cgroup_rmdir(pathname) < 0) + + ret = cgroup_rmdir(pathname); + if (ret < 0) r = -1; next: free(pathname); } - if (rmdir(dirname) < 0) { + ret = rmdir(dirname); + if (ret < 0) { if (!r) - WARN("failed to delete %s: %s", dirname, strerror(errno)); + WARN("Failed to delete \"%s\": %s", dirname, + strerror(errno)); r = -1; } - if (closedir(dir) < 0) { + ret = closedir(dir); + if (ret < 0) { if (!r) - WARN("failed to delete %s: %s", dirname, strerror(errno)); + WARN("Failed to delete \"%s\": %s", dirname, + strerror(errno)); r = -1; } + return r; } From 7d531e9ba482947eaf9be3b19949f24b5e78bb44 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 25 Aug 2017 11:51:05 +0200 Subject: [PATCH 04/18] cgfsng: add container name to lxc.cgroup.dir value Say we have lxc.uts.name = c1 lxc.cgroup.dir = lxd the actual path should be lxd/c1 Right now it would just be lxd Signed-off-by: Christian Brauner --- src/lxc/cgroups/cgfsng.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index e137b33c0..fe3fd7062 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -1345,11 +1345,11 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname) */ static inline bool cgfsng_create(void *hdata) { - struct cgfsng_handler_data *d = hdata; - char *tmp, *cgname, *offset; int i; - int idx = 0; size_t len; + char *cgname, *offset, *tmp; + int idx = 0; + struct cgfsng_handler_data *d = hdata; if (!d) return false; @@ -1360,7 +1360,7 @@ static inline bool cgfsng_create(void *hdata) } if (d->cgroup_meta.dir) - tmp = strdup(d->cgroup_meta.dir); + tmp = lxc_string_join("/", (const char *[]){d->cgroup_meta.dir, d->name, NULL}, false); else tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern); if (!tmp) { From 92c590ae1ea40bc094603ab49c20b785cc88bb1d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 25 Aug 2017 11:53:55 +0200 Subject: [PATCH 05/18] cgfsng: try to delete parent cgroups Say we have lxc.uts.name = c1 lxc.cgroup.dir = lxd/a/b/c the path for the container's cgroup would be lxd/a/b/c/c1 When the container is shutdown we should not just try to delete "c1" we should also try to delete "c", "b", "a", and "lxd". This is to ensure that we don't leave empty cgroups around thereby increasing the chance that we run into trouble with cgroup limits. The algorithm for this isn't too costly since we can simply stop walking upwards at the first rmdir() failure. Signed-off-by: Christian Brauner --- src/lxc/cgroups/cgfsng.c | 78 ++++++++++++++++++++++++++++++++++------ src/lxc/confile.c | 3 -- 2 files changed, 67 insertions(+), 14 deletions(-) diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index fe3fd7062..390923eca 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -1272,35 +1272,91 @@ static int rmdir_wrapper(void *data) return cgroup_rmdir(path); } -void recursive_destroy(char *path, struct lxc_conf *conf) +int recursive_destroy(char *path, struct lxc_conf *conf) { int r; + if (conf && !lxc_list_empty(&conf->id_map)) r = userns_exec_1(conf, rmdir_wrapper, path, "rmdir_wrapper"); else r = cgroup_rmdir(path); - if (r < 0) ERROR("Error destroying %s", path); + + return r; } static void cgfsng_destroy(void *hdata, struct lxc_conf *conf) { + int i; + char *clean_parent, *clean_fullcgpath; + char **fields; + size_t recurse_upwards = 0; struct cgfsng_handler_data *d = hdata; if (!d) return; - if (d->container_cgroup && hierarchies) { - int i; - for (i = 0; hierarchies[i]; i++) { - struct hierarchy *h = hierarchies[i]; - if (h->fullcgpath) { - recursive_destroy(h->fullcgpath, conf); - free(h->fullcgpath); - h->fullcgpath = NULL; - } + if (!d->container_cgroup || !hierarchies) + return; + + if (d->cgroup_meta.dir) + clean_parent = d->cgroup_meta.dir; + else + clean_parent = d->cgroup_pattern; + fields = lxc_normalize_path(clean_parent); + if (fields) { + recurse_upwards = lxc_array_len((void **)fields); + if (recurse_upwards > 0 && clean_parent == d->cgroup_pattern) + recurse_upwards--; + lxc_free_array((void **)fields, free); + } + + for (i = 0; hierarchies[i]; i++) { + int ret; + size_t j; + struct hierarchy *h = hierarchies[i]; + + if (!h->fullcgpath) + continue; + + clean_fullcgpath = lxc_deslashify(h->fullcgpath); + if (!clean_fullcgpath) + clean_fullcgpath = h->fullcgpath; + + /* Delete the container's cgroup */ + ret = recursive_destroy(clean_fullcgpath, conf); + if (ret < 0) + goto next; + + if (h->fullcgpath == clean_fullcgpath) + goto next; + + /* Delete parent cgroups as specified in the containers config + * file. This takes care of not having useless empty cgroups + * around. + */ + for (j = 0; j < recurse_upwards; j++) { + char *s = clean_fullcgpath; + + s = strrchr(s, '/'); + if (!s) + break; + *s = '\0'; + + /* If we fail to delete a cgroup we know that any parent + * cgroup also cannot be removed. + */ + ret = recursive_destroy(clean_fullcgpath, conf); + if (ret < 0) + break; } + +next: + if (h->fullcgpath != clean_fullcgpath) + free(clean_fullcgpath); + free(h->fullcgpath); + h->fullcgpath = NULL; } free_handler_data(d); diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 62337289e..e66bae314 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -1431,9 +1431,6 @@ static int set_config_cgroup_dir(const char *key, const char *value, if (lxc_config_value_empty(value)) return clr_config_cgroup_dir(key, lxc_conf, NULL); - if (lxc_conf->cgroup_meta.dir) - clr_config_cgroup_dir(key, lxc_conf, NULL); - return set_config_string_item(&lxc_conf->cgroup_meta.dir, value); } From d04e77c34a445dd9caee2a50a5c948e7af706d55 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 26 Aug 2017 18:53:29 +0200 Subject: [PATCH 06/18] lxc-user-nic: non-functional changes Signed-off-by: Christian Brauner --- src/lxc/lxc_user_nic.c | 49 ++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index af44a3210..0825e0b11 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -914,31 +914,28 @@ static bool may_access_netns(int pid) int main(int argc, char *argv[]) { - int n, fd; + int fd, n, pid, ret; char *me; - char *nicname; - int pid; - char *cnic = NULL; /* Created nic name in container is returned here. */ - char *vethname = NULL; + char nicname[100]; + char *cnic = NULL, *vethname = NULL; bool gotone = false; struct alloted_s *alloted = NULL; - nicname = alloca(40); - if (!nicname) { - usernic_error("Failed allocate memory: %s\n", strerror(errno)); - exit(EXIT_FAILURE); - } - - /* set a sane env, because we are setuid-root */ - if (clearenv() < 0) { + /* Set a sane env, because we are setuid-root. */ + ret = clearenv(); + if (ret) { usernic_error("%s", "Failed to clear environment\n"); exit(EXIT_FAILURE); } - if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1) < 0) { + + ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1); + if (ret < 0) { usernic_error("%s", "Failed to set PATH, exiting\n"); exit(EXIT_FAILURE); } - if ((me = get_username()) == NULL) { + + me = get_username(); + if (!me) { usernic_error("%s", "Failed to get username\n"); exit(EXIT_FAILURE); } @@ -952,9 +949,8 @@ int main(int argc, char *argv[]) lxcpath = argv[1]; lxcname = argv[2]; - errno = 0; - pid = strtol(argv[3], NULL, 10); - if (errno) { + ret = lxc_safe_int(argv[3], &pid); + if (ret < 0) { usernic_error("Could not read pid: %s\n", argv[1]); exit(EXIT_FAILURE); } @@ -964,7 +960,8 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - if ((fd = open_and_lock(LXC_USERNIC_DB)) < 0) { + fd = open_and_lock(LXC_USERNIC_DB); + if (fd < 0) { usernic_error("Failed to lock %s\n", LXC_USERNIC_DB); exit(EXIT_FAILURE); } @@ -976,7 +973,7 @@ int main(int argc, char *argv[]) n = get_alloted(me, argv[4], argv[5], &alloted); if (n > 0) - gotone = get_nic_if_avail(fd, alloted, pid, argv[4], argv[5], n, &nicname, &cnic); + gotone = get_nic_if_avail(fd, alloted, pid, argv[4], argv[5], n, (char **)&nicname, &cnic); close(fd); free_alloted(&alloted); @@ -986,16 +983,16 @@ int main(int argc, char *argv[]) } /* Now rename the link. */ - if (rename_in_ns(pid, cnic, &vethname) < 0) { + ret = rename_in_ns(pid, cnic, &vethname); + if (ret < 0) { usernic_error("%s", "Failed to rename the link\n"); - if (lxc_netdev_delete_by_name(cnic) < 0) - usernic_error("Failed to delete link \"%s\" the link. Manual cleanup needed\n", cnic); + ret = lxc_netdev_delete_by_name(cnic); + if (ret < 0) + usernic_error("Failed to delete \"%s\"\n", cnic); exit(EXIT_FAILURE); } - /* Write the name of the interface pair to the stdout - like - * eth0:veth9MT2L4. - */ + /* Write the name of the interface pair to the stdout: eth0:veth9MT2L4 */ fprintf(stdout, "%s:%s\n", vethname, nicname); exit(EXIT_SUCCESS); } From 8285dcfb504e02520c9b4cac395dd5a45b1b08d5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 26 Aug 2017 23:10:18 +0200 Subject: [PATCH 07/18] lxc-user-nic: fix memleak get_new_nicname() calls lxc_mkifname() which allocates memory and returns it to the caller. The way get_new_nicname() and get_nic_if_avail() were implemented they hid that fact by returning a boolean. That doesn't make sense. Let's rather have them return a pointer to the allocated nic name which the caller needs to free. Signed-off-by: Christian Brauner --- src/lxc/lxc_user_nic.c | 90 +++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index 0825e0b11..2afd6b14a 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -510,25 +510,29 @@ out_del: return false; } -/* - * Get a new nic. - * *dest will contain the name (vethXXXXXX) which is attached - * on the host to the lxc bridge +/* get_new_nicname() will return the name (vethXXXXXX) which is attached on the + * host to the lxc bridge. The returned string must be freed by caller. */ -static bool get_new_nicname(char **dest, char *br, int pid, char **cnic) +static char *get_new_nicname(char *br, int pid, char **cnic) { int ret; + char *nicname; char template[IFNAMSIZ]; ret = snprintf(template, sizeof(template), "vethXXXXXX"); if (ret < 0 || (size_t)ret >= sizeof(template)) - return false; + return NULL; - *dest = lxc_mkifname(template); - if (!create_nic(*dest, br, pid, cnic)) - return false; + nicname = lxc_mkifname(template); + if (!nicname) + return NULL; - return true; + if (!create_nic(nicname, br, pid, cnic)) { + free(nicname); + return NULL; + } + + return nicname; } static bool get_nic_from_line(char *p, char **nic) @@ -642,13 +646,12 @@ static int count_entries(char *buf, off_t len, char *me, char *t, char *br) * The dbfile has lines of the format: * user type bridge nicname */ -static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, - char *intype, char *br, int allowed, - char **nicname, char **cnic) +static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid, + char *intype, char *br, int allowed, char **cnic) { int ret; off_t len, slen; - char *newline, *owner; + char *newline, *nicname, *owner; struct stat sb; struct alloted_s *n; int count = 0; @@ -658,13 +661,13 @@ static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, cull_entries(fd, n->name, intype, br); if (allowed == 0) - return false; + return NULL; owner = names->name; if (fstat(fd, &sb) < 0) { usernic_error("Failed to fstat: %s\n", strerror(errno)); - return false; + return NULL; } len = sb.st_size; @@ -672,7 +675,7 @@ static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { usernic_error("Failed to establish shared memory mapping: %s\n", strerror(errno)); - return false; + return NULL; } owner = NULL; @@ -688,24 +691,29 @@ static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, } if (owner == NULL) - return false; + return NULL; - if (!get_new_nicname(nicname, br, pid, cnic)) - return false; - - /* owner ' ' intype ' ' br ' ' *nicname + '\n' + '\0' */ - slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(*nicname) + 5; - newline = alloca(slen); - if (!newline) { - usernic_error("Failed allocate memory: %s\n", strerror(errno)); - return false; + nicname = get_new_nicname(br, pid, cnic); + if (!nicname) { + usernic_error("%s", "Failed to get new nic name\n"); + return NULL; } - ret = snprintf(newline, slen, "%s %s %s %s\n", owner, intype, br, *nicname); + /* owner ' ' intype ' ' br ' ' *nicname + '\n' + '\0' */ + slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(nicname) + 5; + newline = alloca(slen); + if (!newline) { + free(nicname); + usernic_error("Failed allocate memory: %s\n", strerror(errno)); + return NULL; + } + + ret = snprintf(newline, slen, "%s %s %s %s\n", owner, intype, br, nicname); if (ret < 0 || ret >= slen) { - if (lxc_netdev_delete_by_name(*nicname) != 0) - usernic_error("Error unlinking %s\n", *nicname); - return false; + if (lxc_netdev_delete_by_name(nicname) != 0) + usernic_error("Error unlinking %s\n", nicname); + free(nicname); + return NULL; } if (len) munmap(buf, len); @@ -716,15 +724,16 @@ static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, buf = mmap(NULL, len + slen, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { usernic_error("Failed to establish shared memory mapping: %s\n", strerror(errno)); - if (lxc_netdev_delete_by_name(*nicname) != 0) - usernic_error("Error unlinking %s\n", *nicname); - return false; + if (lxc_netdev_delete_by_name(nicname) != 0) + usernic_error("Error unlinking %s\n", nicname); + free(nicname); + return NULL; } strcpy(buf + len, newline); munmap(buf, len + slen); - return true; + return nicname; } static bool create_db_dir(char *fnam) @@ -916,9 +925,7 @@ int main(int argc, char *argv[]) { int fd, n, pid, ret; char *me; - char nicname[100]; - char *cnic = NULL, *vethname = NULL; - bool gotone = false; + char *cnic = NULL, *nicname = NULL, *vethname = NULL; struct alloted_s *alloted = NULL; /* Set a sane env, because we are setuid-root. */ @@ -973,11 +980,12 @@ int main(int argc, char *argv[]) n = get_alloted(me, argv[4], argv[5], &alloted); if (n > 0) - gotone = get_nic_if_avail(fd, alloted, pid, argv[4], argv[5], n, (char **)&nicname, &cnic); + nicname = get_nic_if_avail(fd, alloted, pid, argv[4], + argv[5], n, &cnic); close(fd); free_alloted(&alloted); - if (!gotone) { + if (!nicname) { usernic_error("%s", "Quota reached\n"); exit(EXIT_FAILURE); } @@ -989,10 +997,12 @@ int main(int argc, char *argv[]) ret = lxc_netdev_delete_by_name(cnic); if (ret < 0) usernic_error("Failed to delete \"%s\"\n", cnic); + free(nicname); exit(EXIT_FAILURE); } /* Write the name of the interface pair to the stdout: eth0:veth9MT2L4 */ fprintf(stdout, "%s:%s\n", vethname, nicname); + free(nicname); exit(EXIT_SUCCESS); } From 900e5f94afa9c835eecc509d8b09db7898195aa3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 26 Aug 2017 23:16:03 +0200 Subject: [PATCH 08/18] lxc-user-nic: add new {create,delete} subcommands Signed-off-by: Christian Brauner --- src/lxc/lxc_user_nic.c | 64 ++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index 2afd6b14a..d40519136 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -59,12 +59,17 @@ static void usage(char *me, bool fail) { - fprintf(stderr, "Usage: %s lxcpath name pid type bridge nicname\n", me); - fprintf(stderr, " nicname is the name to use inside the container\n"); - exit(fail ? 1 : 0); -} + fprintf(stderr, "Usage: %s create {lxcpath} {name} {pid} {type} " + "{bridge} {nicname}\n", me); + fprintf(stderr, "Usage: %s delete {lxcpath} {name} {pid} {type} " + "{bridge} {nicname}\n", me); + fprintf(stderr, "{nicname} is the name to use inside the container\n"); -static char *lxcpath, *lxcname; + if (fail) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); +} static int open_and_lock(char *path) { @@ -921,12 +926,38 @@ static bool may_access_netns(int pid) return may_access; } +struct user_nic_args { + char *cmd; + char *lxc_path; + char *lxc_name; + char *pid; + char *type; + char *link; + char *veth_name; +}; + int main(int argc, char *argv[]) { int fd, n, pid, ret; char *me; char *cnic = NULL, *nicname = NULL, *vethname = NULL; struct alloted_s *alloted = NULL; + struct user_nic_args args; + + if (argc < 7 || argc > 8) { + usage(argv[0], true); + exit(EXIT_FAILURE); + } + + memset(&args, 0, sizeof(struct user_nic_args)); + args.cmd = argv[1]; + args.lxc_path = argv[2]; + args.lxc_name = argv[3]; + args.pid = argv[4]; + args.type = argv[5]; + args.link = argv[6]; + if (argc >= 8) + args.veth_name = argv[7]; /* Set a sane env, because we are setuid-root. */ ret = clearenv(); @@ -947,18 +978,9 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - if (argc < 6) - usage(argv[0], true); - - if (argc >= 7) - vethname = argv[6]; - - lxcpath = argv[1]; - lxcname = argv[2]; - - ret = lxc_safe_int(argv[3], &pid); + ret = lxc_safe_int(args.pid, &pid); if (ret < 0) { - usernic_error("Could not read pid: %s\n", argv[1]); + usernic_error("Could not read pid: %s\n", args.pid); exit(EXIT_FAILURE); } @@ -978,10 +1000,10 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - n = get_alloted(me, argv[4], argv[5], &alloted); + n = get_alloted(me, args.type, args.link, &alloted); if (n > 0) - nicname = get_nic_if_avail(fd, alloted, pid, argv[4], - argv[5], n, &cnic); + nicname = get_nic_if_avail(fd, alloted, pid, args.type, + args.link, n, &cnic); close(fd); free_alloted(&alloted); @@ -991,7 +1013,7 @@ int main(int argc, char *argv[]) } /* Now rename the link. */ - ret = rename_in_ns(pid, cnic, &vethname); + ret = rename_in_ns(pid, cnic, &args.veth_name); if (ret < 0) { usernic_error("%s", "Failed to rename the link\n"); ret = lxc_netdev_delete_by_name(cnic); @@ -1002,7 +1024,7 @@ int main(int argc, char *argv[]) } /* Write the name of the interface pair to the stdout: eth0:veth9MT2L4 */ - fprintf(stdout, "%s:%s\n", vethname, nicname); + fprintf(stdout, "%s:%s\n", args.veth_name, nicname); free(nicname); exit(EXIT_SUCCESS); } From f703d9904a799e9b4993be835d376879b757a830 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 27 Aug 2017 00:00:58 +0200 Subject: [PATCH 09/18] tests: adapt lxc-user-nic tests to new syntax Signed-off-by: Christian Brauner --- src/tests/lxc-test-usernic.in | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/tests/lxc-test-usernic.in b/src/tests/lxc-test-usernic.in index 08b9b55fc..53bc8166c 100755 --- a/src/tests/lxc-test-usernic.in +++ b/src/tests/lxc-test-usernic.in @@ -153,7 +153,7 @@ lxcpath=/home/usernic-user/.local/share/lxc lxcname=b1 # Assign one veth, should fail as no allowed entries yet -if run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br0 xx1"; then +if run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br0 xx1"; then echo "FAIL: able to create nic with no entries" exit 1 fi @@ -164,24 +164,24 @@ sed -i '/^usernic-user/d' /etc/lxc/lxc-usernet echo "usernic-user veth usernic-br0 2" >> /etc/lxc/lxc-usernet # Assign one veth to second bridge, should fail -if run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br1 xx1"; then +if run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br1 xx1"; then echo "FAIL: able to create nic with no entries" exit 1 fi # Assign two veths, should succeed -if ! run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br0 xx2"; then +if ! run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br0 xx2"; then echo "FAIL: unable to create first nic" exit 1 fi -if ! run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br0 xx3"; then +if ! run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br0 xx3"; then echo "FAIL: unable to create second nic" exit 1 fi # Assign one more veth, should fail. -if run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br0 xx4"; then +if run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br0 xx4"; then echo "FAIL: able to create third nic" exit 1 fi @@ -191,7 +191,7 @@ run_cmd "lxc-stop -n b1 -k" run_cmd "lxc-start -n b1 -d" p1=$(run_cmd "lxc-info -n b1 -p -H") -if ! run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br0 xx5"; then +if ! run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br0 xx5"; then echo "FAIL: unable to create nic after destroying the old" cleanup 1 fi @@ -204,7 +204,7 @@ lxc-start -n usernic-c1 -d p2=$(lxc-info -n usernic-c1 -p -H) # assign veth to it - should fail -if run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p2 veth usernic-br0 xx6"; then +if run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p2 veth usernic-br0 xx6"; then echo "FAIL: able to attach nic to root-owned container" cleanup 1 fi From 25aead3fdd4e296585e275b69357f642ba594420 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 26 Aug 2017 23:04:01 +0200 Subject: [PATCH 10/18] conf: adapt to lxc-user-nic usage - lxc-user-nic gains the subcommands {create,delete} - dup2() STDERR_FILENO as well so that we can show helpful messages in our logs on failure - initialize output buffer so that we don't print garbage Signed-off-by: Christian Brauner --- src/lxc/conf.c | 7 +++++-- src/lxc/lxc_user_nic.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 6171a017c..5adeebbc3 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -3277,8 +3277,8 @@ static int unpriv_assign_nic(const char *lxcpath, char *lxcname, pid_t child; int bytes, pipefd[2]; char *token, *saveptr = NULL; - char buffer[MAX_BUFFER_SIZE]; char netdev_link[IFNAMSIZ + 1]; + char buffer[MAX_BUFFER_SIZE] = {0}; if (netdev->type != LXC_NET_VETH) { ERROR("nic type %d not support for unprivileged use", @@ -3308,6 +3308,8 @@ static int unpriv_assign_nic(const char *lxcpath, char *lxcname, /* Redirect stdout to write-end of the pipe. */ ret = dup2(pipefd[1], STDOUT_FILENO); + if (ret >= 0) + ret = dup2(pipefd[1], STDERR_FILENO); close(pipefd[1]); /* Close the write-end of the pipe. */ if (ret < 0) { SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor."); @@ -3351,7 +3353,8 @@ static int unpriv_assign_nic(const char *lxcpath, char *lxcname, buffer[bytes - 1] = '\0'; if (wait_for_pid(child) != 0) { - TRACE("lxc-user-nic failed to configure requested network"); + ERROR("lxc-user-nic failed to configure requested network: %s", + buffer[0] != '\0' ? buffer : "(null)"); close(pipefd[0]); return -1; } diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index d40519136..24a7bb199 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -940,7 +940,7 @@ int main(int argc, char *argv[]) { int fd, n, pid, ret; char *me; - char *cnic = NULL, *nicname = NULL, *vethname = NULL; + char *cnic = NULL, *nicname = NULL; struct alloted_s *alloted = NULL; struct user_nic_args args; From c92dfebd9eb246e1b0e159e41e903f66aeac5274 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 27 Aug 2017 00:39:17 +0200 Subject: [PATCH 11/18] lxc-user-nic: rework renaming net devices This should make things a little less convoluted. Signed-off-by: Christian Brauner --- src/lxc/lxc_user_nic.c | 96 ++++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 45 deletions(-) diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index 24a7bb199..5769dfb48 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -767,18 +767,18 @@ again: goto again; } -#define VETH_DEF_NAME "eth%d" -static int rename_in_ns(int pid, char *oldname, char **newnamep) +static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname) { + int ret; uid_t ruid, suid, euid; - int fret = -1; - int fd = -1, ifindex = -1, ofd = -1, ret; - bool grab_newname = false; + char ifname[IFNAMSIZ]; + char *string_ret = NULL, *name = NULL; + int fd = -1, ifindex = -1, ofd = -1; ofd = lxc_preserve_ns(getpid(), "net"); if (ofd < 0) { usernic_error("Failed opening network namespace path for %d", getpid()); - return fret; + return NULL; } fd = lxc_preserve_ns(pid, "net"); @@ -818,63 +818,68 @@ static int rename_in_ns(int pid, char *oldname, char **newnamep) goto do_full_cleanup; } - if (!*newnamep) { - grab_newname = true; - *newnamep = VETH_DEF_NAME; - - ifindex = if_nametoindex(oldname); - if (!ifindex) { - usernic_error("Failed to get netdev index: %s\n", strerror(errno)); - goto do_full_cleanup; - } - } - - ret = lxc_netdev_rename_by_name(oldname, *newnamep); - if (ret < 0) { - usernic_error("Error %d renaming netdev %s to %s in container\n", ret, oldname, *newnamep); + /* Check if old interface exists. */ + ifindex = if_nametoindex(oldname); + if (!ifindex) { + usernic_error("Failed to get netdev index: %s\n", strerror(errno)); goto do_full_cleanup; } - if (grab_newname) { - char ifname[IFNAMSIZ]; - char *namep = ifname; + /* When the IFLA_IFNAME attribute is passed something like "%d" + * netlink will replace the format specifier with an appropriate index. + * So we pass "eth%d". + */ + if (newname) + name = newname; + else + name = "eth%d"; - if (!if_indextoname(ifindex, namep)) { - usernic_error("Failed to get new netdev name: %s\n", strerror(errno)); - goto do_full_cleanup; - } - - *newnamep = strdup(namep); - if (!*newnamep) - goto do_full_cleanup; + ret = lxc_netdev_rename_by_name(oldname, name); + if (ret < 0) { + usernic_error("Error %d renaming netdev %s to %s in container\n", + ret, oldname, name); + goto do_full_cleanup; } - fret = 0; + /* Retrieve new name for interface. */ + if (!if_indextoname(ifindex, ifname)) { + usernic_error("Failed to get new netdev name: %s\n", strerror(errno)); + goto do_full_cleanup; + } + + /* Allocation failure for strdup() is checked below. */ + name = strdup(ifname); + string_ret = name; do_full_cleanup: ret = setresuid(ruid, euid, suid); if (ret < 0) { - usernic_error("Failed to restore privilege by setting effective " - "user id to %d, real user id to %d, and saved user " - "ID to %d: %s\n", - ruid, euid, suid, strerror(errno)); - fret = -1; + usernic_error("Failed to restore privilege by setting " + "effective user id to %d, real user id to %d, " + "and saved user ID to %d: %s\n", ruid, euid, suid, + strerror(errno)); + + string_ret = NULL; } ret = setns(ofd, CLONE_NEWNET); if (ret < 0) { usernic_error("Failed to setns() to original network namespace " - "of PID %d: %s\n", - ofd, strerror(errno)); - fret = -1; + "of PID %d: %s\n", ofd, strerror(errno)); + + string_ret = NULL; } do_partial_cleanup: if (fd >= 0) close(fd); + + if (!string_ret && name) + free(name); + close(ofd); - return fret; + return string_ret; } /* If the caller (real uid, not effective uid) may read the /proc/[pid]/ns/net, @@ -939,7 +944,7 @@ struct user_nic_args { int main(int argc, char *argv[]) { int fd, n, pid, ret; - char *me; + char *me, *newname; char *cnic = NULL, *nicname = NULL; struct alloted_s *alloted = NULL; struct user_nic_args args; @@ -1013,8 +1018,8 @@ int main(int argc, char *argv[]) } /* Now rename the link. */ - ret = rename_in_ns(pid, cnic, &args.veth_name); - if (ret < 0) { + newname = lxc_secure_rename_in_ns(pid, cnic, args.veth_name); + if (!newname) { usernic_error("%s", "Failed to rename the link\n"); ret = lxc_netdev_delete_by_name(cnic); if (ret < 0) @@ -1024,7 +1029,8 @@ int main(int argc, char *argv[]) } /* Write the name of the interface pair to the stdout: eth0:veth9MT2L4 */ - fprintf(stdout, "%s:%s\n", args.veth_name, nicname); + fprintf(stdout, "%s:%s\n", newname, nicname); + free(newname); free(nicname); exit(EXIT_SUCCESS); } From 0cffb6769da95b9c5980d7e2e01e9a75238461d3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 27 Aug 2017 04:59:57 +0200 Subject: [PATCH 12/18] network: send ifindex for unpriv networks We use the ifindex as an indicator that liblxc created the network so let's record it for the unprivileged case as well. Signed-off-by: Christian Brauner --- src/lxc/conf.c | 12 ++++++++++++ src/lxc/lxc_user_nic.c | 10 ++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 5adeebbc3..2be6d2ed3 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -3274,6 +3274,7 @@ bool lxc_delete_network(struct lxc_handler *handler) static int unpriv_assign_nic(const char *lxcpath, char *lxcname, struct lxc_netdev *netdev, pid_t pid) { + int ret; pid_t child; int bytes, pipefd[2]; char *token, *saveptr = NULL; @@ -3387,6 +3388,17 @@ static int unpriv_assign_nic(const char *lxcpath, char *lxcname, return -1; } + /* fill netdev->veth_attr.pair field */ + token = strtok_r(NULL, ":", &saveptr); + if (!token) + return -1; + + ret = lxc_safe_int(token, &netdev->ifindex); + if (ret < 0) { + ERROR("Failed to parse ifindex for network device \"%s\"", netdev->name); + return -1; + } + return 0; } diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index 5769dfb48..3486e5cdd 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -767,7 +767,8 @@ again: goto again; } -static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname) +static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname, + int *ifidx) { int ret; uid_t ruid, suid, euid; @@ -850,6 +851,7 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname) /* Allocation failure for strdup() is checked below. */ name = strdup(ifname); string_ret = name; + *ifidx = ifindex; do_full_cleanup: ret = setresuid(ruid, euid, suid); @@ -943,7 +945,7 @@ struct user_nic_args { int main(int argc, char *argv[]) { - int fd, n, pid, ret; + int fd, ifindex, n, pid, ret; char *me, *newname; char *cnic = NULL, *nicname = NULL; struct alloted_s *alloted = NULL; @@ -1018,7 +1020,7 @@ int main(int argc, char *argv[]) } /* Now rename the link. */ - newname = lxc_secure_rename_in_ns(pid, cnic, args.veth_name); + newname = lxc_secure_rename_in_ns(pid, cnic, args.veth_name, &ifindex); if (!newname) { usernic_error("%s", "Failed to rename the link\n"); ret = lxc_netdev_delete_by_name(cnic); @@ -1029,7 +1031,7 @@ int main(int argc, char *argv[]) } /* Write the name of the interface pair to the stdout: eth0:veth9MT2L4 */ - fprintf(stdout, "%s:%s\n", newname, nicname); + fprintf(stdout, "%s:%s:%d\n", newname, nicname, ifindex); free(newname); free(nicname); exit(EXIT_SUCCESS); From 7a582518b38d6e712f82e0c2bb9b4ef84f29997a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 27 Aug 2017 05:01:14 +0200 Subject: [PATCH 13/18] network: log ifindex Signed-off-by: Christian Brauner --- src/lxc/confile_utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c index fa3f64598..a74e2dc6d 100644 --- a/src/lxc/confile_utils.c +++ b/src/lxc/confile_utils.c @@ -253,6 +253,7 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf) netdev = it->elem; TRACE("index: %zd", netdev->idx); + TRACE("ifindex: %d", netdev->ifindex); switch (netdev->type) { case LXC_NET_VETH: TRACE("type: veth"); From a055595ca6ebc0d7b64b3b0fcd454a9443a044b9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 27 Aug 2017 05:02:23 +0200 Subject: [PATCH 14/18] network: delete ovs for unprivileged networks Signed-off-by: Christian Brauner --- src/lxc/conf.c | 134 ++++++++++++++++++++++++++++++++++++----- src/lxc/conf.h | 2 + src/lxc/lxc_user_nic.c | 22 ++++++- 3 files changed, 143 insertions(+), 15 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 2be6d2ed3..73b1089e5 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -3207,26 +3207,44 @@ bool lxc_delete_network(struct lxc_handler *handler) * namespace is destroyed but in case we did not move the * interface to the network namespace, we have to destroy it. */ - ret = lxc_netdev_delete_by_index(netdev->ifindex); - if (-ret == ENODEV) { - INFO("Interface \"%s\" with index %d already deleted " - "or existing in different network namespace", - netdev->name ? netdev->name : "(null)", netdev->ifindex); - } else if (ret < 0) { - deleted_all = false; - WARN("Failed to remove interface \"%s\" with index %d: " - "%s", netdev->name ? netdev->name : "(null)", - netdev->ifindex, strerror(-ret)); - continue; + if (!am_unpriv()) { + ret = lxc_netdev_delete_by_index(netdev->ifindex); + if (-ret == ENODEV) { + INFO("Interface \"%s\" with index %d already " + "deleted or existing in different network " + "namespace", + netdev->name ? netdev->name : "(null)", + netdev->ifindex); + } else if (ret < 0) { + deleted_all = false; + WARN("Failed to remove interface \"%s\" with " + "index %d: %s", + netdev->name ? netdev->name : "(null)", + netdev->ifindex, strerror(-ret)); + continue; + } + INFO("Removed interface \"%s\" with index %d", + netdev->name ? netdev->name : "(null)", + netdev->ifindex); } - INFO("Removed interface \"%s\" with index %d", - netdev->name ? netdev->name : "(null)", netdev->ifindex); if (netdev->type != LXC_NET_VETH) continue; - if (am_unpriv()) + if (am_unpriv()) { + if (is_ovs_bridge(netdev->link)) { + ret = lxc_unpriv_delete_nic(handler->lxcpath, + handler->name, "ovs", + netdev, getpid()); + if (ret < 0) + WARN("Failed to remove port \"%s\" " + "from openvswitch bridge \"%s\"", + netdev->priv.veth_attr.pair, + netdev->link); + } + continue; + } /* Explicitly delete host veth device to prevent lingering * devices. We had issues in LXD around this. @@ -5124,3 +5142,91 @@ struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings) return result; } + +int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, char *type, + struct lxc_netdev *netdev, pid_t pid) +{ + pid_t child; + int bytes, pipefd[2]; + char netdev_link[IFNAMSIZ + 1]; + char buffer[MAX_BUFFER_SIZE] = {0}; + + if (netdev->type != LXC_NET_VETH) { + ERROR("nic type %d not support for unprivileged use", + netdev->type); + return -1; + } + + if (pipe(pipefd) < 0) { + SYSERROR("pipe failed"); + return -1; + } + + child = fork(); + if (child < 0) { + SYSERROR("fork"); + close(pipefd[0]); + close(pipefd[1]); + return -1; + } + + if (child == 0) { /* child */ + /* Call lxc-user-nic pid type bridge. */ + int ret; + char pidstr[LXC_NUMSTRLEN64]; + + close(pipefd[0]); /* Close the read-end of the pipe. */ + + /* Redirect stdout to write-end of the pipe. */ + ret = dup2(pipefd[1], STDOUT_FILENO); + if (ret >= 0) + ret = dup2(pipefd[1], STDERR_FILENO); + close(pipefd[1]); /* Close the write-end of the pipe. */ + if (ret < 0) { + SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor."); + exit(EXIT_FAILURE); + } + + if (netdev->link) + strncpy(netdev_link, netdev->link, IFNAMSIZ); + else + strncpy(netdev_link, "none", IFNAMSIZ); + + ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); + if (ret < 0 || ret >= LXC_NUMSTRLEN64) + exit(EXIT_FAILURE); + pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; + + INFO("Execing lxc-user-nic delete %s %s %s ovs %s %s", lxcpath, + lxcname, pidstr, netdev_link, netdev->priv.veth_attr.pair); + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath, + lxcname, pidstr, "ovs", netdev_link, + netdev->priv.veth_attr.pair, (char *)NULL); + SYSERROR("Failed to exec lxc-user-nic."); + exit(EXIT_FAILURE); + } + + /* close the write-end of the pipe */ + close(pipefd[1]); + + bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE); + if (bytes < 0) { + SYSERROR("Failed to read from pipe file descriptor."); + close(pipefd[0]); + return -1; + } + buffer[bytes - 1] = '\0'; + + if (wait_for_pid(child) != 0) { + ERROR("lxc-user-nic failed to delete requested network: %s", + buffer[0] != '\0' ? buffer : "(null)"); + close(pipefd[0]); + return -1; + } + TRACE("Received output \"%s\" from lxc-user-nic", buffer); + + /* close the read-end of the pipe */ + close(pipefd[0]); + + return 0; +} diff --git a/src/lxc/conf.h b/src/lxc/conf.h index f085bc94c..89e792348 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -500,5 +500,7 @@ extern FILE *make_anonymous_mount_file(struct lxc_list *mount); extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings); extern unsigned long add_required_remount_flags(const char *s, const char *d, unsigned long flags); +extern int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, char *type, + struct lxc_netdev *netdev, pid_t pid); #endif /* __LXC_CONF_H */ diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index 3486e5cdd..0c2911d3e 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -836,9 +836,10 @@ static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname, name = "eth%d"; ret = lxc_netdev_rename_by_name(oldname, name); + name = NULL; if (ret < 0) { usernic_error("Error %d renaming netdev %s to %s in container\n", - ret, oldname, name); + ret, oldname, newname ? newname : "eth%d"); goto do_full_cleanup; } @@ -1007,6 +1008,25 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } + if (!strcmp(args.cmd, "delete")) { + close(fd); + + if (strcmp(args.type, "ovs")) { + usernic_error("%s", "Deletion of non ovs type network " + "devics not implemented\n"); + exit(EXIT_FAILURE); + } + + ret = lxc_ovs_delete_port(args.link, args.veth_name); + if (ret < 0) { + usernic_error("Failed to remove port \"%s\" from " + "openvswitch bridge \"%s\"", + args.veth_name, args.link); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + } + n = get_alloted(me, args.type, args.link, &alloted); if (n > 0) nicname = get_nic_if_avail(fd, alloted, pid, args.type, From af256970563c3e95e6cd6d396f2e27da8bd2756f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 27 Aug 2017 15:03:16 +0200 Subject: [PATCH 15/18] lxc-user-nic: non-functional changes Signed-off-by: Christian Brauner --- src/lxc/lxc_user_nic.c | 103 +++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 41 deletions(-) diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index 0c2911d3e..628a9c70b 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -73,12 +73,13 @@ static void usage(char *me, bool fail) static int open_and_lock(char *path) { - int fd; + int fd, ret; struct flock lk; - fd = open(path, O_RDWR|O_CREAT, S_IWUSR | S_IRUSR); + fd = open(path, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); if (fd < 0) { - usernic_error("Failed to open %s: %s\n", path, strerror(errno)); + usernic_error("Failed to open \"%s\": %s\n", path, + strerror(errno)); return -1; } @@ -86,8 +87,11 @@ static int open_and_lock(char *path) lk.l_whence = SEEK_SET; lk.l_start = 0; lk.l_len = 0; - if (fcntl(fd, F_SETLKW, &lk) < 0) { - usernic_error("Failed to lock %s: %s\n", path, strerror(errno)); + + ret = fcntl(fd, F_SETLKW, &lk); + if (ret < 0) { + usernic_error("Failed to lock \"%s\": %s\n", path, + strerror(errno)); close(fd); return -1; } @@ -95,14 +99,13 @@ static int open_and_lock(char *path) return fd; } - static char *get_username(void) { struct passwd *pwd; pwd = getpwuid(getuid()); if (!pwd) { - usernic_error("Failed to call get username: %s\n", strerror(errno)); + usernic_error("Failed to get username: %s\n", strerror(errno)); return NULL; } @@ -132,9 +135,8 @@ static char **get_groupnames(void) ngroups = getgroups(0, NULL); if (ngroups < 0) { - usernic_error( - "Failed to get number of groups the user belongs to: %s\n", - strerror(errno)); + usernic_error("Failed to get number of groups the user " + "belongs to: %s\n", strerror(errno)); return NULL; } if (ngroups == 0) @@ -208,19 +210,21 @@ struct alloted_s { struct alloted_s *next; }; -static struct alloted_s *append_alloted(struct alloted_s **head, char *name, int n) +static struct alloted_s *append_alloted(struct alloted_s **head, char *name, + int n) { struct alloted_s *cur, *al; if (!head || !name) { - /* sanity check. parameters should not be null */ + /* Sanity check. Parameters should not be null. */ usernic_error("%s\n", "Unexpected NULL argument"); return NULL; } al = malloc(sizeof(struct alloted_s)); if (!al) { - usernic_error("Failed to allocate memory: %s\n", strerror(errno)); + usernic_error("Failed to allocate memory: %s\n", + strerror(errno)); return NULL; } @@ -271,7 +275,8 @@ static void free_alloted(struct alloted_s **head) * Return the count entry for the calling user if there is one. Else * return -1. */ -static int get_alloted(char *me, char *intype, char *link, struct alloted_s **alloted) +static int get_alloted(char *me, char *intype, char *link, + struct alloted_s **alloted) { int n, ret; char name[100], type[100], br[100]; @@ -284,13 +289,15 @@ static int get_alloted(char *me, char *intype, char *link, struct alloted_s **al fin = fopen(LXC_USERNIC_CONF, "r"); if (!fin) { - usernic_error("Failed to open \"%s\": %s\n", LXC_USERNIC_CONF, strerror(errno)); + usernic_error("Failed to open \"%s\": %s\n", LXC_USERNIC_CONF, + strerror(errno)); return -1; } groups = get_groupnames(); while ((getline(&line, &len, fin)) != -1) { - ret = sscanf(line, "%99[^ \t] %99[^ \t] %99[^ \t] %d", name, type, br, &n); + ret = sscanf(line, "%99[^ \t] %99[^ \t] %99[^ \t] %d", name, + type, br, &n); if (ret != 4) continue; @@ -363,7 +370,8 @@ static char *find_line(char *p, char *e, char *u, char *t, char *l) p++; p2 = get_eow(p, e); - if (!p2 || ((size_t)(p2 - p)) != strlen(u) || strncmp(p, u, strlen(u))) + if (!p2 || ((size_t)(p2 - p)) != strlen(u) || + strncmp(p, u, strlen(u))) goto next; p = p2 + 1; @@ -371,7 +379,8 @@ static char *find_line(char *p, char *e, char *u, char *t, char *l) p++; p2 = get_eow(p, e); - if (!p2 || ((size_t)(p2 - p)) != strlen(t) || strncmp(p, t, strlen(t))) + if (!p2 || ((size_t)(p2 - p)) != strlen(t) || + strncmp(p, t, strlen(t))) goto next; p = p2 + 1; @@ -379,11 +388,12 @@ static char *find_line(char *p, char *e, char *u, char *t, char *l) p++; p2 = get_eow(p, e); - if (!p2 || ((size_t)(p2 - p)) != strlen(l) || strncmp(p, l, strlen(l))) + if (!p2 || ((size_t)(p2 - p)) != strlen(l) || + strncmp(p, l, strlen(l))) goto next; return ret; -next: + next: p = p1 + 1; } @@ -422,7 +432,8 @@ static int instantiate_veth(char *n1, char **n2) err = lxc_veth_create(n1, *n2); if (err) { - usernic_error("Failed to create %s-%s : %s.\n", n1, *n2, strerror(-err)); + usernic_error("Failed to create %s-%s : %s.\n", n1, *n2, + strerror(-err)); return -1; } @@ -432,8 +443,7 @@ static int instantiate_veth(char *n1, char **n2) err = setup_private_host_hw_addr(n1); if (err) usernic_error("Failed to change mac address of host interface " - "%s : %s\n", - n1, strerror(-err)); + "%s : %s\n", n1, strerror(-err)); return netdev_set_flag(n1, IFF_UP); } @@ -476,13 +486,15 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic) if (mtu > 0) { ret = lxc_netdev_set_mtu(veth1buf, mtu); if (ret < 0) { - usernic_error("Failed to set mtu to %d on %s\n", mtu, veth1buf); + usernic_error("Failed to set mtu to %d on %s\n", + mtu, veth1buf); goto out_del; } ret = lxc_netdev_set_mtu(veth2buf, mtu); if (ret < 0) { - usernic_error("Failed to set mtu to %d on %s\n", mtu, veth2buf); + usernic_error("Failed to set mtu to %d on %s\n", + mtu, veth2buf); goto out_del; } } @@ -498,7 +510,8 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic) /* pass veth2 to target netns */ ret = lxc_netdev_move_by_name(veth2buf, pid, NULL); if (ret < 0) { - usernic_error("Error moving %s to network namespace of %d\n", veth2buf, pid); + usernic_error("Error moving %s to network namespace of %d\n", + veth2buf, pid); goto out_del; } @@ -545,7 +558,8 @@ static bool get_nic_from_line(char *p, char **nic) int ret; char user[100], type[100], br[100]; - ret = sscanf(p, "%99[^ \t\n] %99[^ \t\n] %99[^ \t\n] %99[^ \t\n]", user, type, br, *nic); + ret = sscanf(p, "%99[^ \t\n] %99[^ \t\n] %99[^ \t\n] %99[^ \t\n]", user, + type, br, *nic); if (ret != 4) return false; @@ -579,9 +593,10 @@ static bool cull_entries(int fd, char *me, char *t, char *br) if (len == 0) return true; - buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + buf = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { - usernic_error("Failed to establish shared memory mapping: %s\n", strerror(errno)); + usernic_error("Failed to establish shared memory mapping: %s\n", + strerror(errno)); return false; } @@ -626,7 +641,8 @@ static bool cull_entries(int fd, char *me, char *t, char *br) munmap(buf, sb.st_size); if (ftruncate(fd, p - buf)) - usernic_error("Failed to set new file size: %s\n", strerror(errno)); + usernic_error("Failed to set new file size: %s\n", + strerror(errno)); return true; } @@ -647,10 +663,7 @@ static int count_entries(char *buf, off_t len, char *me, char *t, char *br) return count; } -/* - * The dbfile has lines of the format: - * user type bridge nicname - */ +/* The dbfile has lines of the format: user type bridge nicname. */ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid, char *intype, char *br, int allowed, char **cnic) { @@ -677,9 +690,11 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid, len = sb.st_size; if (len > 0) { - buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + buf = + mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { - usernic_error("Failed to establish shared memory mapping: %s\n", strerror(errno)); + usernic_error("Failed to establish shared memory mapping: %s\n", + strerror(errno)); return NULL; } @@ -724,11 +739,13 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid, munmap(buf, len); if (ftruncate(fd, len + slen)) - usernic_error("Failed to set new file size: %s\n", strerror(errno)); + usernic_error("Failed to set new file size: %s\n", + strerror(errno)); - buf = mmap(NULL, len + slen, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + buf = mmap(NULL, len + slen, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { - usernic_error("Failed to establish shared memory mapping: %s\n", strerror(errno)); + usernic_error("Failed to establish shared memory mapping: %s\n", + strerror(errno)); if (lxc_netdev_delete_by_name(nicname) != 0) usernic_error("Error unlinking %s\n", nicname); free(nicname); @@ -743,6 +760,7 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid, static bool create_db_dir(char *fnam) { + int ret; char *p; p = alloca(strlen(fnam) + 1); @@ -757,8 +775,11 @@ again: return true; *p = '\0'; - if (mkdir(fnam, 0755) && errno != EEXIST) { - usernic_error("Failed to create %s: %s\n", fnam, strerror(errno)); + + ret = mkdir(fnam, 0755); + if (ret < 0 && errno != EEXIST) { + usernic_error("Failed to create %s: %s\n", fnam, + strerror(errno)); *p = '/'; return false; } From 8b8e00a24d3eafa57bfd5db3f364c6570f0c8f10 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 27 Aug 2017 09:17:10 +0200 Subject: [PATCH 16/18] lxc-user-nic: check db before trying to delete Signed-off-by: Christian Brauner --- src/lxc/conf.c | 18 +++++------ src/lxc/conf.h | 2 +- src/lxc/lxc_user_nic.c | 73 +++++++++++++++++++++++++++++++++--------- 3 files changed, 67 insertions(+), 26 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 73b1089e5..d2a1f7cad 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -3234,7 +3234,7 @@ bool lxc_delete_network(struct lxc_handler *handler) if (am_unpriv()) { if (is_ovs_bridge(netdev->link)) { ret = lxc_unpriv_delete_nic(handler->lxcpath, - handler->name, "ovs", + handler->name, netdev, getpid()); if (ret < 0) WARN("Failed to remove port \"%s\" " @@ -5143,12 +5143,11 @@ struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings) return result; } -int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, char *type, +int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, struct lxc_netdev *netdev, pid_t pid) { pid_t child; int bytes, pipefd[2]; - char netdev_link[IFNAMSIZ + 1]; char buffer[MAX_BUFFER_SIZE] = {0}; if (netdev->type != LXC_NET_VETH) { @@ -5187,20 +5186,19 @@ int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, char *type, exit(EXIT_FAILURE); } - if (netdev->link) - strncpy(netdev_link, netdev->link, IFNAMSIZ); - else - strncpy(netdev_link, "none", IFNAMSIZ); + if (!netdev->link) + SYSERROR("Network link for network device \"%s\" is " + "missing", netdev->priv.veth_attr.pair); ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); if (ret < 0 || ret >= LXC_NUMSTRLEN64) exit(EXIT_FAILURE); pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; - INFO("Execing lxc-user-nic delete %s %s %s ovs %s %s", lxcpath, - lxcname, pidstr, netdev_link, netdev->priv.veth_attr.pair); + INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath, + lxcname, pidstr, netdev->link, netdev->priv.veth_attr.pair); execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath, - lxcname, pidstr, "ovs", netdev_link, + lxcname, pidstr, "veth", netdev->link, netdev->priv.veth_attr.pair, (char *)NULL); SYSERROR("Failed to exec lxc-user-nic."); exit(EXIT_FAILURE); diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 89e792348..c54abb3ab 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -500,7 +500,7 @@ extern FILE *make_anonymous_mount_file(struct lxc_list *mount); extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings); extern unsigned long add_required_remount_flags(const char *s, const char *d, unsigned long flags); -extern int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, char *type, +extern int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, struct lxc_netdev *netdev, pid_t pid); #endif /* __LXC_CONF_H */ diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index 628a9c70b..db36142c1 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -572,19 +572,22 @@ struct entry_line { bool keep; }; -static bool cull_entries(int fd, char *me, char *t, char *br) +static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname, + bool *found_nicname) { - int i, n = 0; + int i, ret; off_t len; - char *buf, *p, *e, *nic; + char *buf, *e, *nic, *p; struct stat sb; + int n = 0; struct entry_line *entry_lines = NULL; nic = alloca(100); if (!nic) return false; - if (fstat(fd, &sb) < 0) { + ret = fstat(fd, &sb); + if (ret < 0) { usernic_error("Failed to fstat: %s\n", strerror(errno)); return false; } @@ -593,7 +596,7 @@ static bool cull_entries(int fd, char *me, char *t, char *br) if (len == 0) return true; - buf = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + buf = lxc_strmmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { usernic_error("Failed to establish shared memory mapping: %s\n", strerror(errno)); @@ -622,6 +625,10 @@ static bool cull_entries(int fd, char *me, char *t, char *br) if (nic && !nic_exists(nic)) entry_lines[n - 1].keep = false; + if (nicname) + if (!strcmp(nic, nicname)) + *found_nicname = true; + p += entry_lines[n - 1].len + 1; if (p >= e) break; @@ -639,8 +646,9 @@ static bool cull_entries(int fd, char *me, char *t, char *br) } free(entry_lines); - munmap(buf, sb.st_size); - if (ftruncate(fd, p - buf)) + lxc_strmunmap(buf, sb.st_size); + ret = ftruncate(fd, p - buf); + if (ret < 0) usernic_error("Failed to set new file size: %s\n", strerror(errno)); @@ -676,7 +684,7 @@ static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid, char *buf = NULL; for (n = names; n != NULL; n = n->next) - cull_entries(fd, n->name, intype, br); + cull_entries(fd, n->name, intype, br, NULL, NULL); if (allowed == 0) return NULL; @@ -965,9 +973,12 @@ struct user_nic_args { char *veth_name; }; +#define LXC_USERNIC_CREATE 0 +#define LXC_USERNIC_DELETE 1 + int main(int argc, char *argv[]) { - int fd, ifindex, n, pid, ret; + int fd, ifindex, n, pid, request, ret; char *me, *newname; char *cnic = NULL, *nicname = NULL; struct alloted_s *alloted = NULL; @@ -988,6 +999,15 @@ int main(int argc, char *argv[]) if (argc >= 8) args.veth_name = argv[7]; + if (!strcmp(args.cmd, "create")) { + request = LXC_USERNIC_CREATE; + } else if (!strcmp(args.cmd, "delete")) { + request = LXC_USERNIC_DELETE; + } else { + usage(argv[0], true); + exit(EXIT_FAILURE); + } + /* Set a sane env, because we are setuid-root. */ ret = clearenv(); if (ret) { @@ -1029,12 +1049,36 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - if (!strcmp(args.cmd, "delete")) { - close(fd); + n = get_alloted(me, args.type, args.link, &alloted); - if (strcmp(args.type, "ovs")) { + if (request == LXC_USERNIC_DELETE) { + int ret; + struct alloted_s *it; + bool found_nicname = false; + + if (!is_ovs_bridge(args.link)) { usernic_error("%s", "Deletion of non ovs type network " - "devics not implemented\n"); + "devices not implemented\n"); + close(fd); + free_alloted(&alloted); + exit(EXIT_FAILURE); + } + + /* Check whether the network device we are supposed to delete + * exists in the db. If it doesn't we will not delete it as we + * need to assume the network device is not under our control. + * As a side effect we also clear any invalid entries from the + * database. + */ + for (it = alloted; it; it = it->next) + cull_entries(fd, it->name, args.type, args.link, + args.veth_name, &found_nicname); + close(fd); + free_alloted(&alloted); + + if (!found_nicname) { + usernic_error("%s", "Caller is not allowed to delete " + "network device\n"); exit(EXIT_FAILURE); } @@ -1045,10 +1089,9 @@ int main(int argc, char *argv[]) args.veth_name, args.link); exit(EXIT_FAILURE); } + exit(EXIT_SUCCESS); } - - n = get_alloted(me, args.type, args.link, &alloted); if (n > 0) nicname = get_nic_if_avail(fd, alloted, pid, args.type, args.link, n, &cnic); From 890928153e36299f7f1fc7a1147a63ae748acd61 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 27 Aug 2017 14:48:52 +0200 Subject: [PATCH 17/18] conf: increase lxc-user-nic buffer This will allow us log more detailed failures. Signed-off-by: Christian Brauner --- src/lxc/conf.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index d2a1f7cad..eeefeee6b 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -3286,9 +3286,6 @@ bool lxc_delete_network(struct lxc_handler *handler) } #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic" - -/* lxc-user-nic returns "interface_name:interface_name\n" */ -#define MAX_BUFFER_SIZE IFNAMSIZ * 2 + 2 static int unpriv_assign_nic(const char *lxcpath, char *lxcname, struct lxc_netdev *netdev, pid_t pid) { @@ -3297,7 +3294,7 @@ static int unpriv_assign_nic(const char *lxcpath, char *lxcname, int bytes, pipefd[2]; char *token, *saveptr = NULL; char netdev_link[IFNAMSIZ + 1]; - char buffer[MAX_BUFFER_SIZE] = {0}; + char buffer[MAXPATHLEN] = {0}; if (netdev->type != LXC_NET_VETH) { ERROR("nic type %d not support for unprivileged use", @@ -3363,7 +3360,7 @@ static int unpriv_assign_nic(const char *lxcpath, char *lxcname, /* close the write-end of the pipe */ close(pipefd[1]); - bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE); + bytes = read(pipefd[0], &buffer, MAXPATHLEN); if (bytes < 0) { SYSERROR("Failed to read from pipe file descriptor."); close(pipefd[0]); @@ -5148,7 +5145,7 @@ int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, { pid_t child; int bytes, pipefd[2]; - char buffer[MAX_BUFFER_SIZE] = {0}; + char buffer[MAXPATHLEN] = {0}; if (netdev->type != LXC_NET_VETH) { ERROR("nic type %d not support for unprivileged use", @@ -5207,7 +5204,7 @@ int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, /* close the write-end of the pipe */ close(pipefd[1]); - bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE); + bytes = read(pipefd[0], &buffer, MAXPATHLEN); if (bytes < 0) { SYSERROR("Failed to read from pipe file descriptor."); close(pipefd[0]); From 811ef482114116b82376b6edad2df801ade0da36 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 28 Aug 2017 12:23:29 +0200 Subject: [PATCH 18/18] network: non-functional changes This moves all of the network handling code into network.{c,h}. This makes what is going on much clearer. Also it's easier to find relevant code if it is all in one place. Signed-off-by: Christian Brauner --- src/lxc/conf.c | 1212 +-------------------------------------- src/lxc/conf.h | 114 +--- src/lxc/confile_utils.c | 1 + src/lxc/lxc_user_nic.c | 2 +- src/lxc/network.c | 1195 +++++++++++++++++++++++++++++++++++++- src/lxc/network.h | 124 +++- src/lxc/start.c | 5 +- 7 files changed, 1324 insertions(+), 1329 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index eeefeee6b..276de98c1 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -238,8 +238,6 @@ char *lxchook_names[NUM_LXC_HOOKS] = {"pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy"}; -typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *); - struct mount_opt { char *name; int clear; @@ -270,38 +268,6 @@ struct lxc_conf *current_config; /* Declare this here, since we don't want to reshuffle the whole file. */ static int in_caplist(int cap, struct lxc_list *caps); -static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_none(struct lxc_handler *, struct lxc_netdev *); - -static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = { - [LXC_NET_VETH] = instantiate_veth, - [LXC_NET_MACVLAN] = instantiate_macvlan, - [LXC_NET_VLAN] = instantiate_vlan, - [LXC_NET_PHYS] = instantiate_phys, - [LXC_NET_EMPTY] = instantiate_empty, - [LXC_NET_NONE] = instantiate_none, -}; - -static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_none(struct lxc_handler *, struct lxc_netdev *); - -static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = { - [LXC_NET_VETH] = shutdown_veth, - [LXC_NET_MACVLAN] = shutdown_macvlan, - [LXC_NET_VLAN] = shutdown_vlan, - [LXC_NET_PHYS] = shutdown_phys, - [LXC_NET_EMPTY] = shutdown_empty, - [LXC_NET_NONE] = shutdown_none, -}; - static struct mount_opt mount_opt[] = { { "async", 1, MS_SYNCHRONOUS }, { "atime", 1, MS_NOATIME }, @@ -530,8 +496,7 @@ static int run_script_argv(const char *name, const char *section, return run_buffer(buffer); } -static int run_script(const char *name, const char *section, const char *script, - ...) +int run_script(const char *name, const char *section, const char *script, ...) { int ret; char *buffer, *p; @@ -2325,314 +2290,6 @@ static int dropcaps_except(struct lxc_list *caps) return 0; } -static int setup_hw_addr(char *hwaddr, const char *ifname) -{ - struct sockaddr sockaddr; - struct ifreq ifr; - int ret, fd, saved_errno; - - ret = lxc_convert_mac(hwaddr, &sockaddr); - if (ret) { - ERROR("mac address '%s' conversion failed : %s", - hwaddr, strerror(-ret)); - return -1; - } - - memcpy(ifr.ifr_name, ifname, IFNAMSIZ); - ifr.ifr_name[IFNAMSIZ-1] = '\0'; - memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr)); - - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) { - ERROR("socket failure : %s", strerror(errno)); - return -1; - } - - ret = ioctl(fd, SIOCSIFHWADDR, &ifr); - saved_errno = errno; - close(fd); - if (ret) - ERROR("ioctl failure : %s", strerror(saved_errno)); - - DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name); - - return ret; -} - -static int setup_ipv4_addr(struct lxc_list *ip, int ifindex) -{ - struct lxc_list *iterator; - struct lxc_inetdev *inetdev; - int err; - - lxc_list_for_each(iterator, ip) { - - inetdev = iterator->elem; - - err = lxc_ipv4_addr_add(ifindex, &inetdev->addr, - &inetdev->bcast, inetdev->prefix); - if (err) { - ERROR("failed to setup_ipv4_addr ifindex %d : %s", - ifindex, strerror(-err)); - return -1; - } - } - - return 0; -} - -static int setup_ipv6_addr(struct lxc_list *ip, int ifindex) -{ - struct lxc_list *iterator; - struct lxc_inet6dev *inet6dev; - int err; - - lxc_list_for_each(iterator, ip) { - - inet6dev = iterator->elem; - - err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr, - &inet6dev->mcast, &inet6dev->acast, - inet6dev->prefix); - if (err) { - ERROR("failed to setup_ipv6_addr ifindex %d : %s", - ifindex, strerror(-err)); - return -1; - } - } - - return 0; -} - -static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev) -{ - char ifname[IFNAMSIZ]; - int err; - const char *net_type_name; - char *current_ifname = ifname; - - /* empty network namespace */ - if (!netdev->ifindex) { - if (netdev->flags & IFF_UP) { - err = lxc_netdev_up("lo"); - if (err) { - ERROR("failed to set the loopback up : %s", - strerror(-err)); - return -1; - } - } - - if (netdev->type == LXC_NET_EMPTY) - return 0; - - if (netdev->type == LXC_NET_NONE) - return 0; - - if (netdev->type != LXC_NET_VETH) { - net_type_name = lxc_net_type_to_str(netdev->type); - ERROR("%s networks are not supported for containers " - "not setup up by privileged users", - net_type_name); - return -1; - } - - netdev->ifindex = if_nametoindex(netdev->name); - } - - /* get the new ifindex in case of physical netdev */ - if (netdev->type == LXC_NET_PHYS) { - if (!(netdev->ifindex = if_nametoindex(netdev->link))) { - ERROR("failed to get ifindex for %s", - netdev->link); - return -1; - } - } - - /* retrieve the name of the interface */ - if (!if_indextoname(netdev->ifindex, current_ifname)) { - ERROR("no interface corresponding to index '%d'", - netdev->ifindex); - return -1; - } - - /* Default: let the system to choose one interface name. - * When the IFLA_IFNAME attribute is passed something like "%d" - * netlink will replace the format specifier with an appropriate index. - */ - if (!netdev->name) - netdev->name = netdev->type == LXC_NET_PHYS ? - netdev->link : "eth%d"; - - /* rename the interface name */ - if (strcmp(ifname, netdev->name) != 0) { - err = lxc_netdev_rename_by_name(ifname, netdev->name); - if (err) { - ERROR("failed to rename %s->%s : %s", ifname, netdev->name, - strerror(-err)); - return -1; - } - } - - /* Re-read the name of the interface because its name has changed - * and would be automatically allocated by the system - */ - if (!if_indextoname(netdev->ifindex, current_ifname)) { - ERROR("no interface corresponding to index '%d'", - netdev->ifindex); - return -1; - } - - /* set a mac address */ - if (netdev->hwaddr) { - if (setup_hw_addr(netdev->hwaddr, current_ifname)) { - ERROR("failed to setup hw address for '%s'", - current_ifname); - return -1; - } - } - - /* setup ipv4 addresses on the interface */ - if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) { - ERROR("failed to setup ip addresses for '%s'", - ifname); - return -1; - } - - /* setup ipv6 addresses on the interface */ - if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) { - ERROR("failed to setup ipv6 addresses for '%s'", - ifname); - return -1; - } - - /* set the network device up */ - if (netdev->flags & IFF_UP) { - int err; - - err = lxc_netdev_up(current_ifname); - if (err) { - ERROR("failed to set '%s' up : %s", current_ifname, - strerror(-err)); - return -1; - } - - /* the network is up, make the loopback up too */ - err = lxc_netdev_up("lo"); - if (err) { - ERROR("failed to set the loopback up : %s", - strerror(-err)); - return -1; - } - } - - /* We can only set up the default routes after bringing - * up the interface, sine bringing up the interface adds - * the link-local routes and we can't add a default - * route if the gateway is not reachable. */ - - /* setup ipv4 gateway on the interface */ - if (netdev->ipv4_gateway) { - if (!(netdev->flags & IFF_UP)) { - ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname); - return -1; - } - - if (lxc_list_empty(&netdev->ipv4)) { - ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname); - return -1; - } - - err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); - if (err) { - err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway); - if (err) { - ERROR("failed to add ipv4 dest for '%s': %s", - ifname, strerror(-err)); - } - - err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); - if (err) { - ERROR("failed to setup ipv4 gateway for '%s': %s", - ifname, strerror(-err)); - if (netdev->ipv4_gateway_auto) { - char buf[INET_ADDRSTRLEN]; - inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf)); - ERROR("tried to set autodetected ipv4 gateway '%s'", buf); - } - return -1; - } - } - } - - /* setup ipv6 gateway on the interface */ - if (netdev->ipv6_gateway) { - if (!(netdev->flags & IFF_UP)) { - ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname); - return -1; - } - - if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) { - ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname); - return -1; - } - - err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); - if (err) { - err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway); - if (err) { - ERROR("failed to add ipv6 dest for '%s': %s", - ifname, strerror(-err)); - } - - err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); - if (err) { - ERROR("failed to setup ipv6 gateway for '%s': %s", - ifname, strerror(-err)); - if (netdev->ipv6_gateway_auto) { - char buf[INET6_ADDRSTRLEN]; - inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf)); - ERROR("tried to set autodetected ipv6 gateway '%s'", buf); - } - return -1; - } - } - } - - DEBUG("'%s' has been setup", current_ifname); - - return 0; -} - -static int lxc_setup_networks_in_child_namespaces(const struct lxc_conf *conf, - struct lxc_list *network) -{ - struct lxc_list *iterator; - struct lxc_netdev *netdev; - - lxc_log_configured_netdevs(conf); - - lxc_list_for_each(iterator, network) { - netdev = iterator->elem; - - /* REMOVE in LXC 3.0 */ - if (netdev->idx < 0) { - ERROR("WARNING: using \"lxc.network.*\" keys to define " - "networks is DEPRECATED, please switch to using " - "\"lxc.net.[i].* keys\""); - } - - if (lxc_setup_netdev_in_child_namespaces(netdev)) { - ERROR("failed to setup netdev"); - return -1; - } - } - - if (!lxc_list_empty(network)) - INFO("network has been setup"); - - return 0; -} - static int parse_resource(const char *res) { size_t i; int resid = -1; @@ -2672,46 +2329,6 @@ int setup_resource_limits(struct lxc_list *limits, pid_t pid) { return 0; } -/* try to move physical nics to the init netns */ -void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf) -{ - int i, oldfd; - char ifname[IFNAMSIZ]; - - if (netnsfd < 0 || conf->num_savednics == 0) - return; - - INFO("Running to reset %d nic names.", conf->num_savednics); - - oldfd = lxc_preserve_ns(getpid(), "net"); - if (oldfd < 0) { - SYSERROR("Failed to open monitor netns fd."); - return; - } - - if (setns(netnsfd, 0) != 0) { - SYSERROR("Failed to enter container netns to reset nics"); - close(oldfd); - return; - } - for (i=0; inum_savednics; i++) { - struct saved_nic *s = &conf->saved_nics[i]; - /* retrieve the name of the interface */ - if (!if_indextoname(s->ifindex, ifname)) { - WARN("no interface corresponding to index '%d'", s->ifindex); - continue; - } - if (lxc_netdev_move_by_name(ifname, 1, s->orig_name)) - WARN("Error moving nic name:%s back to host netns", ifname); - free(s->orig_name); - } - conf->num_savednics = 0; - - if (setns(oldfd, 0) != 0) - SYSERROR("Failed to re-enter monitor's netns"); - close(oldfd); -} - static char *default_rootfs_mount = LXCROOTFSMOUNT; struct lxc_conf *lxc_conf_init(void) @@ -2777,695 +2394,6 @@ struct lxc_conf *lxc_conf_init(void) return new; } -static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char *veth1, *veth2; - char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ]; - int bridge_index, err; - unsigned int mtu = 0; - - if (netdev->priv.veth_attr.pair) { - veth1 = netdev->priv.veth_attr.pair; - if (handler->conf->reboot) - lxc_netdev_delete_by_name(veth1); - } else { - err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX"); - if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */ - ERROR("veth1 name too long"); - return -1; - } - veth1 = lxc_mkifname(veth1buf); - if (!veth1) { - ERROR("failed to allocate a temporary name"); - return -1; - } - /* store away for deconf */ - memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ); - } - - snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX"); - veth2 = lxc_mkifname(veth2buf); - if (!veth2) { - ERROR("failed to allocate a temporary name"); - goto out_delete; - } - - err = lxc_veth_create(veth1, veth2); - if (err) { - ERROR("failed to create veth pair \"%s\" and \"%s\": %s", veth1, - veth2, strerror(-err)); - goto out_delete; - } - - /* changing the high byte of the mac address to 0xfe, the bridge interface - * will always keep the host's mac address and not take the mac address - * of a container */ - err = setup_private_host_hw_addr(veth1); - if (err) { - ERROR("failed to change mac address of host interface \"%s\": %s", - veth1, strerror(-err)); - goto out_delete; - } - - netdev->ifindex = if_nametoindex(veth2); - if (!netdev->ifindex) { - ERROR("failed to retrieve the index for \"%s\"", veth2); - goto out_delete; - } - - if (netdev->mtu) { - if (lxc_safe_uint(netdev->mtu, &mtu) < 0) - WARN("failed to parse mtu from"); - else - INFO("retrieved mtu %d", mtu); - } else if (netdev->link) { - bridge_index = if_nametoindex(netdev->link); - if (bridge_index) { - mtu = netdev_get_mtu(bridge_index); - INFO("retrieved mtu %d from %s", mtu, netdev->link); - } else { - mtu = netdev_get_mtu(netdev->ifindex); - INFO("retrieved mtu %d from %s", mtu, veth2); - } - } - - if (mtu) { - err = lxc_netdev_set_mtu(veth1, mtu); - if (!err) - err = lxc_netdev_set_mtu(veth2, mtu); - if (err) { - ERROR("failed to set mtu \"%d\" for veth pair \"%s\" " - "and \"%s\": %s", - mtu, veth1, veth2, strerror(-err)); - goto out_delete; - } - } - - if (netdev->link) { - err = lxc_bridge_attach(netdev->link, veth1); - if (err) { - ERROR("failed to attach \"%s\" to bridge \"%s\": %s", - veth1, netdev->link, strerror(-err)); - goto out_delete; - } - INFO("attached \"%s\" to bridge \"%s\"", veth1, netdev->link); - } - - err = lxc_netdev_up(veth1); - if (err) { - ERROR("failed to set \"%s\" up: %s", veth1, strerror(-err)); - goto out_delete; - } - - if (netdev->upscript) { - err = run_script(handler->name, "net", netdev->upscript, "up", - "veth", veth1, (char*) NULL); - if (err) - goto out_delete; - } - - DEBUG("instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2, - netdev->ifindex); - - return 0; - -out_delete: - if (netdev->ifindex != 0) - lxc_netdev_delete_by_name(veth1); - if (!netdev->priv.veth_attr.pair) - free(veth1); - free(veth2); - return -1; -} - -static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char *veth1; - int err; - - if (netdev->priv.veth_attr.pair) - veth1 = netdev->priv.veth_attr.pair; - else - veth1 = netdev->priv.veth_attr.veth1; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "veth", veth1, (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char peerbuf[IFNAMSIZ], *peer; - int err; - - if (!netdev->link) { - ERROR("no link specified for macvlan netdev"); - return -1; - } - - err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX"); - if (err >= sizeof(peerbuf)) - return -1; - - peer = lxc_mkifname(peerbuf); - if (!peer) { - ERROR("failed to make a temporary name"); - return -1; - } - - err = lxc_macvlan_create(netdev->link, peer, - netdev->priv.macvlan_attr.mode); - if (err) { - ERROR("failed to create macvlan interface '%s' on '%s' : %s", - peer, netdev->link, strerror(-err)); - goto out; - } - - netdev->ifindex = if_nametoindex(peer); - if (!netdev->ifindex) { - ERROR("failed to retrieve the index for %s", peer); - goto out; - } - - if (netdev->upscript) { - err = run_script(handler->name, "net", netdev->upscript, "up", - "macvlan", netdev->link, (char*) NULL); - if (err) - goto out; - } - - DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'", - peer, netdev->ifindex, netdev->priv.macvlan_attr.mode); - - return 0; -out: - lxc_netdev_delete_by_name(peer); - free(peer); - return -1; -} - -static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - int err; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "macvlan", netdev->link, - (char*) NULL); - if (err) - return -1; - } - return 0; -} - -/* XXX: merge with instantiate_macvlan */ -static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char peer[IFNAMSIZ]; - int err; - static uint16_t vlan_cntr = 0; - unsigned int mtu = 0; - - if (!netdev->link) { - ERROR("no link specified for vlan netdev"); - return -1; - } - - err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++); - if (err >= sizeof(peer)) { - ERROR("peer name too long"); - return -1; - } - - err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid); - if (err) { - ERROR("failed to create vlan interface '%s' on '%s' : %s", - peer, netdev->link, strerror(-err)); - return -1; - } - - netdev->ifindex = if_nametoindex(peer); - if (!netdev->ifindex) { - ERROR("failed to retrieve the ifindex for %s", peer); - lxc_netdev_delete_by_name(peer); - return -1; - } - - DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000", - netdev->ifindex); - if (netdev->mtu) { - if (lxc_safe_uint(netdev->mtu, &mtu) < 0) { - ERROR("Failed to retrieve mtu from: '%d'/'%s'.", - netdev->ifindex, netdev->name); - return -1; - } - err = lxc_netdev_set_mtu(peer, mtu); - if (err) { - ERROR("failed to set mtu '%s' for %s : %s", - netdev->mtu, peer, strerror(-err)); - lxc_netdev_delete_by_name(peer); - return -1; - } - } - - return 0; -} - -static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - return 0; -} - -static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - if (!netdev->link) { - ERROR("no link specified for the physical interface"); - return -1; - } - - netdev->ifindex = if_nametoindex(netdev->link); - if (!netdev->ifindex) { - ERROR("failed to retrieve the index for %s", netdev->link); - return -1; - } - - if (netdev->upscript) { - int err; - err = run_script(handler->name, "net", netdev->upscript, - "up", "phys", netdev->link, (char*) NULL); - if (err) - return -1; - } - - return 0; -} - -static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - int err; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "phys", netdev->link, (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - netdev->ifindex = 0; - return 0; -} - -static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - netdev->ifindex = 0; - if (netdev->upscript) { - int err; - err = run_script(handler->name, "net", netdev->upscript, - "up", "empty", (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - int err; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "empty", (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - return 0; -} - -int lxc_requests_empty_network(struct lxc_handler *handler) -{ - struct lxc_list *network = &handler->conf->network; - struct lxc_list *iterator; - struct lxc_netdev *netdev; - bool found_none = false, found_nic = false; - - if (lxc_list_empty(network)) - return 0; - - lxc_list_for_each(iterator, network) { - - netdev = iterator->elem; - - if (netdev->type == LXC_NET_NONE) - found_none = true; - else - found_nic = true; - } - if (found_none && !found_nic) - return 1; - return 0; -} - -int lxc_setup_networks_in_parent_namespaces(struct lxc_handler *handler) -{ - bool am_root; - struct lxc_netdev *netdev; - struct lxc_list *iterator; - struct lxc_list *network = &handler->conf->network; - - /* We need to be root. */ - am_root = (getuid() == 0); - if (!am_root) - return 0; - - lxc_list_for_each(iterator, network) { - netdev = iterator->elem; - - if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) { - ERROR("invalid network configuration type '%d'", - netdev->type); - return -1; - } - - if (netdev_conf[netdev->type](handler, netdev)) { - ERROR("failed to create netdev"); - return -1; - } - - } - - return 0; -} - -bool lxc_delete_network(struct lxc_handler *handler) -{ - int ret; - struct lxc_list *iterator; - struct lxc_list *network = &handler->conf->network; - bool deleted_all = true; - - lxc_list_for_each(iterator, network) { - char *hostveth = NULL; - struct lxc_netdev *netdev = iterator->elem; - - /* We can only delete devices whose ifindex we have. If we don't - * have the index it means that we didn't create it. - */ - if (!netdev->ifindex) - continue; - - if (netdev->type == LXC_NET_PHYS) { - ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link); - if (ret < 0) - WARN("Failed to rename interface with index %d " - "to its initial name \"%s\"", - netdev->ifindex, netdev->link); - else - TRACE("Renamed interface with index %d to its " - "initial name \"%s\"", - netdev->ifindex, netdev->link); - continue; - } - - ret = netdev_deconf[netdev->type](handler, netdev); - if (ret < 0) - WARN("Failed to deconfigure network device"); - - /* Recent kernels remove the virtual interfaces when the network - * namespace is destroyed but in case we did not move the - * interface to the network namespace, we have to destroy it. - */ - if (!am_unpriv()) { - ret = lxc_netdev_delete_by_index(netdev->ifindex); - if (-ret == ENODEV) { - INFO("Interface \"%s\" with index %d already " - "deleted or existing in different network " - "namespace", - netdev->name ? netdev->name : "(null)", - netdev->ifindex); - } else if (ret < 0) { - deleted_all = false; - WARN("Failed to remove interface \"%s\" with " - "index %d: %s", - netdev->name ? netdev->name : "(null)", - netdev->ifindex, strerror(-ret)); - continue; - } - INFO("Removed interface \"%s\" with index %d", - netdev->name ? netdev->name : "(null)", - netdev->ifindex); - } - - if (netdev->type != LXC_NET_VETH) - continue; - - if (am_unpriv()) { - if (is_ovs_bridge(netdev->link)) { - ret = lxc_unpriv_delete_nic(handler->lxcpath, - handler->name, - netdev, getpid()); - if (ret < 0) - WARN("Failed to remove port \"%s\" " - "from openvswitch bridge \"%s\"", - netdev->priv.veth_attr.pair, - netdev->link); - } - - continue; - } - - /* Explicitly delete host veth device to prevent lingering - * devices. We had issues in LXD around this. - */ - if (netdev->priv.veth_attr.pair) - hostveth = netdev->priv.veth_attr.pair; - else - hostveth = netdev->priv.veth_attr.veth1; - if (*hostveth == '\0') - continue; - - ret = lxc_netdev_delete_by_name(hostveth); - if (ret < 0) { - deleted_all = false; - WARN("Failed to remove interface \"%s\" from \"%s\": %s", - hostveth, netdev->link, strerror(-ret)); - continue; - } - INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link); - - if (!is_ovs_bridge(netdev->link)) { - netdev->priv.veth_attr.veth1[0] = '\0'; - continue; - } - - /* Delete the openvswitch port. */ - ret = lxc_ovs_delete_port(netdev->link, hostveth); - if (ret < 0) - WARN("Failed to remove port \"%s\" from openvswitch " - "bridge \"%s\"", hostveth, netdev->link); - else - INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", - hostveth, netdev->link); - - netdev->priv.veth_attr.veth1[0] = '\0'; - } - - return deleted_all; -} - -#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic" -static int unpriv_assign_nic(const char *lxcpath, char *lxcname, - struct lxc_netdev *netdev, pid_t pid) -{ - int ret; - pid_t child; - int bytes, pipefd[2]; - char *token, *saveptr = NULL; - char netdev_link[IFNAMSIZ + 1]; - char buffer[MAXPATHLEN] = {0}; - - if (netdev->type != LXC_NET_VETH) { - ERROR("nic type %d not support for unprivileged use", - netdev->type); - return -1; - } - - if (pipe(pipefd) < 0) { - SYSERROR("pipe failed"); - return -1; - } - - child = fork(); - if (child < 0) { - SYSERROR("fork"); - close(pipefd[0]); - close(pipefd[1]); - return -1; - } - - if (child == 0) { /* child */ - /* Call lxc-user-nic pid type bridge. */ - int ret; - char pidstr[LXC_NUMSTRLEN64]; - - close(pipefd[0]); /* Close the read-end of the pipe. */ - - /* Redirect stdout to write-end of the pipe. */ - ret = dup2(pipefd[1], STDOUT_FILENO); - if (ret >= 0) - ret = dup2(pipefd[1], STDERR_FILENO); - close(pipefd[1]); /* Close the write-end of the pipe. */ - if (ret < 0) { - SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor."); - exit(EXIT_FAILURE); - } - - if (netdev->link) - strncpy(netdev_link, netdev->link, IFNAMSIZ); - else - strncpy(netdev_link, "none", IFNAMSIZ); - - ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); - if (ret < 0 || ret >= LXC_NUMSTRLEN64) - exit(EXIT_FAILURE); - pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; - - INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath, - lxcname, pidstr, netdev_link, - netdev->name ? netdev->name : "(null)"); - if (netdev->name) - execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", - lxcpath, lxcname, pidstr, "veth", netdev_link, - netdev->name, (char *)NULL); - else - execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", - lxcpath, lxcname, pidstr, "veth", netdev_link, - (char *)NULL); - SYSERROR("Failed to exec lxc-user-nic."); - exit(EXIT_FAILURE); - } - - /* close the write-end of the pipe */ - close(pipefd[1]); - - bytes = read(pipefd[0], &buffer, MAXPATHLEN); - if (bytes < 0) { - SYSERROR("Failed to read from pipe file descriptor."); - close(pipefd[0]); - return -1; - } - buffer[bytes - 1] = '\0'; - - if (wait_for_pid(child) != 0) { - ERROR("lxc-user-nic failed to configure requested network: %s", - buffer[0] != '\0' ? buffer : "(null)"); - close(pipefd[0]); - return -1; - } - TRACE("Received output \"%s\" from lxc-user-nic", buffer); - - /* close the read-end of the pipe */ - close(pipefd[0]); - - /* fill netdev->name field */ - token = strtok_r(buffer, ":", &saveptr); - if (!token) - return -1; - - netdev->name = malloc(IFNAMSIZ + 1); - if (!netdev->name) { - SYSERROR("Failed to allocate memory."); - return -1; - } - memset(netdev->name, 0, IFNAMSIZ + 1); - strncpy(netdev->name, token, IFNAMSIZ); - - /* fill netdev->veth_attr.pair field */ - token = strtok_r(NULL, ":", &saveptr); - if (!token) - return -1; - - netdev->priv.veth_attr.pair = strdup(token); - if (!netdev->priv.veth_attr.pair) { - ERROR("Failed to allocate memory."); - return -1; - } - - /* fill netdev->veth_attr.pair field */ - token = strtok_r(NULL, ":", &saveptr); - if (!token) - return -1; - - ret = lxc_safe_int(token, &netdev->ifindex); - if (ret < 0) { - ERROR("Failed to parse ifindex for network device \"%s\"", netdev->name); - return -1; - } - - return 0; -} - -int lxc_assign_network(const char *lxcpath, char *lxcname, - struct lxc_list *network, pid_t pid) -{ - struct lxc_list *iterator; - struct lxc_netdev *netdev; - char ifname[IFNAMSIZ]; - int am_root = (getuid() == 0); - int err; - - lxc_list_for_each(iterator, network) { - - netdev = iterator->elem; - - if (netdev->type == LXC_NET_VETH && !am_root) { - if (netdev->mtu) - INFO("mtu ignored due to insufficient privilege"); - if (unpriv_assign_nic(lxcpath, lxcname, netdev, pid)) - return -1; - /* lxc-user-nic has moved the nic to the new ns. - * unpriv_assign_nic() fills in netdev->name. - * netdev->ifindex will be filed in at - * lxc_setup_netdev_in_child_namespaces. - */ - continue; - } - - /* empty network namespace, nothing to move */ - if (!netdev->ifindex) - continue; - - /* retrieve the name of the interface */ - if (!if_indextoname(netdev->ifindex, ifname)) { - ERROR("no interface corresponding to index '%d'", netdev->ifindex); - return -1; - } - - err = lxc_netdev_move_by_name(ifname, pid, NULL); - if (err) { - ERROR("failed to move '%s' to the container : %s", - netdev->link, strerror(-err)); - return -1; - } - - DEBUG("move '%s'/'%s' to '%d': .", ifname, netdev->name, pid); - } - - return 0; -} - static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size) { @@ -3737,54 +2665,6 @@ again: return freeid; } -int lxc_find_gateway_addresses(struct lxc_handler *handler) -{ - struct lxc_list *network = &handler->conf->network; - struct lxc_list *iterator; - struct lxc_netdev *netdev; - int link_index; - - lxc_list_for_each(iterator, network) { - netdev = iterator->elem; - - if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto) - continue; - - if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) { - ERROR("gateway = auto only supported for " - "veth and macvlan"); - return -1; - } - - if (!netdev->link) { - ERROR("gateway = auto needs a link interface"); - return -1; - } - - link_index = if_nametoindex(netdev->link); - if (!link_index) - return -EINVAL; - - if (netdev->ipv4_gateway_auto) { - if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) { - ERROR("failed to automatically find ipv4 gateway " - "address from link interface '%s'", netdev->link); - return -1; - } - } - - if (netdev->ipv6_gateway_auto) { - if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) { - ERROR("failed to automatically find ipv6 gateway " - "address from link interface '%s'", netdev->link); - return -1; - } - } - } - - return 0; -} - int lxc_create_tty(const char *name, struct lxc_conf *conf) { struct lxc_tty_info *tty_info = &conf->tty_info; @@ -4244,8 +3124,7 @@ int lxc_setup(struct lxc_handler *handler) } } - if (lxc_setup_networks_in_child_namespaces(lxc_conf, - &lxc_conf->network)) { + if (lxc_setup_network_in_child_namespaces(lxc_conf, &lxc_conf->network)) { ERROR("failed to setup the network for '%s'", name); return -1; } @@ -4531,7 +3410,6 @@ int lxc_clear_environment(struct lxc_conf *c) return 0; } - int lxc_clear_mount_entries(struct lxc_conf *c) { struct lxc_list *it,*next; @@ -5139,89 +4017,3 @@ struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings) return result; } - -int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, - struct lxc_netdev *netdev, pid_t pid) -{ - pid_t child; - int bytes, pipefd[2]; - char buffer[MAXPATHLEN] = {0}; - - if (netdev->type != LXC_NET_VETH) { - ERROR("nic type %d not support for unprivileged use", - netdev->type); - return -1; - } - - if (pipe(pipefd) < 0) { - SYSERROR("pipe failed"); - return -1; - } - - child = fork(); - if (child < 0) { - SYSERROR("fork"); - close(pipefd[0]); - close(pipefd[1]); - return -1; - } - - if (child == 0) { /* child */ - /* Call lxc-user-nic pid type bridge. */ - int ret; - char pidstr[LXC_NUMSTRLEN64]; - - close(pipefd[0]); /* Close the read-end of the pipe. */ - - /* Redirect stdout to write-end of the pipe. */ - ret = dup2(pipefd[1], STDOUT_FILENO); - if (ret >= 0) - ret = dup2(pipefd[1], STDERR_FILENO); - close(pipefd[1]); /* Close the write-end of the pipe. */ - if (ret < 0) { - SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor."); - exit(EXIT_FAILURE); - } - - if (!netdev->link) - SYSERROR("Network link for network device \"%s\" is " - "missing", netdev->priv.veth_attr.pair); - - ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); - if (ret < 0 || ret >= LXC_NUMSTRLEN64) - exit(EXIT_FAILURE); - pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; - - INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath, - lxcname, pidstr, netdev->link, netdev->priv.veth_attr.pair); - execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath, - lxcname, pidstr, "veth", netdev->link, - netdev->priv.veth_attr.pair, (char *)NULL); - SYSERROR("Failed to exec lxc-user-nic."); - exit(EXIT_FAILURE); - } - - /* close the write-end of the pipe */ - close(pipefd[1]); - - bytes = read(pipefd[0], &buffer, MAXPATHLEN); - if (bytes < 0) { - SYSERROR("Failed to read from pipe file descriptor."); - close(pipefd[0]); - return -1; - } - buffer[bytes - 1] = '\0'; - - if (wait_for_pid(child) != 0) { - ERROR("lxc-user-nic failed to delete requested network: %s", - buffer[0] != '\0' ? buffer : "(null)"); - close(pipefd[0]); - return -1; - } - TRACE("Received output \"%s\" from lxc-user-nic", buffer); - - /* close the read-end of the pipe */ - close(pipefd[0]); - - return 0; -} diff --git a/src/lxc/conf.h b/src/lxc/conf.h index c54abb3ab..bd525a2be 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -46,103 +46,6 @@ typedef void * scmp_filter_ctx; #define subuidfile "/etc/subuid" #define subgidfile "/etc/subgid" -enum { - LXC_NET_EMPTY, - LXC_NET_VETH, - LXC_NET_MACVLAN, - LXC_NET_PHYS, - LXC_NET_VLAN, - LXC_NET_NONE, - LXC_NET_MAXCONFTYPE, -}; - -/* - * Defines the structure to configure an ipv4 address - * @address : ipv4 address - * @broadcast : ipv4 broadcast address - * @mask : network mask - */ -struct lxc_inetdev { - struct in_addr addr; - struct in_addr bcast; - unsigned int prefix; -}; - -struct lxc_route { - struct in_addr addr; -}; - -/* - * Defines the structure to configure an ipv6 address - * @flags : set the address up - * @address : ipv6 address - * @broadcast : ipv6 broadcast address - * @mask : network mask - */ -struct lxc_inet6dev { - struct in6_addr addr; - struct in6_addr mcast; - struct in6_addr acast; - unsigned int prefix; -}; - -struct lxc_route6 { - struct in6_addr addr; -}; - -struct ifla_veth { - char *pair; /* pair name */ - char veth1[IFNAMSIZ]; /* needed for deconf */ -}; - -struct ifla_vlan { - unsigned int flags; - unsigned int fmask; - unsigned short vid; - unsigned short pad; -}; - -struct ifla_macvlan { - int mode; /* private, vepa, bridge, passthru */ -}; - -union netdev_p { - struct ifla_veth veth_attr; - struct ifla_vlan vlan_attr; - struct ifla_macvlan macvlan_attr; -}; - -/* - * Defines a structure to configure a network device - * @link : lxc.net.[i].link, name of bridge or host iface to attach if any - * @name : lxc.net.[i].name, name of iface on the container side - * @flags : flag of the network device (IFF_UP, ... ) - * @ipv4 : a list of ipv4 addresses to be set on the network device - * @ipv6 : a list of ipv6 addresses to be set on the network device - * @upscript : a script filename to be executed during interface configuration - * @downscript : a script filename to be executed during interface destruction - * @idx : network counter - */ -struct lxc_netdev { - ssize_t idx; - int type; - int flags; - int ifindex; - char *link; - char *name; - char *hwaddr; - char *mtu; - union netdev_p priv; - struct lxc_list ipv4; - struct lxc_list ipv6; - struct in_addr *ipv4_gateway; - bool ipv4_gateway_auto; - struct in6_addr *ipv6_gateway; - bool ipv6_gateway_auto; - char *upscript; - char *downscript; -}; - /* * Defines a generic struct to configure the control group. It is up to the * programmer to specify the right subsystem. @@ -327,12 +230,8 @@ enum lxchooks { LXCHOOK_DESTROY, NUM_LXC_HOOKS }; -extern char *lxchook_names[NUM_LXC_HOOKS]; -struct saved_nic { - int ifindex; - char *orig_name; -}; +extern char *lxchook_names[NUM_LXC_HOOKS]; struct lxc_conf { int is_execute; @@ -458,13 +357,7 @@ extern int detect_shared_rootfs(void); extern struct lxc_conf *lxc_conf_init(void); extern void lxc_conf_free(struct lxc_conf *conf); extern int pin_rootfs(const char *rootfs); -extern int lxc_requests_empty_network(struct lxc_handler *handler); -extern int lxc_setup_networks_in_parent_namespaces(struct lxc_handler *handler); -extern bool lxc_delete_network(struct lxc_handler *handler); -extern int lxc_assign_network(const char *lxcpath, char *lxcname, - struct lxc_list *networks, pid_t pid); extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid); -extern int lxc_find_gateway_addresses(struct lxc_handler *handler); extern int lxc_create_tty(const char *name, struct lxc_conf *conf); extern void lxc_delete_tty(struct lxc_tty_info *tty_info); extern int lxc_clear_config_caps(struct lxc_conf *c); @@ -483,7 +376,6 @@ extern int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath); extern int lxc_setup(struct lxc_handler *handler); extern int setup_resource_limits(struct lxc_list *limits, pid_t pid); -extern void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf); extern int find_unmapped_nsid(struct lxc_conf *conf, enum idtype idtype); extern int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype); @@ -500,7 +392,7 @@ extern FILE *make_anonymous_mount_file(struct lxc_list *mount); extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings); extern unsigned long add_required_remount_flags(const char *s, const char *d, unsigned long flags); -extern int lxc_unpriv_delete_nic(const char *lxcpath, char *lxcname, - struct lxc_netdev *netdev, pid_t pid); +extern int run_script(const char *name, const char *section, const char *script, + ...); #endif /* __LXC_CONF_H */ diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c index a74e2dc6d..0d9ff66e6 100644 --- a/src/lxc/confile_utils.c +++ b/src/lxc/confile_utils.c @@ -31,6 +31,7 @@ #include "error.h" #include "log.h" #include "list.h" +#include "network.h" #include "parse.h" #include "utils.h" diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index db36142c1..0fb788877 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -17,7 +17,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#define _GNU_SOURCE /* See feature_test_macros(7) */ +#define _GNU_SOURCE #include #include #include diff --git a/src/lxc/network.c b/src/lxc/network.c index d1353eafe..12b7d697e 100644 --- a/src/lxc/network.c +++ b/src/lxc/network.c @@ -21,8 +21,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "config.h" - +#define _GNU_SOURCE #include #include #include @@ -47,6 +46,8 @@ #include #include "conf.h" +#include "config.h" +#include "confile_utils.h" #include "log.h" #include "network.h" #include "nl.h" @@ -92,6 +93,355 @@ lxc_log_define(lxc_network, lxc); +typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *); + +static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int bridge_index, err; + char *veth1, *veth2; + char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ]; + unsigned int mtu = 0; + + if (netdev->priv.veth_attr.pair) { + veth1 = netdev->priv.veth_attr.pair; + if (handler->conf->reboot) + lxc_netdev_delete_by_name(veth1); + } else { + err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX"); + if (err < 0 || (size_t)err >= sizeof(veth1buf)) + return -1; + + veth1 = lxc_mkifname(veth1buf); + if (!veth1) + return -1; + + /* store away for deconf */ + memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ); + } + + snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX"); + veth2 = lxc_mkifname(veth2buf); + if (!veth2) + goto out_delete; + + err = lxc_veth_create(veth1, veth2); + if (err) { + ERROR("Failed to create veth pair \"%s\" and \"%s\": %s", veth1, + veth2, strerror(-err)); + goto out_delete; + } + + /* changing the high byte of the mac address to 0xfe, the bridge interface + * will always keep the host's mac address and not take the mac address + * of a container */ + err = setup_private_host_hw_addr(veth1); + if (err) { + ERROR("Failed to change mac address of host interface \"%s\": %s", + veth1, strerror(-err)); + goto out_delete; + } + + netdev->ifindex = if_nametoindex(veth2); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", veth2); + goto out_delete; + } + + if (netdev->mtu) { + if (lxc_safe_uint(netdev->mtu, &mtu) < 0) + WARN("Failed to parse mtu"); + else + INFO("Retrieved mtu %d", mtu); + } else if (netdev->link) { + bridge_index = if_nametoindex(netdev->link); + if (bridge_index) { + mtu = netdev_get_mtu(bridge_index); + INFO("Retrieved mtu %d from %s", mtu, netdev->link); + } else { + mtu = netdev_get_mtu(netdev->ifindex); + INFO("Retrieved mtu %d from %s", mtu, veth2); + } + } + + if (mtu) { + err = lxc_netdev_set_mtu(veth1, mtu); + if (!err) + err = lxc_netdev_set_mtu(veth2, mtu); + if (err) { + ERROR("Failed to set mtu \"%d\" for veth pair \"%s\" " + "and \"%s\": %s", + mtu, veth1, veth2, strerror(-err)); + goto out_delete; + } + } + + if (netdev->link) { + err = lxc_bridge_attach(netdev->link, veth1); + if (err) { + ERROR("Failed to attach \"%s\" to bridge \"%s\": %s", + veth1, netdev->link, strerror(-err)); + goto out_delete; + } + INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link); + } + + err = lxc_netdev_up(veth1); + if (err) { + ERROR("Failed to set \"%s\" up: %s", veth1, strerror(-err)); + goto out_delete; + } + + if (netdev->upscript) { + err = run_script(handler->name, "net", netdev->upscript, "up", + "veth", veth1, (char*) NULL); + if (err) + goto out_delete; + } + + DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2, + netdev->ifindex); + + return 0; + +out_delete: + if (netdev->ifindex != 0) + lxc_netdev_delete_by_name(veth1); + if (!netdev->priv.veth_attr.pair) + free(veth1); + free(veth2); + return -1; +} + +static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + char peerbuf[IFNAMSIZ], *peer; + int err; + + if (!netdev->link) { + ERROR("No link for macvlan network device specified"); + return -1; + } + + err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX"); + if (err < 0 || (size_t)err >= sizeof(peerbuf)) + return -1; + + peer = lxc_mkifname(peerbuf); + if (!peer) + return -1; + + err = lxc_macvlan_create(netdev->link, peer, + netdev->priv.macvlan_attr.mode); + if (err) { + ERROR("Failed to create macvlan interface \"%s\" on \"%s\": %s", + peer, netdev->link, strerror(-err)); + goto out; + } + + netdev->ifindex = if_nametoindex(peer); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", peer); + goto out; + } + + if (netdev->upscript) { + err = run_script(handler->name, "net", netdev->upscript, "up", + "macvlan", netdev->link, (char*) NULL); + if (err) + goto out; + } + + DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d", + peer, netdev->ifindex, netdev->priv.macvlan_attr.mode); + + return 0; +out: + lxc_netdev_delete_by_name(peer); + free(peer); + return -1; +} + +static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + char peer[IFNAMSIZ]; + int err; + static uint16_t vlan_cntr = 0; + unsigned int mtu = 0; + + if (!netdev->link) { + ERROR("No link for vlan network device specified"); + return -1; + } + + err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++); + if (err < 0 || (size_t)err >= sizeof(peer)) + return -1; + + err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid); + if (err) { + ERROR("Failed to create vlan interface \"%s\" on \"%s\": %s", + peer, netdev->link, strerror(-err)); + return -1; + } + + netdev->ifindex = if_nametoindex(peer); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", peer); + lxc_netdev_delete_by_name(peer); + return -1; + } + + DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\" (vlan1000)", + peer, netdev->ifindex); + if (netdev->mtu) { + if (lxc_safe_uint(netdev->mtu, &mtu) < 0) { + ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".", + netdev->ifindex, + netdev->name ? netdev->name : "(null)"); + return -1; + } + err = lxc_netdev_set_mtu(peer, mtu); + if (err) { + ERROR("Failed to set mtu \"%s\" for \"%s\": %s", + netdev->mtu, peer, strerror(-err)); + lxc_netdev_delete_by_name(peer); + return -1; + } + } + + return 0; +} + +static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + if (!netdev->link) { + ERROR("No link for physical interface specified"); + return -1; + } + + netdev->ifindex = if_nametoindex(netdev->link); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link); + return -1; + } + + if (netdev->upscript) { + int err; + err = run_script(handler->name, "net", netdev->upscript, + "up", "phys", netdev->link, (char*) NULL); + if (err) + return -1; + } + + return 0; +} + +static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + netdev->ifindex = 0; + if (netdev->upscript) { + int err; + err = run_script(handler->name, "net", netdev->upscript, + "up", "empty", (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + netdev->ifindex = 0; + return 0; +} + +static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = { + [LXC_NET_VETH] = instantiate_veth, + [LXC_NET_MACVLAN] = instantiate_macvlan, + [LXC_NET_VLAN] = instantiate_vlan, + [LXC_NET_PHYS] = instantiate_phys, + [LXC_NET_EMPTY] = instantiate_empty, + [LXC_NET_NONE] = instantiate_none, +}; + +static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + char *veth1; + int err; + + if (netdev->priv.veth_attr.pair) + veth1 = netdev->priv.veth_attr.pair; + else + veth1 = netdev->priv.veth_attr.veth1; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "veth", veth1, (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int err; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "macvlan", netdev->link, + (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + return 0; +} + +static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int err; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "phys", netdev->link, (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int err; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "empty", (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + return 0; +} + +static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = { + [LXC_NET_VETH] = shutdown_veth, + [LXC_NET_MACVLAN] = shutdown_macvlan, + [LXC_NET_VLAN] = shutdown_vlan, + [LXC_NET_PHYS] = shutdown_phys, + [LXC_NET_EMPTY] = shutdown_empty, + [LXC_NET_NONE] = shutdown_none, +}; + int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname) { int err; @@ -1544,7 +1894,7 @@ const char *lxc_net_type_to_str(int type) static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -char *lxc_mkifname(char *template) +char *lxc_mkifname(const char *template) { int ifexists = 0; size_t i = 0; @@ -1633,3 +1983,842 @@ int setup_private_host_hw_addr(char *veth1) return 0; } + +int lxc_find_gateway_addresses(struct lxc_handler *handler) +{ + struct lxc_list *network = &handler->conf->network; + struct lxc_list *iterator; + struct lxc_netdev *netdev; + int link_index; + + lxc_list_for_each(iterator, network) { + netdev = iterator->elem; + + if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto) + continue; + + if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) { + ERROR("Automatic gateway detection is only supported " + "for veth and macvlan"); + return -1; + } + + if (!netdev->link) { + ERROR("Automatic gateway detection needs a link interface"); + return -1; + } + + link_index = if_nametoindex(netdev->link); + if (!link_index) + return -EINVAL; + + if (netdev->ipv4_gateway_auto) { + if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) { + ERROR("Failed to automatically find ipv4 gateway " + "address from link interface \"%s\"", netdev->link); + return -1; + } + } + + if (netdev->ipv6_gateway_auto) { + if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) { + ERROR("Failed to automatically find ipv6 gateway " + "address from link interface \"%s\"", netdev->link); + return -1; + } + } + } + + return 0; +} + +#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic" +static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname, + struct lxc_netdev *netdev, pid_t pid) +{ + int ret; + pid_t child; + int bytes, pipefd[2]; + char *token, *saveptr = NULL; + char netdev_link[IFNAMSIZ + 1]; + char buffer[MAXPATHLEN] = {0}; + + if (netdev->type != LXC_NET_VETH) { + ERROR("Network type %d not support for unprivileged use", netdev->type); + return -1; + } + + ret = pipe(pipefd); + if (ret < 0) { + SYSERROR("Failed to create pipe"); + return -1; + } + + child = fork(); + if (child < 0) { + SYSERROR("Failed to create new process"); + close(pipefd[0]); + close(pipefd[1]); + return -1; + } + + if (child == 0) { + int ret; + char pidstr[LXC_NUMSTRLEN64]; + + close(pipefd[0]); + + ret = dup2(pipefd[1], STDOUT_FILENO); + if (ret >= 0) + ret = dup2(pipefd[1], STDERR_FILENO); + close(pipefd[1]); + if (ret < 0) { + SYSERROR("Failed to duplicate std{err,out} file descriptor"); + exit(EXIT_FAILURE); + } + + if (netdev->link) + strncpy(netdev_link, netdev->link, IFNAMSIZ); + else + strncpy(netdev_link, "none", IFNAMSIZ); + + ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); + if (ret < 0 || ret >= LXC_NUMSTRLEN64) + exit(EXIT_FAILURE); + pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; + + INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath, + lxcname, pidstr, netdev_link, + netdev->name ? netdev->name : "(null)"); + if (netdev->name) + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", + lxcpath, lxcname, pidstr, "veth", netdev_link, + netdev->name, (char *)NULL); + else + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", + lxcpath, lxcname, pidstr, "veth", netdev_link, + (char *)NULL); + SYSERROR("Failed to execute lxc-user-nic"); + exit(EXIT_FAILURE); + } + + /* close the write-end of the pipe */ + close(pipefd[1]); + + bytes = read(pipefd[0], &buffer, MAXPATHLEN); + if (bytes < 0) { + SYSERROR("Failed to read from pipe file descriptor."); + close(pipefd[0]); + return -1; + } + buffer[bytes - 1] = '\0'; + + ret = wait_for_pid(child); + close(pipefd[0]); + if (ret != 0) { + ERROR("lxc-user-nic failed to configure requested network: %s", + buffer[0] != '\0' ? buffer : "(null)"); + return -1; + } + TRACE("Received output \"%s\" from lxc-user-nic", buffer); + + /* netdev->name */ + token = strtok_r(buffer, ":", &saveptr); + if (!token) + return -1; + + netdev->name = malloc(IFNAMSIZ + 1); + if (!netdev->name) { + SYSERROR("Failed to allocate memory."); + return -1; + } + memset(netdev->name, 0, IFNAMSIZ + 1); + strncpy(netdev->name, token, IFNAMSIZ); + + /* netdev->priv.veth_attr.pair */ + token = strtok_r(NULL, ":", &saveptr); + if (!token) + return -1; + + netdev->priv.veth_attr.pair = strdup(token); + if (!netdev->priv.veth_attr.pair) { + ERROR("Failed to allocate memory."); + return -1; + } + + /* netdev->ifindex */ + token = strtok_r(NULL, ":", &saveptr); + if (!token) + return -1; + + ret = lxc_safe_int(token, &netdev->ifindex); + if (ret < 0) { + ERROR("Failed to parse ifindex for network device \"%s\"", netdev->name); + return -1; + } + + return 0; +} + +static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, + struct lxc_netdev *netdev, pid_t pid) +{ + int bytes, ret; + pid_t child; + int pipefd[2]; + char buffer[MAXPATHLEN] = {0}; + + if (netdev->type != LXC_NET_VETH) { + ERROR("Network type %d not support for unprivileged use", netdev->type); + return -1; + } + + ret = pipe(pipefd); + if (ret < 0) { + SYSERROR("Failed to create pipe"); + return -1; + } + + child = fork(); + if (child < 0) { + SYSERROR("Failed to create new process"); + close(pipefd[0]); + close(pipefd[1]); + return -1; + } + + if (child == 0) { + int ret; + char pidstr[LXC_NUMSTRLEN64]; + + close(pipefd[0]); + + ret = dup2(pipefd[1], STDOUT_FILENO); + if (ret >= 0) + ret = dup2(pipefd[1], STDERR_FILENO); + close(pipefd[1]); + if (ret < 0) { + SYSERROR("Failed to duplicate std{err,out} file descriptor"); + exit(EXIT_FAILURE); + } + + if (!netdev->link) + SYSERROR("Network link for network device \"%s\" is " + "missing", netdev->priv.veth_attr.pair); + + ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); + if (ret < 0 || ret >= LXC_NUMSTRLEN64) + exit(EXIT_FAILURE); + pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; + + INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath, + lxcname, pidstr, netdev->link, netdev->priv.veth_attr.pair); + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath, + lxcname, pidstr, "veth", netdev->link, + netdev->priv.veth_attr.pair, (char *)NULL); + SYSERROR("Failed to exec lxc-user-nic."); + exit(EXIT_FAILURE); + } + + close(pipefd[1]); + + bytes = read(pipefd[0], &buffer, MAXPATHLEN); + if (bytes < 0) { + SYSERROR("Failed to read from pipe file descriptor."); + close(pipefd[0]); + return -1; + } + buffer[bytes - 1] = '\0'; + + if (wait_for_pid(child) != 0) { + ERROR("lxc-user-nic failed to delete requested network: %s", + buffer[0] != '\0' ? buffer : "(null)"); + close(pipefd[0]); + return -1; + } + + close(pipefd[0]); + + return 0; +} + +int lxc_create_network_priv(struct lxc_handler *handler) +{ + bool am_root; + struct lxc_list *iterator; + struct lxc_list *network = &handler->conf->network; + + /* We need to be root. */ + am_root = (getuid() == 0); + if (!am_root) + return 0; + + lxc_list_for_each(iterator, network) { + struct lxc_netdev *netdev = iterator->elem; + + if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) { + ERROR("Invalid network configuration type %d", netdev->type); + return -1; + } + + if (netdev_conf[netdev->type](handler, netdev)) { + ERROR("Failed to create network device"); + return -1; + } + + } + + return 0; +} + +int lxc_create_network(const char *lxcpath, char *lxcname, + struct lxc_list *network, pid_t pid) +{ + int err; + bool am_root; + char ifname[IFNAMSIZ]; + struct lxc_list *iterator; + + am_root = (getuid() == 0); + + lxc_list_for_each(iterator, network) { + struct lxc_netdev *netdev = iterator->elem; + + if (netdev->type == LXC_NET_VETH && !am_root) { + if (netdev->mtu) + INFO("mtu ignored due to insufficient privilege"); + if (lxc_create_network_unpriv(lxcpath, lxcname, netdev, pid)) + return -1; + /* lxc-user-nic has moved the nic to the new ns. + * unpriv_assign_nic() fills in netdev->name. + * netdev->ifindex will be filled in at + * lxc_setup_netdev_in_child_namespaces(). + */ + continue; + } + + /* empty network namespace, nothing to move */ + if (!netdev->ifindex) + continue; + + /* retrieve the name of the interface */ + if (!if_indextoname(netdev->ifindex, ifname)) { + ERROR("No interface corresponding to ifindex \"%d\"", + netdev->ifindex); + return -1; + } + + err = lxc_netdev_move_by_name(ifname, pid, NULL); + if (err) { + ERROR("Failed to move network device \"%s\" to " + "network namespace %d: %s", ifname, pid, + strerror(-err)); + return -1; + } + + DEBUG("Moved network device \"%s\"/\"%s\" to network namespace " + "of %d:", ifname, netdev->name ? netdev->name : "(null)", + pid); + } + + return 0; +} + +bool lxc_delete_network(struct lxc_handler *handler) +{ + int ret; + struct lxc_list *iterator; + struct lxc_list *network = &handler->conf->network; + bool deleted_all = true; + + lxc_list_for_each(iterator, network) { + char *hostveth = NULL; + struct lxc_netdev *netdev = iterator->elem; + + /* We can only delete devices whose ifindex we have. If we don't + * have the index it means that we didn't create it. + */ + if (!netdev->ifindex) + continue; + + if (netdev->type == LXC_NET_PHYS) { + ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link); + if (ret < 0) + WARN("Failed to rename interface with index %d " + "to its initial name \"%s\"", + netdev->ifindex, netdev->link); + else + TRACE("Renamed interface with index %d to its " + "initial name \"%s\"", + netdev->ifindex, netdev->link); + continue; + } + + ret = netdev_deconf[netdev->type](handler, netdev); + if (ret < 0) + WARN("Failed to deconfigure network device"); + + /* Recent kernels remove the virtual interfaces when the network + * namespace is destroyed but in case we did not move the + * interface to the network namespace, we have to destroy it. + */ + if (!am_unpriv()) { + ret = lxc_netdev_delete_by_index(netdev->ifindex); + if (-ret == ENODEV) { + INFO("Interface \"%s\" with index %d already " + "deleted or existing in different network " + "namespace", + netdev->name ? netdev->name : "(null)", + netdev->ifindex); + } else if (ret < 0) { + deleted_all = false; + WARN("Failed to remove interface \"%s\" with " + "index %d: %s", + netdev->name ? netdev->name : "(null)", + netdev->ifindex, strerror(-ret)); + continue; + } + INFO("Removed interface \"%s\" with index %d", + netdev->name ? netdev->name : "(null)", + netdev->ifindex); + } + + if (netdev->type != LXC_NET_VETH) + continue; + + if (am_unpriv()) { + if (is_ovs_bridge(netdev->link)) { + ret = lxc_delete_network_unpriv(handler->lxcpath, + handler->name, + netdev, getpid()); + if (ret < 0) + WARN("Failed to remove port \"%s\" " + "from openvswitch bridge \"%s\"", + netdev->priv.veth_attr.pair, + netdev->link); + } + + continue; + } + + /* Explicitly delete host veth device to prevent lingering + * devices. We had issues in LXD around this. + */ + if (netdev->priv.veth_attr.pair) + hostveth = netdev->priv.veth_attr.pair; + else + hostveth = netdev->priv.veth_attr.veth1; + if (*hostveth == '\0') + continue; + + ret = lxc_netdev_delete_by_name(hostveth); + if (ret < 0) { + deleted_all = false; + WARN("Failed to remove interface \"%s\" from \"%s\": %s", + hostveth, netdev->link, strerror(-ret)); + continue; + } + INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link); + + if (!is_ovs_bridge(netdev->link)) { + netdev->priv.veth_attr.veth1[0] = '\0'; + continue; + } + + /* Delete the openvswitch port. */ + ret = lxc_ovs_delete_port(netdev->link, hostveth); + if (ret < 0) + WARN("Failed to remove port \"%s\" from openvswitch " + "bridge \"%s\"", hostveth, netdev->link); + else + INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", + hostveth, netdev->link); + + netdev->priv.veth_attr.veth1[0] = '\0'; + } + + return deleted_all; +} + +int lxc_requests_empty_network(struct lxc_handler *handler) +{ + struct lxc_list *network = &handler->conf->network; + struct lxc_list *iterator; + bool found_none = false, found_nic = false; + + if (lxc_list_empty(network)) + return 0; + + lxc_list_for_each(iterator, network) { + struct lxc_netdev *netdev = iterator->elem; + + if (netdev->type == LXC_NET_NONE) + found_none = true; + else + found_nic = true; + } + if (found_none && !found_nic) + return 1; + return 0; +} + +/* try to move physical nics to the init netns */ +void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf) +{ + int ret; + int i, oldfd; + char ifname[IFNAMSIZ]; + + if (netnsfd < 0 || conf->num_savednics == 0) + return; + + INFO("Trying to restore network device names in original namespace for " + "%d network devices", conf->num_savednics); + + oldfd = lxc_preserve_ns(getpid(), "net"); + if (oldfd < 0) { + SYSERROR("Failed to preserve network namespace"); + return; + } + + ret = setns(netnsfd, 0); + if (ret < 0) { + SYSERROR("Failed to enter network namespace"); + close(oldfd); + return; + } + + for (i = 0; i < conf->num_savednics; i++) { + struct saved_nic *s = &conf->saved_nics[i]; + + /* retrieve the name of the interface */ + if (!if_indextoname(s->ifindex, ifname)) { + WARN("No interface corresponding to ifindex %d", + s->ifindex); + continue; + } + if (lxc_netdev_move_by_name(ifname, 1, s->orig_name)) + WARN("Error moving network device \"%s\" back to " + "network namespace", ifname); + free(s->orig_name); + } + conf->num_savednics = 0; + + ret = setns(oldfd, 0); + if (ret < 0) + SYSERROR("Failed to enter network namespace"); + close(oldfd); +} + +static int setup_hw_addr(char *hwaddr, const char *ifname) +{ + struct sockaddr sockaddr; + struct ifreq ifr; + int ret, fd, saved_errno; + + ret = lxc_convert_mac(hwaddr, &sockaddr); + if (ret) { + ERROR("Mac address \"%s\" conversion failed: %s", hwaddr, + strerror(-ret)); + return -1; + } + + memcpy(ifr.ifr_name, ifname, IFNAMSIZ); + ifr.ifr_name[IFNAMSIZ-1] = '\0'; + memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr)); + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + ret = ioctl(fd, SIOCSIFHWADDR, &ifr); + saved_errno = errno; + close(fd); + if (ret) + ERROR("Failed to perform ioctl: %s", strerror(saved_errno)); + + DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, + ifr.ifr_name); + + return ret; +} + +static int setup_ipv4_addr(struct lxc_list *ip, int ifindex) +{ + struct lxc_list *iterator; + int err; + + lxc_list_for_each(iterator, ip) { + struct lxc_inetdev *inetdev = iterator->elem; + + err = lxc_ipv4_addr_add(ifindex, &inetdev->addr, + &inetdev->bcast, inetdev->prefix); + if (err) { + ERROR("Failed to setup ipv4 address for network device " + "with eifindex %d: %s", ifindex, strerror(-err)); + return -1; + } + } + + return 0; +} + +static int setup_ipv6_addr(struct lxc_list *ip, int ifindex) +{ + struct lxc_list *iterator; + int err; + + lxc_list_for_each(iterator, ip) { + struct lxc_inet6dev *inet6dev = iterator->elem; + + err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr, + &inet6dev->mcast, &inet6dev->acast, + inet6dev->prefix); + if (err) { + ERROR("Failed to setup ipv6 address for network device " + "with eifindex %d: %s", ifindex, strerror(-err)); + return -1; + } + } + + return 0; +} + +static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev) +{ + char ifname[IFNAMSIZ]; + int err; + const char *net_type_name; + char *current_ifname = ifname; + + /* empty network namespace */ + if (!netdev->ifindex) { + if (netdev->flags & IFF_UP) { + err = lxc_netdev_up("lo"); + if (err) { + ERROR("Failed to set the loopback network " + "device up: %s", + strerror(-err)); + return -1; + } + } + + if (netdev->type == LXC_NET_EMPTY) + return 0; + + if (netdev->type == LXC_NET_NONE) + return 0; + + if (netdev->type != LXC_NET_VETH) { + net_type_name = lxc_net_type_to_str(netdev->type); + ERROR("%s networks are not supported for containers " + "not setup up by privileged users", + net_type_name); + return -1; + } + + netdev->ifindex = if_nametoindex(netdev->name); + } + + /* get the new ifindex in case of physical netdev */ + if (netdev->type == LXC_NET_PHYS) { + netdev->ifindex = if_nametoindex(netdev->link); + if (!netdev->ifindex) { + ERROR("Failed to get ifindex for network device \"%s\"", + netdev->link); + return -1; + } + } + + /* retrieve the name of the interface */ + if (!if_indextoname(netdev->ifindex, current_ifname)) { + ERROR("Failed get name for network device with ifindex %d", + netdev->ifindex); + return -1; + } + + /* Default: let the system to choose one interface name. + * When the IFLA_IFNAME attribute is passed something like "%d" + * netlink will replace the format specifier with an appropriate index. + */ + if (!netdev->name) + netdev->name = netdev->type == LXC_NET_PHYS ? + netdev->link : "eth%d"; + + /* rename the interface name */ + if (strcmp(ifname, netdev->name) != 0) { + err = lxc_netdev_rename_by_name(ifname, netdev->name); + if (err) { + ERROR("Failed to rename network device \"%s\" to " + "\"%s\": %s", ifname, netdev->name, strerror(-err)); + return -1; + } + } + + /* Re-read the name of the interface because its name has changed + * and would be automatically allocated by the system + */ + if (!if_indextoname(netdev->ifindex, current_ifname)) { + ERROR("Failed get name for network device with ifindex %d", + netdev->ifindex); + return -1; + } + + /* set a mac address */ + if (netdev->hwaddr) { + if (setup_hw_addr(netdev->hwaddr, current_ifname)) { + ERROR("Failed to setup hw address for network device \"%s\"", + current_ifname); + return -1; + } + } + + /* setup ipv4 addresses on the interface */ + if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) { + ERROR("Failed to setup ip addresses for network device \"%s\"", + ifname); + return -1; + } + + /* setup ipv6 addresses on the interface */ + if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) { + ERROR("Failed to setup ipv6 addresses for network device \"%s\"", + ifname); + return -1; + } + + /* set the network device up */ + if (netdev->flags & IFF_UP) { + int err; + + err = lxc_netdev_up(current_ifname); + if (err) { + ERROR("Failed to set network device \"%s\" up: %s", + current_ifname, strerror(-err)); + return -1; + } + + /* the network is up, make the loopback up too */ + err = lxc_netdev_up("lo"); + if (err) { + ERROR("Failed to set the loopback network device up: %s", + strerror(-err)); + return -1; + } + } + + /* We can only set up the default routes after bringing + * up the interface, sine bringing up the interface adds + * the link-local routes and we can't add a default + * route if the gateway is not reachable. */ + + /* setup ipv4 gateway on the interface */ + if (netdev->ipv4_gateway) { + if (!(netdev->flags & IFF_UP)) { + ERROR("Cannot add ipv4 gateway for network device " + "\"%s\" when not bringing up the interface", ifname); + return -1; + } + + if (lxc_list_empty(&netdev->ipv4)) { + ERROR("Cannot add ipv4 gateway for network device " + "\"%s\" when not assigning an address", ifname); + return -1; + } + + err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); + if (err) { + err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway); + if (err) { + ERROR("Failed to add ipv4 dest for network " + "device \"%s\": %s", ifname, strerror(-err)); + } + + err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); + if (err) { + ERROR("Failed to setup ipv4 gateway for " + "network device \"%s\": %s", + ifname, strerror(-err)); + if (netdev->ipv4_gateway_auto) { + char buf[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf)); + ERROR("Fried to set autodetected ipv4 gateway \"%s\"", buf); + } + return -1; + } + } + } + + /* setup ipv6 gateway on the interface */ + if (netdev->ipv6_gateway) { + if (!(netdev->flags & IFF_UP)) { + ERROR("Cannot add ipv6 gateway for network device " + "\"%s\" when not bringing up the interface", ifname); + return -1; + } + + if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) { + ERROR("Cannot add ipv6 gateway for network device " + "\"%s\" when not assigning an address", ifname); + return -1; + } + + err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); + if (err) { + err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway); + if (err) { + ERROR("Failed to add ipv6 dest for network " + "device \"%s\": %s", ifname, strerror(-err)); + } + + err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); + if (err) { + ERROR("Failed to setup ipv6 gateway for " + "network device \"%s\": %s", ifname, + strerror(-err)); + if (netdev->ipv6_gateway_auto) { + char buf[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf)); + ERROR("Tried to set autodetected ipv6 " + "gateway for network device " + "\"%s\"", buf); + } + return -1; + } + } + } + + DEBUG("Network devie \"%s\" has been setup", current_ifname); + + return 0; +} + +int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf, + struct lxc_list *network) +{ + struct lxc_list *iterator; + struct lxc_netdev *netdev; + + lxc_log_configured_netdevs(conf); + + lxc_list_for_each(iterator, network) { + netdev = iterator->elem; + + /* REMOVE in LXC 3.0 */ + if (netdev->idx < 0) { + ERROR("WARNING: using \"lxc.network.*\" keys to define " + "networks is DEPRECATED, please switch to using " + "\"lxc.net.[i].* keys\""); + } + + if (lxc_setup_netdev_in_child_namespaces(netdev)) { + ERROR("failed to setup netdev"); + return -1; + } + } + + if (!lxc_list_empty(network)) + INFO("network has been setup"); + + return 0; +} diff --git a/src/lxc/network.h b/src/lxc/network.h index 8a79a0622..d1b8de9b7 100644 --- a/src/lxc/network.h +++ b/src/lxc/network.h @@ -23,11 +23,120 @@ #ifndef __LXC_NETWORK_H #define __LXC_NETWORK_H +#include #include #include #include #include +#include "list.h" + +struct lxc_conf; +struct lxc_handler; +struct lxc_netdev; + +enum { + LXC_NET_EMPTY, + LXC_NET_VETH, + LXC_NET_MACVLAN, + LXC_NET_PHYS, + LXC_NET_VLAN, + LXC_NET_NONE, + LXC_NET_MAXCONFTYPE, +}; + +/* + * Defines the structure to configure an ipv4 address + * @address : ipv4 address + * @broadcast : ipv4 broadcast address + * @mask : network mask + */ +struct lxc_inetdev { + struct in_addr addr; + struct in_addr bcast; + unsigned int prefix; +}; + +struct lxc_route { + struct in_addr addr; +}; + +/* + * Defines the structure to configure an ipv6 address + * @flags : set the address up + * @address : ipv6 address + * @broadcast : ipv6 broadcast address + * @mask : network mask + */ +struct lxc_inet6dev { + struct in6_addr addr; + struct in6_addr mcast; + struct in6_addr acast; + unsigned int prefix; +}; + +struct lxc_route6 { + struct in6_addr addr; +}; + +struct ifla_veth { + char *pair; /* pair name */ + char veth1[IFNAMSIZ]; /* needed for deconf */ +}; + +struct ifla_vlan { + unsigned int flags; + unsigned int fmask; + unsigned short vid; + unsigned short pad; +}; + +struct ifla_macvlan { + int mode; /* private, vepa, bridge, passthru */ +}; + +union netdev_p { + struct ifla_veth veth_attr; + struct ifla_vlan vlan_attr; + struct ifla_macvlan macvlan_attr; +}; + +/* + * Defines a structure to configure a network device + * @link : lxc.net.[i].link, name of bridge or host iface to attach if any + * @name : lxc.net.[i].name, name of iface on the container side + * @flags : flag of the network device (IFF_UP, ... ) + * @ipv4 : a list of ipv4 addresses to be set on the network device + * @ipv6 : a list of ipv6 addresses to be set on the network device + * @upscript : a script filename to be executed during interface configuration + * @downscript : a script filename to be executed during interface destruction + * @idx : network counter + */ +struct lxc_netdev { + ssize_t idx; + int type; + int flags; + int ifindex; + char *link; + char *name; + char *hwaddr; + char *mtu; + union netdev_p priv; + struct lxc_list ipv4; + struct lxc_list ipv6; + struct in_addr *ipv4_gateway; + bool ipv4_gateway_auto; + struct in6_addr *ipv6_gateway; + bool ipv6_gateway_auto; + char *upscript; + char *downscript; +}; + +struct saved_nic { + int ifindex; + char *orig_name; +}; + /* Convert a string mac address to a socket structure. */ extern int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr); @@ -106,11 +215,22 @@ extern int lxc_neigh_proxy_on(const char *name, int family); /* Disable neighbor proxying. */ extern int lxc_neigh_proxy_off(const char *name, int family); -/* Generate a new unique network interface name. */ -extern char *lxc_mkifname(char *template); +/* Generate a new unique network interface name. + * Allocated memory must be freed by caller. + */ +extern char *lxc_mkifname(const char *template); extern const char *lxc_net_type_to_str(int type); extern int setup_private_host_hw_addr(char *veth1); extern int netdev_get_mtu(int ifindex); +extern int lxc_create_network_priv(struct lxc_handler *handler); +extern bool lxc_delete_network(struct lxc_handler *handler); +extern int lxc_find_gateway_addresses(struct lxc_handler *handler); +extern int lxc_create_network(const char *lxcpath, char *lxcname, + struct lxc_list *network, pid_t pid); +extern int lxc_requests_empty_network(struct lxc_handler *handler); +extern void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf); +extern int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf, + struct lxc_list *network); #endif /* __LXC_NETWORK_H */ diff --git a/src/lxc/start.c b/src/lxc/start.c index a360f784c..ac37a091c 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -77,6 +77,7 @@ #include "mainloop.h" #include "monitor.h" #include "namespace.h" +#include "network.h" #include "start.h" #include "storage.h" #include "storage_utils.h" @@ -1246,7 +1247,7 @@ static int lxc_spawn(struct lxc_handler *handler) /* That should be done before the clone because we will * fill the netdev index and use them in the child. */ - if (lxc_setup_networks_in_parent_namespaces(handler)) { + if (lxc_create_network_priv(handler)) { ERROR("Failed to create the network."); lxc_sync_fini(handler); return -1; @@ -1364,7 +1365,7 @@ static int lxc_spawn(struct lxc_handler *handler) /* Create the network configuration. */ if (handler->clone_flags & CLONE_NEWNET) { - if (lxc_assign_network(handler->lxcpath, handler->name, + if (lxc_create_network(handler->lxcpath, handler->name, &handler->conf->network, handler->pid)) { ERROR("Failed to create the configured network."); goto out_delete_net;