diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index fc658faf2..390923eca 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -1199,6 +1199,7 @@ out_free: static int cgroup_rmdir(char *dirname) { + int ret; struct dirent *direntp; DIR *dir; int r = 0; @@ -1208,8 +1209,8 @@ static int cgroup_rmdir(char *dirname) return -1; while ((direntp = readdir(dir))) { - struct stat mystat; char *pathname; + struct stat mystat; if (!direntp) break; @@ -1220,32 +1221,40 @@ static int cgroup_rmdir(char *dirname) pathname = must_make_path(dirname, direntp->d_name, NULL); - if (lstat(pathname, &mystat)) { + ret = lstat(pathname, &mystat); + if (ret < 0) { if (!r) - WARN("failed to stat %s", pathname); + WARN("Failed to stat %s", pathname); r = -1; goto next; } if (!S_ISDIR(mystat.st_mode)) goto next; - if (cgroup_rmdir(pathname) < 0) + + ret = cgroup_rmdir(pathname); + if (ret < 0) r = -1; next: free(pathname); } - if (rmdir(dirname) < 0) { + ret = rmdir(dirname); + if (ret < 0) { if (!r) - WARN("failed to delete %s: %s", dirname, strerror(errno)); + WARN("Failed to delete \"%s\": %s", dirname, + strerror(errno)); r = -1; } - if (closedir(dir) < 0) { + ret = closedir(dir); + if (ret < 0) { if (!r) - WARN("failed to delete %s: %s", dirname, strerror(errno)); + WARN("Failed to delete \"%s\": %s", dirname, + strerror(errno)); r = -1; } + return r; } @@ -1263,35 +1272,91 @@ static int rmdir_wrapper(void *data) return cgroup_rmdir(path); } -void recursive_destroy(char *path, struct lxc_conf *conf) +int recursive_destroy(char *path, struct lxc_conf *conf) { int r; + if (conf && !lxc_list_empty(&conf->id_map)) r = userns_exec_1(conf, rmdir_wrapper, path, "rmdir_wrapper"); else r = cgroup_rmdir(path); - if (r < 0) ERROR("Error destroying %s", path); + + return r; } static void cgfsng_destroy(void *hdata, struct lxc_conf *conf) { + int i; + char *clean_parent, *clean_fullcgpath; + char **fields; + size_t recurse_upwards = 0; struct cgfsng_handler_data *d = hdata; if (!d) return; - if (d->container_cgroup && hierarchies) { - int i; - for (i = 0; hierarchies[i]; i++) { - struct hierarchy *h = hierarchies[i]; - if (h->fullcgpath) { - recursive_destroy(h->fullcgpath, conf); - free(h->fullcgpath); - h->fullcgpath = NULL; - } + if (!d->container_cgroup || !hierarchies) + return; + + if (d->cgroup_meta.dir) + clean_parent = d->cgroup_meta.dir; + else + clean_parent = d->cgroup_pattern; + fields = lxc_normalize_path(clean_parent); + if (fields) { + recurse_upwards = lxc_array_len((void **)fields); + if (recurse_upwards > 0 && clean_parent == d->cgroup_pattern) + recurse_upwards--; + lxc_free_array((void **)fields, free); + } + + for (i = 0; hierarchies[i]; i++) { + int ret; + size_t j; + struct hierarchy *h = hierarchies[i]; + + if (!h->fullcgpath) + continue; + + clean_fullcgpath = lxc_deslashify(h->fullcgpath); + if (!clean_fullcgpath) + clean_fullcgpath = h->fullcgpath; + + /* Delete the container's cgroup */ + ret = recursive_destroy(clean_fullcgpath, conf); + if (ret < 0) + goto next; + + if (h->fullcgpath == clean_fullcgpath) + goto next; + + /* Delete parent cgroups as specified in the containers config + * file. This takes care of not having useless empty cgroups + * around. + */ + for (j = 0; j < recurse_upwards; j++) { + char *s = clean_fullcgpath; + + s = strrchr(s, '/'); + if (!s) + break; + *s = '\0'; + + /* If we fail to delete a cgroup we know that any parent + * cgroup also cannot be removed. + */ + ret = recursive_destroy(clean_fullcgpath, conf); + if (ret < 0) + break; } + +next: + if (h->fullcgpath != clean_fullcgpath) + free(clean_fullcgpath); + free(h->fullcgpath); + h->fullcgpath = NULL; } free_handler_data(d); @@ -1336,11 +1401,11 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname) */ static inline bool cgfsng_create(void *hdata) { - struct cgfsng_handler_data *d = hdata; - char *tmp, *cgname, *offset; int i; - int idx = 0; size_t len; + char *cgname, *offset, *tmp; + int idx = 0; + struct cgfsng_handler_data *d = hdata; if (!d) return false; @@ -1351,7 +1416,7 @@ static inline bool cgfsng_create(void *hdata) } if (d->cgroup_meta.dir) - tmp = strdup(d->cgroup_meta.dir); + tmp = lxc_string_join("/", (const char *[]){d->cgroup_meta.dir, d->name, NULL}, false); else tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern); if (!tmp) { diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 3a993c6e1..276de98c1 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -238,8 +238,6 @@ char *lxchook_names[NUM_LXC_HOOKS] = {"pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy"}; -typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *); - struct mount_opt { char *name; int clear; @@ -270,38 +268,6 @@ struct lxc_conf *current_config; /* Declare this here, since we don't want to reshuffle the whole file. */ static int in_caplist(int cap, struct lxc_list *caps); -static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *); -static int instantiate_none(struct lxc_handler *, struct lxc_netdev *); - -static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = { - [LXC_NET_VETH] = instantiate_veth, - [LXC_NET_MACVLAN] = instantiate_macvlan, - [LXC_NET_VLAN] = instantiate_vlan, - [LXC_NET_PHYS] = instantiate_phys, - [LXC_NET_EMPTY] = instantiate_empty, - [LXC_NET_NONE] = instantiate_none, -}; - -static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *); -static int shutdown_none(struct lxc_handler *, struct lxc_netdev *); - -static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = { - [LXC_NET_VETH] = shutdown_veth, - [LXC_NET_MACVLAN] = shutdown_macvlan, - [LXC_NET_VLAN] = shutdown_vlan, - [LXC_NET_PHYS] = shutdown_phys, - [LXC_NET_EMPTY] = shutdown_empty, - [LXC_NET_NONE] = shutdown_none, -}; - static struct mount_opt mount_opt[] = { { "async", 1, MS_SYNCHRONOUS }, { "atime", 1, MS_NOATIME }, @@ -530,8 +496,7 @@ static int run_script_argv(const char *name, const char *section, return run_buffer(buffer); } -static int run_script(const char *name, const char *section, const char *script, - ...) +int run_script(const char *name, const char *section, const char *script, ...) { int ret; char *buffer, *p; @@ -2325,311 +2290,6 @@ static int dropcaps_except(struct lxc_list *caps) return 0; } -static int setup_hw_addr(char *hwaddr, const char *ifname) -{ - struct sockaddr sockaddr; - struct ifreq ifr; - int ret, fd, saved_errno; - - ret = lxc_convert_mac(hwaddr, &sockaddr); - if (ret) { - ERROR("mac address '%s' conversion failed : %s", - hwaddr, strerror(-ret)); - return -1; - } - - memcpy(ifr.ifr_name, ifname, IFNAMSIZ); - ifr.ifr_name[IFNAMSIZ-1] = '\0'; - memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr)); - - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) { - ERROR("socket failure : %s", strerror(errno)); - return -1; - } - - ret = ioctl(fd, SIOCSIFHWADDR, &ifr); - saved_errno = errno; - close(fd); - if (ret) - ERROR("ioctl failure : %s", strerror(saved_errno)); - - DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name); - - return ret; -} - -static int setup_ipv4_addr(struct lxc_list *ip, int ifindex) -{ - struct lxc_list *iterator; - struct lxc_inetdev *inetdev; - int err; - - lxc_list_for_each(iterator, ip) { - - inetdev = iterator->elem; - - err = lxc_ipv4_addr_add(ifindex, &inetdev->addr, - &inetdev->bcast, inetdev->prefix); - if (err) { - ERROR("failed to setup_ipv4_addr ifindex %d : %s", - ifindex, strerror(-err)); - return -1; - } - } - - return 0; -} - -static int setup_ipv6_addr(struct lxc_list *ip, int ifindex) -{ - struct lxc_list *iterator; - struct lxc_inet6dev *inet6dev; - int err; - - lxc_list_for_each(iterator, ip) { - - inet6dev = iterator->elem; - - err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr, - &inet6dev->mcast, &inet6dev->acast, - inet6dev->prefix); - if (err) { - ERROR("failed to setup_ipv6_addr ifindex %d : %s", - ifindex, strerror(-err)); - return -1; - } - } - - return 0; -} - -static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev) -{ - char ifname[IFNAMSIZ]; - int err; - const char *net_type_name; - char *current_ifname = ifname; - - /* empty network namespace */ - if (!netdev->ifindex) { - if (netdev->flags & IFF_UP) { - err = lxc_netdev_up("lo"); - if (err) { - ERROR("failed to set the loopback up : %s", - strerror(-err)); - return -1; - } - } - - if (netdev->type == LXC_NET_EMPTY) - return 0; - - if (netdev->type == LXC_NET_NONE) - return 0; - - if (netdev->type != LXC_NET_VETH) { - net_type_name = lxc_net_type_to_str(netdev->type); - ERROR("%s networks are not supported for containers " - "not setup up by privileged users", - net_type_name); - return -1; - } - - netdev->ifindex = if_nametoindex(netdev->name); - } - - /* get the new ifindex in case of physical netdev */ - if (netdev->type == LXC_NET_PHYS) { - if (!(netdev->ifindex = if_nametoindex(netdev->link))) { - ERROR("failed to get ifindex for %s", - netdev->link); - return -1; - } - } - - /* retrieve the name of the interface */ - if (!if_indextoname(netdev->ifindex, current_ifname)) { - ERROR("no interface corresponding to index '%d'", - netdev->ifindex); - return -1; - } - - /* default: let the system to choose one interface name */ - if (!netdev->name) - netdev->name = netdev->type == LXC_NET_PHYS ? - netdev->link : "eth%d"; - - /* rename the interface name */ - if (strcmp(ifname, netdev->name) != 0) { - err = lxc_netdev_rename_by_name(ifname, netdev->name); - if (err) { - ERROR("failed to rename %s->%s : %s", ifname, netdev->name, - strerror(-err)); - return -1; - } - } - - /* Re-read the name of the interface because its name has changed - * and would be automatically allocated by the system - */ - if (!if_indextoname(netdev->ifindex, current_ifname)) { - ERROR("no interface corresponding to index '%d'", - netdev->ifindex); - return -1; - } - - /* set a mac address */ - if (netdev->hwaddr) { - if (setup_hw_addr(netdev->hwaddr, current_ifname)) { - ERROR("failed to setup hw address for '%s'", - current_ifname); - return -1; - } - } - - /* setup ipv4 addresses on the interface */ - if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) { - ERROR("failed to setup ip addresses for '%s'", - ifname); - return -1; - } - - /* setup ipv6 addresses on the interface */ - if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) { - ERROR("failed to setup ipv6 addresses for '%s'", - ifname); - return -1; - } - - /* set the network device up */ - if (netdev->flags & IFF_UP) { - int err; - - err = lxc_netdev_up(current_ifname); - if (err) { - ERROR("failed to set '%s' up : %s", current_ifname, - strerror(-err)); - return -1; - } - - /* the network is up, make the loopback up too */ - err = lxc_netdev_up("lo"); - if (err) { - ERROR("failed to set the loopback up : %s", - strerror(-err)); - return -1; - } - } - - /* We can only set up the default routes after bringing - * up the interface, sine bringing up the interface adds - * the link-local routes and we can't add a default - * route if the gateway is not reachable. */ - - /* setup ipv4 gateway on the interface */ - if (netdev->ipv4_gateway) { - if (!(netdev->flags & IFF_UP)) { - ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname); - return -1; - } - - if (lxc_list_empty(&netdev->ipv4)) { - ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname); - return -1; - } - - err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); - if (err) { - err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway); - if (err) { - ERROR("failed to add ipv4 dest for '%s': %s", - ifname, strerror(-err)); - } - - err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); - if (err) { - ERROR("failed to setup ipv4 gateway for '%s': %s", - ifname, strerror(-err)); - if (netdev->ipv4_gateway_auto) { - char buf[INET_ADDRSTRLEN]; - inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf)); - ERROR("tried to set autodetected ipv4 gateway '%s'", buf); - } - return -1; - } - } - } - - /* setup ipv6 gateway on the interface */ - if (netdev->ipv6_gateway) { - if (!(netdev->flags & IFF_UP)) { - ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname); - return -1; - } - - if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) { - ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname); - return -1; - } - - err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); - if (err) { - err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway); - if (err) { - ERROR("failed to add ipv6 dest for '%s': %s", - ifname, strerror(-err)); - } - - err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); - if (err) { - ERROR("failed to setup ipv6 gateway for '%s': %s", - ifname, strerror(-err)); - if (netdev->ipv6_gateway_auto) { - char buf[INET6_ADDRSTRLEN]; - inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf)); - ERROR("tried to set autodetected ipv6 gateway '%s'", buf); - } - return -1; - } - } - } - - DEBUG("'%s' has been setup", current_ifname); - - return 0; -} - -static int lxc_setup_networks_in_child_namespaces(const struct lxc_conf *conf, - struct lxc_list *network) -{ - struct lxc_list *iterator; - struct lxc_netdev *netdev; - - lxc_log_configured_netdevs(conf); - - lxc_list_for_each(iterator, network) { - netdev = iterator->elem; - - /* REMOVE in LXC 3.0 */ - if (netdev->idx < 0) { - ERROR("WARNING: using \"lxc.network.*\" keys to define " - "networks is DEPRECATED, please switch to using " - "\"lxc.net.[i].* keys\""); - } - - if (lxc_setup_netdev_in_child_namespaces(netdev)) { - ERROR("failed to setup netdev"); - return -1; - } - } - - if (!lxc_list_empty(network)) - INFO("network has been setup"); - - return 0; -} - static int parse_resource(const char *res) { size_t i; int resid = -1; @@ -2669,46 +2329,6 @@ int setup_resource_limits(struct lxc_list *limits, pid_t pid) { return 0; } -/* try to move physical nics to the init netns */ -void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf) -{ - int i, oldfd; - char ifname[IFNAMSIZ]; - - if (netnsfd < 0 || conf->num_savednics == 0) - return; - - INFO("Running to reset %d nic names.", conf->num_savednics); - - oldfd = lxc_preserve_ns(getpid(), "net"); - if (oldfd < 0) { - SYSERROR("Failed to open monitor netns fd."); - return; - } - - if (setns(netnsfd, 0) != 0) { - SYSERROR("Failed to enter container netns to reset nics"); - close(oldfd); - return; - } - for (i=0; inum_savednics; i++) { - struct saved_nic *s = &conf->saved_nics[i]; - /* retrieve the name of the interface */ - if (!if_indextoname(s->ifindex, ifname)) { - WARN("no interface corresponding to index '%d'", s->ifindex); - continue; - } - if (lxc_netdev_move_by_name(ifname, 1, s->orig_name)) - WARN("Error moving nic name:%s back to host netns", ifname); - free(s->orig_name); - } - conf->num_savednics = 0; - - if (setns(oldfd, 0) != 0) - SYSERROR("Failed to re-enter monitor's netns"); - close(oldfd); -} - static char *default_rootfs_mount = LXCROOTFSMOUNT; struct lxc_conf *lxc_conf_init(void) @@ -2774,659 +2394,6 @@ struct lxc_conf *lxc_conf_init(void) return new; } -static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char *veth1, *veth2; - char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ]; - int bridge_index, err; - unsigned int mtu = 0; - - if (netdev->priv.veth_attr.pair) { - veth1 = netdev->priv.veth_attr.pair; - if (handler->conf->reboot) - lxc_netdev_delete_by_name(veth1); - } else { - err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX"); - if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */ - ERROR("veth1 name too long"); - return -1; - } - veth1 = lxc_mkifname(veth1buf); - if (!veth1) { - ERROR("failed to allocate a temporary name"); - return -1; - } - /* store away for deconf */ - memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ); - } - - snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX"); - veth2 = lxc_mkifname(veth2buf); - if (!veth2) { - ERROR("failed to allocate a temporary name"); - goto out_delete; - } - - err = lxc_veth_create(veth1, veth2); - if (err) { - ERROR("failed to create veth pair \"%s\" and \"%s\": %s", veth1, - veth2, strerror(-err)); - goto out_delete; - } - - /* changing the high byte of the mac address to 0xfe, the bridge interface - * will always keep the host's mac address and not take the mac address - * of a container */ - err = setup_private_host_hw_addr(veth1); - if (err) { - ERROR("failed to change mac address of host interface \"%s\": %s", - veth1, strerror(-err)); - goto out_delete; - } - - netdev->ifindex = if_nametoindex(veth2); - if (!netdev->ifindex) { - ERROR("failed to retrieve the index for \"%s\"", veth2); - goto out_delete; - } - - if (netdev->mtu) { - if (lxc_safe_uint(netdev->mtu, &mtu) < 0) - WARN("failed to parse mtu from"); - else - INFO("retrieved mtu %d", mtu); - } else if (netdev->link) { - bridge_index = if_nametoindex(netdev->link); - if (bridge_index) { - mtu = netdev_get_mtu(bridge_index); - INFO("retrieved mtu %d from %s", mtu, netdev->link); - } else { - mtu = netdev_get_mtu(netdev->ifindex); - INFO("retrieved mtu %d from %s", mtu, veth2); - } - } - - if (mtu) { - err = lxc_netdev_set_mtu(veth1, mtu); - if (!err) - err = lxc_netdev_set_mtu(veth2, mtu); - if (err) { - ERROR("failed to set mtu \"%d\" for veth pair \"%s\" " - "and \"%s\": %s", - mtu, veth1, veth2, strerror(-err)); - goto out_delete; - } - } - - if (netdev->link) { - err = lxc_bridge_attach(netdev->link, veth1); - if (err) { - ERROR("failed to attach \"%s\" to bridge \"%s\": %s", - veth1, netdev->link, strerror(-err)); - goto out_delete; - } - INFO("attached \"%s\" to bridge \"%s\"", veth1, netdev->link); - } - - err = lxc_netdev_up(veth1); - if (err) { - ERROR("failed to set \"%s\" up: %s", veth1, strerror(-err)); - goto out_delete; - } - - if (netdev->upscript) { - err = run_script(handler->name, "net", netdev->upscript, "up", - "veth", veth1, (char*) NULL); - if (err) - goto out_delete; - } - - DEBUG("instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2, - netdev->ifindex); - - return 0; - -out_delete: - if (netdev->ifindex != 0) - lxc_netdev_delete_by_name(veth1); - if (!netdev->priv.veth_attr.pair) - free(veth1); - free(veth2); - return -1; -} - -static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char *veth1; - int err; - - if (netdev->priv.veth_attr.pair) - veth1 = netdev->priv.veth_attr.pair; - else - veth1 = netdev->priv.veth_attr.veth1; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "veth", veth1, (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char peerbuf[IFNAMSIZ], *peer; - int err; - - if (!netdev->link) { - ERROR("no link specified for macvlan netdev"); - return -1; - } - - err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX"); - if (err >= sizeof(peerbuf)) - return -1; - - peer = lxc_mkifname(peerbuf); - if (!peer) { - ERROR("failed to make a temporary name"); - return -1; - } - - err = lxc_macvlan_create(netdev->link, peer, - netdev->priv.macvlan_attr.mode); - if (err) { - ERROR("failed to create macvlan interface '%s' on '%s' : %s", - peer, netdev->link, strerror(-err)); - goto out; - } - - netdev->ifindex = if_nametoindex(peer); - if (!netdev->ifindex) { - ERROR("failed to retrieve the index for %s", peer); - goto out; - } - - if (netdev->upscript) { - err = run_script(handler->name, "net", netdev->upscript, "up", - "macvlan", netdev->link, (char*) NULL); - if (err) - goto out; - } - - DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'", - peer, netdev->ifindex, netdev->priv.macvlan_attr.mode); - - return 0; -out: - lxc_netdev_delete_by_name(peer); - free(peer); - return -1; -} - -static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - int err; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "macvlan", netdev->link, - (char*) NULL); - if (err) - return -1; - } - return 0; -} - -/* XXX: merge with instantiate_macvlan */ -static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - char peer[IFNAMSIZ]; - int err; - static uint16_t vlan_cntr = 0; - unsigned int mtu = 0; - - if (!netdev->link) { - ERROR("no link specified for vlan netdev"); - return -1; - } - - err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++); - if (err >= sizeof(peer)) { - ERROR("peer name too long"); - return -1; - } - - err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid); - if (err) { - ERROR("failed to create vlan interface '%s' on '%s' : %s", - peer, netdev->link, strerror(-err)); - return -1; - } - - netdev->ifindex = if_nametoindex(peer); - if (!netdev->ifindex) { - ERROR("failed to retrieve the ifindex for %s", peer); - lxc_netdev_delete_by_name(peer); - return -1; - } - - DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000", - netdev->ifindex); - if (netdev->mtu) { - if (lxc_safe_uint(netdev->mtu, &mtu) < 0) { - ERROR("Failed to retrieve mtu from: '%d'/'%s'.", - netdev->ifindex, netdev->name); - return -1; - } - err = lxc_netdev_set_mtu(peer, mtu); - if (err) { - ERROR("failed to set mtu '%s' for %s : %s", - netdev->mtu, peer, strerror(-err)); - lxc_netdev_delete_by_name(peer); - return -1; - } - } - - return 0; -} - -static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - return 0; -} - -static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - if (!netdev->link) { - ERROR("no link specified for the physical interface"); - return -1; - } - - netdev->ifindex = if_nametoindex(netdev->link); - if (!netdev->ifindex) { - ERROR("failed to retrieve the index for %s", netdev->link); - return -1; - } - - if (netdev->upscript) { - int err; - err = run_script(handler->name, "net", netdev->upscript, - "up", "phys", netdev->link, (char*) NULL); - if (err) - return -1; - } - - return 0; -} - -static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - int err; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "phys", netdev->link, (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - netdev->ifindex = 0; - return 0; -} - -static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - netdev->ifindex = 0; - if (netdev->upscript) { - int err; - err = run_script(handler->name, "net", netdev->upscript, - "up", "empty", (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - int err; - - if (netdev->downscript) { - err = run_script(handler->name, "net", netdev->downscript, - "down", "empty", (char*) NULL); - if (err) - return -1; - } - return 0; -} - -static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev) -{ - return 0; -} - -int lxc_requests_empty_network(struct lxc_handler *handler) -{ - struct lxc_list *network = &handler->conf->network; - struct lxc_list *iterator; - struct lxc_netdev *netdev; - bool found_none = false, found_nic = false; - - if (lxc_list_empty(network)) - return 0; - - lxc_list_for_each(iterator, network) { - - netdev = iterator->elem; - - if (netdev->type == LXC_NET_NONE) - found_none = true; - else - found_nic = true; - } - if (found_none && !found_nic) - return 1; - return 0; -} - -int lxc_setup_networks_in_parent_namespaces(struct lxc_handler *handler) -{ - bool am_root; - struct lxc_netdev *netdev; - struct lxc_list *iterator; - struct lxc_list *network = &handler->conf->network; - - /* We need to be root. */ - am_root = (getuid() == 0); - if (!am_root) - return 0; - - lxc_list_for_each(iterator, network) { - netdev = iterator->elem; - - if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) { - ERROR("invalid network configuration type '%d'", - netdev->type); - return -1; - } - - if (netdev_conf[netdev->type](handler, netdev)) { - ERROR("failed to create netdev"); - return -1; - } - - } - - return 0; -} - -bool lxc_delete_network(struct lxc_handler *handler) -{ - int ret; - struct lxc_list *iterator; - struct lxc_list *network = &handler->conf->network; - bool deleted_all = true; - - lxc_list_for_each(iterator, network) { - char *hostveth = NULL; - struct lxc_netdev *netdev = iterator->elem; - - /* We can only delete devices whose ifindex we have. If we don't - * have the index it means that we didn't create it. - */ - if (!netdev->ifindex) - continue; - - if (netdev->type == LXC_NET_PHYS) { - ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link); - if (ret < 0) - WARN("Failed to rename interface with index %d " - "to its initial name \"%s\"", - netdev->ifindex, netdev->link); - else - TRACE("Renamed interface with index %d to its " - "initial name \"%s\"", - netdev->ifindex, netdev->link); - continue; - } - - ret = netdev_deconf[netdev->type](handler, netdev); - if (ret < 0) - WARN("Failed to deconfigure network device"); - - /* Recent kernels remove the virtual interfaces when the network - * namespace is destroyed but in case we did not move the - * interface to the network namespace, we have to destroy it. - */ - ret = lxc_netdev_delete_by_index(netdev->ifindex); - if (-ret == ENODEV) { - INFO("Interface \"%s\" with index %d already deleted " - "or existing in different network namespace", - netdev->name ? netdev->name : "(null)", netdev->ifindex); - } else if (ret < 0) { - deleted_all = false; - WARN("Failed to remove interface \"%s\" with index %d: " - "%s", netdev->name ? netdev->name : "(null)", - netdev->ifindex, strerror(-ret)); - continue; - } - INFO("Removed interface \"%s\" with index %d", - netdev->name ? netdev->name : "(null)", netdev->ifindex); - - if (netdev->type != LXC_NET_VETH) - continue; - - if (am_unpriv()) - continue; - - /* Explicitly delete host veth device to prevent lingering - * devices. We had issues in LXD around this. - */ - if (netdev->priv.veth_attr.pair) - hostveth = netdev->priv.veth_attr.pair; - else - hostveth = netdev->priv.veth_attr.veth1; - if (*hostveth == '\0') - continue; - - ret = lxc_netdev_delete_by_name(hostveth); - if (ret < 0) { - deleted_all = false; - WARN("Failed to remove interface \"%s\" from \"%s\": %s", - hostveth, netdev->link, strerror(-ret)); - continue; - } - INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link); - - if (!is_ovs_bridge(netdev->link)) { - netdev->priv.veth_attr.veth1[0] = '\0'; - continue; - } - - /* Delete the openvswitch port. */ - ret = lxc_ovs_delete_port(netdev->link, hostveth); - if (ret < 0) - WARN("Failed to remove port \"%s\" from openvswitch " - "bridge \"%s\"", hostveth, netdev->link); - else - INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", - hostveth, netdev->link); - - netdev->priv.veth_attr.veth1[0] = '\0'; - } - - return deleted_all; -} - -#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic" - -/* lxc-user-nic returns "interface_name:interface_name\n" */ -#define MAX_BUFFER_SIZE IFNAMSIZ * 2 + 2 -static int unpriv_assign_nic(const char *lxcpath, char *lxcname, - struct lxc_netdev *netdev, pid_t pid) -{ - pid_t child; - int bytes, pipefd[2]; - char *token, *saveptr = NULL; - char buffer[MAX_BUFFER_SIZE]; - char netdev_link[IFNAMSIZ + 1]; - - if (netdev->type != LXC_NET_VETH) { - ERROR("nic type %d not support for unprivileged use", - netdev->type); - return -1; - } - - if (pipe(pipefd) < 0) { - SYSERROR("pipe failed"); - return -1; - } - - child = fork(); - if (child < 0) { - SYSERROR("fork"); - close(pipefd[0]); - close(pipefd[1]); - return -1; - } - - if (child == 0) { /* child */ - /* Call lxc-user-nic pid type bridge. */ - int ret; - char pidstr[LXC_NUMSTRLEN64]; - - close(pipefd[0]); /* Close the read-end of the pipe. */ - - /* Redirect stdout to write-end of the pipe. */ - ret = dup2(pipefd[1], STDOUT_FILENO); - close(pipefd[1]); /* Close the write-end of the pipe. */ - if (ret < 0) { - SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor."); - exit(EXIT_FAILURE); - } - - if (netdev->link) - strncpy(netdev_link, netdev->link, IFNAMSIZ); - else - strncpy(netdev_link, "none", IFNAMSIZ); - - ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); - if (ret < 0 || ret >= LXC_NUMSTRLEN64) - exit(EXIT_FAILURE); - pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; - - INFO("Execing lxc-user-nic %s %s %s veth %s %s", lxcpath, - lxcname, pidstr, netdev_link, netdev->name); - execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, lxcpath, lxcname, - pidstr, "veth", netdev_link, netdev->name, NULL); - - SYSERROR("Failed to exec lxc-user-nic."); - exit(EXIT_FAILURE); - } - - /* close the write-end of the pipe */ - close(pipefd[1]); - - bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE); - if (bytes < 0) { - SYSERROR("Failed to read from pipe file descriptor."); - close(pipefd[0]); - return -1; - } - buffer[bytes - 1] = '\0'; - - if (wait_for_pid(child) != 0) { - TRACE("lxc-user-nic failed to configure requested network"); - close(pipefd[0]); - return -1; - } - TRACE("Received output \"%s\" from lxc-user-nic", buffer); - - /* close the read-end of the pipe */ - close(pipefd[0]); - - /* fill netdev->name field */ - token = strtok_r(buffer, ":", &saveptr); - if (!token) - return -1; - - netdev->name = malloc(IFNAMSIZ + 1); - if (!netdev->name) { - SYSERROR("Failed to allocate memory."); - return -1; - } - memset(netdev->name, 0, IFNAMSIZ + 1); - strncpy(netdev->name, token, IFNAMSIZ); - - /* fill netdev->veth_attr.pair field */ - token = strtok_r(NULL, ":", &saveptr); - if (!token) - return -1; - - netdev->priv.veth_attr.pair = strdup(token); - if (!netdev->priv.veth_attr.pair) { - ERROR("Failed to allocate memory."); - return -1; - } - - return 0; -} - -int lxc_assign_network(const char *lxcpath, char *lxcname, - struct lxc_list *network, pid_t pid) -{ - struct lxc_list *iterator; - struct lxc_netdev *netdev; - char ifname[IFNAMSIZ]; - int am_root = (getuid() == 0); - int err; - - lxc_list_for_each(iterator, network) { - - netdev = iterator->elem; - - if (netdev->type == LXC_NET_VETH && !am_root) { - if (netdev->mtu) - INFO("mtu ignored due to insufficient privilege"); - if (unpriv_assign_nic(lxcpath, lxcname, netdev, pid)) - return -1; - /* lxc-user-nic has moved the nic to the new ns. - * unpriv_assign_nic() fills in netdev->name. - * netdev->ifindex will be filed in at - * lxc_setup_netdev_in_child_namespaces. - */ - continue; - } - - /* empty network namespace, nothing to move */ - if (!netdev->ifindex) - continue; - - /* retrieve the name of the interface */ - if (!if_indextoname(netdev->ifindex, ifname)) { - ERROR("no interface corresponding to index '%d'", netdev->ifindex); - return -1; - } - - err = lxc_netdev_move_by_name(ifname, pid, NULL); - if (err) { - ERROR("failed to move '%s' to the container : %s", - netdev->link, strerror(-err)); - return -1; - } - - DEBUG("move '%s'/'%s' to '%d': .", ifname, netdev->name, pid); - } - - return 0; -} - static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size) { @@ -3698,54 +2665,6 @@ again: return freeid; } -int lxc_find_gateway_addresses(struct lxc_handler *handler) -{ - struct lxc_list *network = &handler->conf->network; - struct lxc_list *iterator; - struct lxc_netdev *netdev; - int link_index; - - lxc_list_for_each(iterator, network) { - netdev = iterator->elem; - - if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto) - continue; - - if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) { - ERROR("gateway = auto only supported for " - "veth and macvlan"); - return -1; - } - - if (!netdev->link) { - ERROR("gateway = auto needs a link interface"); - return -1; - } - - link_index = if_nametoindex(netdev->link); - if (!link_index) - return -EINVAL; - - if (netdev->ipv4_gateway_auto) { - if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) { - ERROR("failed to automatically find ipv4 gateway " - "address from link interface '%s'", netdev->link); - return -1; - } - } - - if (netdev->ipv6_gateway_auto) { - if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) { - ERROR("failed to automatically find ipv6 gateway " - "address from link interface '%s'", netdev->link); - return -1; - } - } - } - - return 0; -} - int lxc_create_tty(const char *name, struct lxc_conf *conf) { struct lxc_tty_info *tty_info = &conf->tty_info; @@ -4205,8 +3124,7 @@ int lxc_setup(struct lxc_handler *handler) } } - if (lxc_setup_networks_in_child_namespaces(lxc_conf, - &lxc_conf->network)) { + if (lxc_setup_network_in_child_namespaces(lxc_conf, &lxc_conf->network)) { ERROR("failed to setup the network for '%s'", name); return -1; } @@ -4492,7 +3410,6 @@ int lxc_clear_environment(struct lxc_conf *c) return 0; } - int lxc_clear_mount_entries(struct lxc_conf *c) { struct lxc_list *it,*next; diff --git a/src/lxc/conf.h b/src/lxc/conf.h index f085bc94c..bd525a2be 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -46,103 +46,6 @@ typedef void * scmp_filter_ctx; #define subuidfile "/etc/subuid" #define subgidfile "/etc/subgid" -enum { - LXC_NET_EMPTY, - LXC_NET_VETH, - LXC_NET_MACVLAN, - LXC_NET_PHYS, - LXC_NET_VLAN, - LXC_NET_NONE, - LXC_NET_MAXCONFTYPE, -}; - -/* - * Defines the structure to configure an ipv4 address - * @address : ipv4 address - * @broadcast : ipv4 broadcast address - * @mask : network mask - */ -struct lxc_inetdev { - struct in_addr addr; - struct in_addr bcast; - unsigned int prefix; -}; - -struct lxc_route { - struct in_addr addr; -}; - -/* - * Defines the structure to configure an ipv6 address - * @flags : set the address up - * @address : ipv6 address - * @broadcast : ipv6 broadcast address - * @mask : network mask - */ -struct lxc_inet6dev { - struct in6_addr addr; - struct in6_addr mcast; - struct in6_addr acast; - unsigned int prefix; -}; - -struct lxc_route6 { - struct in6_addr addr; -}; - -struct ifla_veth { - char *pair; /* pair name */ - char veth1[IFNAMSIZ]; /* needed for deconf */ -}; - -struct ifla_vlan { - unsigned int flags; - unsigned int fmask; - unsigned short vid; - unsigned short pad; -}; - -struct ifla_macvlan { - int mode; /* private, vepa, bridge, passthru */ -}; - -union netdev_p { - struct ifla_veth veth_attr; - struct ifla_vlan vlan_attr; - struct ifla_macvlan macvlan_attr; -}; - -/* - * Defines a structure to configure a network device - * @link : lxc.net.[i].link, name of bridge or host iface to attach if any - * @name : lxc.net.[i].name, name of iface on the container side - * @flags : flag of the network device (IFF_UP, ... ) - * @ipv4 : a list of ipv4 addresses to be set on the network device - * @ipv6 : a list of ipv6 addresses to be set on the network device - * @upscript : a script filename to be executed during interface configuration - * @downscript : a script filename to be executed during interface destruction - * @idx : network counter - */ -struct lxc_netdev { - ssize_t idx; - int type; - int flags; - int ifindex; - char *link; - char *name; - char *hwaddr; - char *mtu; - union netdev_p priv; - struct lxc_list ipv4; - struct lxc_list ipv6; - struct in_addr *ipv4_gateway; - bool ipv4_gateway_auto; - struct in6_addr *ipv6_gateway; - bool ipv6_gateway_auto; - char *upscript; - char *downscript; -}; - /* * Defines a generic struct to configure the control group. It is up to the * programmer to specify the right subsystem. @@ -327,12 +230,8 @@ enum lxchooks { LXCHOOK_DESTROY, NUM_LXC_HOOKS }; -extern char *lxchook_names[NUM_LXC_HOOKS]; -struct saved_nic { - int ifindex; - char *orig_name; -}; +extern char *lxchook_names[NUM_LXC_HOOKS]; struct lxc_conf { int is_execute; @@ -458,13 +357,7 @@ extern int detect_shared_rootfs(void); extern struct lxc_conf *lxc_conf_init(void); extern void lxc_conf_free(struct lxc_conf *conf); extern int pin_rootfs(const char *rootfs); -extern int lxc_requests_empty_network(struct lxc_handler *handler); -extern int lxc_setup_networks_in_parent_namespaces(struct lxc_handler *handler); -extern bool lxc_delete_network(struct lxc_handler *handler); -extern int lxc_assign_network(const char *lxcpath, char *lxcname, - struct lxc_list *networks, pid_t pid); extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid); -extern int lxc_find_gateway_addresses(struct lxc_handler *handler); extern int lxc_create_tty(const char *name, struct lxc_conf *conf); extern void lxc_delete_tty(struct lxc_tty_info *tty_info); extern int lxc_clear_config_caps(struct lxc_conf *c); @@ -483,7 +376,6 @@ extern int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath); extern int lxc_setup(struct lxc_handler *handler); extern int setup_resource_limits(struct lxc_list *limits, pid_t pid); -extern void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf); extern int find_unmapped_nsid(struct lxc_conf *conf, enum idtype idtype); extern int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype); @@ -500,5 +392,7 @@ extern FILE *make_anonymous_mount_file(struct lxc_list *mount); extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings); extern unsigned long add_required_remount_flags(const char *s, const char *d, unsigned long flags); +extern int run_script(const char *name, const char *section, const char *script, + ...); #endif /* __LXC_CONF_H */ diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 62337289e..e66bae314 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -1431,9 +1431,6 @@ static int set_config_cgroup_dir(const char *key, const char *value, if (lxc_config_value_empty(value)) return clr_config_cgroup_dir(key, lxc_conf, NULL); - if (lxc_conf->cgroup_meta.dir) - clr_config_cgroup_dir(key, lxc_conf, NULL); - return set_config_string_item(&lxc_conf->cgroup_meta.dir, value); } diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c index fa3f64598..0d9ff66e6 100644 --- a/src/lxc/confile_utils.c +++ b/src/lxc/confile_utils.c @@ -31,6 +31,7 @@ #include "error.h" #include "log.h" #include "list.h" +#include "network.h" #include "parse.h" #include "utils.h" @@ -253,6 +254,7 @@ void lxc_log_configured_netdevs(const struct lxc_conf *conf) netdev = it->elem; TRACE("index: %zd", netdev->idx); + TRACE("ifindex: %d", netdev->ifindex); switch (netdev->type) { case LXC_NET_VETH: TRACE("type: veth"); diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index af44a3210..0fb788877 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -17,7 +17,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#define _GNU_SOURCE /* See feature_test_macros(7) */ +#define _GNU_SOURCE #include #include #include @@ -59,21 +59,27 @@ static void usage(char *me, bool fail) { - fprintf(stderr, "Usage: %s lxcpath name pid type bridge nicname\n", me); - fprintf(stderr, " nicname is the name to use inside the container\n"); - exit(fail ? 1 : 0); -} + fprintf(stderr, "Usage: %s create {lxcpath} {name} {pid} {type} " + "{bridge} {nicname}\n", me); + fprintf(stderr, "Usage: %s delete {lxcpath} {name} {pid} {type} " + "{bridge} {nicname}\n", me); + fprintf(stderr, "{nicname} is the name to use inside the container\n"); -static char *lxcpath, *lxcname; + if (fail) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); +} static int open_and_lock(char *path) { - int fd; + int fd, ret; struct flock lk; - fd = open(path, O_RDWR|O_CREAT, S_IWUSR | S_IRUSR); + fd = open(path, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); if (fd < 0) { - usernic_error("Failed to open %s: %s\n", path, strerror(errno)); + usernic_error("Failed to open \"%s\": %s\n", path, + strerror(errno)); return -1; } @@ -81,8 +87,11 @@ static int open_and_lock(char *path) lk.l_whence = SEEK_SET; lk.l_start = 0; lk.l_len = 0; - if (fcntl(fd, F_SETLKW, &lk) < 0) { - usernic_error("Failed to lock %s: %s\n", path, strerror(errno)); + + ret = fcntl(fd, F_SETLKW, &lk); + if (ret < 0) { + usernic_error("Failed to lock \"%s\": %s\n", path, + strerror(errno)); close(fd); return -1; } @@ -90,14 +99,13 @@ static int open_and_lock(char *path) return fd; } - static char *get_username(void) { struct passwd *pwd; pwd = getpwuid(getuid()); if (!pwd) { - usernic_error("Failed to call get username: %s\n", strerror(errno)); + usernic_error("Failed to get username: %s\n", strerror(errno)); return NULL; } @@ -127,9 +135,8 @@ static char **get_groupnames(void) ngroups = getgroups(0, NULL); if (ngroups < 0) { - usernic_error( - "Failed to get number of groups the user belongs to: %s\n", - strerror(errno)); + usernic_error("Failed to get number of groups the user " + "belongs to: %s\n", strerror(errno)); return NULL; } if (ngroups == 0) @@ -203,19 +210,21 @@ struct alloted_s { struct alloted_s *next; }; -static struct alloted_s *append_alloted(struct alloted_s **head, char *name, int n) +static struct alloted_s *append_alloted(struct alloted_s **head, char *name, + int n) { struct alloted_s *cur, *al; if (!head || !name) { - /* sanity check. parameters should not be null */ + /* Sanity check. Parameters should not be null. */ usernic_error("%s\n", "Unexpected NULL argument"); return NULL; } al = malloc(sizeof(struct alloted_s)); if (!al) { - usernic_error("Failed to allocate memory: %s\n", strerror(errno)); + usernic_error("Failed to allocate memory: %s\n", + strerror(errno)); return NULL; } @@ -266,7 +275,8 @@ static void free_alloted(struct alloted_s **head) * Return the count entry for the calling user if there is one. Else * return -1. */ -static int get_alloted(char *me, char *intype, char *link, struct alloted_s **alloted) +static int get_alloted(char *me, char *intype, char *link, + struct alloted_s **alloted) { int n, ret; char name[100], type[100], br[100]; @@ -279,13 +289,15 @@ static int get_alloted(char *me, char *intype, char *link, struct alloted_s **al fin = fopen(LXC_USERNIC_CONF, "r"); if (!fin) { - usernic_error("Failed to open \"%s\": %s\n", LXC_USERNIC_CONF, strerror(errno)); + usernic_error("Failed to open \"%s\": %s\n", LXC_USERNIC_CONF, + strerror(errno)); return -1; } groups = get_groupnames(); while ((getline(&line, &len, fin)) != -1) { - ret = sscanf(line, "%99[^ \t] %99[^ \t] %99[^ \t] %d", name, type, br, &n); + ret = sscanf(line, "%99[^ \t] %99[^ \t] %99[^ \t] %d", name, + type, br, &n); if (ret != 4) continue; @@ -358,7 +370,8 @@ static char *find_line(char *p, char *e, char *u, char *t, char *l) p++; p2 = get_eow(p, e); - if (!p2 || ((size_t)(p2 - p)) != strlen(u) || strncmp(p, u, strlen(u))) + if (!p2 || ((size_t)(p2 - p)) != strlen(u) || + strncmp(p, u, strlen(u))) goto next; p = p2 + 1; @@ -366,7 +379,8 @@ static char *find_line(char *p, char *e, char *u, char *t, char *l) p++; p2 = get_eow(p, e); - if (!p2 || ((size_t)(p2 - p)) != strlen(t) || strncmp(p, t, strlen(t))) + if (!p2 || ((size_t)(p2 - p)) != strlen(t) || + strncmp(p, t, strlen(t))) goto next; p = p2 + 1; @@ -374,11 +388,12 @@ static char *find_line(char *p, char *e, char *u, char *t, char *l) p++; p2 = get_eow(p, e); - if (!p2 || ((size_t)(p2 - p)) != strlen(l) || strncmp(p, l, strlen(l))) + if (!p2 || ((size_t)(p2 - p)) != strlen(l) || + strncmp(p, l, strlen(l))) goto next; return ret; -next: + next: p = p1 + 1; } @@ -417,7 +432,8 @@ static int instantiate_veth(char *n1, char **n2) err = lxc_veth_create(n1, *n2); if (err) { - usernic_error("Failed to create %s-%s : %s.\n", n1, *n2, strerror(-err)); + usernic_error("Failed to create %s-%s : %s.\n", n1, *n2, + strerror(-err)); return -1; } @@ -427,8 +443,7 @@ static int instantiate_veth(char *n1, char **n2) err = setup_private_host_hw_addr(n1); if (err) usernic_error("Failed to change mac address of host interface " - "%s : %s\n", - n1, strerror(-err)); + "%s : %s\n", n1, strerror(-err)); return netdev_set_flag(n1, IFF_UP); } @@ -471,13 +486,15 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic) if (mtu > 0) { ret = lxc_netdev_set_mtu(veth1buf, mtu); if (ret < 0) { - usernic_error("Failed to set mtu to %d on %s\n", mtu, veth1buf); + usernic_error("Failed to set mtu to %d on %s\n", + mtu, veth1buf); goto out_del; } ret = lxc_netdev_set_mtu(veth2buf, mtu); if (ret < 0) { - usernic_error("Failed to set mtu to %d on %s\n", mtu, veth2buf); + usernic_error("Failed to set mtu to %d on %s\n", + mtu, veth2buf); goto out_del; } } @@ -493,7 +510,8 @@ static bool create_nic(char *nic, char *br, int pid, char **cnic) /* pass veth2 to target netns */ ret = lxc_netdev_move_by_name(veth2buf, pid, NULL); if (ret < 0) { - usernic_error("Error moving %s to network namespace of %d\n", veth2buf, pid); + usernic_error("Error moving %s to network namespace of %d\n", + veth2buf, pid); goto out_del; } @@ -510,25 +528,29 @@ out_del: return false; } -/* - * Get a new nic. - * *dest will contain the name (vethXXXXXX) which is attached - * on the host to the lxc bridge +/* get_new_nicname() will return the name (vethXXXXXX) which is attached on the + * host to the lxc bridge. The returned string must be freed by caller. */ -static bool get_new_nicname(char **dest, char *br, int pid, char **cnic) +static char *get_new_nicname(char *br, int pid, char **cnic) { int ret; + char *nicname; char template[IFNAMSIZ]; ret = snprintf(template, sizeof(template), "vethXXXXXX"); if (ret < 0 || (size_t)ret >= sizeof(template)) - return false; + return NULL; - *dest = lxc_mkifname(template); - if (!create_nic(*dest, br, pid, cnic)) - return false; + nicname = lxc_mkifname(template); + if (!nicname) + return NULL; - return true; + if (!create_nic(nicname, br, pid, cnic)) { + free(nicname); + return NULL; + } + + return nicname; } static bool get_nic_from_line(char *p, char **nic) @@ -536,7 +558,8 @@ static bool get_nic_from_line(char *p, char **nic) int ret; char user[100], type[100], br[100]; - ret = sscanf(p, "%99[^ \t\n] %99[^ \t\n] %99[^ \t\n] %99[^ \t\n]", user, type, br, *nic); + ret = sscanf(p, "%99[^ \t\n] %99[^ \t\n] %99[^ \t\n] %99[^ \t\n]", user, + type, br, *nic); if (ret != 4) return false; @@ -549,19 +572,22 @@ struct entry_line { bool keep; }; -static bool cull_entries(int fd, char *me, char *t, char *br) +static bool cull_entries(int fd, char *me, char *t, char *br, char *nicname, + bool *found_nicname) { - int i, n = 0; + int i, ret; off_t len; - char *buf, *p, *e, *nic; + char *buf, *e, *nic, *p; struct stat sb; + int n = 0; struct entry_line *entry_lines = NULL; nic = alloca(100); if (!nic) return false; - if (fstat(fd, &sb) < 0) { + ret = fstat(fd, &sb); + if (ret < 0) { usernic_error("Failed to fstat: %s\n", strerror(errno)); return false; } @@ -570,9 +596,10 @@ static bool cull_entries(int fd, char *me, char *t, char *br) if (len == 0) return true; - buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + buf = lxc_strmmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { - usernic_error("Failed to establish shared memory mapping: %s\n", strerror(errno)); + usernic_error("Failed to establish shared memory mapping: %s\n", + strerror(errno)); return false; } @@ -598,6 +625,10 @@ static bool cull_entries(int fd, char *me, char *t, char *br) if (nic && !nic_exists(nic)) entry_lines[n - 1].keep = false; + if (nicname) + if (!strcmp(nic, nicname)) + *found_nicname = true; + p += entry_lines[n - 1].len + 1; if (p >= e) break; @@ -615,9 +646,11 @@ static bool cull_entries(int fd, char *me, char *t, char *br) } free(entry_lines); - munmap(buf, sb.st_size); - if (ftruncate(fd, p - buf)) - usernic_error("Failed to set new file size: %s\n", strerror(errno)); + lxc_strmunmap(buf, sb.st_size); + ret = ftruncate(fd, p - buf); + if (ret < 0) + usernic_error("Failed to set new file size: %s\n", + strerror(errno)); return true; } @@ -638,41 +671,39 @@ static int count_entries(char *buf, off_t len, char *me, char *t, char *br) return count; } -/* - * The dbfile has lines of the format: - * user type bridge nicname - */ -static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, - char *intype, char *br, int allowed, - char **nicname, char **cnic) +/* The dbfile has lines of the format: user type bridge nicname. */ +static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid, + char *intype, char *br, int allowed, char **cnic) { int ret; off_t len, slen; - char *newline, *owner; + char *newline, *nicname, *owner; struct stat sb; struct alloted_s *n; int count = 0; char *buf = NULL; for (n = names; n != NULL; n = n->next) - cull_entries(fd, n->name, intype, br); + cull_entries(fd, n->name, intype, br, NULL, NULL); if (allowed == 0) - return false; + return NULL; owner = names->name; if (fstat(fd, &sb) < 0) { usernic_error("Failed to fstat: %s\n", strerror(errno)); - return false; + return NULL; } len = sb.st_size; if (len > 0) { - buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + buf = + mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { - usernic_error("Failed to establish shared memory mapping: %s\n", strerror(errno)); - return false; + usernic_error("Failed to establish shared memory mapping: %s\n", + strerror(errno)); + return NULL; } owner = NULL; @@ -688,47 +719,56 @@ static bool get_nic_if_avail(int fd, struct alloted_s *names, int pid, } if (owner == NULL) - return false; + return NULL; - if (!get_new_nicname(nicname, br, pid, cnic)) - return false; - - /* owner ' ' intype ' ' br ' ' *nicname + '\n' + '\0' */ - slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(*nicname) + 5; - newline = alloca(slen); - if (!newline) { - usernic_error("Failed allocate memory: %s\n", strerror(errno)); - return false; + nicname = get_new_nicname(br, pid, cnic); + if (!nicname) { + usernic_error("%s", "Failed to get new nic name\n"); + return NULL; } - ret = snprintf(newline, slen, "%s %s %s %s\n", owner, intype, br, *nicname); + /* owner ' ' intype ' ' br ' ' *nicname + '\n' + '\0' */ + slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(nicname) + 5; + newline = alloca(slen); + if (!newline) { + free(nicname); + usernic_error("Failed allocate memory: %s\n", strerror(errno)); + return NULL; + } + + ret = snprintf(newline, slen, "%s %s %s %s\n", owner, intype, br, nicname); if (ret < 0 || ret >= slen) { - if (lxc_netdev_delete_by_name(*nicname) != 0) - usernic_error("Error unlinking %s\n", *nicname); - return false; + if (lxc_netdev_delete_by_name(nicname) != 0) + usernic_error("Error unlinking %s\n", nicname); + free(nicname); + return NULL; } if (len) munmap(buf, len); if (ftruncate(fd, len + slen)) - usernic_error("Failed to set new file size: %s\n", strerror(errno)); + usernic_error("Failed to set new file size: %s\n", + strerror(errno)); - buf = mmap(NULL, len + slen, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + buf = mmap(NULL, len + slen, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (buf == MAP_FAILED) { - usernic_error("Failed to establish shared memory mapping: %s\n", strerror(errno)); - if (lxc_netdev_delete_by_name(*nicname) != 0) - usernic_error("Error unlinking %s\n", *nicname); - return false; + usernic_error("Failed to establish shared memory mapping: %s\n", + strerror(errno)); + if (lxc_netdev_delete_by_name(nicname) != 0) + usernic_error("Error unlinking %s\n", nicname); + free(nicname); + return NULL; } strcpy(buf + len, newline); munmap(buf, len + slen); - return true; + return nicname; } static bool create_db_dir(char *fnam) { + int ret; char *p; p = alloca(strlen(fnam) + 1); @@ -743,8 +783,11 @@ again: return true; *p = '\0'; - if (mkdir(fnam, 0755) && errno != EEXIST) { - usernic_error("Failed to create %s: %s\n", fnam, strerror(errno)); + + ret = mkdir(fnam, 0755); + if (ret < 0 && errno != EEXIST) { + usernic_error("Failed to create %s: %s\n", fnam, + strerror(errno)); *p = '/'; return false; } @@ -753,18 +796,19 @@ again: goto again; } -#define VETH_DEF_NAME "eth%d" -static int rename_in_ns(int pid, char *oldname, char **newnamep) +static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname, + int *ifidx) { + int ret; uid_t ruid, suid, euid; - int fret = -1; - int fd = -1, ifindex = -1, ofd = -1, ret; - bool grab_newname = false; + char ifname[IFNAMSIZ]; + char *string_ret = NULL, *name = NULL; + int fd = -1, ifindex = -1, ofd = -1; ofd = lxc_preserve_ns(getpid(), "net"); if (ofd < 0) { usernic_error("Failed opening network namespace path for %d", getpid()); - return fret; + return NULL; } fd = lxc_preserve_ns(pid, "net"); @@ -804,63 +848,70 @@ static int rename_in_ns(int pid, char *oldname, char **newnamep) goto do_full_cleanup; } - if (!*newnamep) { - grab_newname = true; - *newnamep = VETH_DEF_NAME; - - ifindex = if_nametoindex(oldname); - if (!ifindex) { - usernic_error("Failed to get netdev index: %s\n", strerror(errno)); - goto do_full_cleanup; - } - } - - ret = lxc_netdev_rename_by_name(oldname, *newnamep); - if (ret < 0) { - usernic_error("Error %d renaming netdev %s to %s in container\n", ret, oldname, *newnamep); + /* Check if old interface exists. */ + ifindex = if_nametoindex(oldname); + if (!ifindex) { + usernic_error("Failed to get netdev index: %s\n", strerror(errno)); goto do_full_cleanup; } - if (grab_newname) { - char ifname[IFNAMSIZ]; - char *namep = ifname; + /* When the IFLA_IFNAME attribute is passed something like "%d" + * netlink will replace the format specifier with an appropriate index. + * So we pass "eth%d". + */ + if (newname) + name = newname; + else + name = "eth%d"; - if (!if_indextoname(ifindex, namep)) { - usernic_error("Failed to get new netdev name: %s\n", strerror(errno)); - goto do_full_cleanup; - } - - *newnamep = strdup(namep); - if (!*newnamep) - goto do_full_cleanup; + ret = lxc_netdev_rename_by_name(oldname, name); + name = NULL; + if (ret < 0) { + usernic_error("Error %d renaming netdev %s to %s in container\n", + ret, oldname, newname ? newname : "eth%d"); + goto do_full_cleanup; } - fret = 0; + /* Retrieve new name for interface. */ + if (!if_indextoname(ifindex, ifname)) { + usernic_error("Failed to get new netdev name: %s\n", strerror(errno)); + goto do_full_cleanup; + } + + /* Allocation failure for strdup() is checked below. */ + name = strdup(ifname); + string_ret = name; + *ifidx = ifindex; do_full_cleanup: ret = setresuid(ruid, euid, suid); if (ret < 0) { - usernic_error("Failed to restore privilege by setting effective " - "user id to %d, real user id to %d, and saved user " - "ID to %d: %s\n", - ruid, euid, suid, strerror(errno)); - fret = -1; + usernic_error("Failed to restore privilege by setting " + "effective user id to %d, real user id to %d, " + "and saved user ID to %d: %s\n", ruid, euid, suid, + strerror(errno)); + + string_ret = NULL; } ret = setns(ofd, CLONE_NEWNET); if (ret < 0) { usernic_error("Failed to setns() to original network namespace " - "of PID %d: %s\n", - ofd, strerror(errno)); - fret = -1; + "of PID %d: %s\n", ofd, strerror(errno)); + + string_ret = NULL; } do_partial_cleanup: if (fd >= 0) close(fd); + + if (!string_ret && name) + free(name); + close(ofd); - return fret; + return string_ret; } /* If the caller (real uid, not effective uid) may read the /proc/[pid]/ns/net, @@ -912,50 +963,73 @@ static bool may_access_netns(int pid) return may_access; } +struct user_nic_args { + char *cmd; + char *lxc_path; + char *lxc_name; + char *pid; + char *type; + char *link; + char *veth_name; +}; + +#define LXC_USERNIC_CREATE 0 +#define LXC_USERNIC_DELETE 1 + int main(int argc, char *argv[]) { - int n, fd; - char *me; - char *nicname; - int pid; - char *cnic = NULL; /* Created nic name in container is returned here. */ - char *vethname = NULL; - bool gotone = false; + int fd, ifindex, n, pid, request, ret; + char *me, *newname; + char *cnic = NULL, *nicname = NULL; struct alloted_s *alloted = NULL; + struct user_nic_args args; - nicname = alloca(40); - if (!nicname) { - usernic_error("Failed allocate memory: %s\n", strerror(errno)); + if (argc < 7 || argc > 8) { + usage(argv[0], true); exit(EXIT_FAILURE); } - /* set a sane env, because we are setuid-root */ - if (clearenv() < 0) { + memset(&args, 0, sizeof(struct user_nic_args)); + args.cmd = argv[1]; + args.lxc_path = argv[2]; + args.lxc_name = argv[3]; + args.pid = argv[4]; + args.type = argv[5]; + args.link = argv[6]; + if (argc >= 8) + args.veth_name = argv[7]; + + if (!strcmp(args.cmd, "create")) { + request = LXC_USERNIC_CREATE; + } else if (!strcmp(args.cmd, "delete")) { + request = LXC_USERNIC_DELETE; + } else { + usage(argv[0], true); + exit(EXIT_FAILURE); + } + + /* Set a sane env, because we are setuid-root. */ + ret = clearenv(); + if (ret) { usernic_error("%s", "Failed to clear environment\n"); exit(EXIT_FAILURE); } - if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1) < 0) { + + ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1); + if (ret < 0) { usernic_error("%s", "Failed to set PATH, exiting\n"); exit(EXIT_FAILURE); } - if ((me = get_username()) == NULL) { + + me = get_username(); + if (!me) { usernic_error("%s", "Failed to get username\n"); exit(EXIT_FAILURE); } - if (argc < 6) - usage(argv[0], true); - - if (argc >= 7) - vethname = argv[6]; - - lxcpath = argv[1]; - lxcname = argv[2]; - - errno = 0; - pid = strtol(argv[3], NULL, 10); - if (errno) { - usernic_error("Could not read pid: %s\n", argv[1]); + ret = lxc_safe_int(args.pid, &pid); + if (ret < 0) { + usernic_error("Could not read pid: %s\n", args.pid); exit(EXIT_FAILURE); } @@ -964,7 +1038,8 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - if ((fd = open_and_lock(LXC_USERNIC_DB)) < 0) { + fd = open_and_lock(LXC_USERNIC_DB); + if (fd < 0) { usernic_error("Failed to lock %s\n", LXC_USERNIC_DB); exit(EXIT_FAILURE); } @@ -974,28 +1049,74 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - n = get_alloted(me, argv[4], argv[5], &alloted); + n = get_alloted(me, args.type, args.link, &alloted); + + if (request == LXC_USERNIC_DELETE) { + int ret; + struct alloted_s *it; + bool found_nicname = false; + + if (!is_ovs_bridge(args.link)) { + usernic_error("%s", "Deletion of non ovs type network " + "devices not implemented\n"); + close(fd); + free_alloted(&alloted); + exit(EXIT_FAILURE); + } + + /* Check whether the network device we are supposed to delete + * exists in the db. If it doesn't we will not delete it as we + * need to assume the network device is not under our control. + * As a side effect we also clear any invalid entries from the + * database. + */ + for (it = alloted; it; it = it->next) + cull_entries(fd, it->name, args.type, args.link, + args.veth_name, &found_nicname); + close(fd); + free_alloted(&alloted); + + if (!found_nicname) { + usernic_error("%s", "Caller is not allowed to delete " + "network device\n"); + exit(EXIT_FAILURE); + } + + ret = lxc_ovs_delete_port(args.link, args.veth_name); + if (ret < 0) { + usernic_error("Failed to remove port \"%s\" from " + "openvswitch bridge \"%s\"", + args.veth_name, args.link); + exit(EXIT_FAILURE); + } + + exit(EXIT_SUCCESS); + } if (n > 0) - gotone = get_nic_if_avail(fd, alloted, pid, argv[4], argv[5], n, &nicname, &cnic); + nicname = get_nic_if_avail(fd, alloted, pid, args.type, + args.link, n, &cnic); close(fd); free_alloted(&alloted); - if (!gotone) { + if (!nicname) { usernic_error("%s", "Quota reached\n"); exit(EXIT_FAILURE); } /* Now rename the link. */ - if (rename_in_ns(pid, cnic, &vethname) < 0) { + newname = lxc_secure_rename_in_ns(pid, cnic, args.veth_name, &ifindex); + if (!newname) { usernic_error("%s", "Failed to rename the link\n"); - if (lxc_netdev_delete_by_name(cnic) < 0) - usernic_error("Failed to delete link \"%s\" the link. Manual cleanup needed\n", cnic); + ret = lxc_netdev_delete_by_name(cnic); + if (ret < 0) + usernic_error("Failed to delete \"%s\"\n", cnic); + free(nicname); exit(EXIT_FAILURE); } - /* Write the name of the interface pair to the stdout - like - * eth0:veth9MT2L4. - */ - fprintf(stdout, "%s:%s\n", vethname, nicname); + /* Write the name of the interface pair to the stdout: eth0:veth9MT2L4 */ + fprintf(stdout, "%s:%s:%d\n", newname, nicname, ifindex); + free(newname); + free(nicname); exit(EXIT_SUCCESS); } diff --git a/src/lxc/network.c b/src/lxc/network.c index d1353eafe..12b7d697e 100644 --- a/src/lxc/network.c +++ b/src/lxc/network.c @@ -21,8 +21,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "config.h" - +#define _GNU_SOURCE #include #include #include @@ -47,6 +46,8 @@ #include #include "conf.h" +#include "config.h" +#include "confile_utils.h" #include "log.h" #include "network.h" #include "nl.h" @@ -92,6 +93,355 @@ lxc_log_define(lxc_network, lxc); +typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *); + +static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int bridge_index, err; + char *veth1, *veth2; + char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ]; + unsigned int mtu = 0; + + if (netdev->priv.veth_attr.pair) { + veth1 = netdev->priv.veth_attr.pair; + if (handler->conf->reboot) + lxc_netdev_delete_by_name(veth1); + } else { + err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX"); + if (err < 0 || (size_t)err >= sizeof(veth1buf)) + return -1; + + veth1 = lxc_mkifname(veth1buf); + if (!veth1) + return -1; + + /* store away for deconf */ + memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ); + } + + snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX"); + veth2 = lxc_mkifname(veth2buf); + if (!veth2) + goto out_delete; + + err = lxc_veth_create(veth1, veth2); + if (err) { + ERROR("Failed to create veth pair \"%s\" and \"%s\": %s", veth1, + veth2, strerror(-err)); + goto out_delete; + } + + /* changing the high byte of the mac address to 0xfe, the bridge interface + * will always keep the host's mac address and not take the mac address + * of a container */ + err = setup_private_host_hw_addr(veth1); + if (err) { + ERROR("Failed to change mac address of host interface \"%s\": %s", + veth1, strerror(-err)); + goto out_delete; + } + + netdev->ifindex = if_nametoindex(veth2); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", veth2); + goto out_delete; + } + + if (netdev->mtu) { + if (lxc_safe_uint(netdev->mtu, &mtu) < 0) + WARN("Failed to parse mtu"); + else + INFO("Retrieved mtu %d", mtu); + } else if (netdev->link) { + bridge_index = if_nametoindex(netdev->link); + if (bridge_index) { + mtu = netdev_get_mtu(bridge_index); + INFO("Retrieved mtu %d from %s", mtu, netdev->link); + } else { + mtu = netdev_get_mtu(netdev->ifindex); + INFO("Retrieved mtu %d from %s", mtu, veth2); + } + } + + if (mtu) { + err = lxc_netdev_set_mtu(veth1, mtu); + if (!err) + err = lxc_netdev_set_mtu(veth2, mtu); + if (err) { + ERROR("Failed to set mtu \"%d\" for veth pair \"%s\" " + "and \"%s\": %s", + mtu, veth1, veth2, strerror(-err)); + goto out_delete; + } + } + + if (netdev->link) { + err = lxc_bridge_attach(netdev->link, veth1); + if (err) { + ERROR("Failed to attach \"%s\" to bridge \"%s\": %s", + veth1, netdev->link, strerror(-err)); + goto out_delete; + } + INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link); + } + + err = lxc_netdev_up(veth1); + if (err) { + ERROR("Failed to set \"%s\" up: %s", veth1, strerror(-err)); + goto out_delete; + } + + if (netdev->upscript) { + err = run_script(handler->name, "net", netdev->upscript, "up", + "veth", veth1, (char*) NULL); + if (err) + goto out_delete; + } + + DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2, + netdev->ifindex); + + return 0; + +out_delete: + if (netdev->ifindex != 0) + lxc_netdev_delete_by_name(veth1); + if (!netdev->priv.veth_attr.pair) + free(veth1); + free(veth2); + return -1; +} + +static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + char peerbuf[IFNAMSIZ], *peer; + int err; + + if (!netdev->link) { + ERROR("No link for macvlan network device specified"); + return -1; + } + + err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX"); + if (err < 0 || (size_t)err >= sizeof(peerbuf)) + return -1; + + peer = lxc_mkifname(peerbuf); + if (!peer) + return -1; + + err = lxc_macvlan_create(netdev->link, peer, + netdev->priv.macvlan_attr.mode); + if (err) { + ERROR("Failed to create macvlan interface \"%s\" on \"%s\": %s", + peer, netdev->link, strerror(-err)); + goto out; + } + + netdev->ifindex = if_nametoindex(peer); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", peer); + goto out; + } + + if (netdev->upscript) { + err = run_script(handler->name, "net", netdev->upscript, "up", + "macvlan", netdev->link, (char*) NULL); + if (err) + goto out; + } + + DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d", + peer, netdev->ifindex, netdev->priv.macvlan_attr.mode); + + return 0; +out: + lxc_netdev_delete_by_name(peer); + free(peer); + return -1; +} + +static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + char peer[IFNAMSIZ]; + int err; + static uint16_t vlan_cntr = 0; + unsigned int mtu = 0; + + if (!netdev->link) { + ERROR("No link for vlan network device specified"); + return -1; + } + + err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++); + if (err < 0 || (size_t)err >= sizeof(peer)) + return -1; + + err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid); + if (err) { + ERROR("Failed to create vlan interface \"%s\" on \"%s\": %s", + peer, netdev->link, strerror(-err)); + return -1; + } + + netdev->ifindex = if_nametoindex(peer); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", peer); + lxc_netdev_delete_by_name(peer); + return -1; + } + + DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\" (vlan1000)", + peer, netdev->ifindex); + if (netdev->mtu) { + if (lxc_safe_uint(netdev->mtu, &mtu) < 0) { + ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".", + netdev->ifindex, + netdev->name ? netdev->name : "(null)"); + return -1; + } + err = lxc_netdev_set_mtu(peer, mtu); + if (err) { + ERROR("Failed to set mtu \"%s\" for \"%s\": %s", + netdev->mtu, peer, strerror(-err)); + lxc_netdev_delete_by_name(peer); + return -1; + } + } + + return 0; +} + +static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + if (!netdev->link) { + ERROR("No link for physical interface specified"); + return -1; + } + + netdev->ifindex = if_nametoindex(netdev->link); + if (!netdev->ifindex) { + ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link); + return -1; + } + + if (netdev->upscript) { + int err; + err = run_script(handler->name, "net", netdev->upscript, + "up", "phys", netdev->link, (char*) NULL); + if (err) + return -1; + } + + return 0; +} + +static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + netdev->ifindex = 0; + if (netdev->upscript) { + int err; + err = run_script(handler->name, "net", netdev->upscript, + "up", "empty", (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + netdev->ifindex = 0; + return 0; +} + +static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = { + [LXC_NET_VETH] = instantiate_veth, + [LXC_NET_MACVLAN] = instantiate_macvlan, + [LXC_NET_VLAN] = instantiate_vlan, + [LXC_NET_PHYS] = instantiate_phys, + [LXC_NET_EMPTY] = instantiate_empty, + [LXC_NET_NONE] = instantiate_none, +}; + +static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + char *veth1; + int err; + + if (netdev->priv.veth_attr.pair) + veth1 = netdev->priv.veth_attr.pair; + else + veth1 = netdev->priv.veth_attr.veth1; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "veth", veth1, (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int err; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "macvlan", netdev->link, + (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + return 0; +} + +static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int err; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "phys", netdev->link, (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + int err; + + if (netdev->downscript) { + err = run_script(handler->name, "net", netdev->downscript, + "down", "empty", (char*) NULL); + if (err) + return -1; + } + return 0; +} + +static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev) +{ + return 0; +} + +static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = { + [LXC_NET_VETH] = shutdown_veth, + [LXC_NET_MACVLAN] = shutdown_macvlan, + [LXC_NET_VLAN] = shutdown_vlan, + [LXC_NET_PHYS] = shutdown_phys, + [LXC_NET_EMPTY] = shutdown_empty, + [LXC_NET_NONE] = shutdown_none, +}; + int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname) { int err; @@ -1544,7 +1894,7 @@ const char *lxc_net_type_to_str(int type) static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -char *lxc_mkifname(char *template) +char *lxc_mkifname(const char *template) { int ifexists = 0; size_t i = 0; @@ -1633,3 +1983,842 @@ int setup_private_host_hw_addr(char *veth1) return 0; } + +int lxc_find_gateway_addresses(struct lxc_handler *handler) +{ + struct lxc_list *network = &handler->conf->network; + struct lxc_list *iterator; + struct lxc_netdev *netdev; + int link_index; + + lxc_list_for_each(iterator, network) { + netdev = iterator->elem; + + if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto) + continue; + + if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) { + ERROR("Automatic gateway detection is only supported " + "for veth and macvlan"); + return -1; + } + + if (!netdev->link) { + ERROR("Automatic gateway detection needs a link interface"); + return -1; + } + + link_index = if_nametoindex(netdev->link); + if (!link_index) + return -EINVAL; + + if (netdev->ipv4_gateway_auto) { + if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) { + ERROR("Failed to automatically find ipv4 gateway " + "address from link interface \"%s\"", netdev->link); + return -1; + } + } + + if (netdev->ipv6_gateway_auto) { + if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) { + ERROR("Failed to automatically find ipv6 gateway " + "address from link interface \"%s\"", netdev->link); + return -1; + } + } + } + + return 0; +} + +#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic" +static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname, + struct lxc_netdev *netdev, pid_t pid) +{ + int ret; + pid_t child; + int bytes, pipefd[2]; + char *token, *saveptr = NULL; + char netdev_link[IFNAMSIZ + 1]; + char buffer[MAXPATHLEN] = {0}; + + if (netdev->type != LXC_NET_VETH) { + ERROR("Network type %d not support for unprivileged use", netdev->type); + return -1; + } + + ret = pipe(pipefd); + if (ret < 0) { + SYSERROR("Failed to create pipe"); + return -1; + } + + child = fork(); + if (child < 0) { + SYSERROR("Failed to create new process"); + close(pipefd[0]); + close(pipefd[1]); + return -1; + } + + if (child == 0) { + int ret; + char pidstr[LXC_NUMSTRLEN64]; + + close(pipefd[0]); + + ret = dup2(pipefd[1], STDOUT_FILENO); + if (ret >= 0) + ret = dup2(pipefd[1], STDERR_FILENO); + close(pipefd[1]); + if (ret < 0) { + SYSERROR("Failed to duplicate std{err,out} file descriptor"); + exit(EXIT_FAILURE); + } + + if (netdev->link) + strncpy(netdev_link, netdev->link, IFNAMSIZ); + else + strncpy(netdev_link, "none", IFNAMSIZ); + + ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); + if (ret < 0 || ret >= LXC_NUMSTRLEN64) + exit(EXIT_FAILURE); + pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; + + INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath, + lxcname, pidstr, netdev_link, + netdev->name ? netdev->name : "(null)"); + if (netdev->name) + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", + lxcpath, lxcname, pidstr, "veth", netdev_link, + netdev->name, (char *)NULL); + else + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create", + lxcpath, lxcname, pidstr, "veth", netdev_link, + (char *)NULL); + SYSERROR("Failed to execute lxc-user-nic"); + exit(EXIT_FAILURE); + } + + /* close the write-end of the pipe */ + close(pipefd[1]); + + bytes = read(pipefd[0], &buffer, MAXPATHLEN); + if (bytes < 0) { + SYSERROR("Failed to read from pipe file descriptor."); + close(pipefd[0]); + return -1; + } + buffer[bytes - 1] = '\0'; + + ret = wait_for_pid(child); + close(pipefd[0]); + if (ret != 0) { + ERROR("lxc-user-nic failed to configure requested network: %s", + buffer[0] != '\0' ? buffer : "(null)"); + return -1; + } + TRACE("Received output \"%s\" from lxc-user-nic", buffer); + + /* netdev->name */ + token = strtok_r(buffer, ":", &saveptr); + if (!token) + return -1; + + netdev->name = malloc(IFNAMSIZ + 1); + if (!netdev->name) { + SYSERROR("Failed to allocate memory."); + return -1; + } + memset(netdev->name, 0, IFNAMSIZ + 1); + strncpy(netdev->name, token, IFNAMSIZ); + + /* netdev->priv.veth_attr.pair */ + token = strtok_r(NULL, ":", &saveptr); + if (!token) + return -1; + + netdev->priv.veth_attr.pair = strdup(token); + if (!netdev->priv.veth_attr.pair) { + ERROR("Failed to allocate memory."); + return -1; + } + + /* netdev->ifindex */ + token = strtok_r(NULL, ":", &saveptr); + if (!token) + return -1; + + ret = lxc_safe_int(token, &netdev->ifindex); + if (ret < 0) { + ERROR("Failed to parse ifindex for network device \"%s\"", netdev->name); + return -1; + } + + return 0; +} + +static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, + struct lxc_netdev *netdev, pid_t pid) +{ + int bytes, ret; + pid_t child; + int pipefd[2]; + char buffer[MAXPATHLEN] = {0}; + + if (netdev->type != LXC_NET_VETH) { + ERROR("Network type %d not support for unprivileged use", netdev->type); + return -1; + } + + ret = pipe(pipefd); + if (ret < 0) { + SYSERROR("Failed to create pipe"); + return -1; + } + + child = fork(); + if (child < 0) { + SYSERROR("Failed to create new process"); + close(pipefd[0]); + close(pipefd[1]); + return -1; + } + + if (child == 0) { + int ret; + char pidstr[LXC_NUMSTRLEN64]; + + close(pipefd[0]); + + ret = dup2(pipefd[1], STDOUT_FILENO); + if (ret >= 0) + ret = dup2(pipefd[1], STDERR_FILENO); + close(pipefd[1]); + if (ret < 0) { + SYSERROR("Failed to duplicate std{err,out} file descriptor"); + exit(EXIT_FAILURE); + } + + if (!netdev->link) + SYSERROR("Network link for network device \"%s\" is " + "missing", netdev->priv.veth_attr.pair); + + ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); + if (ret < 0 || ret >= LXC_NUMSTRLEN64) + exit(EXIT_FAILURE); + pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; + + INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath, + lxcname, pidstr, netdev->link, netdev->priv.veth_attr.pair); + execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath, + lxcname, pidstr, "veth", netdev->link, + netdev->priv.veth_attr.pair, (char *)NULL); + SYSERROR("Failed to exec lxc-user-nic."); + exit(EXIT_FAILURE); + } + + close(pipefd[1]); + + bytes = read(pipefd[0], &buffer, MAXPATHLEN); + if (bytes < 0) { + SYSERROR("Failed to read from pipe file descriptor."); + close(pipefd[0]); + return -1; + } + buffer[bytes - 1] = '\0'; + + if (wait_for_pid(child) != 0) { + ERROR("lxc-user-nic failed to delete requested network: %s", + buffer[0] != '\0' ? buffer : "(null)"); + close(pipefd[0]); + return -1; + } + + close(pipefd[0]); + + return 0; +} + +int lxc_create_network_priv(struct lxc_handler *handler) +{ + bool am_root; + struct lxc_list *iterator; + struct lxc_list *network = &handler->conf->network; + + /* We need to be root. */ + am_root = (getuid() == 0); + if (!am_root) + return 0; + + lxc_list_for_each(iterator, network) { + struct lxc_netdev *netdev = iterator->elem; + + if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) { + ERROR("Invalid network configuration type %d", netdev->type); + return -1; + } + + if (netdev_conf[netdev->type](handler, netdev)) { + ERROR("Failed to create network device"); + return -1; + } + + } + + return 0; +} + +int lxc_create_network(const char *lxcpath, char *lxcname, + struct lxc_list *network, pid_t pid) +{ + int err; + bool am_root; + char ifname[IFNAMSIZ]; + struct lxc_list *iterator; + + am_root = (getuid() == 0); + + lxc_list_for_each(iterator, network) { + struct lxc_netdev *netdev = iterator->elem; + + if (netdev->type == LXC_NET_VETH && !am_root) { + if (netdev->mtu) + INFO("mtu ignored due to insufficient privilege"); + if (lxc_create_network_unpriv(lxcpath, lxcname, netdev, pid)) + return -1; + /* lxc-user-nic has moved the nic to the new ns. + * unpriv_assign_nic() fills in netdev->name. + * netdev->ifindex will be filled in at + * lxc_setup_netdev_in_child_namespaces(). + */ + continue; + } + + /* empty network namespace, nothing to move */ + if (!netdev->ifindex) + continue; + + /* retrieve the name of the interface */ + if (!if_indextoname(netdev->ifindex, ifname)) { + ERROR("No interface corresponding to ifindex \"%d\"", + netdev->ifindex); + return -1; + } + + err = lxc_netdev_move_by_name(ifname, pid, NULL); + if (err) { + ERROR("Failed to move network device \"%s\" to " + "network namespace %d: %s", ifname, pid, + strerror(-err)); + return -1; + } + + DEBUG("Moved network device \"%s\"/\"%s\" to network namespace " + "of %d:", ifname, netdev->name ? netdev->name : "(null)", + pid); + } + + return 0; +} + +bool lxc_delete_network(struct lxc_handler *handler) +{ + int ret; + struct lxc_list *iterator; + struct lxc_list *network = &handler->conf->network; + bool deleted_all = true; + + lxc_list_for_each(iterator, network) { + char *hostveth = NULL; + struct lxc_netdev *netdev = iterator->elem; + + /* We can only delete devices whose ifindex we have. If we don't + * have the index it means that we didn't create it. + */ + if (!netdev->ifindex) + continue; + + if (netdev->type == LXC_NET_PHYS) { + ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link); + if (ret < 0) + WARN("Failed to rename interface with index %d " + "to its initial name \"%s\"", + netdev->ifindex, netdev->link); + else + TRACE("Renamed interface with index %d to its " + "initial name \"%s\"", + netdev->ifindex, netdev->link); + continue; + } + + ret = netdev_deconf[netdev->type](handler, netdev); + if (ret < 0) + WARN("Failed to deconfigure network device"); + + /* Recent kernels remove the virtual interfaces when the network + * namespace is destroyed but in case we did not move the + * interface to the network namespace, we have to destroy it. + */ + if (!am_unpriv()) { + ret = lxc_netdev_delete_by_index(netdev->ifindex); + if (-ret == ENODEV) { + INFO("Interface \"%s\" with index %d already " + "deleted or existing in different network " + "namespace", + netdev->name ? netdev->name : "(null)", + netdev->ifindex); + } else if (ret < 0) { + deleted_all = false; + WARN("Failed to remove interface \"%s\" with " + "index %d: %s", + netdev->name ? netdev->name : "(null)", + netdev->ifindex, strerror(-ret)); + continue; + } + INFO("Removed interface \"%s\" with index %d", + netdev->name ? netdev->name : "(null)", + netdev->ifindex); + } + + if (netdev->type != LXC_NET_VETH) + continue; + + if (am_unpriv()) { + if (is_ovs_bridge(netdev->link)) { + ret = lxc_delete_network_unpriv(handler->lxcpath, + handler->name, + netdev, getpid()); + if (ret < 0) + WARN("Failed to remove port \"%s\" " + "from openvswitch bridge \"%s\"", + netdev->priv.veth_attr.pair, + netdev->link); + } + + continue; + } + + /* Explicitly delete host veth device to prevent lingering + * devices. We had issues in LXD around this. + */ + if (netdev->priv.veth_attr.pair) + hostveth = netdev->priv.veth_attr.pair; + else + hostveth = netdev->priv.veth_attr.veth1; + if (*hostveth == '\0') + continue; + + ret = lxc_netdev_delete_by_name(hostveth); + if (ret < 0) { + deleted_all = false; + WARN("Failed to remove interface \"%s\" from \"%s\": %s", + hostveth, netdev->link, strerror(-ret)); + continue; + } + INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link); + + if (!is_ovs_bridge(netdev->link)) { + netdev->priv.veth_attr.veth1[0] = '\0'; + continue; + } + + /* Delete the openvswitch port. */ + ret = lxc_ovs_delete_port(netdev->link, hostveth); + if (ret < 0) + WARN("Failed to remove port \"%s\" from openvswitch " + "bridge \"%s\"", hostveth, netdev->link); + else + INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", + hostveth, netdev->link); + + netdev->priv.veth_attr.veth1[0] = '\0'; + } + + return deleted_all; +} + +int lxc_requests_empty_network(struct lxc_handler *handler) +{ + struct lxc_list *network = &handler->conf->network; + struct lxc_list *iterator; + bool found_none = false, found_nic = false; + + if (lxc_list_empty(network)) + return 0; + + lxc_list_for_each(iterator, network) { + struct lxc_netdev *netdev = iterator->elem; + + if (netdev->type == LXC_NET_NONE) + found_none = true; + else + found_nic = true; + } + if (found_none && !found_nic) + return 1; + return 0; +} + +/* try to move physical nics to the init netns */ +void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf) +{ + int ret; + int i, oldfd; + char ifname[IFNAMSIZ]; + + if (netnsfd < 0 || conf->num_savednics == 0) + return; + + INFO("Trying to restore network device names in original namespace for " + "%d network devices", conf->num_savednics); + + oldfd = lxc_preserve_ns(getpid(), "net"); + if (oldfd < 0) { + SYSERROR("Failed to preserve network namespace"); + return; + } + + ret = setns(netnsfd, 0); + if (ret < 0) { + SYSERROR("Failed to enter network namespace"); + close(oldfd); + return; + } + + for (i = 0; i < conf->num_savednics; i++) { + struct saved_nic *s = &conf->saved_nics[i]; + + /* retrieve the name of the interface */ + if (!if_indextoname(s->ifindex, ifname)) { + WARN("No interface corresponding to ifindex %d", + s->ifindex); + continue; + } + if (lxc_netdev_move_by_name(ifname, 1, s->orig_name)) + WARN("Error moving network device \"%s\" back to " + "network namespace", ifname); + free(s->orig_name); + } + conf->num_savednics = 0; + + ret = setns(oldfd, 0); + if (ret < 0) + SYSERROR("Failed to enter network namespace"); + close(oldfd); +} + +static int setup_hw_addr(char *hwaddr, const char *ifname) +{ + struct sockaddr sockaddr; + struct ifreq ifr; + int ret, fd, saved_errno; + + ret = lxc_convert_mac(hwaddr, &sockaddr); + if (ret) { + ERROR("Mac address \"%s\" conversion failed: %s", hwaddr, + strerror(-ret)); + return -1; + } + + memcpy(ifr.ifr_name, ifname, IFNAMSIZ); + ifr.ifr_name[IFNAMSIZ-1] = '\0'; + memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr)); + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + ret = ioctl(fd, SIOCSIFHWADDR, &ifr); + saved_errno = errno; + close(fd); + if (ret) + ERROR("Failed to perform ioctl: %s", strerror(saved_errno)); + + DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, + ifr.ifr_name); + + return ret; +} + +static int setup_ipv4_addr(struct lxc_list *ip, int ifindex) +{ + struct lxc_list *iterator; + int err; + + lxc_list_for_each(iterator, ip) { + struct lxc_inetdev *inetdev = iterator->elem; + + err = lxc_ipv4_addr_add(ifindex, &inetdev->addr, + &inetdev->bcast, inetdev->prefix); + if (err) { + ERROR("Failed to setup ipv4 address for network device " + "with eifindex %d: %s", ifindex, strerror(-err)); + return -1; + } + } + + return 0; +} + +static int setup_ipv6_addr(struct lxc_list *ip, int ifindex) +{ + struct lxc_list *iterator; + int err; + + lxc_list_for_each(iterator, ip) { + struct lxc_inet6dev *inet6dev = iterator->elem; + + err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr, + &inet6dev->mcast, &inet6dev->acast, + inet6dev->prefix); + if (err) { + ERROR("Failed to setup ipv6 address for network device " + "with eifindex %d: %s", ifindex, strerror(-err)); + return -1; + } + } + + return 0; +} + +static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev) +{ + char ifname[IFNAMSIZ]; + int err; + const char *net_type_name; + char *current_ifname = ifname; + + /* empty network namespace */ + if (!netdev->ifindex) { + if (netdev->flags & IFF_UP) { + err = lxc_netdev_up("lo"); + if (err) { + ERROR("Failed to set the loopback network " + "device up: %s", + strerror(-err)); + return -1; + } + } + + if (netdev->type == LXC_NET_EMPTY) + return 0; + + if (netdev->type == LXC_NET_NONE) + return 0; + + if (netdev->type != LXC_NET_VETH) { + net_type_name = lxc_net_type_to_str(netdev->type); + ERROR("%s networks are not supported for containers " + "not setup up by privileged users", + net_type_name); + return -1; + } + + netdev->ifindex = if_nametoindex(netdev->name); + } + + /* get the new ifindex in case of physical netdev */ + if (netdev->type == LXC_NET_PHYS) { + netdev->ifindex = if_nametoindex(netdev->link); + if (!netdev->ifindex) { + ERROR("Failed to get ifindex for network device \"%s\"", + netdev->link); + return -1; + } + } + + /* retrieve the name of the interface */ + if (!if_indextoname(netdev->ifindex, current_ifname)) { + ERROR("Failed get name for network device with ifindex %d", + netdev->ifindex); + return -1; + } + + /* Default: let the system to choose one interface name. + * When the IFLA_IFNAME attribute is passed something like "%d" + * netlink will replace the format specifier with an appropriate index. + */ + if (!netdev->name) + netdev->name = netdev->type == LXC_NET_PHYS ? + netdev->link : "eth%d"; + + /* rename the interface name */ + if (strcmp(ifname, netdev->name) != 0) { + err = lxc_netdev_rename_by_name(ifname, netdev->name); + if (err) { + ERROR("Failed to rename network device \"%s\" to " + "\"%s\": %s", ifname, netdev->name, strerror(-err)); + return -1; + } + } + + /* Re-read the name of the interface because its name has changed + * and would be automatically allocated by the system + */ + if (!if_indextoname(netdev->ifindex, current_ifname)) { + ERROR("Failed get name for network device with ifindex %d", + netdev->ifindex); + return -1; + } + + /* set a mac address */ + if (netdev->hwaddr) { + if (setup_hw_addr(netdev->hwaddr, current_ifname)) { + ERROR("Failed to setup hw address for network device \"%s\"", + current_ifname); + return -1; + } + } + + /* setup ipv4 addresses on the interface */ + if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) { + ERROR("Failed to setup ip addresses for network device \"%s\"", + ifname); + return -1; + } + + /* setup ipv6 addresses on the interface */ + if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) { + ERROR("Failed to setup ipv6 addresses for network device \"%s\"", + ifname); + return -1; + } + + /* set the network device up */ + if (netdev->flags & IFF_UP) { + int err; + + err = lxc_netdev_up(current_ifname); + if (err) { + ERROR("Failed to set network device \"%s\" up: %s", + current_ifname, strerror(-err)); + return -1; + } + + /* the network is up, make the loopback up too */ + err = lxc_netdev_up("lo"); + if (err) { + ERROR("Failed to set the loopback network device up: %s", + strerror(-err)); + return -1; + } + } + + /* We can only set up the default routes after bringing + * up the interface, sine bringing up the interface adds + * the link-local routes and we can't add a default + * route if the gateway is not reachable. */ + + /* setup ipv4 gateway on the interface */ + if (netdev->ipv4_gateway) { + if (!(netdev->flags & IFF_UP)) { + ERROR("Cannot add ipv4 gateway for network device " + "\"%s\" when not bringing up the interface", ifname); + return -1; + } + + if (lxc_list_empty(&netdev->ipv4)) { + ERROR("Cannot add ipv4 gateway for network device " + "\"%s\" when not assigning an address", ifname); + return -1; + } + + err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); + if (err) { + err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway); + if (err) { + ERROR("Failed to add ipv4 dest for network " + "device \"%s\": %s", ifname, strerror(-err)); + } + + err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway); + if (err) { + ERROR("Failed to setup ipv4 gateway for " + "network device \"%s\": %s", + ifname, strerror(-err)); + if (netdev->ipv4_gateway_auto) { + char buf[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf)); + ERROR("Fried to set autodetected ipv4 gateway \"%s\"", buf); + } + return -1; + } + } + } + + /* setup ipv6 gateway on the interface */ + if (netdev->ipv6_gateway) { + if (!(netdev->flags & IFF_UP)) { + ERROR("Cannot add ipv6 gateway for network device " + "\"%s\" when not bringing up the interface", ifname); + return -1; + } + + if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) { + ERROR("Cannot add ipv6 gateway for network device " + "\"%s\" when not assigning an address", ifname); + return -1; + } + + err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); + if (err) { + err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway); + if (err) { + ERROR("Failed to add ipv6 dest for network " + "device \"%s\": %s", ifname, strerror(-err)); + } + + err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway); + if (err) { + ERROR("Failed to setup ipv6 gateway for " + "network device \"%s\": %s", ifname, + strerror(-err)); + if (netdev->ipv6_gateway_auto) { + char buf[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf)); + ERROR("Tried to set autodetected ipv6 " + "gateway for network device " + "\"%s\"", buf); + } + return -1; + } + } + } + + DEBUG("Network devie \"%s\" has been setup", current_ifname); + + return 0; +} + +int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf, + struct lxc_list *network) +{ + struct lxc_list *iterator; + struct lxc_netdev *netdev; + + lxc_log_configured_netdevs(conf); + + lxc_list_for_each(iterator, network) { + netdev = iterator->elem; + + /* REMOVE in LXC 3.0 */ + if (netdev->idx < 0) { + ERROR("WARNING: using \"lxc.network.*\" keys to define " + "networks is DEPRECATED, please switch to using " + "\"lxc.net.[i].* keys\""); + } + + if (lxc_setup_netdev_in_child_namespaces(netdev)) { + ERROR("failed to setup netdev"); + return -1; + } + } + + if (!lxc_list_empty(network)) + INFO("network has been setup"); + + return 0; +} diff --git a/src/lxc/network.h b/src/lxc/network.h index 8a79a0622..d1b8de9b7 100644 --- a/src/lxc/network.h +++ b/src/lxc/network.h @@ -23,11 +23,120 @@ #ifndef __LXC_NETWORK_H #define __LXC_NETWORK_H +#include #include #include #include #include +#include "list.h" + +struct lxc_conf; +struct lxc_handler; +struct lxc_netdev; + +enum { + LXC_NET_EMPTY, + LXC_NET_VETH, + LXC_NET_MACVLAN, + LXC_NET_PHYS, + LXC_NET_VLAN, + LXC_NET_NONE, + LXC_NET_MAXCONFTYPE, +}; + +/* + * Defines the structure to configure an ipv4 address + * @address : ipv4 address + * @broadcast : ipv4 broadcast address + * @mask : network mask + */ +struct lxc_inetdev { + struct in_addr addr; + struct in_addr bcast; + unsigned int prefix; +}; + +struct lxc_route { + struct in_addr addr; +}; + +/* + * Defines the structure to configure an ipv6 address + * @flags : set the address up + * @address : ipv6 address + * @broadcast : ipv6 broadcast address + * @mask : network mask + */ +struct lxc_inet6dev { + struct in6_addr addr; + struct in6_addr mcast; + struct in6_addr acast; + unsigned int prefix; +}; + +struct lxc_route6 { + struct in6_addr addr; +}; + +struct ifla_veth { + char *pair; /* pair name */ + char veth1[IFNAMSIZ]; /* needed for deconf */ +}; + +struct ifla_vlan { + unsigned int flags; + unsigned int fmask; + unsigned short vid; + unsigned short pad; +}; + +struct ifla_macvlan { + int mode; /* private, vepa, bridge, passthru */ +}; + +union netdev_p { + struct ifla_veth veth_attr; + struct ifla_vlan vlan_attr; + struct ifla_macvlan macvlan_attr; +}; + +/* + * Defines a structure to configure a network device + * @link : lxc.net.[i].link, name of bridge or host iface to attach if any + * @name : lxc.net.[i].name, name of iface on the container side + * @flags : flag of the network device (IFF_UP, ... ) + * @ipv4 : a list of ipv4 addresses to be set on the network device + * @ipv6 : a list of ipv6 addresses to be set on the network device + * @upscript : a script filename to be executed during interface configuration + * @downscript : a script filename to be executed during interface destruction + * @idx : network counter + */ +struct lxc_netdev { + ssize_t idx; + int type; + int flags; + int ifindex; + char *link; + char *name; + char *hwaddr; + char *mtu; + union netdev_p priv; + struct lxc_list ipv4; + struct lxc_list ipv6; + struct in_addr *ipv4_gateway; + bool ipv4_gateway_auto; + struct in6_addr *ipv6_gateway; + bool ipv6_gateway_auto; + char *upscript; + char *downscript; +}; + +struct saved_nic { + int ifindex; + char *orig_name; +}; + /* Convert a string mac address to a socket structure. */ extern int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr); @@ -106,11 +215,22 @@ extern int lxc_neigh_proxy_on(const char *name, int family); /* Disable neighbor proxying. */ extern int lxc_neigh_proxy_off(const char *name, int family); -/* Generate a new unique network interface name. */ -extern char *lxc_mkifname(char *template); +/* Generate a new unique network interface name. + * Allocated memory must be freed by caller. + */ +extern char *lxc_mkifname(const char *template); extern const char *lxc_net_type_to_str(int type); extern int setup_private_host_hw_addr(char *veth1); extern int netdev_get_mtu(int ifindex); +extern int lxc_create_network_priv(struct lxc_handler *handler); +extern bool lxc_delete_network(struct lxc_handler *handler); +extern int lxc_find_gateway_addresses(struct lxc_handler *handler); +extern int lxc_create_network(const char *lxcpath, char *lxcname, + struct lxc_list *network, pid_t pid); +extern int lxc_requests_empty_network(struct lxc_handler *handler); +extern void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf); +extern int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf, + struct lxc_list *network); #endif /* __LXC_NETWORK_H */ diff --git a/src/lxc/start.c b/src/lxc/start.c index a360f784c..ac37a091c 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -77,6 +77,7 @@ #include "mainloop.h" #include "monitor.h" #include "namespace.h" +#include "network.h" #include "start.h" #include "storage.h" #include "storage_utils.h" @@ -1246,7 +1247,7 @@ static int lxc_spawn(struct lxc_handler *handler) /* That should be done before the clone because we will * fill the netdev index and use them in the child. */ - if (lxc_setup_networks_in_parent_namespaces(handler)) { + if (lxc_create_network_priv(handler)) { ERROR("Failed to create the network."); lxc_sync_fini(handler); return -1; @@ -1364,7 +1365,7 @@ static int lxc_spawn(struct lxc_handler *handler) /* Create the network configuration. */ if (handler->clone_flags & CLONE_NEWNET) { - if (lxc_assign_network(handler->lxcpath, handler->name, + if (lxc_create_network(handler->lxcpath, handler->name, &handler->conf->network, handler->pid)) { ERROR("Failed to create the configured network."); goto out_delete_net; diff --git a/src/tests/lxc-test-usernic.in b/src/tests/lxc-test-usernic.in index 08b9b55fc..53bc8166c 100755 --- a/src/tests/lxc-test-usernic.in +++ b/src/tests/lxc-test-usernic.in @@ -153,7 +153,7 @@ lxcpath=/home/usernic-user/.local/share/lxc lxcname=b1 # Assign one veth, should fail as no allowed entries yet -if run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br0 xx1"; then +if run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br0 xx1"; then echo "FAIL: able to create nic with no entries" exit 1 fi @@ -164,24 +164,24 @@ sed -i '/^usernic-user/d' /etc/lxc/lxc-usernet echo "usernic-user veth usernic-br0 2" >> /etc/lxc/lxc-usernet # Assign one veth to second bridge, should fail -if run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br1 xx1"; then +if run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br1 xx1"; then echo "FAIL: able to create nic with no entries" exit 1 fi # Assign two veths, should succeed -if ! run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br0 xx2"; then +if ! run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br0 xx2"; then echo "FAIL: unable to create first nic" exit 1 fi -if ! run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br0 xx3"; then +if ! run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br0 xx3"; then echo "FAIL: unable to create second nic" exit 1 fi # Assign one more veth, should fail. -if run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br0 xx4"; then +if run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br0 xx4"; then echo "FAIL: able to create third nic" exit 1 fi @@ -191,7 +191,7 @@ run_cmd "lxc-stop -n b1 -k" run_cmd "lxc-start -n b1 -d" p1=$(run_cmd "lxc-info -n b1 -p -H") -if ! run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p1 veth usernic-br0 xx5"; then +if ! run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p1 veth usernic-br0 xx5"; then echo "FAIL: unable to create nic after destroying the old" cleanup 1 fi @@ -204,7 +204,7 @@ lxc-start -n usernic-c1 -d p2=$(lxc-info -n usernic-c1 -p -H) # assign veth to it - should fail -if run_cmd "$LXC_USER_NIC $lxcpath $lxcname $p2 veth usernic-br0 xx6"; then +if run_cmd "$LXC_USER_NIC create $lxcpath $lxcname $p2 veth usernic-br0 xx6"; then echo "FAIL: able to attach nic to root-owned container" cleanup 1 fi