diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md index 5d19f2b1e..96940039b 100755 --- a/INSTALL.DPDK-ADVANCED.md +++ b/INSTALL.DPDK-ADVANCED.md @@ -570,6 +570,49 @@ For users wanting to do packet forwarding using kernel stack below are the steps where `-L`: Changes the numbers of channels of the specified network device and `combined`: Changes the number of multi-purpose channels. + 4. OVS vHost client-mode & vHost reconnect (OPTIONAL) + + By default, OVS DPDK acts as the vHost socket server for dpdkvhostuser + ports and QEMU acts as the vHost client. This means OVS creates and + manages the vHost socket and QEMU is the client which connects to the + vHost server (OVS). In QEMU v2.7 the option is available for QEMU to act + as the vHost server meaning the roles can be reversed and OVS can become + the vHost client. To enable client mode for a given dpdkvhostuserport, + one must specify a valid 'vhost-server-path' like so: + + ``` + ovs-vsctl set Interface dpdkvhostuser0 options:vhost-server-path=/path/to/socket + ``` + + Setting this value automatically switches the port to client mode (from + OVS' perspective). 'vhost-server-path' reflects the full path of the + socket that has been or will be created by QEMU for the given vHost User + port. Once a path is specified, the port will remain in 'client' mode + for the remainder of it's lifetime ie. it cannot be reverted back to + server mode. + + One must append ',server' to the 'chardev' arguments on the QEMU command + line, to instruct QEMU to use vHost server mode for a given interface, + like so: + + ```` + -chardev socket,id=char0,path=/path/to/socket,server + ```` + + If the corresponding dpdkvhostuser port has not yet been configured in + OVS with vhost-server-path=/path/to/socket, QEMU will print a log + similar to the following: + + `QEMU waiting for connection on: disconnected:unix:/path/to/socket,server` + + QEMU will wait until the port is created sucessfully in OVS to boot the + VM. + + One benefit of using this mode is the ability for vHost ports to + 'reconnect' in event of the switch crashing or being brought down. Once + it is brought back up, the vHost ports will reconnect automatically and + normal service will resume. + - VM Configuration with libvirt * change the user/group, access control policty and restart libvirtd. diff --git a/NEWS b/NEWS index 5dbcd1d7a..e989d6429 100644 --- a/NEWS +++ b/NEWS @@ -72,6 +72,7 @@ Post-v2.5.0 * Optional support for DPDK pdump enabled. * Jumbo frame support * Remove dpdkvhostcuse port type. + * OVS client mode for vHost and vHost reconnect (Requires QEMU 2.7) - Increase number of registers to 16. - ovs-benchmark: This utility has been removed due to lack of use and bitrot. diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 9c39f5619..e5f2cdd09 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -356,10 +356,9 @@ struct netdev_dpdk { /* True if vHost device is 'up' and has been reconfigured at least once */ bool vhost_reconfigured; - /* Identifier used to distinguish vhost devices from each other. It does - * not change during the lifetime of a struct netdev_dpdk. It can be read - * without holding any mutex. */ - const char vhost_id[PATH_MAX]; + /* Identifiers used to distinguish vhost devices from each other. */ + char vhost_server_id[PATH_MAX]; + char vhost_client_id[PATH_MAX]; /* In dpdk_list. */ struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); @@ -378,6 +377,9 @@ struct netdev_dpdk { /* Socket ID detected when vHost device is brought up */ int requested_socket_id; + /* Denotes whether vHost port is client/server mode */ + uint64_t vhost_driver_flags; + /* Ingress Policer */ OVSRCU_TYPE(struct ingress_policer *) ingress_policer; uint32_t policer_rate; @@ -812,6 +814,8 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no, dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); ovsrcu_index_init(&dev->vid, -1); dev->vhost_reconfigured = false; + /* initialise vHost port in server mode */ + dev->vhost_driver_flags &= ~RTE_VHOST_USER_CLIENT; err = netdev_dpdk_mempool_configure(dev); if (err) { @@ -874,13 +878,22 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[], } } +/* Returns a pointer to the relevant vHost socket ID depending on the mode in + * use */ +static char * +get_vhost_id(struct netdev_dpdk *dev) + OVS_REQUIRES(dev->mutex) +{ + return dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT ? + dev->vhost_client_id : dev->vhost_server_id; +} + static int netdev_dpdk_vhost_construct(struct netdev *netdev) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); const char *name = netdev->name; int err; - uint64_t flags = 0; /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in * the file system. '/' or '\' would traverse directories, so they're not @@ -898,19 +911,24 @@ netdev_dpdk_vhost_construct(struct netdev *netdev) ovs_mutex_lock(&dpdk_mutex); /* Take the name of the vhost-user port and append it to the location where - * the socket is to be created, then register the socket. + * the socket is to be created, then register the socket. Sockets are + * registered initially in 'server' mode. */ - snprintf(CONST_CAST(char *, dev->vhost_id), sizeof dev->vhost_id, "%s/%s", + snprintf(dev->vhost_server_id, sizeof dev->vhost_server_id, "%s/%s", vhost_sock_dir, name); - err = rte_vhost_driver_register(dev->vhost_id, flags); + err = rte_vhost_driver_register(dev->vhost_server_id, + dev->vhost_driver_flags); if (err) { VLOG_ERR("vhost-user socket device setup failure for socket %s\n", - dev->vhost_id); + dev->vhost_server_id); } else { - fatal_signal_add_file_to_unlink(dev->vhost_id); - VLOG_INFO("Socket %s created for vhost-user port %s\n", - dev->vhost_id, name); + if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { + /* OVS server mode - add this socket to list for deletion */ + fatal_signal_add_file_to_unlink(dev->vhost_server_id); + VLOG_INFO("Socket %s created for vhost-user port %s\n", + dev->vhost_server_id, name); + } err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST); } @@ -964,28 +982,31 @@ netdev_dpdk_destruct(struct netdev *netdev) * try to acquire 'dpdk_mutex' and possibly 'dev->mutex'. To avoid a * deadlock, none of the mutexes must be held while calling this function. */ static int -dpdk_vhost_driver_unregister(struct netdev_dpdk *dev) +dpdk_vhost_driver_unregister(struct netdev_dpdk *dev OVS_UNUSED, + char *vhost_id) OVS_EXCLUDED(dpdk_mutex) OVS_EXCLUDED(dev->mutex) { - return rte_vhost_driver_unregister(dev->vhost_id); + return rte_vhost_driver_unregister(vhost_id); } static void netdev_dpdk_vhost_destruct(struct netdev *netdev) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + char *vhost_id; ovs_mutex_lock(&dpdk_mutex); ovs_mutex_lock(&dev->mutex); /* Guest becomes an orphan if still attached. */ - if (netdev_dpdk_get_vid(dev) >= 0) { + if (netdev_dpdk_get_vid(dev) >= 0 + && !(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { VLOG_ERR("Removing port '%s' while vhost device still attached.", netdev->name); VLOG_ERR("To restore connectivity after re-adding of port, VM on socket" " '%s' must be restarted.", - dev->vhost_id); + get_vhost_id(dev)); } free(ovsrcu_get_protected(struct ingress_policer *, @@ -995,14 +1016,18 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev) ovs_list_remove(&dev->list_node); dpdk_mp_put(dev->dpdk_mp); + vhost_id = xstrdup(get_vhost_id(dev)); + ovs_mutex_unlock(&dev->mutex); ovs_mutex_unlock(&dpdk_mutex); - if (dpdk_vhost_driver_unregister(dev)) { - VLOG_ERR("Unable to remove vhost-user socket %s", dev->vhost_id); - } else { - fatal_signal_remove_file_to_unlink(dev->vhost_id); + if (dpdk_vhost_driver_unregister(dev, vhost_id)) { + VLOG_ERR("Unable to remove vhost-user socket %s", vhost_id); + } else if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { + /* OVS server mode - remove this socket from list for deletion */ + fatal_signal_remove_file_to_unlink(vhost_id); } + free(vhost_id); } static void @@ -1082,6 +1107,23 @@ netdev_dpdk_ring_set_config(struct netdev *netdev, const struct smap *args) return 0; } +static int +netdev_dpdk_vhost_set_config(struct netdev *netdev, const struct smap *args) +{ + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + const char *path; + + if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { + path = smap_get(args, "vhost-server-path"); + if (path && strcmp(path, dev->vhost_client_id)) { + strcpy(dev->vhost_client_id, path); + netdev_request_reconfigure(netdev); + } + } + + return 0; +} + static int netdev_dpdk_get_numa_id(const struct netdev *netdev) { @@ -2284,7 +2326,7 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk *dev) } } - VLOG_DBG("TX queue mapping for %s\n", dev->vhost_id); + VLOG_DBG("TX queue mapping for %s\n", get_vhost_id(dev)); for (i = 0; i < total_txqs; i++) { VLOG_DBG("%2d --> %2d", i, dev->tx_q[i].map); } @@ -2308,10 +2350,10 @@ new_device(int vid) ovs_mutex_lock(&dpdk_mutex); /* Add device to the vhost port with the same name as that passed down. */ LIST_FOR_EACH(dev, list_node, &dpdk_list) { - if (strncmp(ifname, dev->vhost_id, IF_NAME_SZ) == 0) { + ovs_mutex_lock(&dev->mutex); + if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) { uint32_t qp_num = rte_vhost_get_queue_num(vid); - ovs_mutex_lock(&dev->mutex); /* Get NUMA information */ newnode = rte_vhost_get_numa_node(vid); if (newnode == -1) { @@ -2341,6 +2383,7 @@ new_device(int vid) ovs_mutex_unlock(&dev->mutex); break; } + ovs_mutex_unlock(&dev->mutex); } ovs_mutex_unlock(&dpdk_mutex); @@ -2434,8 +2477,8 @@ vring_state_changed(int vid, uint16_t queue_id, int enable) ovs_mutex_lock(&dpdk_mutex); LIST_FOR_EACH (dev, list_node, &dpdk_list) { - if (strncmp(ifname, dev->vhost_id, IF_NAME_SZ) == 0) { - ovs_mutex_lock(&dev->mutex); + ovs_mutex_lock(&dev->mutex); + if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) { if (enable) { dev->tx_q[qid].map = qid; } else { @@ -2446,6 +2489,7 @@ vring_state_changed(int vid, uint16_t queue_id, int enable) ovs_mutex_unlock(&dev->mutex); break; } + ovs_mutex_unlock(&dev->mutex); } ovs_mutex_unlock(&dpdk_mutex); @@ -2931,6 +2975,7 @@ static int netdev_dpdk_vhost_reconfigure(struct netdev *netdev) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + int err = 0; ovs_mutex_lock(&dpdk_mutex); ovs_mutex_lock(&dev->mutex); @@ -2956,6 +3001,45 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev) dev->vhost_reconfigured = true; } + /* Configure vHost client mode if requested and if the following criteria + * are met: + * 1. Device is currently in 'server' mode. + * 2. Device is currently not active. + * 3. A path has been specified. + */ + if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT) + && !(netdev_dpdk_get_vid(dev) >= 0) + && strlen(dev->vhost_client_id)) { + /* Unregister server-mode device */ + char *vhost_id = xstrdup(get_vhost_id(dev)); + + ovs_mutex_unlock(&dev->mutex); + ovs_mutex_unlock(&dpdk_mutex); + err = dpdk_vhost_driver_unregister(dev, vhost_id); + free(vhost_id); + ovs_mutex_lock(&dpdk_mutex); + ovs_mutex_lock(&dev->mutex); + if (err) { + VLOG_ERR("Unable to remove vhost-user socket %s", + get_vhost_id(dev)); + } else { + fatal_signal_remove_file_to_unlink(get_vhost_id(dev)); + /* Register client-mode device */ + err = rte_vhost_driver_register(dev->vhost_client_id, + RTE_VHOST_USER_CLIENT); + if (err) { + VLOG_ERR("vhost-user device setup failure for device %s\n", + dev->vhost_client_id); + } else { + /* Configuration successful */ + dev->vhost_driver_flags |= RTE_VHOST_USER_CLIENT; + VLOG_INFO("vHost User device '%s' changed to 'client' mode, " + "using client socket '%s'", + dev->up.name, get_vhost_id(dev)); + } + } + } + ovs_mutex_unlock(&dev->mutex); ovs_mutex_unlock(&dpdk_mutex); @@ -3459,7 +3543,7 @@ static const struct netdev_class dpdk_vhost_class = "dpdkvhostuser", netdev_dpdk_vhost_construct, netdev_dpdk_vhost_destruct, - NULL, + netdev_dpdk_vhost_set_config, NULL, netdev_dpdk_vhost_send, netdev_dpdk_vhost_get_carrier, diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 8331b49c1..69b559202 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2366,6 +2366,17 @@

+ + +

+ When specified, switches the given port permanently to 'client' + mode. The value specifies the path to the socket associated with a + vHost User client mode device that has been or will be created by + QEMU. + Only supported by DPDK vHost interfaces. +

+