2016-08-24 14:53:52 -07:00
|
|
|
/* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
|
2009-07-08 13:19:16 -07:00
|
|
|
*
|
2009-06-15 15:11:30 -07:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at:
|
2009-07-08 13:19:16 -07:00
|
|
|
*
|
2009-06-15 15:11:30 -07:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2009-07-08 13:19:16 -07:00
|
|
|
*
|
2009-06-15 15:11:30 -07:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
2009-07-08 13:19:16 -07:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
#include <getopt.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2009-11-30 13:17:34 -08:00
|
|
|
#ifdef HAVE_MLOCKALL
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#endif
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
|
|
#include "bridge.h"
|
|
|
|
#include "command-line.h"
|
|
|
|
#include "compiler.h"
|
|
|
|
#include "daemon.h"
|
2011-07-26 10:13:07 -07:00
|
|
|
#include "dirs.h"
|
2011-11-17 18:06:55 -08:00
|
|
|
#include "dpif.h"
|
2010-11-29 12:21:08 -08:00
|
|
|
#include "dummy.h"
|
2014-02-26 10:44:46 -08:00
|
|
|
#include "fatal-signal.h"
|
2012-05-08 15:44:21 -07:00
|
|
|
#include "memory.h"
|
2009-07-30 16:04:45 -07:00
|
|
|
#include "netdev.h"
|
2011-10-03 21:52:39 -07:00
|
|
|
#include "openflow/openflow.h"
|
2009-12-03 11:28:40 -08:00
|
|
|
#include "ovsdb-idl.h"
|
2018-01-25 15:39:48 -08:00
|
|
|
#include "ovs-rcu.h"
|
2018-03-31 17:12:55 -07:00
|
|
|
#include "ovs-router.h"
|
2018-06-04 10:07:36 +02:00
|
|
|
#include "ovs-thread.h"
|
2017-11-03 13:53:53 +08:00
|
|
|
#include "openvswitch/poll-loop.h"
|
2012-05-08 15:44:21 -07:00
|
|
|
#include "simap.h"
|
2010-01-06 14:35:20 -08:00
|
|
|
#include "stream-ssl.h"
|
2009-12-03 11:28:40 -08:00
|
|
|
#include "stream.h"
|
2009-07-08 13:19:16 -07:00
|
|
|
#include "svec.h"
|
|
|
|
#include "timeval.h"
|
|
|
|
#include "unixctl.h"
|
|
|
|
#include "util.h"
|
2021-12-22 10:17:12 +01:00
|
|
|
#include "openvswitch/usdt-probes.h"
|
2014-12-15 14:10:38 +01:00
|
|
|
#include "openvswitch/vconn.h"
|
2014-12-15 14:10:38 +01:00
|
|
|
#include "openvswitch/vlog.h"
|
2012-03-27 15:57:52 -07:00
|
|
|
#include "lib/vswitch-idl.h"
|
2018-06-26 14:06:21 -07:00
|
|
|
#include "lib/dns-resolve.h"
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2010-10-19 14:47:01 -07:00
|
|
|
VLOG_DEFINE_THIS_MODULE(vswitchd);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
vswitchd: Only lock pages that are faulted in.
The main purpose of locking the memory is to ensure that OVS can keep
doing what it did before in case of increased memory pressure, e.g.,
during VM ingest / migration. Fulfilling this requirement can be
achieved without locking all the allocated memory, but only the pages
already accessed in the past (faulted in). Processing of the new
traffic involves new memory allocations. Latency on these operations
can't be guaranteed by the locking. The main difference would be
the pre-faulting of the stack memory. However, in order to revalidate
or process upcalls on the same traffic, the same amount of stack is
likely needed, so all the necessary memory will already be faulted in.
Switch 'mlockall' to MCL_ONFAULT to avoid consuming unnecessarily
large amounts of RAM on systems with high core counts. For example,
in a densely populated OVN cluster this saves about 650 MB of RAM per
node on a system with 64 cores. This equates to 320 GB of allocated
but unused RAM in a 500 node cluster.
This also makes OVS better suited by default for small systems with
limited amount of memory.
The MCL_ONFAULT flag was introduced in Linux kernel 4.4 and wasn't
available at the time of '--mlockall' introduction, but we can use it
now. Falling back to an old way of locking in case we're running on
an older kernel just in case.
Only locking the faulted in pages also makes locking compatible with
vhost post-copy live migration by default, because we'll no longer
pre-fault all the guest's memory. Post-copy relies on userfaultfd
to work on shared huge pages, which is only available in 4.11+ kernels.
So, technically, it should not be possible for MCL_ONFAULT to fail and
the call without it to succeed. But keeping the check just in case
for now.
Acked-by: Simon Horman <horms@ovn.org>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-06-14 14:22:47 +02:00
|
|
|
/* --mlockall: If set, locks all present process memory pages into physical
|
|
|
|
* RAM and all the new pages the moment they are faulted in, preventing
|
2012-06-29 09:22:59 -07:00
|
|
|
* the kernel from paging any of its memory to disk. */
|
|
|
|
static bool want_mlockall;
|
|
|
|
|
dpdk: Allow retaining CAP_SYS_RAWIO privileges.
Open vSwitch generally tries to let the underlying operating system
managed the low level details of hardware, for example DMA mapping,
bus arbitration, etc. However, when using DPDK, the underlying
operating system yields control of many of these details to userspace
for management.
In the case of some DPDK port drivers, configuring rte_flow or even
allocating resources may require access to iopl/ioperm calls, which
are guarded by the CAP_SYS_RAWIO privilege on linux systems. These
calls are dangerous, and can allow a process to completely compromise
a system. However, they are needed in the case of some userspace
driver code which manages the hardware (for example, the mlx
implementation of backend support for rte_flow).
Here, we create an opt-in flag passed to the command line to allow
this access. We need to do this before ever accessing the database,
because we want to drop all privileges asap, and cannot wait for
a connection to the database to be established and functional before
dropping. There may be distribution specific ways to do capability
management as well (using for example, systemd), but they are not
as universal to the vswitchd as a flag.
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Gaetan Rivet <gaetanr@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-03-16 08:00:39 -04:00
|
|
|
/* --hw-rawio-access: If set, retains CAP_SYS_RAWIO privileges. */
|
|
|
|
static bool hw_rawio_access;
|
|
|
|
|
2010-05-03 15:43:49 -07:00
|
|
|
static unixctl_cb_func ovs_vswitchd_exit;
|
|
|
|
|
2012-01-19 10:26:03 -08:00
|
|
|
static char *parse_options(int argc, char *argv[], char **unixctl_path);
|
2014-12-15 14:10:38 +01:00
|
|
|
OVS_NO_RETURN static void usage(void);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2023-07-18 14:40:03 +02:00
|
|
|
static struct ovs_vswitchd_exit_args {
|
|
|
|
struct unixctl_conn **conns;
|
|
|
|
size_t n_conns;
|
|
|
|
bool exiting;
|
|
|
|
bool cleanup;
|
|
|
|
} exit_args;
|
2017-04-24 18:55:04 -07:00
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
int
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
|
|
|
struct unixctl_server *unixctl;
|
2023-07-18 14:40:03 +02:00
|
|
|
char *unixctl_path = NULL;
|
2011-07-26 10:13:07 -07:00
|
|
|
char *remote;
|
2009-07-08 13:19:16 -07:00
|
|
|
int retval;
|
|
|
|
|
|
|
|
set_program_name(argv[0]);
|
2018-06-04 10:07:36 +02:00
|
|
|
ovsthread_id_init();
|
2014-03-24 19:23:08 -07:00
|
|
|
|
2018-06-26 14:06:21 -07:00
|
|
|
dns_resolve_init(true);
|
2015-03-16 12:01:55 -04:00
|
|
|
ovs_cmdl_proctitle_init(argc, argv);
|
2014-01-17 10:43:03 -08:00
|
|
|
service_start(&argc, &argv);
|
2012-01-19 10:26:03 -08:00
|
|
|
remote = parse_options(argc, argv, &unixctl_path);
|
2014-02-26 10:44:46 -08:00
|
|
|
fatal_ignore_sigpipe();
|
2009-07-08 13:19:16 -07:00
|
|
|
|
dpdk: Allow retaining CAP_SYS_RAWIO privileges.
Open vSwitch generally tries to let the underlying operating system
managed the low level details of hardware, for example DMA mapping,
bus arbitration, etc. However, when using DPDK, the underlying
operating system yields control of many of these details to userspace
for management.
In the case of some DPDK port drivers, configuring rte_flow or even
allocating resources may require access to iopl/ioperm calls, which
are guarded by the CAP_SYS_RAWIO privilege on linux systems. These
calls are dangerous, and can allow a process to completely compromise
a system. However, they are needed in the case of some userspace
driver code which manages the hardware (for example, the mlx
implementation of backend support for rte_flow).
Here, we create an opt-in flag passed to the command line to allow
this access. We need to do this before ever accessing the database,
because we want to drop all privileges asap, and cannot wait for
a connection to the database to be established and functional before
dropping. There may be distribution specific ways to do capability
management as well (using for example, systemd), but they are not
as universal to the vswitchd as a flag.
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Gaetan Rivet <gaetanr@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-03-16 08:00:39 -04:00
|
|
|
daemonize_start(true, hw_rawio_access);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2012-06-29 09:22:59 -07:00
|
|
|
if (want_mlockall) {
|
|
|
|
#ifdef HAVE_MLOCKALL
|
vswitchd: Only lock pages that are faulted in.
The main purpose of locking the memory is to ensure that OVS can keep
doing what it did before in case of increased memory pressure, e.g.,
during VM ingest / migration. Fulfilling this requirement can be
achieved without locking all the allocated memory, but only the pages
already accessed in the past (faulted in). Processing of the new
traffic involves new memory allocations. Latency on these operations
can't be guaranteed by the locking. The main difference would be
the pre-faulting of the stack memory. However, in order to revalidate
or process upcalls on the same traffic, the same amount of stack is
likely needed, so all the necessary memory will already be faulted in.
Switch 'mlockall' to MCL_ONFAULT to avoid consuming unnecessarily
large amounts of RAM on systems with high core counts. For example,
in a densely populated OVN cluster this saves about 650 MB of RAM per
node on a system with 64 cores. This equates to 320 GB of allocated
but unused RAM in a 500 node cluster.
This also makes OVS better suited by default for small systems with
limited amount of memory.
The MCL_ONFAULT flag was introduced in Linux kernel 4.4 and wasn't
available at the time of '--mlockall' introduction, but we can use it
now. Falling back to an old way of locking in case we're running on
an older kernel just in case.
Only locking the faulted in pages also makes locking compatible with
vhost post-copy live migration by default, because we'll no longer
pre-fault all the guest's memory. Post-copy relies on userfaultfd
to work on shared huge pages, which is only available in 4.11+ kernels.
So, technically, it should not be possible for MCL_ONFAULT to fail and
the call without it to succeed. But keeping the check just in case
for now.
Acked-by: Simon Horman <horms@ovn.org>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2024-06-14 14:22:47 +02:00
|
|
|
/* MCL_ONFAULT introduced in Linux kernel 4.4. */
|
|
|
|
#ifndef MCL_ONFAULT
|
|
|
|
#define MCL_ONFAULT 4
|
|
|
|
#endif
|
|
|
|
if (mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) {
|
|
|
|
if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
|
|
|
|
VLOG_ERR("mlockall failed: %s", ovs_strerror(errno));
|
|
|
|
} else {
|
|
|
|
set_all_memory_locked();
|
|
|
|
}
|
2012-06-29 09:22:59 -07:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
VLOG_ERR("mlockall not supported on this system");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2012-01-19 10:26:03 -08:00
|
|
|
retval = unixctl_server_create(unixctl_path, &unixctl);
|
2009-07-08 13:19:16 -07:00
|
|
|
if (retval) {
|
2010-01-15 10:31:57 -08:00
|
|
|
exit(EXIT_FAILURE);
|
2009-07-08 13:19:16 -07:00
|
|
|
}
|
2017-04-24 18:55:04 -07:00
|
|
|
unixctl_command_register("exit", "[--cleanup]", 0, 1,
|
2023-07-18 14:40:03 +02:00
|
|
|
ovs_vswitchd_exit, NULL);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2010-06-10 14:17:41 -07:00
|
|
|
bridge_init(remote);
|
2011-07-26 10:13:07 -07:00
|
|
|
free(remote);
|
|
|
|
|
2023-07-18 14:40:03 +02:00
|
|
|
while (!exit_args.exiting) {
|
2021-12-22 10:17:12 +01:00
|
|
|
OVS_USDT_PROBE(main, run_start);
|
2012-05-08 15:44:21 -07:00
|
|
|
memory_run();
|
|
|
|
if (memory_should_report()) {
|
|
|
|
struct simap usage;
|
|
|
|
|
|
|
|
simap_init(&usage);
|
|
|
|
bridge_get_memory_usage(&usage);
|
|
|
|
memory_report(&usage);
|
|
|
|
simap_destroy(&usage);
|
|
|
|
}
|
2010-06-10 14:17:41 -07:00
|
|
|
bridge_run();
|
2009-07-08 13:19:16 -07:00
|
|
|
unixctl_server_run(unixctl);
|
2009-07-30 16:04:45 -07:00
|
|
|
netdev_run();
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2012-05-08 15:44:21 -07:00
|
|
|
memory_wait();
|
2010-06-10 14:17:41 -07:00
|
|
|
bridge_wait();
|
2009-07-08 13:19:16 -07:00
|
|
|
unixctl_server_wait(unixctl);
|
2009-07-30 16:04:45 -07:00
|
|
|
netdev_wait();
|
2023-07-18 14:40:03 +02:00
|
|
|
if (exit_args.exiting) {
|
2010-11-16 15:14:58 -08:00
|
|
|
poll_immediate_wake();
|
|
|
|
}
|
2021-12-22 10:17:12 +01:00
|
|
|
OVS_USDT_PROBE(main, poll_block);
|
2009-07-08 13:19:16 -07:00
|
|
|
poll_block();
|
2014-01-17 10:43:03 -08:00
|
|
|
if (should_service_stop()) {
|
2023-07-18 14:40:03 +02:00
|
|
|
exit_args.exiting = true;
|
2014-01-17 10:43:03 -08:00
|
|
|
}
|
2009-07-08 13:19:16 -07:00
|
|
|
}
|
2023-07-18 14:40:03 +02:00
|
|
|
bridge_exit(exit_args.cleanup);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < exit_args.n_conns; i++) {
|
|
|
|
unixctl_command_reply(exit_args.conns[i], NULL);
|
|
|
|
}
|
|
|
|
free(exit_args.conns);
|
|
|
|
|
2010-12-13 13:08:31 -08:00
|
|
|
unixctl_server_destroy(unixctl);
|
2014-01-17 10:43:03 -08:00
|
|
|
service_stop();
|
2018-01-25 15:39:49 -08:00
|
|
|
vlog_disable_async();
|
2018-01-25 15:39:48 -08:00
|
|
|
ovsrcu_exit();
|
2018-06-26 14:06:21 -07:00
|
|
|
dns_resolve_destroy();
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-07-26 10:13:07 -07:00
|
|
|
static char *
|
2012-01-19 10:26:03 -08:00
|
|
|
parse_options(int argc, char *argv[], char **unixctl_pathp)
|
2009-07-08 13:19:16 -07:00
|
|
|
{
|
|
|
|
enum {
|
|
|
|
OPT_PEER_CA_CERT = UCHAR_MAX + 1,
|
2009-11-30 13:17:34 -08:00
|
|
|
OPT_MLOCKALL,
|
2012-01-19 10:26:03 -08:00
|
|
|
OPT_UNIXCTL,
|
2009-07-08 13:19:16 -07:00
|
|
|
VLOG_OPTION_ENUMS,
|
2010-11-29 12:21:08 -08:00
|
|
|
OPT_BOOTSTRAP_CA_CERT,
|
2011-01-28 12:39:15 -08:00
|
|
|
OPT_ENABLE_DUMMY,
|
2011-11-17 18:06:55 -08:00
|
|
|
OPT_DISABLE_SYSTEM,
|
2019-06-25 14:52:38 -07:00
|
|
|
OPT_DISABLE_SYSTEM_ROUTE,
|
2014-03-24 19:23:08 -07:00
|
|
|
DAEMON_OPTION_ENUMS,
|
|
|
|
OPT_DPDK,
|
2016-10-06 16:21:33 -07:00
|
|
|
SSL_OPTION_ENUMS,
|
2016-06-06 17:05:49 -07:00
|
|
|
OPT_DUMMY_NUMA,
|
dpdk: Allow retaining CAP_SYS_RAWIO privileges.
Open vSwitch generally tries to let the underlying operating system
managed the low level details of hardware, for example DMA mapping,
bus arbitration, etc. However, when using DPDK, the underlying
operating system yields control of many of these details to userspace
for management.
In the case of some DPDK port drivers, configuring rte_flow or even
allocating resources may require access to iopl/ioperm calls, which
are guarded by the CAP_SYS_RAWIO privilege on linux systems. These
calls are dangerous, and can allow a process to completely compromise
a system. However, they are needed in the case of some userspace
driver code which manages the hardware (for example, the mlx
implementation of backend support for rte_flow).
Here, we create an opt-in flag passed to the command line to allow
this access. We need to do this before ever accessing the database,
because we want to drop all privileges asap, and cannot wait for
a connection to the database to be established and functional before
dropping. There may be distribution specific ways to do capability
management as well (using for example, systemd), but they are not
as universal to the vswitchd as a flag.
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Gaetan Rivet <gaetanr@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-03-16 08:00:39 -04:00
|
|
|
OPT_HW_RAWIO_ACCESS,
|
2009-07-08 13:19:16 -07:00
|
|
|
};
|
2013-04-23 16:40:56 -07:00
|
|
|
static const struct option long_options[] = {
|
2011-05-04 13:49:42 -07:00
|
|
|
{"help", no_argument, NULL, 'h'},
|
|
|
|
{"version", no_argument, NULL, 'V'},
|
|
|
|
{"mlockall", no_argument, NULL, OPT_MLOCKALL},
|
2012-01-19 10:26:03 -08:00
|
|
|
{"unixctl", required_argument, NULL, OPT_UNIXCTL},
|
2009-07-08 13:19:16 -07:00
|
|
|
DAEMON_LONG_OPTIONS,
|
|
|
|
VLOG_LONG_OPTIONS,
|
2011-05-10 09:17:37 -07:00
|
|
|
STREAM_SSL_LONG_OPTIONS,
|
2011-05-04 13:49:42 -07:00
|
|
|
{"peer-ca-cert", required_argument, NULL, OPT_PEER_CA_CERT},
|
|
|
|
{"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT},
|
2012-01-19 10:24:46 -08:00
|
|
|
{"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY},
|
2011-11-17 18:06:55 -08:00
|
|
|
{"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM},
|
2019-06-25 14:52:38 -07:00
|
|
|
{"disable-system-route", no_argument, NULL, OPT_DISABLE_SYSTEM_ROUTE},
|
2016-04-29 13:44:01 -04:00
|
|
|
{"dpdk", optional_argument, NULL, OPT_DPDK},
|
2016-06-06 17:05:49 -07:00
|
|
|
{"dummy-numa", required_argument, NULL, OPT_DUMMY_NUMA},
|
dpdk: Allow retaining CAP_SYS_RAWIO privileges.
Open vSwitch generally tries to let the underlying operating system
managed the low level details of hardware, for example DMA mapping,
bus arbitration, etc. However, when using DPDK, the underlying
operating system yields control of many of these details to userspace
for management.
In the case of some DPDK port drivers, configuring rte_flow or even
allocating resources may require access to iopl/ioperm calls, which
are guarded by the CAP_SYS_RAWIO privilege on linux systems. These
calls are dangerous, and can allow a process to completely compromise
a system. However, they are needed in the case of some userspace
driver code which manages the hardware (for example, the mlx
implementation of backend support for rte_flow).
Here, we create an opt-in flag passed to the command line to allow
this access. We need to do this before ever accessing the database,
because we want to drop all privileges asap, and cannot wait for
a connection to the database to be established and functional before
dropping. There may be distribution specific ways to do capability
management as well (using for example, systemd), but they are not
as universal to the vswitchd as a flag.
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Gaetan Rivet <gaetanr@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-03-16 08:00:39 -04:00
|
|
|
{"hw-rawio-access", no_argument, NULL, OPT_HW_RAWIO_ACCESS},
|
2011-05-04 13:49:42 -07:00
|
|
|
{NULL, 0, NULL, 0},
|
2009-07-08 13:19:16 -07:00
|
|
|
};
|
2015-03-16 12:01:55 -04:00
|
|
|
char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
|
2009-07-08 13:19:16 -07:00
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
int c;
|
|
|
|
|
|
|
|
c = getopt_long(argc, argv, short_options, long_options, NULL);
|
|
|
|
if (c == -1) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (c) {
|
|
|
|
case 'h':
|
|
|
|
usage();
|
|
|
|
|
|
|
|
case 'V':
|
2015-04-15 13:26:32 -07:00
|
|
|
ovs_print_version(0, 0);
|
2018-01-15 19:21:12 +01:00
|
|
|
print_dpdk_version();
|
2009-07-08 13:19:16 -07:00
|
|
|
exit(EXIT_SUCCESS);
|
|
|
|
|
2009-11-30 13:17:34 -08:00
|
|
|
case OPT_MLOCKALL:
|
2012-06-29 09:22:59 -07:00
|
|
|
want_mlockall = true;
|
2009-11-30 13:17:34 -08:00
|
|
|
break;
|
|
|
|
|
2012-01-19 10:26:03 -08:00
|
|
|
case OPT_UNIXCTL:
|
|
|
|
*unixctl_pathp = optarg;
|
|
|
|
break;
|
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
VLOG_OPTION_HANDLERS
|
|
|
|
DAEMON_OPTION_HANDLERS
|
2010-01-06 14:35:20 -08:00
|
|
|
STREAM_SSL_OPTION_HANDLERS
|
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
case OPT_PEER_CA_CERT:
|
2010-01-06 14:35:20 -08:00
|
|
|
stream_ssl_set_peer_ca_cert_file(optarg);
|
2009-07-08 13:19:16 -07:00
|
|
|
break;
|
2009-12-21 13:06:47 -08:00
|
|
|
|
|
|
|
case OPT_BOOTSTRAP_CA_CERT:
|
|
|
|
stream_ssl_set_ca_cert_file(optarg, true);
|
|
|
|
break;
|
2009-07-08 13:19:16 -07:00
|
|
|
|
2010-11-29 12:21:08 -08:00
|
|
|
case OPT_ENABLE_DUMMY:
|
2015-06-13 16:58:49 -07:00
|
|
|
dummy_enable(optarg);
|
2010-11-29 12:21:08 -08:00
|
|
|
break;
|
|
|
|
|
2011-11-17 18:06:55 -08:00
|
|
|
case OPT_DISABLE_SYSTEM:
|
2020-06-17 14:22:47 -07:00
|
|
|
dp_disallow_provider("system");
|
2019-06-25 14:52:38 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case OPT_DISABLE_SYSTEM_ROUTE:
|
2018-03-31 17:12:55 -07:00
|
|
|
ovs_router_disable_system_routing_table();
|
2011-11-17 18:06:55 -08:00
|
|
|
break;
|
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
case '?':
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
|
2014-03-24 19:23:08 -07:00
|
|
|
case OPT_DPDK:
|
2016-04-29 13:44:01 -04:00
|
|
|
ovs_fatal(0, "Using --dpdk to configure DPDK is not supported.");
|
2014-03-24 19:23:08 -07:00
|
|
|
break;
|
|
|
|
|
2016-06-06 17:05:49 -07:00
|
|
|
case OPT_DUMMY_NUMA:
|
|
|
|
ovs_numa_set_dummy(optarg);
|
|
|
|
break;
|
|
|
|
|
dpdk: Allow retaining CAP_SYS_RAWIO privileges.
Open vSwitch generally tries to let the underlying operating system
managed the low level details of hardware, for example DMA mapping,
bus arbitration, etc. However, when using DPDK, the underlying
operating system yields control of many of these details to userspace
for management.
In the case of some DPDK port drivers, configuring rte_flow or even
allocating resources may require access to iopl/ioperm calls, which
are guarded by the CAP_SYS_RAWIO privilege on linux systems. These
calls are dangerous, and can allow a process to completely compromise
a system. However, they are needed in the case of some userspace
driver code which manages the hardware (for example, the mlx
implementation of backend support for rte_flow).
Here, we create an opt-in flag passed to the command line to allow
this access. We need to do this before ever accessing the database,
because we want to drop all privileges asap, and cannot wait for
a connection to the database to be established and functional before
dropping. There may be distribution specific ways to do capability
management as well (using for example, systemd), but they are not
as universal to the vswitchd as a flag.
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Gaetan Rivet <gaetanr@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-03-16 08:00:39 -04:00
|
|
|
case OPT_HW_RAWIO_ACCESS:
|
|
|
|
hw_rawio_access = true;
|
|
|
|
break;
|
|
|
|
|
2009-07-08 13:19:16 -07:00
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(short_options);
|
|
|
|
|
|
|
|
argc -= optind;
|
|
|
|
argv += optind;
|
|
|
|
|
2011-07-26 10:13:07 -07:00
|
|
|
switch (argc) {
|
|
|
|
case 0:
|
|
|
|
return xasprintf("unix:%s/db.sock", ovs_rundir());
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
return xstrdup(argv[0]);
|
|
|
|
|
|
|
|
default:
|
|
|
|
VLOG_FATAL("at most one non-option argument accepted; "
|
2011-03-31 16:23:50 -07:00
|
|
|
"use --help for usage");
|
2009-07-08 13:19:16 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
usage(void)
|
|
|
|
{
|
2009-06-23 14:18:43 -07:00
|
|
|
printf("%s: Open vSwitch daemon\n"
|
2011-07-26 10:13:07 -07:00
|
|
|
"usage: %s [OPTIONS] [DATABASE]\n"
|
|
|
|
"where DATABASE is a socket on which ovsdb-server is listening\n"
|
|
|
|
" (default: \"unix:%s/db.sock\").\n",
|
|
|
|
program_name, program_name, ovs_rundir());
|
2009-12-21 13:13:48 -08:00
|
|
|
stream_usage("DATABASE", true, false, true);
|
2009-07-08 13:19:16 -07:00
|
|
|
daemon_usage();
|
|
|
|
vlog_usage();
|
2014-08-07 12:40:34 -07:00
|
|
|
printf("\nDPDK options:\n"
|
2016-04-29 13:44:01 -04:00
|
|
|
"Configuration of DPDK via command-line is removed from this\n"
|
|
|
|
"version of Open vSwitch. DPDK is configured through ovsdb.\n"
|
|
|
|
);
|
2011-02-11 13:16:28 -08:00
|
|
|
printf("\nOther options:\n"
|
2015-03-05 13:42:04 -08:00
|
|
|
" --unixctl=SOCKET override default control socket name\n"
|
|
|
|
" -h, --help display this help message\n"
|
|
|
|
" -V, --version display version information\n");
|
2009-07-08 13:19:16 -07:00
|
|
|
exit(EXIT_SUCCESS);
|
|
|
|
}
|
2010-05-03 15:43:49 -07:00
|
|
|
|
|
|
|
static void
|
2017-04-24 18:55:04 -07:00
|
|
|
ovs_vswitchd_exit(struct unixctl_conn *conn, int argc,
|
2023-07-18 14:40:03 +02:00
|
|
|
const char *argv[], void *args OVS_UNUSED)
|
2010-05-03 15:43:49 -07:00
|
|
|
{
|
2023-07-18 14:40:03 +02:00
|
|
|
exit_args.n_conns++;
|
|
|
|
exit_args.conns = xrealloc(exit_args.conns,
|
|
|
|
exit_args.n_conns * sizeof *exit_args.conns);
|
|
|
|
exit_args.conns[exit_args.n_conns - 1] = conn;
|
|
|
|
exit_args.exiting = true;
|
|
|
|
if (!exit_args.cleanup) {
|
|
|
|
exit_args.cleanup = argc == 2 && !strcmp(argv[1], "--cleanup");
|
|
|
|
}
|
2010-05-03 15:43:49 -07:00
|
|
|
}
|