2009-06-24 10:24:09 -07:00
/*
* Copyright ( c ) 2009 Nicira Networks .
*
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at :
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
# include <config.h>
2009-07-30 16:04:45 -07:00
# include <assert.h>
2009-06-24 10:24:09 -07:00
# include <errno.h>
2009-07-30 16:04:45 -07:00
# include <fcntl.h>
# include <arpa/inet.h>
# include <inttypes.h>
# include <linux/if_tun.h>
# include <linux/types.h>
# include <linux/ethtool.h>
2009-06-24 10:24:09 -07:00
# include <linux/rtnetlink.h>
2009-07-30 16:04:45 -07:00
# include <linux/sockios.h>
# include <linux/version.h>
# include <sys/types.h>
# include <sys/ioctl.h>
# include <sys/socket.h>
# include <netpacket/packet.h>
# include <net/ethernet.h>
# include <net/if.h>
# include <net/if_arp.h>
# include <net/if_packet.h>
# include <net/route.h>
# include <netinet/in.h>
2009-06-24 10:24:09 -07:00
# include <poll.h>
2009-07-30 16:04:45 -07:00
# include <stdlib.h>
# include <string.h>
# include <unistd.h>
2009-06-24 10:24:09 -07:00
# include "coverage.h"
2009-07-30 16:04:45 -07:00
# include "dynamic-string.h"
# include "fatal-signal.h"
# include "netdev-provider.h"
2009-06-24 10:24:09 -07:00
# include "netlink.h"
# include "ofpbuf.h"
2009-07-30 16:04:45 -07:00
# include "openflow/openflow.h"
# include "packets.h"
# include "poll-loop.h"
2009-07-28 13:05:20 -07:00
# include "rtnetlink.h"
2009-07-30 16:04:45 -07:00
# include "socket-util.h"
# include "shash.h"
# include "svec.h"
2009-06-24 10:24:09 -07:00
# define THIS_MODULE VLM_netdev_linux
# include "vlog.h"
2009-07-30 16:04:45 -07:00
/* These were introduced in Linux 2.6.14, so they might be missing if we have
* old headers . */
# ifndef ADVERTISED_Pause
# define ADVERTISED_Pause (1 << 13)
# endif
# ifndef ADVERTISED_Asym_Pause
# define ADVERTISED_Asym_Pause (1 << 14)
# endif
struct netdev_linux {
struct netdev netdev ;
/* File descriptors. For ordinary network devices, the two fds below are
* the same ; for tap devices , they differ . */
int netdev_fd ; /* Network device. */
int tap_fd ; /* TAP character device, if any, otherwise the
* network device . */
struct netdev_linux_cache * cache ;
} ;
enum {
VALID_IFINDEX = 1 < < 0 ,
VALID_ETHERADDR = 1 < < 1 ,
VALID_IN4 = 1 < < 2 ,
VALID_IN6 = 1 < < 3 ,
VALID_MTU = 1 < < 4 ,
2009-09-14 14:12:23 -07:00
VALID_CARRIER = 1 < < 5 ,
VALID_IS_INTERNAL = 1 < < 6
2009-07-30 16:04:45 -07:00
} ;
/* Cached network device information. */
struct netdev_linux_cache {
struct shash_node * shash_node ;
unsigned int valid ;
int ref_cnt ;
int ifindex ;
uint8_t etheraddr [ ETH_ADDR_LEN ] ;
2009-09-02 10:14:53 -07:00
struct in_addr address , netmask ;
2009-07-30 16:04:45 -07:00
struct in6_addr in6 ;
int mtu ;
int carrier ;
2009-09-14 14:12:23 -07:00
bool is_internal ;
2009-07-30 16:04:45 -07:00
} ;
static struct shash cache_map = SHASH_INITIALIZER ( & cache_map ) ;
2009-07-28 12:36:32 -07:00
static struct rtnetlink_notifier netdev_linux_cache_notifier ;
2009-07-30 16:04:45 -07:00
/* An AF_INET socket (used for ioctl operations). */
static int af_inet_sock = - 1 ;
struct netdev_linux_notifier {
struct netdev_notifier notifier ;
struct list node ;
} ;
static struct shash netdev_linux_notifiers =
SHASH_INITIALIZER ( & netdev_linux_notifiers ) ;
2009-07-28 12:36:32 -07:00
static struct rtnetlink_notifier netdev_linux_poll_notifier ;
2009-07-30 16:04:45 -07:00
/* This is set pretty low because we probably won't learn anything from the
* additional log messages . */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT ( 5 , 20 ) ;
static int netdev_linux_do_ethtool ( struct netdev * , struct ethtool_cmd * ,
int cmd , const char * cmd_name ) ;
static int netdev_linux_do_ioctl ( const struct netdev * , struct ifreq * ,
int cmd , const char * cmd_name ) ;
2009-09-02 10:14:53 -07:00
static int netdev_linux_get_ipv4 ( const struct netdev * , struct in_addr * ,
int cmd , const char * cmd_name ) ;
2009-07-30 16:04:45 -07:00
static int get_flags ( const struct netdev * , int * flagsp ) ;
static int set_flags ( struct netdev * , int flags ) ;
static int do_get_ifindex ( const char * netdev_name ) ;
static int get_ifindex ( const struct netdev * , int * ifindexp ) ;
static int do_set_addr ( struct netdev * netdev ,
int ioctl_nr , const char * ioctl_name ,
struct in_addr addr ) ;
static int get_etheraddr ( const char * netdev_name , uint8_t ea [ ETH_ADDR_LEN ] ) ;
static int set_etheraddr ( const char * netdev_name , int hwaddr_family ,
const uint8_t [ ETH_ADDR_LEN ] ) ;
static int get_stats_via_netlink ( int ifindex , struct netdev_stats * stats ) ;
static int get_stats_via_proc ( const char * netdev_name , struct netdev_stats * stats ) ;
static struct netdev_linux *
netdev_linux_cast ( const struct netdev * netdev )
{
netdev_assert_class ( netdev , & netdev_linux_class ) ;
return CONTAINER_OF ( netdev , struct netdev_linux , netdev ) ;
}
static int
netdev_linux_init ( void )
{
static int status = - 1 ;
if ( status < 0 ) {
af_inet_sock = socket ( AF_INET , SOCK_DGRAM , 0 ) ;
status = af_inet_sock > = 0 ? 0 : errno ;
if ( status ) {
VLOG_ERR ( " failed to create inet socket: %s " , strerror ( status ) ) ;
}
}
return status ;
}
static void
netdev_linux_run ( void )
{
2009-07-28 12:36:32 -07:00
rtnetlink_notifier_run ( ) ;
2009-07-30 16:04:45 -07:00
}
static void
netdev_linux_wait ( void )
{
2009-07-28 12:36:32 -07:00
rtnetlink_notifier_wait ( ) ;
2009-07-30 16:04:45 -07:00
}
static void
2009-07-28 12:36:32 -07:00
netdev_linux_cache_cb ( const struct rtnetlink_change * change ,
2009-07-30 16:04:45 -07:00
void * aux UNUSED )
{
struct netdev_linux_cache * cache ;
if ( change ) {
cache = shash_find_data ( & cache_map , change - > ifname ) ;
if ( cache ) {
cache - > valid = 0 ;
}
} else {
struct shash_node * node ;
SHASH_FOR_EACH ( node , & cache_map ) {
cache = node - > data ;
cache - > valid = 0 ;
}
}
}
static int
netdev_linux_open ( const char * name , char * suffix , int ethertype ,
struct netdev * * netdevp )
{
struct netdev_linux * netdev ;
enum netdev_flags flags ;
int error ;
/* Allocate network device. */
netdev = xcalloc ( 1 , sizeof * netdev ) ;
netdev_init ( & netdev - > netdev , suffix , & netdev_linux_class ) ;
netdev - > netdev_fd = - 1 ;
netdev - > tap_fd = - 1 ;
netdev - > cache = shash_find_data ( & cache_map , suffix ) ;
if ( ! netdev - > cache ) {
if ( shash_is_empty ( & cache_map ) ) {
2009-07-28 12:36:32 -07:00
int error = rtnetlink_notifier_register (
2009-07-30 16:04:45 -07:00
& netdev_linux_cache_notifier , netdev_linux_cache_cb , NULL ) ;
if ( error ) {
netdev_close ( & netdev - > netdev ) ;
return error ;
}
}
netdev - > cache = xmalloc ( sizeof * netdev - > cache ) ;
netdev - > cache - > shash_node = shash_add ( & cache_map , suffix ,
netdev - > cache ) ;
netdev - > cache - > valid = 0 ;
netdev - > cache - > ref_cnt = 0 ;
}
netdev - > cache - > ref_cnt + + ;
if ( ! strncmp ( name , " tap: " , 4 ) ) {
static const char tap_dev [ ] = " /dev/net/tun " ;
struct ifreq ifr ;
/* Open tap device. */
netdev - > tap_fd = open ( tap_dev , O_RDWR ) ;
if ( netdev - > tap_fd < 0 ) {
error = errno ;
VLOG_WARN ( " opening \" %s \" failed: %s " , tap_dev , strerror ( error ) ) ;
goto error ;
}
/* Create tap device. */
ifr . ifr_flags = IFF_TAP | IFF_NO_PI ;
error = netdev_linux_do_ioctl ( & netdev - > netdev , & ifr ,
TUNSETIFF , " TUNSETIFF " ) ;
if ( error ) {
goto error ;
}
/* Make non-blocking. */
error = set_nonblocking ( netdev - > tap_fd ) ;
if ( error ) {
goto error ;
}
}
error = netdev_get_flags ( & netdev - > netdev , & flags ) ;
if ( error = = ENODEV ) {
goto error ;
}
if ( netdev - > tap_fd > = 0 | | ethertype ! = NETDEV_ETH_TYPE_NONE ) {
struct sockaddr_ll sll ;
int protocol ;
int ifindex ;
/* Create file descriptor. */
protocol = ( ethertype = = NETDEV_ETH_TYPE_ANY ? ETH_P_ALL
: ethertype = = NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2
: ethertype ) ;
netdev - > netdev_fd = socket ( PF_PACKET , SOCK_RAW , htons ( protocol ) ) ;
if ( netdev - > netdev_fd < 0 ) {
error = errno ;
goto error ;
}
if ( netdev - > tap_fd < 0 ) {
netdev - > tap_fd = netdev - > netdev_fd ;
}
/* Set non-blocking mode. */
error = set_nonblocking ( netdev - > netdev_fd ) ;
if ( error ) {
goto error ;
}
/* Get ethernet device index. */
error = get_ifindex ( & netdev - > netdev , & ifindex ) ;
if ( error ) {
goto error ;
}
/* Bind to specific ethernet device. */
memset ( & sll , 0 , sizeof sll ) ;
sll . sll_family = AF_PACKET ;
sll . sll_ifindex = ifindex ;
if ( bind ( netdev - > netdev_fd ,
( struct sockaddr * ) & sll , sizeof sll ) < 0 ) {
error = errno ;
VLOG_ERR ( " bind to %s failed: %s " , suffix , strerror ( error ) ) ;
goto error ;
}
/* Between the socket() and bind() calls above, the socket receives all
* packets of the requested type on all system interfaces . We do not
* want to receive that data , but there is no way to avoid it . So we
* must now drain out the receive queue . */
error = drain_rcvbuf ( netdev - > netdev_fd ) ;
if ( error ) {
goto error ;
}
}
* netdevp = & netdev - > netdev ;
return 0 ;
error :
netdev_close ( & netdev - > netdev ) ;
return error ;
}
/* Closes and destroys 'netdev'. */
static void
netdev_linux_close ( struct netdev * netdev_ )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
if ( netdev - > cache & & ! - - netdev - > cache - > ref_cnt ) {
shash_delete ( & cache_map , netdev - > cache - > shash_node ) ;
free ( netdev - > cache ) ;
if ( shash_is_empty ( & cache_map ) ) {
2009-07-28 12:36:32 -07:00
rtnetlink_notifier_unregister ( & netdev_linux_cache_notifier ) ;
2009-07-30 16:04:45 -07:00
}
}
if ( netdev - > netdev_fd > = 0 ) {
close ( netdev - > netdev_fd ) ;
}
if ( netdev - > tap_fd > = 0 & & netdev - > netdev_fd ! = netdev - > tap_fd ) {
close ( netdev - > tap_fd ) ;
}
free ( netdev ) ;
}
2009-06-24 10:24:09 -07:00
2009-07-30 16:04:45 -07:00
/* Initializes 'svec' with a list of the names of all known network devices. */
static int
netdev_linux_enumerate ( struct svec * svec )
{
struct if_nameindex * names ;
names = if_nameindex ( ) ;
if ( names ) {
size_t i ;
for ( i = 0 ; names [ i ] . if_name ! = NULL ; i + + ) {
svec_add ( svec , names [ i ] . if_name ) ;
}
if_freenameindex ( names ) ;
return 0 ;
} else {
VLOG_WARN ( " could not obtain list of network device names: %s " ,
strerror ( errno ) ) ;
return errno ;
}
}
static int
netdev_linux_recv ( struct netdev * netdev_ , void * data , size_t size )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
if ( netdev - > tap_fd < 0 ) {
/* Device was opened with NETDEV_ETH_TYPE_NONE. */
return EAGAIN ;
}
for ( ; ; ) {
ssize_t retval = read ( netdev - > tap_fd , data , size ) ;
if ( retval > = 0 ) {
return retval ;
} else if ( errno ! = EINTR ) {
if ( errno ! = EAGAIN ) {
VLOG_WARN_RL ( & rl , " error receiving Ethernet packet on %s: %s " ,
strerror ( errno ) , netdev_get_name ( netdev_ ) ) ;
}
return errno ;
}
}
}
/* Registers with the poll loop to wake up from the next call to poll_block()
* when a packet is ready to be received with netdev_recv ( ) on ' netdev ' . */
static void
netdev_linux_recv_wait ( struct netdev * netdev_ )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
if ( netdev - > tap_fd > = 0 ) {
poll_fd_wait ( netdev - > tap_fd , POLLIN ) ;
}
}
/* Discards all packets waiting to be received from 'netdev'. */
static int
netdev_linux_drain ( struct netdev * netdev_ )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
if ( netdev - > tap_fd < 0 & & netdev - > netdev_fd < 0 ) {
return 0 ;
} else if ( netdev - > tap_fd ! = netdev - > netdev_fd ) {
struct ifreq ifr ;
int error = netdev_linux_do_ioctl ( netdev_ , & ifr ,
SIOCGIFTXQLEN , " SIOCGIFTXQLEN " ) ;
if ( error ) {
return error ;
}
drain_fd ( netdev - > tap_fd , ifr . ifr_qlen ) ;
return 0 ;
} else {
return drain_rcvbuf ( netdev - > netdev_fd ) ;
}
}
/* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive
* errno value . Returns EAGAIN without blocking if the packet cannot be queued
* immediately . Returns EMSGSIZE if a partial packet was transmitted or if
* the packet is too big or too small to transmit on the device .
*
* The caller retains ownership of ' buffer ' in all cases .
*
* The kernel maintains a packet transmission queue , so the caller is not
* expected to do additional queuing of packets . */
static int
netdev_linux_send ( struct netdev * netdev_ , const void * data , size_t size )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
/* XXX should support sending even if 'ethertype' was NETDEV_ETH_TYPE_NONE.
*/
if ( netdev - > tap_fd < 0 ) {
return EPIPE ;
}
for ( ; ; ) {
ssize_t retval = write ( netdev - > tap_fd , data , size ) ;
if ( retval < 0 ) {
/* The Linux AF_PACKET implementation never blocks waiting for room
* for packets , instead returning ENOBUFS . Translate this into
* EAGAIN for the caller . */
if ( errno = = ENOBUFS ) {
return EAGAIN ;
} else if ( errno = = EINTR ) {
continue ;
} else if ( errno ! = EAGAIN ) {
VLOG_WARN_RL ( & rl , " error sending Ethernet packet on %s: %s " ,
netdev_get_name ( netdev_ ) , strerror ( errno ) ) ;
}
return errno ;
} else if ( retval ! = size ) {
VLOG_WARN_RL ( & rl , " sent partial Ethernet packet (%zd bytes of "
" %zu) on %s " , retval , size , netdev_get_name ( netdev_ ) ) ;
return EMSGSIZE ;
} else {
return 0 ;
}
}
}
/* Registers with the poll loop to wake up from the next call to poll_block()
* when the packet transmission queue has sufficient room to transmit a packet
* with netdev_send ( ) .
*
* The kernel maintains a packet transmission queue , so the client is not
* expected to do additional queuing of packets . Thus , this function is
* unlikely to ever be used . It is included for completeness . */
static void
netdev_linux_send_wait ( struct netdev * netdev_ )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
if ( netdev - > tap_fd < 0 & & netdev - > netdev_fd < 0 ) {
/* Nothing to do. */
} else if ( netdev - > tap_fd = = netdev - > netdev_fd ) {
poll_fd_wait ( netdev - > tap_fd , POLLOUT ) ;
} else {
/* TAP device always accepts packets.*/
poll_immediate_wake ( ) ;
}
}
/* Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful,
* otherwise a positive errno value . */
static int
netdev_linux_set_etheraddr ( struct netdev * netdev_ ,
const uint8_t mac [ ETH_ADDR_LEN ] )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
int error = set_etheraddr ( netdev_get_name ( netdev_ ) , ARPHRD_ETHER , mac ) ;
if ( ! error ) {
memcpy ( netdev - > cache - > etheraddr , mac , ETH_ADDR_LEN ) ;
}
return error ;
}
/* Returns a pointer to 'netdev''s MAC address. The caller must not modify or
* free the returned buffer . */
static int
netdev_linux_get_etheraddr ( const struct netdev * netdev_ ,
uint8_t mac [ ETH_ADDR_LEN ] )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
if ( ! ( netdev - > cache - > valid & VALID_ETHERADDR ) ) {
int error = get_etheraddr ( netdev_get_name ( netdev_ ) ,
netdev - > cache - > etheraddr ) ;
if ( error ) {
return error ;
}
netdev - > cache - > valid | = VALID_ETHERADDR ;
}
memcpy ( mac , netdev - > cache - > etheraddr , ETH_ADDR_LEN ) ;
return 0 ;
}
/* Returns the maximum size of transmitted (and received) packets on 'netdev',
* in bytes , not including the hardware header ; thus , this is typically 1500
* bytes for Ethernet devices . */
static int
netdev_linux_get_mtu ( const struct netdev * netdev_ , int * mtup )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
if ( ! ( netdev - > cache - > valid & VALID_MTU ) ) {
struct ifreq ifr ;
int error ;
error = netdev_linux_do_ioctl ( netdev_ , & ifr , SIOCGIFMTU , " SIOCGIFMTU " ) ;
if ( error ) {
return error ;
}
netdev - > cache - > mtu = ifr . ifr_mtu ;
netdev - > cache - > valid | = VALID_MTU ;
}
* mtup = netdev - > cache - > mtu ;
return 0 ;
}
static int
netdev_linux_get_carrier ( const struct netdev * netdev_ , bool * carrier )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
int error = 0 ;
char * fn = NULL ;
int fd = - 1 ;
if ( ! ( netdev - > cache - > valid & VALID_CARRIER ) ) {
char line [ 8 ] ;
int retval ;
fn = xasprintf ( " /sys/class/net/%s/carrier " , netdev_get_name ( netdev_ ) ) ;
fd = open ( fn , O_RDONLY ) ;
if ( fd < 0 ) {
error = errno ;
VLOG_WARN_RL ( & rl , " %s: open failed: %s " , fn , strerror ( error ) ) ;
goto exit ;
}
retval = read ( fd , line , sizeof line ) ;
if ( retval < 0 ) {
error = errno ;
if ( error = = EINVAL ) {
/* This is the normal return value when we try to check carrier
* if the network device is not up . */
} else {
VLOG_WARN_RL ( & rl , " %s: read failed: %s " , fn , strerror ( error ) ) ;
}
goto exit ;
} else if ( retval = = 0 ) {
error = EPROTO ;
VLOG_WARN_RL ( & rl , " %s: unexpected end of file " , fn ) ;
goto exit ;
}
if ( line [ 0 ] ! = ' 0 ' & & line [ 0 ] ! = ' 1 ' ) {
error = EPROTO ;
VLOG_WARN_RL ( & rl , " %s: value is %c (expected 0 or 1) " ,
fn , line [ 0 ] ) ;
goto exit ;
}
netdev - > cache - > carrier = line [ 0 ] ! = ' 0 ' ;
netdev - > cache - > valid | = VALID_CARRIER ;
}
* carrier = netdev - > cache - > carrier ;
error = 0 ;
exit :
if ( fd > = 0 ) {
close ( fd ) ;
}
free ( fn ) ;
return error ;
}
/* Check whether we can we use RTM_GETLINK to get network device statistics.
* In pre - 2.6 .19 kernels , this was only available if wireless extensions were
* enabled . */
static bool
check_for_working_netlink_stats ( void )
{
/* Decide on the netdev_get_stats() implementation to use. Netlink is
* preferable , so if that works , we ' ll use it . */
int ifindex = do_get_ifindex ( " lo " ) ;
if ( ifindex < 0 ) {
VLOG_WARN ( " failed to get ifindex for lo, "
" obtaining netdev stats from proc " ) ;
return false ;
} else {
struct netdev_stats stats ;
int error = get_stats_via_netlink ( ifindex , & stats ) ;
if ( ! error ) {
VLOG_DBG ( " obtaining netdev stats via rtnetlink " ) ;
return true ;
} else {
VLOG_INFO ( " RTM_GETLINK failed (%s), obtaining netdev stats "
" via proc (you are probably running a pre-2.6.19 "
" kernel) " , strerror ( error ) ) ;
return false ;
}
}
}
/* Retrieves current device stats for 'netdev'.
*
* XXX All of the members of struct netdev_stats are 64 bits wide , but on
* 32 - bit architectures the Linux network stats are only 32 bits . */
static int
2009-09-14 14:12:23 -07:00
netdev_linux_get_stats ( const struct netdev * netdev_ , struct netdev_stats * stats )
2009-07-30 16:04:45 -07:00
{
2009-09-14 14:12:23 -07:00
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
2009-07-30 16:04:45 -07:00
static int use_netlink_stats = - 1 ;
int error ;
2009-09-14 14:12:23 -07:00
struct netdev_stats raw_stats ;
struct netdev_stats * collect_stats = stats ;
2009-07-30 16:04:45 -07:00
COVERAGE_INC ( netdev_get_stats ) ;
2009-09-14 14:12:23 -07:00
if ( ! ( netdev - > cache - > valid & VALID_IS_INTERNAL ) ) {
netdev - > cache - > is_internal = ( netdev - > tap_fd ! = - 1 ) ;
if ( ! netdev - > cache - > is_internal ) {
struct ethtool_drvinfo drvinfo ;
memset ( & drvinfo , 0 , sizeof drvinfo ) ;
error = netdev_linux_do_ethtool ( & netdev - > netdev ,
( struct ethtool_cmd * ) & drvinfo ,
ETHTOOL_GDRVINFO ,
" ETHTOOL_GDRVINFO " ) ;
if ( ! error ) {
netdev - > cache - > is_internal = ! strcmp ( drvinfo . driver ,
" openvswitch " ) ;
}
}
2009-09-16 11:03:42 -07:00
netdev - > cache - > valid | = VALID_IS_INTERNAL ;
2009-09-14 14:12:23 -07:00
}
if ( netdev - > cache - > is_internal ) {
collect_stats = & raw_stats ;
}
2009-07-30 16:04:45 -07:00
if ( use_netlink_stats < 0 ) {
use_netlink_stats = check_for_working_netlink_stats ( ) ;
}
if ( use_netlink_stats ) {
int ifindex ;
2009-09-14 14:12:23 -07:00
error = get_ifindex ( & netdev - > netdev , & ifindex ) ;
2009-07-30 16:04:45 -07:00
if ( ! error ) {
2009-09-14 14:12:23 -07:00
error = get_stats_via_netlink ( ifindex , collect_stats ) ;
2009-07-30 16:04:45 -07:00
}
} else {
2009-09-14 14:12:23 -07:00
error = get_stats_via_proc ( netdev - > netdev . name , collect_stats ) ;
2009-07-30 16:04:45 -07:00
}
2009-09-14 14:12:23 -07:00
/* If this port is an internal port then the transmit and receive stats
* will appear to be swapped relative to the other ports since we are the
* one sending the data , not a remote computer . For consistency , we swap
* them back here . */
if ( netdev - > cache - > is_internal ) {
stats - > rx_packets = raw_stats . tx_packets ;
stats - > tx_packets = raw_stats . rx_packets ;
stats - > rx_bytes = raw_stats . tx_bytes ;
stats - > tx_bytes = raw_stats . rx_bytes ;
stats - > rx_errors = raw_stats . tx_errors ;
stats - > tx_errors = raw_stats . rx_errors ;
stats - > rx_dropped = raw_stats . tx_dropped ;
stats - > tx_dropped = raw_stats . rx_dropped ;
stats - > multicast = raw_stats . multicast ;
stats - > collisions = raw_stats . collisions ;
stats - > rx_length_errors = 0 ;
stats - > rx_over_errors = 0 ;
stats - > rx_crc_errors = 0 ;
stats - > rx_frame_errors = 0 ;
stats - > rx_fifo_errors = 0 ;
stats - > rx_missed_errors = 0 ;
stats - > tx_aborted_errors = 0 ;
stats - > tx_carrier_errors = 0 ;
stats - > tx_fifo_errors = 0 ;
stats - > tx_heartbeat_errors = 0 ;
stats - > tx_window_errors = 0 ;
}
2009-07-30 16:04:45 -07:00
return error ;
}
/* Stores the features supported by 'netdev' into each of '*current',
* ' * advertised ' , ' * supported ' , and ' * peer ' that are non - null . Each value is a
* bitmap of " enum ofp_port_features " bits , in host byte order . Returns 0 if
* successful , otherwise a positive errno value . On failure , all of the
* passed - in values are set to 0. */
static int
netdev_linux_get_features ( struct netdev * netdev ,
uint32_t * current , uint32_t * advertised ,
uint32_t * supported , uint32_t * peer )
{
struct ethtool_cmd ecmd ;
int error ;
memset ( & ecmd , 0 , sizeof ecmd ) ;
error = netdev_linux_do_ethtool ( netdev , & ecmd ,
ETHTOOL_GSET , " ETHTOOL_GSET " ) ;
if ( error ) {
return error ;
}
/* Supported features. */
* supported = 0 ;
if ( ecmd . supported & SUPPORTED_10baseT_Half ) {
* supported | = OFPPF_10MB_HD ;
}
if ( ecmd . supported & SUPPORTED_10baseT_Full ) {
* supported | = OFPPF_10MB_FD ;
}
if ( ecmd . supported & SUPPORTED_100baseT_Half ) {
* supported | = OFPPF_100MB_HD ;
}
if ( ecmd . supported & SUPPORTED_100baseT_Full ) {
* supported | = OFPPF_100MB_FD ;
}
if ( ecmd . supported & SUPPORTED_1000baseT_Half ) {
* supported | = OFPPF_1GB_HD ;
}
if ( ecmd . supported & SUPPORTED_1000baseT_Full ) {
* supported | = OFPPF_1GB_FD ;
}
if ( ecmd . supported & SUPPORTED_10000baseT_Full ) {
* supported | = OFPPF_10GB_FD ;
}
if ( ecmd . supported & SUPPORTED_TP ) {
* supported | = OFPPF_COPPER ;
}
if ( ecmd . supported & SUPPORTED_FIBRE ) {
* supported | = OFPPF_FIBER ;
}
if ( ecmd . supported & SUPPORTED_Autoneg ) {
* supported | = OFPPF_AUTONEG ;
}
if ( ecmd . supported & SUPPORTED_Pause ) {
* supported | = OFPPF_PAUSE ;
}
if ( ecmd . supported & SUPPORTED_Asym_Pause ) {
* supported | = OFPPF_PAUSE_ASYM ;
}
/* Advertised features. */
* advertised = 0 ;
if ( ecmd . advertising & ADVERTISED_10baseT_Half ) {
* advertised | = OFPPF_10MB_HD ;
}
if ( ecmd . advertising & ADVERTISED_10baseT_Full ) {
* advertised | = OFPPF_10MB_FD ;
}
if ( ecmd . advertising & ADVERTISED_100baseT_Half ) {
* advertised | = OFPPF_100MB_HD ;
}
if ( ecmd . advertising & ADVERTISED_100baseT_Full ) {
* advertised | = OFPPF_100MB_FD ;
}
if ( ecmd . advertising & ADVERTISED_1000baseT_Half ) {
* advertised | = OFPPF_1GB_HD ;
}
if ( ecmd . advertising & ADVERTISED_1000baseT_Full ) {
* advertised | = OFPPF_1GB_FD ;
}
if ( ecmd . advertising & ADVERTISED_10000baseT_Full ) {
* advertised | = OFPPF_10GB_FD ;
}
if ( ecmd . advertising & ADVERTISED_TP ) {
* advertised | = OFPPF_COPPER ;
}
if ( ecmd . advertising & ADVERTISED_FIBRE ) {
* advertised | = OFPPF_FIBER ;
}
if ( ecmd . advertising & ADVERTISED_Autoneg ) {
* advertised | = OFPPF_AUTONEG ;
}
if ( ecmd . advertising & ADVERTISED_Pause ) {
* advertised | = OFPPF_PAUSE ;
}
if ( ecmd . advertising & ADVERTISED_Asym_Pause ) {
* advertised | = OFPPF_PAUSE_ASYM ;
}
/* Current settings. */
if ( ecmd . speed = = SPEED_10 ) {
* current = ecmd . duplex ? OFPPF_10MB_FD : OFPPF_10MB_HD ;
} else if ( ecmd . speed = = SPEED_100 ) {
* current = ecmd . duplex ? OFPPF_100MB_FD : OFPPF_100MB_HD ;
} else if ( ecmd . speed = = SPEED_1000 ) {
* current = ecmd . duplex ? OFPPF_1GB_FD : OFPPF_1GB_HD ;
} else if ( ecmd . speed = = SPEED_10000 ) {
* current = OFPPF_10GB_FD ;
} else {
* current = 0 ;
}
if ( ecmd . port = = PORT_TP ) {
* current | = OFPPF_COPPER ;
} else if ( ecmd . port = = PORT_FIBRE ) {
* current | = OFPPF_FIBER ;
}
if ( ecmd . autoneg ) {
* current | = OFPPF_AUTONEG ;
}
/* Peer advertisements. */
* peer = 0 ; /* XXX */
return 0 ;
}
/* Set the features advertised by 'netdev' to 'advertise'. */
static int
netdev_linux_set_advertisements ( struct netdev * netdev , uint32_t advertise )
{
struct ethtool_cmd ecmd ;
int error ;
memset ( & ecmd , 0 , sizeof ecmd ) ;
error = netdev_linux_do_ethtool ( netdev , & ecmd ,
ETHTOOL_GSET , " ETHTOOL_GSET " ) ;
if ( error ) {
return error ;
}
ecmd . advertising = 0 ;
if ( advertise & OFPPF_10MB_HD ) {
ecmd . advertising | = ADVERTISED_10baseT_Half ;
}
if ( advertise & OFPPF_10MB_FD ) {
ecmd . advertising | = ADVERTISED_10baseT_Full ;
}
if ( advertise & OFPPF_100MB_HD ) {
ecmd . advertising | = ADVERTISED_100baseT_Half ;
}
if ( advertise & OFPPF_100MB_FD ) {
ecmd . advertising | = ADVERTISED_100baseT_Full ;
}
if ( advertise & OFPPF_1GB_HD ) {
ecmd . advertising | = ADVERTISED_1000baseT_Half ;
}
if ( advertise & OFPPF_1GB_FD ) {
ecmd . advertising | = ADVERTISED_1000baseT_Full ;
}
if ( advertise & OFPPF_10GB_FD ) {
ecmd . advertising | = ADVERTISED_10000baseT_Full ;
}
if ( advertise & OFPPF_COPPER ) {
ecmd . advertising | = ADVERTISED_TP ;
}
if ( advertise & OFPPF_FIBER ) {
ecmd . advertising | = ADVERTISED_FIBRE ;
}
if ( advertise & OFPPF_AUTONEG ) {
ecmd . advertising | = ADVERTISED_Autoneg ;
}
if ( advertise & OFPPF_PAUSE ) {
ecmd . advertising | = ADVERTISED_Pause ;
}
if ( advertise & OFPPF_PAUSE_ASYM ) {
ecmd . advertising | = ADVERTISED_Asym_Pause ;
}
return netdev_linux_do_ethtool ( netdev , & ecmd ,
ETHTOOL_SSET , " ETHTOOL_SSET " ) ;
}
/* If 'netdev_name' is the name of a VLAN network device (e.g. one created with
* vconfig ( 8 ) ) , sets ' * vlan_vid ' to the VLAN VID associated with that device
* and returns 0. Otherwise returns a errno value ( specifically ENOENT if
* ' netdev_name ' is the name of a network device that is not a VLAN device ) and
* sets ' * vlan_vid ' to - 1. */
static int
netdev_linux_get_vlan_vid ( const struct netdev * netdev , int * vlan_vid )
{
const char * netdev_name = netdev_get_name ( netdev ) ;
struct ds line = DS_EMPTY_INITIALIZER ;
FILE * stream = NULL ;
int error ;
char * fn ;
COVERAGE_INC ( netdev_get_vlan_vid ) ;
fn = xasprintf ( " /proc/net/vlan/%s " , netdev_name ) ;
stream = fopen ( fn , " r " ) ;
if ( ! stream ) {
error = errno ;
goto done ;
}
if ( ds_get_line ( & line , stream ) ) {
if ( ferror ( stream ) ) {
error = errno ;
VLOG_ERR_RL ( & rl , " error reading \" %s \" : %s " , fn , strerror ( errno ) ) ;
} else {
error = EPROTO ;
VLOG_ERR_RL ( & rl , " unexpected end of file reading \" %s \" " , fn ) ;
}
goto done ;
}
if ( ! sscanf ( ds_cstr ( & line ) , " %*s VID: %d " , vlan_vid ) ) {
error = EPROTO ;
VLOG_ERR_RL ( & rl , " parse error reading \" %s \" line 1: \" %s \" " ,
fn , ds_cstr ( & line ) ) ;
goto done ;
}
error = 0 ;
done :
free ( fn ) ;
if ( stream ) {
fclose ( stream ) ;
}
ds_destroy ( & line ) ;
if ( error ) {
* vlan_vid = - 1 ;
}
return error ;
}
# define POLICE_ADD_CMD " / sbin / tc qdisc add dev %s handle ffff: ingress"
# define POLICE_CONFIG_CMD " / sbin / tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0 / 0 police rate %dkbit burst %dk mtu 65535 drop flowid :1"
/* We redirect stderr to /dev/null because we often want to remove all
* traffic control configuration on a port so its in a known state . If
* this done when there is no such configuration , tc complains , so we just
* always ignore it .
*/
# define POLICE_DEL_CMD " / sbin / tc qdisc del dev %s handle ffff: ingress 2> / dev / null"
/* Attempts to set input rate limiting (policing) policy. */
static int
netdev_linux_set_policing ( struct netdev * netdev ,
uint32_t kbits_rate , uint32_t kbits_burst )
{
const char * netdev_name = netdev_get_name ( netdev ) ;
char command [ 1024 ] ;
COVERAGE_INC ( netdev_set_policing ) ;
if ( kbits_rate ) {
if ( ! kbits_burst ) {
/* Default to 10 kilobits if not specified. */
kbits_burst = 10 ;
}
/* xxx This should be more careful about only adding if it
* xxx actually exists , as opposed to always deleting it . */
snprintf ( command , sizeof ( command ) , POLICE_DEL_CMD , netdev_name ) ;
if ( system ( command ) = = - 1 ) {
VLOG_WARN_RL ( & rl , " %s: problem removing policing " , netdev_name ) ;
}
snprintf ( command , sizeof ( command ) , POLICE_ADD_CMD , netdev_name ) ;
if ( system ( command ) ! = 0 ) {
VLOG_WARN_RL ( & rl , " %s: problem adding policing " , netdev_name ) ;
return - 1 ;
}
snprintf ( command , sizeof ( command ) , POLICE_CONFIG_CMD , netdev_name ,
kbits_rate , kbits_burst ) ;
if ( system ( command ) ! = 0 ) {
VLOG_WARN_RL ( & rl , " %s: problem configuring policing " ,
netdev_name ) ;
return - 1 ;
}
} else {
snprintf ( command , sizeof ( command ) , POLICE_DEL_CMD , netdev_name ) ;
if ( system ( command ) = = - 1 ) {
VLOG_WARN_RL ( & rl , " %s: problem removing policing " , netdev_name ) ;
}
}
return 0 ;
}
static int
2009-09-02 10:14:53 -07:00
netdev_linux_get_in4 ( const struct netdev * netdev_ ,
struct in_addr * address , struct in_addr * netmask )
2009-07-30 16:04:45 -07:00
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
if ( ! ( netdev - > cache - > valid & VALID_IN4 ) ) {
int error ;
2009-09-02 10:14:53 -07:00
error = netdev_linux_get_ipv4 ( netdev_ , & netdev - > cache - > address ,
2009-07-30 16:04:45 -07:00
SIOCGIFADDR , " SIOCGIFADDR " ) ;
if ( error ) {
return error ;
}
2009-09-02 10:14:53 -07:00
error = netdev_linux_get_ipv4 ( netdev_ , & netdev - > cache - > netmask ,
SIOCGIFNETMASK , " SIOCGIFNETMASK " ) ;
if ( error ) {
return error ;
}
2009-07-30 16:04:45 -07:00
netdev - > cache - > valid | = VALID_IN4 ;
}
2009-09-02 10:14:53 -07:00
* address = netdev - > cache - > address ;
* netmask = netdev - > cache - > netmask ;
return address - > s_addr = = INADDR_ANY ? EADDRNOTAVAIL : 0 ;
2009-07-30 16:04:45 -07:00
}
static int
2009-09-02 10:14:53 -07:00
netdev_linux_set_in4 ( struct netdev * netdev_ , struct in_addr address ,
struct in_addr netmask )
2009-07-30 16:04:45 -07:00
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
int error ;
2009-09-02 10:14:53 -07:00
error = do_set_addr ( netdev_ , SIOCSIFADDR , " SIOCSIFADDR " , address ) ;
2009-07-30 16:04:45 -07:00
if ( ! error ) {
netdev - > cache - > valid | = VALID_IN4 ;
2009-09-02 10:14:53 -07:00
netdev - > cache - > address = address ;
netdev - > cache - > netmask = netmask ;
if ( address . s_addr ! = INADDR_ANY ) {
2009-07-30 16:04:45 -07:00
error = do_set_addr ( netdev_ , SIOCSIFNETMASK ,
2009-09-02 10:14:53 -07:00
" SIOCSIFNETMASK " , netmask ) ;
2009-07-30 16:04:45 -07:00
}
}
return error ;
}
static bool
parse_if_inet6_line ( const char * line ,
struct in6_addr * in6 , char ifname [ 16 + 1 ] )
{
uint8_t * s6 = in6 - > s6_addr ;
# define X8 "%2"SCNx8
return sscanf ( line ,
" " X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8
" %*x %*x %*x %*x %16s \n " ,
& s6 [ 0 ] , & s6 [ 1 ] , & s6 [ 2 ] , & s6 [ 3 ] ,
& s6 [ 4 ] , & s6 [ 5 ] , & s6 [ 6 ] , & s6 [ 7 ] ,
& s6 [ 8 ] , & s6 [ 9 ] , & s6 [ 10 ] , & s6 [ 11 ] ,
& s6 [ 12 ] , & s6 [ 13 ] , & s6 [ 14 ] , & s6 [ 15 ] ,
ifname ) = = 17 ;
}
/* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address (if
* ' in6 ' is non - null ) and returns true . Otherwise , returns false . */
static int
netdev_linux_get_in6 ( const struct netdev * netdev_ , struct in6_addr * in6 )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
if ( ! ( netdev - > cache - > valid & VALID_IN6 ) ) {
FILE * file ;
char line [ 128 ] ;
netdev - > cache - > in6 = in6addr_any ;
file = fopen ( " /proc/net/if_inet6 " , " r " ) ;
if ( file ! = NULL ) {
const char * name = netdev_get_name ( netdev_ ) ;
while ( fgets ( line , sizeof line , file ) ) {
struct in6_addr in6 ;
char ifname [ 16 + 1 ] ;
if ( parse_if_inet6_line ( line , & in6 , ifname )
& & ! strcmp ( name , ifname ) )
{
netdev - > cache - > in6 = in6 ;
break ;
}
}
fclose ( file ) ;
}
netdev - > cache - > valid | = VALID_IN6 ;
}
* in6 = netdev - > cache - > in6 ;
return 0 ;
}
static void
make_in4_sockaddr ( struct sockaddr * sa , struct in_addr addr )
{
struct sockaddr_in sin ;
memset ( & sin , 0 , sizeof sin ) ;
sin . sin_family = AF_INET ;
sin . sin_addr = addr ;
sin . sin_port = 0 ;
memset ( sa , 0 , sizeof * sa ) ;
memcpy ( sa , & sin , sizeof sin ) ;
}
static int
do_set_addr ( struct netdev * netdev ,
int ioctl_nr , const char * ioctl_name , struct in_addr addr )
{
struct ifreq ifr ;
strncpy ( ifr . ifr_name , netdev - > name , sizeof ifr . ifr_name ) ;
make_in4_sockaddr ( & ifr . ifr_addr , addr ) ;
return netdev_linux_do_ioctl ( netdev , & ifr , ioctl_nr , ioctl_name ) ;
}
/* Adds 'router' as a default IP gateway. */
static int
netdev_linux_add_router ( struct netdev * netdev UNUSED , struct in_addr router )
{
struct in_addr any = { INADDR_ANY } ;
struct rtentry rt ;
int error ;
memset ( & rt , 0 , sizeof rt ) ;
make_in4_sockaddr ( & rt . rt_dst , any ) ;
make_in4_sockaddr ( & rt . rt_gateway , router ) ;
make_in4_sockaddr ( & rt . rt_genmask , any ) ;
rt . rt_flags = RTF_UP | RTF_GATEWAY ;
COVERAGE_INC ( netdev_add_router ) ;
error = ioctl ( af_inet_sock , SIOCADDRT , & rt ) < 0 ? errno : 0 ;
if ( error ) {
VLOG_WARN ( " ioctl(SIOCADDRT): %s " , strerror ( error ) ) ;
}
return error ;
}
2009-09-02 10:14:53 -07:00
static int
netdev_linux_get_next_hop ( const struct in_addr * host , struct in_addr * next_hop ,
char * * netdev_name )
{
static const char fn [ ] = " /proc/net/route " ;
FILE * stream ;
char line [ 256 ] ;
int ln ;
* netdev_name = NULL ;
stream = fopen ( fn , " r " ) ;
if ( stream = = NULL ) {
VLOG_WARN_RL ( & rl , " %s: open failed: %s " , fn , strerror ( errno ) ) ;
return errno ;
}
ln = 0 ;
while ( fgets ( line , sizeof line , stream ) ) {
if ( + + ln > = 2 ) {
char iface [ 17 ] ;
uint32_t dest , gateway , mask ;
int refcnt , metric , mtu ;
unsigned int flags , use , window , irtt ;
if ( sscanf ( line ,
" %16s % " SCNx32 " % " SCNx32 " %04X %d %u %d % " SCNx32
" %d %u %u \n " ,
iface , & dest , & gateway , & flags , & refcnt ,
& use , & metric , & mask , & mtu , & window , & irtt ) ! = 11 ) {
VLOG_WARN_RL ( & rl , " %s: could not parse line %d: %s " ,
fn , ln , line ) ;
continue ;
}
if ( ! ( flags & RTF_UP ) ) {
/* Skip routes that aren't up. */
continue ;
}
/* The output of 'dest', 'mask', and 'gateway' were given in
* network byte order , so we don ' t need need any endian
* conversions here . */
if ( ( dest & mask ) = = ( host - > s_addr & mask ) ) {
if ( ! gateway ) {
/* The host is directly reachable. */
next_hop - > s_addr = 0 ;
} else {
/* To reach the host, we must go through a gateway. */
next_hop - > s_addr = gateway ;
}
* netdev_name = xstrdup ( iface ) ;
fclose ( stream ) ;
return 0 ;
}
}
}
fclose ( stream ) ;
return ENXIO ;
}
2009-07-30 16:04:45 -07:00
/* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be
* successfully retrieved , it stores the corresponding MAC address in ' mac ' and
* returns 0. Otherwise , it returns a positive errno value ; in particular ,
* ENXIO indicates that there is not ARP table entry for ' ip ' on ' netdev ' . */
static int
netdev_linux_arp_lookup ( const struct netdev * netdev ,
uint32_t ip , uint8_t mac [ ETH_ADDR_LEN ] )
{
struct arpreq r ;
struct sockaddr_in * pa ;
int retval ;
memset ( & r , 0 , sizeof r ) ;
pa = ( struct sockaddr_in * ) & r . arp_pa ;
pa - > sin_family = AF_INET ;
pa - > sin_addr . s_addr = ip ;
pa - > sin_port = 0 ;
r . arp_ha . sa_family = ARPHRD_ETHER ;
r . arp_flags = 0 ;
strncpy ( r . arp_dev , netdev - > name , sizeof r . arp_dev ) ;
COVERAGE_INC ( netdev_arp_lookup ) ;
retval = ioctl ( af_inet_sock , SIOCGARP , & r ) < 0 ? errno : 0 ;
if ( ! retval ) {
memcpy ( mac , r . arp_ha . sa_data , ETH_ADDR_LEN ) ;
} else if ( retval ! = ENXIO ) {
VLOG_WARN_RL ( & rl , " %s: could not look up ARP entry for " IP_FMT " : %s " ,
netdev - > name , IP_ARGS ( & ip ) , strerror ( retval ) ) ;
}
return retval ;
}
static int
nd_to_iff_flags ( enum netdev_flags nd )
{
int iff = 0 ;
if ( nd & NETDEV_UP ) {
iff | = IFF_UP ;
}
if ( nd & NETDEV_PROMISC ) {
iff | = IFF_PROMISC ;
}
return iff ;
}
static int
iff_to_nd_flags ( int iff )
{
enum netdev_flags nd = 0 ;
if ( iff & IFF_UP ) {
nd | = NETDEV_UP ;
}
if ( iff & IFF_PROMISC ) {
nd | = NETDEV_PROMISC ;
}
return nd ;
}
static int
netdev_linux_update_flags ( struct netdev * netdev , enum netdev_flags off ,
enum netdev_flags on , enum netdev_flags * old_flagsp )
{
int old_flags , new_flags ;
int error ;
error = get_flags ( netdev , & old_flags ) ;
if ( ! error ) {
* old_flagsp = iff_to_nd_flags ( old_flags ) ;
new_flags = ( old_flags & ~ nd_to_iff_flags ( off ) ) | nd_to_iff_flags ( on ) ;
if ( new_flags ! = old_flags ) {
error = set_flags ( netdev , new_flags ) ;
}
}
return error ;
}
static void
poll_notify ( struct list * list )
{
struct netdev_linux_notifier * notifier ;
LIST_FOR_EACH ( notifier , struct netdev_linux_notifier , node , list ) {
struct netdev_notifier * n = & notifier - > notifier ;
n - > cb ( n ) ;
}
}
static void
2009-07-28 12:36:32 -07:00
netdev_linux_poll_cb ( const struct rtnetlink_change * change ,
2009-07-30 16:04:45 -07:00
void * aux UNUSED )
{
if ( change ) {
struct list * list = shash_find_data ( & netdev_linux_notifiers ,
change - > ifname ) ;
if ( list ) {
poll_notify ( list ) ;
}
} else {
struct shash_node * node ;
SHASH_FOR_EACH ( node , & netdev_linux_notifiers ) {
poll_notify ( node - > data ) ;
}
}
}
static int
netdev_linux_poll_add ( struct netdev * netdev ,
void ( * cb ) ( struct netdev_notifier * ) , void * aux ,
struct netdev_notifier * * notifierp )
{
const char * netdev_name = netdev_get_name ( netdev ) ;
struct netdev_linux_notifier * notifier ;
struct list * list ;
if ( shash_is_empty ( & netdev_linux_notifiers ) ) {
2009-07-28 12:36:32 -07:00
int error = rtnetlink_notifier_register ( & netdev_linux_poll_notifier ,
2009-07-30 16:04:45 -07:00
netdev_linux_poll_cb , NULL ) ;
if ( error ) {
return error ;
}
}
list = shash_find_data ( & netdev_linux_notifiers , netdev_name ) ;
if ( ! list ) {
list = xmalloc ( sizeof * list ) ;
list_init ( list ) ;
shash_add ( & netdev_linux_notifiers , netdev_name , list ) ;
}
notifier = xmalloc ( sizeof * notifier ) ;
netdev_notifier_init ( & notifier - > notifier , netdev , cb , aux ) ;
list_push_back ( list , & notifier - > node ) ;
* notifierp = & notifier - > notifier ;
return 0 ;
}
static void
netdev_linux_poll_remove ( struct netdev_notifier * notifier_ )
{
struct netdev_linux_notifier * notifier =
CONTAINER_OF ( notifier_ , struct netdev_linux_notifier , notifier ) ;
struct list * list ;
/* Remove 'notifier' from its list. */
list = list_remove ( & notifier - > node ) ;
if ( list_is_empty ( list ) ) {
/* The list is now empty. Remove it from the hash and free it. */
const char * netdev_name = netdev_get_name ( notifier - > notifier . netdev ) ;
shash_delete ( & netdev_linux_notifiers ,
shash_find ( & netdev_linux_notifiers , netdev_name ) ) ;
free ( list ) ;
}
free ( notifier ) ;
/* If that was the last notifier, unregister. */
if ( shash_is_empty ( & netdev_linux_notifiers ) ) {
2009-07-28 12:36:32 -07:00
rtnetlink_notifier_unregister ( & netdev_linux_poll_notifier ) ;
2009-07-30 16:04:45 -07:00
}
}
const struct netdev_class netdev_linux_class = {
" " , /* prefix */
" linux " , /* name */
netdev_linux_init ,
netdev_linux_run ,
netdev_linux_wait ,
netdev_linux_open ,
netdev_linux_close ,
netdev_linux_enumerate ,
netdev_linux_recv ,
netdev_linux_recv_wait ,
netdev_linux_drain ,
netdev_linux_send ,
netdev_linux_send_wait ,
netdev_linux_set_etheraddr ,
netdev_linux_get_etheraddr ,
netdev_linux_get_mtu ,
netdev_linux_get_carrier ,
netdev_linux_get_stats ,
netdev_linux_get_features ,
netdev_linux_set_advertisements ,
netdev_linux_get_vlan_vid ,
netdev_linux_set_policing ,
netdev_linux_get_in4 ,
netdev_linux_set_in4 ,
netdev_linux_get_in6 ,
netdev_linux_add_router ,
2009-09-02 10:14:53 -07:00
netdev_linux_get_next_hop ,
2009-07-30 16:04:45 -07:00
netdev_linux_arp_lookup ,
netdev_linux_update_flags ,
netdev_linux_poll_add ,
netdev_linux_poll_remove ,
} ;
const struct netdev_class netdev_tap_class = {
" tap " , /* prefix */
" tap " , /* name */
netdev_linux_init ,
NULL , /* run */
NULL , /* wait */
netdev_linux_open ,
netdev_linux_close ,
netdev_linux_enumerate ,
netdev_linux_recv ,
netdev_linux_recv_wait ,
netdev_linux_drain ,
netdev_linux_send ,
netdev_linux_send_wait ,
netdev_linux_set_etheraddr ,
netdev_linux_get_etheraddr ,
netdev_linux_get_mtu ,
netdev_linux_get_carrier ,
netdev_linux_get_stats ,
netdev_linux_get_features ,
netdev_linux_set_advertisements ,
netdev_linux_get_vlan_vid ,
netdev_linux_set_policing ,
netdev_linux_get_in4 ,
netdev_linux_set_in4 ,
netdev_linux_get_in6 ,
netdev_linux_add_router ,
2009-09-02 10:14:53 -07:00
netdev_linux_get_next_hop ,
2009-07-30 16:04:45 -07:00
netdev_linux_arp_lookup ,
netdev_linux_update_flags ,
netdev_linux_poll_add ,
netdev_linux_poll_remove ,
} ;
static int
get_stats_via_netlink ( int ifindex , struct netdev_stats * stats )
{
2009-07-28 13:05:20 -07:00
/* Policy for RTNLGRP_LINK messages.
*
* There are * many * more fields in these messages , but currently we only
* care about these fields . */
static const struct nl_policy rtnlgrp_link_policy [ ] = {
[ IFLA_IFNAME ] = { . type = NL_A_STRING , . optional = false } ,
[ IFLA_STATS ] = { . type = NL_A_UNSPEC , . optional = true ,
. min_len = sizeof ( struct rtnl_link_stats ) } ,
} ;
2009-07-30 16:04:45 -07:00
static struct nl_sock * rtnl_sock ;
struct ofpbuf request ;
struct ofpbuf * reply ;
struct ifinfomsg * ifi ;
const struct rtnl_link_stats * rtnl_stats ;
struct nlattr * attrs [ ARRAY_SIZE ( rtnlgrp_link_policy ) ] ;
int error ;
if ( ! rtnl_sock ) {
error = nl_sock_create ( NETLINK_ROUTE , 0 , 0 , 0 , & rtnl_sock ) ;
if ( error ) {
VLOG_ERR_RL ( & rl , " failed to create rtnetlink socket: %s " ,
strerror ( error ) ) ;
return error ;
}
}
ofpbuf_init ( & request , 0 ) ;
nl_msg_put_nlmsghdr ( & request , rtnl_sock , sizeof * ifi ,
RTM_GETLINK , NLM_F_REQUEST ) ;
ifi = ofpbuf_put_zeros ( & request , sizeof * ifi ) ;
ifi - > ifi_family = PF_UNSPEC ;
ifi - > ifi_index = ifindex ;
error = nl_sock_transact ( rtnl_sock , & request , & reply ) ;
ofpbuf_uninit ( & request ) ;
if ( error ) {
return error ;
}
if ( ! nl_policy_parse ( reply , NLMSG_HDRLEN + sizeof ( struct ifinfomsg ) ,
rtnlgrp_link_policy ,
attrs , ARRAY_SIZE ( rtnlgrp_link_policy ) ) ) {
ofpbuf_delete ( reply ) ;
return EPROTO ;
}
if ( ! attrs [ IFLA_STATS ] ) {
VLOG_WARN_RL ( & rl , " RTM_GETLINK reply lacks stats " ) ;
2009-09-22 10:17:44 -07:00
ofpbuf_delete ( reply ) ;
2009-07-30 16:04:45 -07:00
return EPROTO ;
}
rtnl_stats = nl_attr_get ( attrs [ IFLA_STATS ] ) ;
stats - > rx_packets = rtnl_stats - > rx_packets ;
stats - > tx_packets = rtnl_stats - > tx_packets ;
stats - > rx_bytes = rtnl_stats - > rx_bytes ;
stats - > tx_bytes = rtnl_stats - > tx_bytes ;
stats - > rx_errors = rtnl_stats - > rx_errors ;
stats - > tx_errors = rtnl_stats - > tx_errors ;
stats - > rx_dropped = rtnl_stats - > rx_dropped ;
stats - > tx_dropped = rtnl_stats - > tx_dropped ;
stats - > multicast = rtnl_stats - > multicast ;
stats - > collisions = rtnl_stats - > collisions ;
stats - > rx_length_errors = rtnl_stats - > rx_length_errors ;
stats - > rx_over_errors = rtnl_stats - > rx_over_errors ;
stats - > rx_crc_errors = rtnl_stats - > rx_crc_errors ;
stats - > rx_frame_errors = rtnl_stats - > rx_frame_errors ;
stats - > rx_fifo_errors = rtnl_stats - > rx_fifo_errors ;
stats - > rx_missed_errors = rtnl_stats - > rx_missed_errors ;
stats - > tx_aborted_errors = rtnl_stats - > tx_aborted_errors ;
stats - > tx_carrier_errors = rtnl_stats - > tx_carrier_errors ;
stats - > tx_fifo_errors = rtnl_stats - > tx_fifo_errors ;
stats - > tx_heartbeat_errors = rtnl_stats - > tx_heartbeat_errors ;
stats - > tx_window_errors = rtnl_stats - > tx_window_errors ;
2009-09-22 10:17:44 -07:00
ofpbuf_delete ( reply ) ;
2009-07-30 16:04:45 -07:00
return 0 ;
}
static int
get_stats_via_proc ( const char * netdev_name , struct netdev_stats * stats )
{
static const char fn [ ] = " /proc/net/dev " ;
char line [ 1024 ] ;
FILE * stream ;
int ln ;
stream = fopen ( fn , " r " ) ;
if ( ! stream ) {
VLOG_WARN_RL ( & rl , " %s: open failed: %s " , fn , strerror ( errno ) ) ;
return errno ;
}
ln = 0 ;
while ( fgets ( line , sizeof line , stream ) ) {
if ( + + ln > = 3 ) {
char devname [ 16 ] ;
# define X64 "%"SCNu64
if ( sscanf ( line ,
" %15[^:]: "
X64 X64 X64 X64 X64 X64 X64 " %*u "
X64 X64 X64 X64 X64 X64 X64 " %*u " ,
devname ,
& stats - > rx_bytes ,
& stats - > rx_packets ,
& stats - > rx_errors ,
& stats - > rx_dropped ,
& stats - > rx_fifo_errors ,
& stats - > rx_frame_errors ,
& stats - > multicast ,
& stats - > tx_bytes ,
& stats - > tx_packets ,
& stats - > tx_errors ,
& stats - > tx_dropped ,
& stats - > tx_fifo_errors ,
& stats - > collisions ,
& stats - > tx_carrier_errors ) ! = 15 ) {
VLOG_WARN_RL ( & rl , " %s:%d: parse error " , fn , ln ) ;
} else if ( ! strcmp ( devname , netdev_name ) ) {
stats - > rx_length_errors = UINT64_MAX ;
stats - > rx_over_errors = UINT64_MAX ;
stats - > rx_crc_errors = UINT64_MAX ;
stats - > rx_missed_errors = UINT64_MAX ;
stats - > tx_aborted_errors = UINT64_MAX ;
stats - > tx_heartbeat_errors = UINT64_MAX ;
stats - > tx_window_errors = UINT64_MAX ;
fclose ( stream ) ;
return 0 ;
}
}
}
VLOG_WARN_RL ( & rl , " %s: no stats for %s " , fn , netdev_name ) ;
fclose ( stream ) ;
return ENODEV ;
}
static int
get_flags ( const struct netdev * netdev , int * flags )
{
struct ifreq ifr ;
int error ;
error = netdev_linux_do_ioctl ( netdev , & ifr , SIOCGIFFLAGS , " SIOCGIFFLAGS " ) ;
* flags = ifr . ifr_flags ;
return error ;
}
static int
set_flags ( struct netdev * netdev , int flags )
{
struct ifreq ifr ;
ifr . ifr_flags = flags ;
return netdev_linux_do_ioctl ( netdev , & ifr , SIOCSIFFLAGS , " SIOCSIFFLAGS " ) ;
}
static int
do_get_ifindex ( const char * netdev_name )
{
struct ifreq ifr ;
strncpy ( ifr . ifr_name , netdev_name , sizeof ifr . ifr_name ) ;
COVERAGE_INC ( netdev_get_ifindex ) ;
if ( ioctl ( af_inet_sock , SIOCGIFINDEX , & ifr ) < 0 ) {
VLOG_WARN_RL ( & rl , " ioctl(SIOCGIFINDEX) on %s device failed: %s " ,
netdev_name , strerror ( errno ) ) ;
return - errno ;
}
return ifr . ifr_ifindex ;
}
static int
get_ifindex ( const struct netdev * netdev_ , int * ifindexp )
{
struct netdev_linux * netdev = netdev_linux_cast ( netdev_ ) ;
* ifindexp = 0 ;
if ( ! ( netdev - > cache - > valid & VALID_IFINDEX ) ) {
int ifindex = do_get_ifindex ( netdev_get_name ( netdev_ ) ) ;
if ( ifindex < 0 ) {
return - ifindex ;
}
netdev - > cache - > valid | = VALID_IFINDEX ;
netdev - > cache - > ifindex = ifindex ;
}
* ifindexp = netdev - > cache - > ifindex ;
return 0 ;
}
static int
get_etheraddr ( const char * netdev_name , uint8_t ea [ ETH_ADDR_LEN ] )
{
struct ifreq ifr ;
int hwaddr_family ;
memset ( & ifr , 0 , sizeof ifr ) ;
strncpy ( ifr . ifr_name , netdev_name , sizeof ifr . ifr_name ) ;
COVERAGE_INC ( netdev_get_hwaddr ) ;
if ( ioctl ( af_inet_sock , SIOCGIFHWADDR , & ifr ) < 0 ) {
VLOG_ERR ( " ioctl(SIOCGIFHWADDR) on %s device failed: %s " ,
netdev_name , strerror ( errno ) ) ;
return errno ;
}
hwaddr_family = ifr . ifr_hwaddr . sa_family ;
if ( hwaddr_family ! = AF_UNSPEC & & hwaddr_family ! = ARPHRD_ETHER ) {
VLOG_WARN ( " %s device has unknown hardware address family %d " ,
netdev_name , hwaddr_family ) ;
}
memcpy ( ea , ifr . ifr_hwaddr . sa_data , ETH_ADDR_LEN ) ;
return 0 ;
}
static int
set_etheraddr ( const char * netdev_name , int hwaddr_family ,
const uint8_t mac [ ETH_ADDR_LEN ] )
{
struct ifreq ifr ;
memset ( & ifr , 0 , sizeof ifr ) ;
strncpy ( ifr . ifr_name , netdev_name , sizeof ifr . ifr_name ) ;
ifr . ifr_hwaddr . sa_family = hwaddr_family ;
memcpy ( ifr . ifr_hwaddr . sa_data , mac , ETH_ADDR_LEN ) ;
COVERAGE_INC ( netdev_set_hwaddr ) ;
if ( ioctl ( af_inet_sock , SIOCSIFHWADDR , & ifr ) < 0 ) {
VLOG_ERR ( " ioctl(SIOCSIFHWADDR) on %s device failed: %s " ,
netdev_name , strerror ( errno ) ) ;
return errno ;
}
return 0 ;
}
static int
netdev_linux_do_ethtool ( struct netdev * netdev , struct ethtool_cmd * ecmd ,
int cmd , const char * cmd_name )
{
struct ifreq ifr ;
memset ( & ifr , 0 , sizeof ifr ) ;
strncpy ( ifr . ifr_name , netdev - > name , sizeof ifr . ifr_name ) ;
ifr . ifr_data = ( caddr_t ) ecmd ;
ecmd - > cmd = cmd ;
COVERAGE_INC ( netdev_ethtool ) ;
if ( ioctl ( af_inet_sock , SIOCETHTOOL , & ifr ) = = 0 ) {
return 0 ;
} else {
if ( errno ! = EOPNOTSUPP ) {
VLOG_WARN_RL ( & rl , " ethtool command %s on network device %s "
" failed: %s " , cmd_name , netdev - > name ,
strerror ( errno ) ) ;
} else {
/* The device doesn't support this operation. That's pretty
* common , so there ' s no point in logging anything . */
}
return errno ;
}
}
static int
netdev_linux_do_ioctl ( const struct netdev * netdev , struct ifreq * ifr ,
int cmd , const char * cmd_name )
{
strncpy ( ifr - > ifr_name , netdev_get_name ( netdev ) , sizeof ifr - > ifr_name ) ;
if ( ioctl ( af_inet_sock , cmd , ifr ) = = - 1 ) {
VLOG_DBG_RL ( & rl , " %s: ioctl(%s) failed: %s " ,
netdev_get_name ( netdev ) , cmd_name , strerror ( errno ) ) ;
return errno ;
}
return 0 ;
}
2009-09-02 10:14:53 -07:00
static int
netdev_linux_get_ipv4 ( const struct netdev * netdev , struct in_addr * ip ,
int cmd , const char * cmd_name )
{
struct ifreq ifr ;
int error ;
ifr . ifr_addr . sa_family = AF_INET ;
error = netdev_linux_do_ioctl ( netdev , & ifr , cmd , cmd_name ) ;
if ( ! error ) {
const struct sockaddr_in * sin = ( struct sockaddr_in * ) & ifr . ifr_addr ;
* ip = sin - > sin_addr ;
}
return error ;
}