#include <unistd.h>
#include "coverage.h"
-#include "dpif-linux.h"
+#include "dpif-netlink.h"
#include "dpif-netdev.h"
#include "dynamic-string.h"
#include "fatal-signal.h"
#include "ofpbuf.h"
#include "openflow/openflow.h"
#include "ovs-atomic.h"
+#include "packet-dpif.h"
#include "packets.h"
#include "poll-loop.h"
#include "rtnetlink-link.h"
/* Polling miimon status for all ports causes performance degradation when
* handling a large number of ports. If there are no devices using miimon, then
- * we skip netdev_linux_miimon_run() and netdev_linux_miimon_wait(). */
-static atomic_int miimon_cnt = ATOMIC_VAR_INIT(0);
+ * we skip netdev_linux_miimon_run() and netdev_linux_miimon_wait().
+ *
+ * Readers do not depend on this variable synchronizing with the related
+ * changes in the device miimon status, so we can use atomic_count. */
+static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
static void netdev_linux_run(void);
static bool
netdev_linux_miimon_enabled(void)
{
- int miimon;
-
- atomic_read(&miimon_cnt, &miimon);
- return miimon > 0;
+ return atomic_count_get(&miimon_cnt) > 0;
}
static void
}
if (netdev->miimon_interval > 0) {
- int junk;
- atomic_sub(&miimon_cnt, 1, &junk);
+ atomic_count_dec(&miimon_cnt);
}
ovs_mutex_destroy(&netdev->mutex);
}
static int
-netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct ofpbuf **packet, int *c)
+netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dpif_packet **packets,
+ int *c)
{
struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
struct netdev *netdev = rx->up.netdev;
+ struct dpif_packet *packet;
struct ofpbuf *buffer;
ssize_t retval;
int mtu;
mtu = ETH_PAYLOAD_MAX;
}
- buffer = ofpbuf_new_with_headroom(VLAN_ETH_HEADER_LEN + mtu, DP_NETDEV_HEADROOM);
+ packet = dpif_packet_new_with_headroom(VLAN_ETH_HEADER_LEN + mtu,
+ DP_NETDEV_HEADROOM);
+ buffer = &packet->ofpbuf;
retval = (rx->is_tap
? netdev_linux_rxq_recv_tap(rx->fd, buffer)
VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s",
ovs_strerror(errno), netdev_rxq_get_name(rxq_));
}
- ofpbuf_delete(buffer);
+ dpif_packet_delete(packet);
} else {
dp_packet_pad(buffer);
- packet[0] = buffer;
+ dpif_packet_set_dp_hash(packet, 0);
+ packets[0] = packet;
*c = 1;
}
* The kernel maintains a packet transmission queue, so the caller is not
* expected to do additional queuing of packets. */
static int
-netdev_linux_send(struct netdev *netdev_, struct ofpbuf *pkt, bool may_steal)
+netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
+ struct dpif_packet **pkts, int cnt, bool may_steal)
{
- const void *data = ofpbuf_data(pkt);
- size_t size = ofpbuf_size(pkt);
+ int i;
+ int error = 0;
- for (;;) {
+ /* 'i' is incremented only if there's no error */
+ for (i = 0; i < cnt;) {
+ const void *data = ofpbuf_data(&pkts[i]->ofpbuf);
+ size_t size = ofpbuf_size(&pkts[i]->ofpbuf);
ssize_t retval;
if (!is_tap_netdev(netdev_)) {
retval = write(netdev->tap_fd, data, size);
}
- if (may_steal) {
- ofpbuf_delete(pkt);
- }
-
if (retval < 0) {
/* The Linux AF_PACKET implementation never blocks waiting for room
* for packets, instead returning ENOBUFS. Translate this into
* EAGAIN for the caller. */
- if (errno == ENOBUFS) {
- return EAGAIN;
- } else if (errno == EINTR) {
+ error = errno == ENOBUFS ? EAGAIN : errno;
+ if (error == EINTR) {
+ /* continue without incrementing 'i', i.e. retry this packet */
continue;
- } else if (errno != EAGAIN) {
- VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
- netdev_get_name(netdev_), ovs_strerror(errno));
}
- return errno;
+ break;
} else if (retval != size) {
- VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRIuSIZE"d bytes of "
- "%"PRIuSIZE") on %s", retval, size, netdev_get_name(netdev_));
- return EMSGSIZE;
- } else {
- return 0;
+ VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRIuSIZE" bytes"
+ " of %"PRIuSIZE") on %s", retval, size,
+ netdev_get_name(netdev_));
+ error = EMSGSIZE;
+ break;
}
+
+ /* Process the next packet in the batch */
+ i++;
+ }
+
+ if (may_steal) {
+ for (i = 0; i < cnt; i++) {
+ dpif_packet_delete(pkts[i]);
+ }
+ }
+
+ if (error && error != EAGAIN) {
+ VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
+ netdev_get_name(netdev_), ovs_strerror(error));
}
+
+ return error;
+
}
/* Registers with the poll loop to wake up from the next call to poll_block()
* expected to do additional queuing of packets. Thus, this function is
* unlikely to ever be used. It is included for completeness. */
static void
-netdev_linux_send_wait(struct netdev *netdev)
+netdev_linux_send_wait(struct netdev *netdev, int qid OVS_UNUSED)
{
if (is_tap_netdev(netdev)) {
/* TAP device always accepts packets.*/
ovs_mutex_lock(&netdev->mutex);
interval = interval > 0 ? MAX(interval, 100) : 0;
if (netdev->miimon_interval != interval) {
- int junk;
-
if (interval && !netdev->miimon_interval) {
- atomic_add(&miimon_cnt, 1, &junk);
+ atomic_count_inc(&miimon_cnt);
} else if (!interval && netdev->miimon_interval) {
- atomic_sub(&miimon_cnt, 1, &junk);
+ atomic_count_dec(&miimon_cnt);
}
netdev->miimon_interval = interval;
netdev_stats_from_ovs_vport_stats(struct netdev_stats *dst,
const struct ovs_vport_stats *src)
{
- dst->rx_packets = get_unaligned_u64(&src->rx_packets);
- dst->tx_packets = get_unaligned_u64(&src->tx_packets);
- dst->rx_bytes = get_unaligned_u64(&src->rx_bytes);
- dst->tx_bytes = get_unaligned_u64(&src->tx_bytes);
- dst->rx_errors = get_unaligned_u64(&src->rx_errors);
- dst->tx_errors = get_unaligned_u64(&src->tx_errors);
- dst->rx_dropped = get_unaligned_u64(&src->rx_dropped);
- dst->tx_dropped = get_unaligned_u64(&src->tx_dropped);
+ dst->rx_packets = get_32aligned_u64(&src->rx_packets);
+ dst->tx_packets = get_32aligned_u64(&src->tx_packets);
+ dst->rx_bytes = get_32aligned_u64(&src->rx_bytes);
+ dst->tx_bytes = get_32aligned_u64(&src->tx_bytes);
+ dst->rx_errors = get_32aligned_u64(&src->rx_errors);
+ dst->tx_errors = get_32aligned_u64(&src->tx_errors);
+ dst->rx_dropped = get_32aligned_u64(&src->rx_dropped);
+ dst->tx_dropped = get_32aligned_u64(&src->tx_dropped);
dst->multicast = 0;
dst->collisions = 0;
dst->rx_length_errors = 0;
static int
get_stats_via_vport__(const struct netdev *netdev, struct netdev_stats *stats)
{
- struct dpif_linux_vport reply;
+ struct dpif_netlink_vport reply;
struct ofpbuf *buf;
int error;
- error = dpif_linux_vport_get(netdev_get_name(netdev), &reply, &buf);
+ error = dpif_netlink_vport_get(netdev_get_name(netdev), &reply, &buf);
if (error) {
return error;
} else if (!reply.stats) {
return error;
}
-static int
-netdev_internal_set_stats(struct netdev *netdev,
- const struct netdev_stats *stats)
-{
- struct ovs_vport_stats vport_stats;
- struct dpif_linux_vport vport;
- int err;
-
- vport_stats.rx_packets = stats->rx_packets;
- vport_stats.tx_packets = stats->tx_packets;
- vport_stats.rx_bytes = stats->rx_bytes;
- vport_stats.tx_bytes = stats->tx_bytes;
- vport_stats.rx_errors = stats->rx_errors;
- vport_stats.tx_errors = stats->tx_errors;
- vport_stats.rx_dropped = stats->rx_dropped;
- vport_stats.tx_dropped = stats->tx_dropped;
-
- dpif_linux_vport_init(&vport);
- vport.cmd = OVS_VPORT_CMD_SET;
- vport.name = netdev_get_name(netdev);
- vport.stats = &vport_stats;
-
- err = dpif_linux_vport_transact(&vport, NULL, NULL);
-
- /* If the vport layer doesn't know about the device, that doesn't mean it
- * doesn't exist (after all were able to open it when netdev_open() was
- * called), it just means that it isn't attached and we'll be getting
- * stats a different way. */
- if (err == ENODEV) {
- err = EOPNOTSUPP;
- }
-
- return err;
-}
-
static void
netdev_linux_read_features(struct netdev_linux *netdev)
{
return error;
}
-#define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS, SET_STATS, \
+#define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS, \
GET_FEATURES, GET_STATUS) \
{ \
NAME, \
NULL, /* get_config */ \
NULL, /* set_config */ \
NULL, /* get_tunnel_config */ \
+ NULL, /* get_numa_id */ \
+ NULL, /* set_multiq */ \
\
netdev_linux_send, \
netdev_linux_send_wait, \
netdev_linux_get_carrier_resets, \
netdev_linux_set_miimon_interval, \
GET_STATS, \
- SET_STATS, \
\
GET_FEATURES, \
netdev_linux_set_advertisements, \
"system",
netdev_linux_construct,
netdev_linux_get_stats,
- NULL, /* set_stats */
netdev_linux_get_features,
netdev_linux_get_status);
"tap",
netdev_linux_construct_tap,
netdev_tap_get_stats,
- NULL, /* set_stats */
netdev_linux_get_features,
netdev_linux_get_status);
"internal",
netdev_linux_construct,
netdev_internal_get_stats,
- netdev_internal_set_stats,
NULL, /* get_features */
netdev_internal_get_status);
\f
memset(&tc_police, 0, sizeof tc_police);
tc_police.action = TC_POLICE_SHOT;
tc_police.mtu = mtu;
- tc_fill_rate(&tc_police.rate, (kbits_rate * 1000)/8, mtu);
+ tc_fill_rate(&tc_police.rate, ((uint64_t) kbits_rate * 1000)/8, mtu);
tc_police.burst = tc_bytes_to_ticks(tc_police.rate.rate,
kbits_burst * 1024);