Print this page
11490 SRS ring polling disabled for VLANs
11491 Want DLS bypass for VLAN traffic
11492 add VLVF bypass to ixgbe core
2869 duplicate packets with vnics over aggrs
11489 DLS stat delete and aggr kstat can deadlock
Portions contributed by: Theo Schlossnagle <jesus@omniti.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
@@ -19,11 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
*/
/*
* MAC data path
@@ -298,13 +298,12 @@
* When there is no fanout, MAC does not create any soft rings for a device and
* the device has frames delivered directly to the MAC client.
*
* Otherwise, all fanout is performed by software. MAC divides incoming frames
* into one of three buckets -- IPv4 TCP traffic, IPv4 UDP traffic, and
- * everything else. Note, VLAN tagged traffic is considered other, regardless of
- * the interior EtherType. Regardless of the type of fanout, these three
- * categories or buckets are always used.
+ * everything else. Regardless of the type of fanout, these three categories
+ * or buckets are always used.
*
* The difference between protocol level fanout and full software ring protocol
* fanout is the number of software rings that end up getting created. The
* system always uses the same number of software rings per protocol bucket. So
* in the first case when we're just doing protocol level fanout, we just create
@@ -1473,20 +1472,19 @@
* Pair of local and remote ports in the transport header
*/
#define PORTS_SIZE 4
/*
- * mac_rx_srs_proto_fanout
- *
- * This routine delivers packets destined to an SRS into one of the
+ * This routine delivers packets destined for an SRS into one of the
* protocol soft rings.
*
- * Given a chain of packets we need to split it up into multiple sub chains
- * destined into TCP, UDP or OTH soft ring. Instead of entering
- * the soft ring one packet at a time, we want to enter it in the form of a
- * chain otherwise we get this start/stop behaviour where the worker thread
- * goes to sleep and then next packets comes in forcing it to wake up etc.
+ * Given a chain of packets we need to split it up into multiple sub
+ * chains: TCP, UDP or OTH soft ring. Instead of entering the soft
+ * ring one packet at a time, we want to enter it in the form of a
+ * chain otherwise we get this start/stop behaviour where the worker
+ * thread goes to sleep and then next packet comes in forcing it to
+ * wake up.
*/
static void
mac_rx_srs_proto_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *head)
{
struct ether_header *ehp;
@@ -1521,13 +1519,13 @@
*/
hw_classified = mac_srs->srs_ring != NULL &&
mac_srs->srs_ring->mr_classify_type == MAC_HW_CLASSIFIER;
/*
- * Special clients (eg. VLAN, non ether, etc) need DLS
- * processing in the Rx path. SRST_DLS_BYPASS will be clear for
- * such SRSs. Another way of disabling bypass is to set the
+ * Some clients, such as non-ethernet, need DLS processing in
+ * the Rx path. Such clients clear the SRST_DLS_BYPASS flag.
+ * DLS bypass may also be disabled via the
* MCIS_RX_BYPASS_DISABLE flag.
*/
dls_bypass = ((mac_srs->srs_type & SRST_DLS_BYPASS) != 0) &&
((mcip->mci_state_flags & MCIS_RX_BYPASS_DISABLE) == 0);
@@ -1535,14 +1533,15 @@
bzero(tailmp, MAX_SR_TYPES * sizeof (mblk_t *));
bzero(cnt, MAX_SR_TYPES * sizeof (int));
bzero(sz, MAX_SR_TYPES * sizeof (size_t));
/*
- * We got a chain from SRS that we need to send to the soft rings.
- * Since squeues for TCP & IPv4 sap poll their soft rings (for
- * performance reasons), we need to separate out v4_tcp, v4_udp
- * and the rest goes in other.
+ * We have a chain from SRS that we need to split across the
+ * soft rings. The squeues for the TCP and IPv4 SAPs use their
+ * own soft rings to allow polling from the squeue. The rest of
+ * the packets are delivered on the OTH soft ring which cannot
+ * be polled.
*/
while (head != NULL) {
mp = head;
head = head->b_next;
mp->b_next = NULL;
@@ -1566,13 +1565,18 @@
*/
if ((sap = ntohs(ehp->ether_type)) == VLAN_TPID) {
evhp = (struct ether_vlan_header *)mp->b_rptr;
sap = ntohs(evhp->ether_type);
hdrsize = sizeof (struct ether_vlan_header);
+
/*
- * Check if the VID of the packet, if any,
- * belongs to this client.
+ * Check if the VID of the packet, if
+ * any, belongs to this client.
+ * Technically, if this packet came up
+ * via a HW classified ring then we
+ * don't need to perform this check.
+ * Perhaps a future optimization.
*/
if (!mac_client_check_flow_vid(mcip,
VLAN_ID(ntohs(evhp->ether_tci)))) {
mac_rx_drop_pkt(mac_srs, mp);
continue;
@@ -1633,11 +1637,10 @@
* a packet that is looped back and when the stack uses the
* fastpath mechanism. The normal case is optimized for
* performance and may bypass DLS. All other cases go through
* the 'OTH' type path without DLS bypass.
*/
-
ipha = (ipha_t *)(mp->b_rptr + hdrsize);
if ((type != OTH) && MBLK_RX_FANOUT_SLOWPATH(mp, ipha))
type = OTH;
if (type == OTH) {
@@ -1645,15 +1648,17 @@
cnt[type], bw_ctl, sz[type], sz1, mp);
continue;
}
ASSERT(type == UNDEF);
+
/*
- * We look for at least 4 bytes past the IP header to get
- * the port information. If we get an IP fragment, we don't
- * have the port information, and we use just the protocol
- * information.
+ * Determine the type from the IP protocol value. If
+ * classified as TCP or UDP, then update the read
+ * pointer to the beginning of the IP header.
+ * Otherwise leave the message as is for further
+ * processing by DLS.
*/
switch (ipha->ipha_protocol) {
case IPPROTO_TCP:
type = V4_TCP;
mp->b_rptr += hdrsize;
@@ -1693,15 +1698,14 @@
}
int fanout_unaligned = 0;
/*
- * mac_rx_srs_long_fanout
- *
- * The fanout routine for VLANs, and for anything else that isn't performing
- * explicit dls bypass. Returns -1 on an error (drop the packet due to a
- * malformed packet), 0 on success, with values written in *indx and *type.
+ * The fanout routine for any clients with DLS bypass disabled or for
+ * traffic classified as "other". Returns -1 on an error (drop the
+ * packet due to a malformed packet), 0 on success, with values
+ * written in *indx and *type.
*/
static int
mac_rx_srs_long_fanout(mac_soft_ring_set_t *mac_srs, mblk_t *mp,
uint32_t sap, size_t hdrsize, enum pkt_type *type, uint_t *indx)
{
@@ -1863,20 +1867,19 @@
*type = OTH;
return (0);
}
/*
- * mac_rx_srs_fanout
- *
- * This routine delivers packets destined to an SRS into a soft ring member
+ * This routine delivers packets destined for an SRS into a soft ring member
* of the set.
*
- * Given a chain of packets we need to split it up into multiple sub chains
- * destined for one of the TCP, UDP or OTH soft rings. Instead of entering
- * the soft ring one packet at a time, we want to enter it in the form of a
- * chain otherwise we get this start/stop behaviour where the worker thread
- * goes to sleep and then next packets comes in forcing it to wake up etc.
+ * Given a chain of packets we need to split it up into multiple sub
+ * chains: TCP, UDP or OTH soft ring. Instead of entering the soft
+ * ring one packet at a time, we want to enter it in the form of a
+ * chain otherwise we get this start/stop behaviour where the worker
+ * thread goes to sleep and then next packet comes in forcing it to
+ * wake up.
*
* Note:
* Since we know what is the maximum fanout possible, we create a 2D array
* of 'softring types * MAX_SR_FANOUT' for the head, tail, cnt and sz
* variables so that we can enter the softrings with chain. We need the
@@ -1933,14 +1936,15 @@
*/
hw_classified = mac_srs->srs_ring != NULL &&
mac_srs->srs_ring->mr_classify_type == MAC_HW_CLASSIFIER;
/*
- * Special clients (eg. VLAN, non ether, etc) need DLS
- * processing in the Rx path. SRST_DLS_BYPASS will be clear for
- * such SRSs. Another way of disabling bypass is to set the
- * MCIS_RX_BYPASS_DISABLE flag.
+ * Some clients, such as non Ethernet, need DLS processing in
+ * the Rx path. Such clients clear the SRST_DLS_BYPASS flag.
+ * DLS bypass may also be disabled via the
+ * MCIS_RX_BYPASS_DISABLE flag, but this is only consumed by
+ * sun4v vsw currently.
*/
dls_bypass = ((mac_srs->srs_type & SRST_DLS_BYPASS) != 0) &&
((mcip->mci_state_flags & MCIS_RX_BYPASS_DISABLE) == 0);
/*
@@ -1958,11 +1962,11 @@
bzero(cnt, MAX_SR_TYPES * MAX_SR_FANOUT * sizeof (int));
bzero(sz, MAX_SR_TYPES * MAX_SR_FANOUT * sizeof (size_t));
/*
* We got a chain from SRS that we need to send to the soft rings.
- * Since squeues for TCP & IPv4 sap poll their soft rings (for
+ * Since squeues for TCP & IPv4 SAP poll their soft rings (for
* performance reasons), we need to separate out v4_tcp, v4_udp
* and the rest goes in other.
*/
while (head != NULL) {
mp = head;
@@ -1988,13 +1992,18 @@
*/
if ((sap = ntohs(ehp->ether_type)) == VLAN_TPID) {
evhp = (struct ether_vlan_header *)mp->b_rptr;
sap = ntohs(evhp->ether_type);
hdrsize = sizeof (struct ether_vlan_header);
+
/*
- * Check if the VID of the packet, if any,
- * belongs to this client.
+ * Check if the VID of the packet, if
+ * any, belongs to this client.
+ * Technically, if this packet came up
+ * via a HW classified ring then we
+ * don't need to perform this check.
+ * Perhaps a future optimization.
*/
if (!mac_client_check_flow_vid(mcip,
VLAN_ID(ntohs(evhp->ether_tci)))) {
mac_rx_drop_pkt(mac_srs, mp);
continue;
@@ -2030,11 +2039,10 @@
tailmp[type][indx], cnt[type][indx], bw_ctl,
sz[type][indx], sz1, mp);
continue;
}
-
/*
* If we are using the default Rx ring where H/W or S/W
* classification has not happened, we need to verify if
* this unicast packet really belongs to us.
*/
@@ -2619,11 +2627,10 @@
if ((tid = mac_srs->srs_tid) != NULL)
mac_srs->srs_tid = NULL;
mac_srs->srs_state |= (SRS_PROC|proc_type);
-
/*
* mcip is NULL for broadcast and multicast flows. The promisc
* callbacks for broadcast and multicast packets are delivered from
* mac_rx() and we don't need to worry about that case in this path
*/
@@ -2639,14 +2646,12 @@
mutex_enter(&mac_srs->srs_lock);
}
}
/*
- * Check if SRS itself is doing the processing
- * This direct path does not apply when subflows are present. In this
- * case, packets need to be dispatched to a soft ring according to the
- * flow's bandwidth and other resources contraints.
+ * Check if SRS itself is doing the processing. This direct
+ * path applies only when subflows are present.
*/
if (mac_srs->srs_type & SRST_NO_SOFT_RINGS) {
mac_direct_rx_t proc;
void *arg1;
mac_resource_handle_t arg2;
@@ -4654,26 +4659,27 @@
* passing it on to the client's receive callback.
* Note that this needs to be done after we dispatch
* the packet to the promiscuous listeners of the
* client, since they expect to see the whole
* frame including the VLAN headers.
+ *
+ * The MCIS_STRIP_DISABLE is only issued when sun4v
+ * vsw is in play.
*/
mp_chain = mac_strip_vlan_tag_chain(mp_chain);
}
mcip->mci_rx_fn(mcip->mci_rx_arg, mrh, mp_chain, B_FALSE);
}
/*
- * mac_rx_soft_ring_process
+ * Process a chain for a given soft ring. If the number of packets
+ * queued in the SRS and its associated soft rings (including this
+ * one) is very small (tracked by srs_poll_pkt_cnt) then allow the
+ * entering thread (interrupt or poll thread) to process the chain
+ * inline. This is meant to reduce latency under low load.
*
- * process a chain for a given soft ring. The number of packets queued
- * in the SRS and its associated soft rings (including this one) is
- * very small (tracked by srs_poll_pkt_cnt), then allow the entering
- * thread (interrupt or poll thread) to do inline processing. This
- * helps keep the latency down under low load.
- *
* The proc and arg for each mblk is already stored in the mblk in
* appropriate places.
*/
/* ARGSUSED */
void
@@ -4727,17 +4733,17 @@
(*proc)(arg1, arg2, mp_chain, NULL);
ASSERT(MUTEX_NOT_HELD(&ringp->s_ring_lock));
/*
- * If we have a soft ring set which is doing
- * bandwidth control, we need to decrement
- * srs_size and count so it the SRS can have a
- * accurate idea of what is the real data
- * queued between SRS and its soft rings. We
- * decrement the counters only when the packet
- * gets processed by both SRS and the soft ring.
+ * If we have an SRS performing bandwidth
+ * control then we need to decrement the size
+ * and count so the SRS has an accurate count
+ * of the data queued between the SRS and its
+ * soft rings. We decrement the counters only
+ * when the packet is processed by both the
+ * SRS and the soft ring.
*/
mutex_enter(&mac_srs->srs_lock);
MAC_UPDATE_SRS_COUNT_LOCKED(mac_srs, cnt);
MAC_UPDATE_SRS_SIZE_LOCKED(mac_srs, sz);
mutex_exit(&mac_srs->srs_lock);
@@ -4749,12 +4755,12 @@
cv_signal(&ringp->s_ring_client_cv);
if ((ringp->s_ring_first == NULL) ||
(ringp->s_ring_state & S_RING_BLANK)) {
/*
- * We processed inline our packet and
- * nothing new has arrived or our
+ * We processed a single packet inline
+ * and nothing new has arrived or our
* receiver doesn't want to receive
* any packets. We are done.
*/
mutex_exit(&ringp->s_ring_lock);
return;