Print this page
11490 SRS ring polling disabled for VLANs
11491 Want DLS bypass for VLAN traffic
11492 add VLVF bypass to ixgbe core
2869 duplicate packets with vnics over aggrs
11489 DLS stat delete and aggr kstat can deadlock
Portions contributed by: Theo Schlossnagle <jesus@omniti.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>


   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2017, Joyent, Inc.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/callb.h>
  28 #include <sys/cpupart.h>
  29 #include <sys/pool.h>
  30 #include <sys/pool_pset.h>
  31 #include <sys/sdt.h>
  32 #include <sys/strsubr.h>
  33 #include <sys/strsun.h>
  34 #include <sys/vlan.h>
  35 #include <inet/ipsec_impl.h>
  36 #include <inet/ip_impl.h>
  37 #include <inet/sadb.h>
  38 #include <inet/ipsecesp.h>
  39 #include <inet/ipsecah.h>
  40 
  41 #include <sys/mac_impl.h>
  42 #include <sys/mac_client_impl.h>
  43 #include <sys/mac_client_priv.h>


1169  *
1170  * The underlying device can expose upto MAX_RINGS_PER_GROUP worth of
1171  * rings to a client. In such a case, MAX_RINGS_PER_GROUP worth of
1172  * array space is needed to store Tx soft rings. Thus we allocate so
1173  * much array space for srs_tx_soft_rings.
1174  *
1175  * And when it is an aggr, again we allocate MAX_RINGS_PER_GROUP worth
1176  * of space to st_soft_rings. This array is used for quick access to
1177  * soft ring associated with a pseudo Tx ring based on the pseudo
1178  * ring's index (mr_index).
1179  */
1180 static void
1181 mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs)
1182 {
1183         mac_client_impl_t *mcip = mac_srs->srs_mcip;
1184 
1185         if (mac_srs->srs_type & SRST_TX) {
1186                 mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **)
1187                     kmem_zalloc(sizeof (mac_soft_ring_t *) *
1188                     MAX_RINGS_PER_GROUP, KM_SLEEP);
1189                 if (mcip->mci_state_flags & MCIS_IS_AGGR) {
1190                         mac_srs_tx_t *tx = &mac_srs->srs_tx;
1191 
1192                         tx->st_soft_rings = (mac_soft_ring_t **)
1193                             kmem_zalloc(sizeof (mac_soft_ring_t *) *
1194                             MAX_RINGS_PER_GROUP, KM_SLEEP);
1195                 }
1196         } else {
1197                 mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **)
1198                     kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1199                     KM_SLEEP);
1200                 mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **)
1201                     kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1202                     KM_SLEEP);
1203                 mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **)
1204                     kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1205                     KM_SLEEP);
1206         }
1207 }
1208 
1209 static void


1578         mutex_exit(&srs->srs_lock);
1579 
1580         mac_tx_client_restart((mac_client_handle_t)mcip);
1581 }
1582 
1583 /*
1584  * The uber function that deals with any update to bandwidth limits.
1585  */
1586 void
1587 mac_srs_update_bwlimit(flow_entry_t *flent, mac_resource_props_t *mrp)
1588 {
1589         int                     count;
1590 
1591         for (count = 0; count < flent->fe_rx_srs_cnt; count++)
1592                 mac_rx_srs_update_bwlimit(flent->fe_rx_srs[count], mrp);
1593         mac_tx_srs_update_bwlimit(flent->fe_tx_srs, mrp);
1594 }
1595 
1596 /*
1597  * When the first sub-flow is added to a link, we disable polling on the
1598  * link and also modify the entry point to mac_rx_srs_subflow_process.
1599  * (polling is disabled because with the subflow added, accounting
1600  * for polling needs additional logic, it is assumed that when a subflow is
1601  * added, we can take some hit as a result of disabling polling rather than
1602  * adding more complexity - if this becomes a perf. issue we need to
1603  * re-rvaluate this logic).  When the last subflow is removed, we turn back
1604  * polling and also reset the entry point to mac_rx_srs_process.
1605  *
1606  * In the future if there are multiple SRS, we can simply
1607  * take one and give it to the flow rather than disabling polling and
1608  * resetting the entry point.
1609  */
1610 void
1611 mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable)
1612 {
1613         flow_entry_t            *flent = mcip->mci_flent;
1614         int                     i;
1615         mac_impl_t              *mip = mcip->mci_mip;
1616         mac_rx_func_t           rx_func;
1617         uint_t                  rx_srs_cnt;
1618         boolean_t               enable_classifier;
1619 
1620         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1621 
1622         enable_classifier = !FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && enable;
1623 
1624         rx_func = enable_classifier ? mac_rx_srs_subflow_process :


1629                 enable_classifier = B_TRUE;
1630 
1631         /*
1632          * If receive function has already been configured correctly for
1633          * current subflow configuration, do nothing.
1634          */
1635         if (flent->fe_cb_fn == (flow_fn_t)rx_func)
1636                 return;
1637 
1638         rx_srs_cnt = flent->fe_rx_srs_cnt;
1639         for (i = 0; i < rx_srs_cnt; i++) {
1640                 ASSERT(flent->fe_rx_srs[i] != NULL);
1641                 mac_srs_poll_state_change(flent->fe_rx_srs[i],
1642                     enable_classifier, rx_func);
1643         }
1644 
1645         /*
1646          * Change the S/W classifier so that we can land in the
1647          * correct processing function with correct argument.
1648          * If all subflows have been removed we can revert to
1649          * mac_rx_srsprocess, else we need mac_rx_srs_subflow_process.
1650          */
1651         mutex_enter(&flent->fe_lock);
1652         flent->fe_cb_fn = (flow_fn_t)rx_func;
1653         flent->fe_cb_arg1 = (void *)mip;
1654         flent->fe_cb_arg2 = flent->fe_rx_srs[0];
1655         mutex_exit(&flent->fe_lock);
1656 }
1657 
1658 static void
1659 mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs)
1660 {
1661         int tcp_count = 0, udp_count = 0, oth_count = 0, tx_count = 0;
1662         mac_soft_ring_t *softring;
1663 
1664         softring = mac_srs->srs_soft_ring_head;
1665         if (softring == NULL) {
1666                 ASSERT(mac_srs->srs_soft_ring_count == 0);
1667                 mac_srs->srs_tcp_ring_count = 0;
1668                 mac_srs->srs_udp_ring_count = 0;
1669                 mac_srs->srs_oth_ring_count = 0;


2168                 mutex_exit(&mac_bw->mac_bw_lock);
2169                 mac_srs->srs_type |= SRST_BW_CONTROL;
2170         } else {
2171                 mac_srs->srs_drain_func = mac_rx_srs_drain;
2172         }
2173 
2174         /*
2175          * We use the following policy to control Receive
2176          * Side Dynamic Polling:
2177          * 1) We switch to poll mode anytime the processing thread causes
2178          *    a backlog to build up in SRS and its associated Soft Rings
2179          *    (sr_poll_pkt_cnt > 0).
2180          * 2) As long as the backlog stays under the low water mark
2181          *    (sr_lowat), we poll the H/W for more packets.
2182          * 3) If the backlog (sr_poll_pkt_cnt) exceeds low water mark, we
2183          *    stay in poll mode but don't poll the H/W for more packets.
2184          * 4) Anytime in polling mode, if we poll the H/W for packets and
2185          *    find nothing plus we have an existing backlog
2186          *    (sr_poll_pkt_cnt > 0), we stay in polling mode but don't poll
2187          *    the H/W for packets anymore (let the polling thread go to sleep).
2188          * 5) Once the backlog is relived (packets are processed) we reenable
2189          *    polling (by signalling the poll thread) only when the backlog
2190          *    dips below sr_poll_thres.
2191          * 6) sr_hiwat is used exclusively when we are not polling capable
2192          *    and is used to decide when to drop packets so the SRS queue
2193          *    length doesn't grow infinitely.
2194          */
2195         if (!is_tx_srs) {
2196                 srs_rx->sr_hiwat = mac_soft_ring_max_q_cnt;
2197                 /* Low water mark needs to be less than high water mark */
2198                 srs_rx->sr_lowat = mac_soft_ring_min_q_cnt <=
2199                     mac_soft_ring_max_q_cnt ? mac_soft_ring_min_q_cnt :
2200                     (mac_soft_ring_max_q_cnt >> 2);
2201                 /* Poll threshold need to be half of low water mark or less */
2202                 srs_rx->sr_poll_thres = mac_soft_ring_poll_thres <=
2203                     (srs_rx->sr_lowat >> 1) ? mac_soft_ring_poll_thres :
2204                     (srs_rx->sr_lowat >> 1);
2205                 if (mac_latency_optimize)
2206                         mac_srs->srs_state |= SRS_LATENCY_OPT;
2207                 else
2208                         mac_srs->srs_state |= SRS_SOFTRING_QUEUE;


2239                 /* Is the mac_srs created over the RX default group? */
2240                 if (ring->mr_gh == (mac_group_handle_t)
2241                     MAC_DEFAULT_RX_GROUP(mcip->mci_mip)) {
2242                         mac_srs->srs_type |= SRST_DEFAULT_GRP;
2243                 }
2244                 mac_srs->srs_ring = ring;
2245                 ring->mr_srs = mac_srs;
2246                 ring->mr_classify_type = MAC_HW_CLASSIFIER;
2247                 ring->mr_flag |= MR_INCIPIENT;
2248 
2249                 if (!(mcip->mci_mip->mi_state_flags & MIS_POLL_DISABLE) &&
2250                     FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && mac_poll_enable)
2251                         mac_srs->srs_state |= SRS_POLLING_CAPAB;
2252 
2253                 mac_srs->srs_poll_thr = thread_create(NULL, 0,
2254                     mac_rx_srs_poll_ring, mac_srs, 0, &p0, TS_RUN,
2255                     mac_srs->srs_pri);
2256                 /*
2257                  * Some drivers require serialization and don't send
2258                  * packet chains in interrupt context. For such
2259                  * drivers, we should always queue in soft ring
2260                  * so that we get a chance to switch into a polling
2261                  * mode under backlog.
2262                  */
2263                 ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring);
2264                 if (ring_info & MAC_RING_RX_ENQUEUE)
2265                         mac_srs->srs_state |= SRS_SOFTRING_QUEUE;
2266         }
2267 done:
2268         mac_srs_stat_create(mac_srs);
2269         return (mac_srs);
2270 }
2271 
2272 /*
2273  * Figure out the number of soft rings required. Its dependant on
2274  * if protocol fanout is required (for LINKs), global settings
2275  * require us to do fanout for performance (based on mac_soft_ring_enable),
2276  * or user has specifically requested fanout.
2277  */
2278 static uint32_t
2279 mac_find_fanout(flow_entry_t *flent, uint32_t link_type)
2280 {


2347 mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2348     uint32_t link_type)
2349 {
2350         cpupart_t               *cpupart;
2351         mac_resource_props_t    *mrp = MCIP_RESOURCE_PROPS(mcip);
2352         mac_resource_props_t    *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2353         boolean_t               use_default = B_FALSE;
2354 
2355         mac_rx_srs_group_setup(mcip, flent, link_type);
2356         mac_tx_srs_group_setup(mcip, flent, link_type);
2357 
2358         pool_lock();
2359         cpupart = mac_pset_find(mrp, &use_default);
2360         mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip),
2361             mac_rx_deliver, mcip, NULL, cpupart);
2362         mac_set_pool_effective(use_default, cpupart, mrp, emrp);
2363         pool_unlock();
2364 }
2365 
2366 /*
2367  * Set up the RX SRSs. If the S/W SRS is not set, set  it up, if there
2368  * is a group associated with this MAC client, set up SRSs for individual
2369  * h/w rings.


2370  */
2371 void
2372 mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2373     uint32_t link_type)
2374 {
2375         mac_impl_t              *mip = mcip->mci_mip;
2376         mac_soft_ring_set_t     *mac_srs;
2377         mac_ring_t              *ring;
2378         uint32_t                fanout_type;
2379         mac_group_t             *rx_group = flent->fe_rx_ring_group;

2380 
2381         fanout_type = mac_find_fanout(flent, link_type);

2382 
2383         /* Create the SRS for S/W classification if none exists */
2384         if (flent->fe_rx_srs[0] == NULL) {
2385                 ASSERT(flent->fe_rx_srs_cnt == 0);
2386                 /* Setup the Rx SRS */
2387                 mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type,
2388                     mac_rx_deliver, mcip, NULL, NULL);
2389                 mutex_enter(&flent->fe_lock);
2390                 flent->fe_cb_fn = (flow_fn_t)mac_srs->srs_rx.sr_lower_proc;
2391                 flent->fe_cb_arg1 = (void *)mip;
2392                 flent->fe_cb_arg2 = (void *)mac_srs;
2393                 mutex_exit(&flent->fe_lock);
2394         }
2395 
2396         if (rx_group == NULL)
2397                 return;

2398         /*
2399          * fanout for default SRS is done when default SRS are created
2400          * above. As each ring is added to the group, we setup the
2401          * SRS and fanout to it.
2402          */
2403         switch (rx_group->mrg_state) {
2404         case MAC_GROUP_STATE_RESERVED:
2405                 for (ring = rx_group->mrg_rings; ring != NULL;
2406                     ring = ring->mr_next) {


2407                         switch (ring->mr_state) {
2408                         case MR_INUSE:
2409                         case MR_FREE:
2410                                 if (ring->mr_srs != NULL)
2411                                         break;
2412                                 if (ring->mr_state != MR_INUSE)
2413                                         (void) mac_start_ring(ring);
2414 
2415                                 /*
2416                                  * Since the group is exclusively ours create
2417                                  * an SRS for this ring to allow the
2418                                  * individual SRS to dynamically poll the
2419                                  * ring. Do this only if the  client is not
2420                                  * a VLAN MAC client, since for VLAN we do
2421                                  * s/w classification for the VID check, and
2422                                  * if it has a unicast address.
2423                                  */
2424                                 if ((mcip->mci_state_flags &
2425                                     MCIS_NO_UNICAST_ADDR) ||
2426                                     i_mac_flow_vid(mcip->mci_flent) !=
2427                                     VLAN_ID_NONE) {
2428                                         break;
2429                                 }








2430                                 mac_srs = mac_srs_create(mcip, flent,
2431                                     fanout_type | link_type,
2432                                     mac_rx_deliver, mcip, NULL, ring);
2433                                 break;
2434                         default:
2435                                 cmn_err(CE_PANIC,
2436                                     "srs_setup: mcip = %p "
2437                                     "trying to add UNKNOWN ring = %p\n",
2438                                     (void *)mcip, (void *)ring);
2439                                 break;
2440                         }
2441                 }
2442                 break;
2443         case MAC_GROUP_STATE_SHARED:
2444                 /*
2445                  * Set all rings of this group to software classified.
2446                  *
2447                  * If the group is current RESERVED, the existing mac
2448                  * client (the only client on this group) is using
2449                  * this group exclusively.  In that case we need to
2450                  * disable polling on the rings of the group (if it
2451                  * was enabled), and free the SRS associated with the
2452                  * rings.
2453                  */
2454                 mac_rx_switch_grp_to_sw(rx_group);
2455                 break;
2456         default:
2457                 ASSERT(B_FALSE);
2458                 break;
2459         }
2460 }
2461 
2462 /*
2463  * Set up the TX SRS.
2464  */
2465 void
2466 mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2467     uint32_t link_type)
2468 {
2469         int                     cnt;
2470         int                     ringcnt;
2471         mac_ring_t              *ring;
2472         mac_group_t             *grp;


2485                         return;
2486                 grp = (mac_group_t *)flent->fe_tx_ring_group;
2487                 ringcnt = grp->mrg_cur_count;
2488                 ring = grp->mrg_rings;
2489                 for (cnt = 0; cnt < ringcnt; cnt++) {
2490                         if (ring->mr_state != MR_INUSE) {
2491                                 (void) mac_start_ring(ring);
2492                         }
2493                         ring = ring->mr_next;
2494                 }
2495                 return;
2496         }
2497         if (flent->fe_tx_srs == NULL) {
2498                 (void) mac_srs_create(mcip, flent, SRST_TX | link_type,
2499                     NULL, mcip, NULL, NULL);
2500         }
2501         mac_tx_srs_setup(mcip, flent);
2502 }
2503 
2504 /*
2505  * Remove all the RX SRSs. If we want to remove only the SRSs associated
2506  * with h/w rings, leave the S/W SRS alone. This is used when we want to
2507  * move the MAC client from one group to another, so we need to teardown
2508  * on the h/w SRSs.

2509  */
2510 void
2511 mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly)
2512 {
2513         mac_soft_ring_set_t     *mac_srs;
2514         int                     i;
2515         int                     count = flent->fe_rx_srs_cnt;
2516 
2517         for (i = 0; i < count; i++) {
2518                 if (i == 0 && hwonly)
2519                         continue;
2520                 mac_srs = flent->fe_rx_srs[i];
2521                 mac_rx_srs_quiesce(mac_srs, SRS_CONDEMNED);
2522                 mac_srs_free(mac_srs);
2523                 flent->fe_rx_srs[i] = NULL;
2524                 flent->fe_rx_srs_cnt--;
2525         }
2526         ASSERT(!hwonly || flent->fe_rx_srs_cnt == 1);
2527         ASSERT(hwonly || flent->fe_rx_srs_cnt == 0);








2528 }
2529 
2530 /*
2531  * Remove the TX SRS.
2532  */
2533 void
2534 mac_tx_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
2535     uint32_t link_type)
2536 {
2537         mac_soft_ring_set_t     *tx_srs;
2538         mac_srs_tx_t            *tx;
2539 
2540         if ((tx_srs = flent->fe_tx_srs) == NULL)
2541                 return;
2542 
2543         tx = &tx_srs->srs_tx;
2544         switch (link_type) {
2545         case SRST_FLOW:
2546                 /*
2547                  * For flows, we need to work with passed


2809  *
2810  * Note: In future, if no fanout is specified, we try to assign 2 Rx
2811  * rings for the primary Link with the primary MAC address + TCP going
2812  * to one ring and primary MAC address + UDP|SCTP going to other ring.
2813  * Any remaining traffic for primary MAC address can go to the default
2814  * Rx ring and get S/W classified. This way the respective SRSs don't
2815  * need to do proto fanout and don't need to have softrings at all and
2816  * can poll their respective Rx rings.
2817  *
2818  * As an optimization, when a new NIC or VNIC is created, we can get
2819  * only one Rx ring and make it a TCP specific Rx ring and use the
2820  * H/W default Rx ring for the rest (this Rx ring is never polled).
2821  *
2822  * For clients that don't have MAC address, but want to receive and
2823  * transmit packets (e.g, bpf, gvrp etc.), we need to setup the datapath.
2824  * For such clients (identified by the MCIS_NO_UNICAST_ADDR flag) we
2825  * always give the default group and use software classification (i.e.
2826  * even if this is the only client in the default group, we will
2827  * leave group as shared).
2828  */

2829 int
2830 mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2831     uint32_t link_type)
2832 {
2833         mac_impl_t              *mip = mcip->mci_mip;
2834         mac_group_t             *rgroup = NULL;
2835         mac_group_t             *tgroup = NULL;
2836         mac_group_t             *default_rgroup;
2837         mac_group_t             *default_tgroup;
2838         int                     err;

2839         uint8_t                 *mac_addr;
2840         mac_group_state_t       next_state;
2841         mac_client_impl_t       *group_only_mcip;
2842         mac_resource_props_t    *mrp = MCIP_RESOURCE_PROPS(mcip);
2843         mac_resource_props_t    *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2844         boolean_t               rxhw;
2845         boolean_t               txhw;
2846         boolean_t               use_default = B_FALSE;
2847         cpupart_t               *cpupart;
2848         boolean_t               no_unicast;
2849         boolean_t               isprimary = flent->fe_type & FLOW_PRIMARY_MAC;
2850         mac_client_impl_t       *reloc_pmcip = NULL;

2851 
2852         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2853 
2854         switch (link_type) {
2855         case SRST_FLOW:
2856                 mac_srs_group_setup(mcip, flent, link_type);
2857                 return (0);
2858 
2859         case SRST_LINK:
2860                 no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR;
2861                 mac_addr = flent->fe_flow_desc.fd_dst_mac;
2862 
2863                 /* Default RX group */
2864                 default_rgroup = MAC_DEFAULT_RX_GROUP(mip);
2865 
2866                 /* Default TX group */
2867                 default_tgroup = MAC_DEFAULT_TX_GROUP(mip);
2868 
2869                 if (no_unicast) {
2870                         rgroup = default_rgroup;
2871                         tgroup = default_tgroup;
2872                         goto grp_found;
2873                 }
2874                 rxhw = (mrp->mrp_mask & MRP_RX_RINGS) &&
2875                     (mrp->mrp_nrxrings > 0 ||
2876                     (mrp->mrp_mask & MRP_RXRINGS_UNSPEC));
2877                 txhw = (mrp->mrp_mask & MRP_TX_RINGS) &&
2878                     (mrp->mrp_ntxrings > 0 ||
2879                     (mrp->mrp_mask & MRP_TXRINGS_UNSPEC));
2880 
2881                 /*
2882                  * By default we have given the primary all the rings
2883                  * i.e. the default group. Let's see if the primary
2884                  * needs to be relocated so that the addition of this
2885                  * client doesn't impact the primary's performance,
2886                  * i.e. if the primary is in the default group and
2887                  * we add this client, the primary will lose polling.
2888                  * We do this only for NICs supporting dynamic ring
2889                  * grouping and only when this is the first client
2890                  * after the primary (i.e. nactiveclients is 2)




2891                  */
2892                 if (!isprimary && mip->mi_nactiveclients == 2 &&
2893                     (group_only_mcip = mac_primary_client_handle(mip)) !=
2894                     NULL && mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) {
2895                         reloc_pmcip = mac_check_primary_relocation(
2896                             group_only_mcip, rxhw);
2897                 }

2898                 /*
2899                  * Check to see if we can get an exclusive group for
2900                  * this mac address or if there already exists a
2901                  * group that has this mac address (case of VLANs).
2902                  * If no groups are available, use the default group.
2903                  */
2904                 rgroup = mac_reserve_rx_group(mcip, mac_addr, B_FALSE);
2905                 if (rgroup == NULL && rxhw) {
2906                         err = ENOSPC;
2907                         goto setup_failed;
2908                 } else if (rgroup == NULL) {
2909                         rgroup = default_rgroup;
2910                 }

2911                 /*



















2912                  * Check to see if we can get an exclusive group for
2913                  * this mac client. If no groups are available, use
2914                  * the default group.
2915                  */
2916                 tgroup = mac_reserve_tx_group(mcip, B_FALSE);
2917                 if (tgroup == NULL && txhw) {
2918                         if (rgroup != NULL && rgroup != default_rgroup)
2919                                 mac_release_rx_group(mcip, rgroup);
2920                         err = ENOSPC;
2921                         goto setup_failed;
2922                 } else if (tgroup == NULL) {
2923                         tgroup = default_tgroup;
2924                 }
2925 
2926                 /*
2927                  * Some NICs don't support any Rx rings, so there may not
2928                  * even be a default group.
2929                  */
2930         grp_found:
2931                 if (rgroup != NULL) {
2932                         if (rgroup != default_rgroup &&
2933                             MAC_GROUP_NO_CLIENT(rgroup) &&
2934                             (rxhw || mcip->mci_share != 0)) {
2935                                 MAC_RX_GRP_RESERVED(mip);
2936                                 if (mip->mi_rx_group_type ==
2937                                     MAC_GROUP_TYPE_DYNAMIC) {
2938                                         MAC_RX_RING_RESERVED(mip,
2939                                             rgroup->mrg_cur_count);
2940                                 }
2941                         }

2942                         flent->fe_rx_ring_group = rgroup;
2943                         /*
2944                          * Add the client to the group. This could cause
2945                          * either this group to move to the shared state or
2946                          * cause the default group to move to the shared state.
2947                          * The actions on this group are done here, while the
2948                          * actions on the default group are postponed to
2949                          * the end of this function.


2950                          */
2951                         mac_group_add_client(rgroup, mcip);
2952                         next_state = mac_group_next_state(rgroup,
2953                             &group_only_mcip, default_rgroup, B_TRUE);
2954                         mac_set_group_state(rgroup, next_state);
2955                 }
2956 
2957                 if (tgroup != NULL) {
2958                         if (tgroup != default_tgroup &&
2959                             MAC_GROUP_NO_CLIENT(tgroup) &&
2960                             (txhw || mcip->mci_share != 0)) {
2961                                 MAC_TX_GRP_RESERVED(mip);
2962                                 if (mip->mi_tx_group_type ==
2963                                     MAC_GROUP_TYPE_DYNAMIC) {
2964                                         MAC_TX_RING_RESERVED(mip,
2965                                             tgroup->mrg_cur_count);
2966                                 }
2967                         }
2968                         flent->fe_tx_ring_group = tgroup;
2969                         mac_group_add_client(tgroup, mcip);
2970                         next_state = mac_group_next_state(tgroup,
2971                             &group_only_mcip, default_tgroup, B_FALSE);
2972                         tgroup->mrg_state = next_state;
2973                 }
2974                 /*
2975                  * Setup the Rx and Tx SRSes. If we got a pristine group
2976                  * exclusively above, mac_srs_group_setup would simply create
2977                  * the required SRSes. If we ended up sharing a previously
2978                  * reserved group, mac_srs_group_setup would also dismantle the
2979                  * SRSes of the previously exclusive group
2980                  */
2981                 mac_srs_group_setup(mcip, flent, link_type);
2982 
2983                 /* We are setting up minimal datapath only */
2984                 if (no_unicast)

2985                         break;
2986                 /* Program the S/W Classifer */


2987                 if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0)
2988                         goto setup_failed;
2989 
2990                 /* Program the H/W Classifier */
2991                 if ((err = mac_add_macaddr(mip, rgroup, mac_addr,
2992                     (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0)) != 0)



2993                         goto setup_failed;

2994                 mcip->mci_unicast = mac_find_macaddr(mip, mac_addr);
2995                 ASSERT(mcip->mci_unicast != NULL);










2996                 /* (Re)init the v6 token & local addr used by link protection */
2997                 mac_protect_update_mac_token(mcip);
2998                 break;
2999 
3000         default:
3001                 ASSERT(B_FALSE);
3002                 break;
3003         }
3004 
3005         /*
3006          * All broadcast and multicast traffic is received only on the default
3007          * group. If we have setup the datapath for a non-default group above
3008          * then move the default group to shared state to allow distribution of
3009          * incoming broadcast traffic to the other groups and dismantle the
3010          * SRSes over the default group.
3011          */
3012         if (rgroup != NULL) {
3013                 if (rgroup != default_rgroup) {
3014                         if (default_rgroup->mrg_state ==
3015                             MAC_GROUP_STATE_RESERVED) {


3019                                     mip->mi_nactiveclients > 1);
3020 
3021                                 mac_set_group_state(default_rgroup,
3022                                     MAC_GROUP_STATE_SHARED);
3023                                 mac_rx_srs_group_setup(group_only_mcip,
3024                                     group_only_mcip->mci_flent, SRST_LINK);
3025                                 pool_lock();
3026                                 cpupart = mac_pset_find(mrp, &use_default);
3027                                 mac_fanout_setup(group_only_mcip,
3028                                     group_only_mcip->mci_flent,
3029                                     MCIP_RESOURCE_PROPS(group_only_mcip),
3030                                     mac_rx_deliver, group_only_mcip, NULL,
3031                                     cpupart);
3032                                 mac_set_pool_effective(use_default, cpupart,
3033                                     mrp, emrp);
3034                                 pool_unlock();
3035                         }
3036                         ASSERT(default_rgroup->mrg_state ==
3037                             MAC_GROUP_STATE_SHARED);
3038                 }

3039                 /*
3040                  * If we get an exclusive group for a VLAN MAC client we
3041                  * need to take the s/w path to make the additional check for
3042                  * the vid. Disable polling and set it to s/w classification.
3043                  * Similarly for clients that don't have a unicast address.


3044                  */
3045                 if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED &&
3046                     (i_mac_flow_vid(flent) != VLAN_ID_NONE || no_unicast)) {

3047                         mac_rx_switch_grp_to_sw(rgroup);
3048                 }

3049         }

3050         mac_set_rings_effective(mcip);
3051         return (0);
3052 
3053 setup_failed:
3054         /* Switch the primary back to default group */
3055         if (reloc_pmcip != NULL) {
3056                 (void) mac_rx_switch_group(reloc_pmcip,
3057                     reloc_pmcip->mci_flent->fe_rx_ring_group, default_rgroup);
3058         }
3059         mac_datapath_teardown(mcip, flent, link_type);
3060         return (err);
3061 }
3062 
3063 void
3064 mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
3065     uint32_t link_type)
3066 {
3067         mac_impl_t              *mip = mcip->mci_mip;
3068         mac_group_t             *group = NULL;
3069         mac_client_impl_t       *grp_only_mcip;
3070         flow_entry_t            *group_only_flent;
3071         mac_group_t             *default_group;
3072         boolean_t               check_default_group = B_FALSE;
3073         mac_group_state_t       next_state;
3074         mac_resource_props_t    *mrp = MCIP_RESOURCE_PROPS(mcip);

3075 
3076         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
3077 
3078         switch (link_type) {
3079         case SRST_FLOW:
3080                 mac_rx_srs_group_teardown(flent, B_FALSE);
3081                 mac_tx_srs_group_teardown(mcip, flent, SRST_FLOW);
3082                 return;
3083 
3084         case SRST_LINK:
3085                 /* Stop sending packets */
3086                 mac_tx_client_block(mcip);


3087 
3088                 /* Stop the packets coming from the H/W */



3089                 if (mcip->mci_unicast != NULL) {
3090                         int err;
3091                         err = mac_remove_macaddr(mcip->mci_unicast);


3092                         if (err != 0) {
3093                                 cmn_err(CE_WARN, "%s: failed to remove a MAC"
3094                                     " address because of error 0x%x",
3095                                     mip->mi_name, err);
3096                         }

3097                         mcip->mci_unicast = NULL;
3098                 }
3099 
3100                 /* Stop the packets coming from the S/W classifier */
3101                 mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
3102                 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
3103 
3104                 /* Now quiesce and destroy all SRS and soft rings */
3105                 mac_rx_srs_group_teardown(flent, B_FALSE);
3106                 mac_tx_srs_group_teardown(mcip, flent, SRST_LINK);
3107 
3108                 ASSERT((mcip->mci_flent == flent) &&
3109                     (flent->fe_next == NULL));
3110 
3111                 /*
3112                  * Release our hold on the group as well. We need
3113                  * to check if the shared group has only one client
3114                  * left who can use it exclusively. Also, if we
3115                  * were the last client, release the group.
3116                  */
3117                 group = flent->fe_rx_ring_group;
3118                 default_group = MAC_DEFAULT_RX_GROUP(mip);
3119                 if (group != NULL) {
3120                         mac_group_remove_client(group, mcip);
3121                         next_state = mac_group_next_state(group,
3122                             &grp_only_mcip, default_group, B_TRUE);

3123                         if (next_state == MAC_GROUP_STATE_RESERVED) {
3124                                 /*
3125                                  * Only one client left on this RX group.
3126                                  */
3127                                 ASSERT(grp_only_mcip != NULL);
3128                                 mac_set_group_state(group,
3129                                     MAC_GROUP_STATE_RESERVED);
3130                                 group_only_flent = grp_only_mcip->mci_flent;
3131 
3132                                 /*
3133                                  * The only remaining client has exclusive
3134                                  * access on the group. Allow it to
3135                                  * dynamically poll the H/W rings etc.
3136                                  */
3137                                 mac_rx_srs_group_setup(grp_only_mcip,
3138                                     group_only_flent, SRST_LINK);
3139                                 mac_fanout_setup(grp_only_mcip,
3140                                     group_only_flent,
3141                                     MCIP_RESOURCE_PROPS(grp_only_mcip),
3142                                     mac_rx_deliver, grp_only_mcip, NULL, NULL);
3143                                 mac_rx_group_unmark(group, MR_INCIPIENT);
3144                                 mac_set_rings_effective(grp_only_mcip);
3145                         } else if (next_state == MAC_GROUP_STATE_REGISTERED) {
3146                                 /*
3147                                  * This is a non-default group being freed up.
3148                                  * We need to reevaluate the default group
3149                                  * to see if the primary client can get
3150                                  * exclusive access to the default group.
3151                                  */
3152                                 ASSERT(group != MAC_DEFAULT_RX_GROUP(mip));
3153                                 if (mrp->mrp_mask & MRP_RX_RINGS) {
3154                                         MAC_RX_GRP_RELEASED(mip);
3155                                         if (mip->mi_rx_group_type ==
3156                                             MAC_GROUP_TYPE_DYNAMIC) {
3157                                                 MAC_RX_RING_RELEASED(mip,
3158                                                     group->mrg_cur_count);
3159                                         }
3160                                 }
3161                                 mac_release_rx_group(mcip, group);
3162                                 mac_set_group_state(group,
3163                                     MAC_GROUP_STATE_REGISTERED);
3164                                 check_default_group = B_TRUE;
3165                         } else {
3166                                 ASSERT(next_state == MAC_GROUP_STATE_SHARED);

3167                                 mac_set_group_state(group,
3168                                     MAC_GROUP_STATE_SHARED);
3169                                 mac_rx_group_unmark(group, MR_CONDEMNED);
3170                         }
3171                         flent->fe_rx_ring_group = NULL;
3172                 }
3173                 /*
3174                  * Remove the client from the TX group. Additionally, if
3175                  * this a non-default group, then we also need to release
3176                  * the group.
3177                  */
3178                 group = flent->fe_tx_ring_group;
3179                 default_group = MAC_DEFAULT_TX_GROUP(mip);
3180                 if (group != NULL) {
3181                         mac_group_remove_client(group, mcip);
3182                         next_state = mac_group_next_state(group,
3183                             &grp_only_mcip, default_group, B_FALSE);
3184                         if (next_state == MAC_GROUP_STATE_REGISTERED) {
3185                                 if (group != default_group) {
3186                                         if (mrp->mrp_mask & MRP_TX_RINGS) {


3235                         } else if (next_state == MAC_GROUP_STATE_RESERVED) {
3236                                 mac_set_rings_effective(grp_only_mcip);
3237                         }
3238                         flent->fe_tx_ring_group = NULL;
3239                         group->mrg_state = next_state;
3240                 }
3241                 break;
3242         default:
3243                 ASSERT(B_FALSE);
3244                 break;
3245         }
3246 
3247         /*
3248          * The mac client using the default group gets exclusive access to the
3249          * default group if and only if it is the sole client on the entire
3250          * mip. If so set the group state to reserved, and set up the SRSes
3251          * over the default group.
3252          */
3253         if (check_default_group) {
3254                 default_group = MAC_DEFAULT_RX_GROUP(mip);
3255                 ASSERT(default_group->mrg_state == MAC_GROUP_STATE_SHARED);
3256                 next_state = mac_group_next_state(default_group,
3257                     &grp_only_mcip, default_group, B_TRUE);
3258                 if (next_state == MAC_GROUP_STATE_RESERVED) {
3259                         ASSERT(grp_only_mcip != NULL &&
3260                             mip->mi_nactiveclients == 1);
3261                         mac_set_group_state(default_group,
3262                             MAC_GROUP_STATE_RESERVED);
3263                         mac_rx_srs_group_setup(grp_only_mcip,
3264                             grp_only_mcip->mci_flent, SRST_LINK);
3265                         mac_fanout_setup(grp_only_mcip,
3266                             grp_only_mcip->mci_flent,
3267                             MCIP_RESOURCE_PROPS(grp_only_mcip), mac_rx_deliver,
3268                             grp_only_mcip, NULL, NULL);
3269                         mac_rx_group_unmark(default_group, MR_INCIPIENT);
3270                         mac_set_rings_effective(grp_only_mcip);
3271                 }
3272         }
3273 
3274         /*
3275          * If the primary is the only one left and the MAC supports
3276          * dynamic grouping, we need to see if the primary needs to
3277          * be moved to the default group so that it can use all the
3278          * H/W rings.
3279          */
3280         if (!(flent->fe_type & FLOW_PRIMARY_MAC) &&


3764 mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring)
3765 {
3766         int i;
3767         mac_soft_ring_t *soft_ring, *remove_sring;
3768         mac_client_impl_t *mcip = mac_srs->srs_mcip;
3769 
3770         mutex_enter(&mac_srs->srs_lock);
3771         for (i = 0; i < mac_srs->srs_tx_ring_count; i++) {
3772                 soft_ring =  mac_srs->srs_tx_soft_rings[i];
3773                 if (soft_ring->s_ring_tx_arg2 == tx_ring)
3774                         break;
3775         }
3776         mutex_exit(&mac_srs->srs_lock);
3777         ASSERT(i < mac_srs->srs_tx_ring_count);
3778         remove_sring = soft_ring;
3779         /*
3780          * In the case of aggr, the soft ring associated with a Tx ring
3781          * is also stored in st_soft_rings[] array. That entry should
3782          * be removed.
3783          */
3784         if (mcip->mci_state_flags & MCIS_IS_AGGR) {
3785                 mac_srs_tx_t *tx = &mac_srs->srs_tx;
3786 
3787                 ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring);
3788                 tx->st_soft_rings[tx_ring->mr_index] = NULL;
3789         }
3790         mac_soft_ring_remove(mac_srs, remove_sring);
3791         mac_srs_update_fanout_list(mac_srs);
3792 }
3793 
3794 /*
3795  * mac_tx_srs_setup():
3796  * Used to setup Tx rings. If no free Tx ring is available, then default
3797  * Tx ring is used.
3798  */
3799 void
3800 mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent)
3801 {
3802         mac_impl_t              *mip = mcip->mci_mip;
3803         mac_soft_ring_set_t     *tx_srs = flent->fe_tx_srs;
3804         int                     i;
3805         int                     tx_ring_count = 0;
3806         uint32_t                soft_ring_type;
3807         mac_group_t             *grp = NULL;
3808         mac_ring_t              *ring;
3809         mac_srs_tx_t            *tx = &tx_srs->srs_tx;
3810         boolean_t               is_aggr;
3811         uint_t                  ring_info = 0;
3812 
3813         is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR) != 0;
3814         grp = flent->fe_tx_ring_group;
3815         if (grp == NULL) {
3816                 ring = (mac_ring_t *)mip->mi_default_tx_ring;
3817                 goto no_group;
3818         }
3819         tx_ring_count = grp->mrg_cur_count;
3820         ring = grp->mrg_rings;
3821         /*
3822          * An attempt is made to reserve 'tx_ring_count' number
3823          * of Tx rings. If tx_ring_count is 0, default Tx ring
3824          * is used. If it is 1, an attempt is made to reserve one
3825          * Tx ring. In both the cases, the ring information is
3826          * stored in Tx SRS. If multiple Tx rings are specified,
3827          * then each Tx ring will have a Tx-side soft ring. All
3828          * these soft rings will be hang off Tx SRS.
3829          */
3830         switch (grp->mrg_state) {
3831                 case MAC_GROUP_STATE_SHARED:
3832                 case MAC_GROUP_STATE_RESERVED:
3833                         if (tx_ring_count <= 1 && !is_aggr) {




   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2018 Joyent, Inc.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/callb.h>
  28 #include <sys/cpupart.h>
  29 #include <sys/pool.h>
  30 #include <sys/pool_pset.h>
  31 #include <sys/sdt.h>
  32 #include <sys/strsubr.h>
  33 #include <sys/strsun.h>
  34 #include <sys/vlan.h>
  35 #include <inet/ipsec_impl.h>
  36 #include <inet/ip_impl.h>
  37 #include <inet/sadb.h>
  38 #include <inet/ipsecesp.h>
  39 #include <inet/ipsecah.h>
  40 
  41 #include <sys/mac_impl.h>
  42 #include <sys/mac_client_impl.h>
  43 #include <sys/mac_client_priv.h>


1169  *
1170  * The underlying device can expose upto MAX_RINGS_PER_GROUP worth of
1171  * rings to a client. In such a case, MAX_RINGS_PER_GROUP worth of
1172  * array space is needed to store Tx soft rings. Thus we allocate so
1173  * much array space for srs_tx_soft_rings.
1174  *
1175  * And when it is an aggr, again we allocate MAX_RINGS_PER_GROUP worth
1176  * of space to st_soft_rings. This array is used for quick access to
1177  * soft ring associated with a pseudo Tx ring based on the pseudo
1178  * ring's index (mr_index).
1179  */
1180 static void
1181 mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs)
1182 {
1183         mac_client_impl_t *mcip = mac_srs->srs_mcip;
1184 
1185         if (mac_srs->srs_type & SRST_TX) {
1186                 mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **)
1187                     kmem_zalloc(sizeof (mac_soft_ring_t *) *
1188                     MAX_RINGS_PER_GROUP, KM_SLEEP);
1189                 if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
1190                         mac_srs_tx_t *tx = &mac_srs->srs_tx;
1191 
1192                         tx->st_soft_rings = (mac_soft_ring_t **)
1193                             kmem_zalloc(sizeof (mac_soft_ring_t *) *
1194                             MAX_RINGS_PER_GROUP, KM_SLEEP);
1195                 }
1196         } else {
1197                 mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **)
1198                     kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1199                     KM_SLEEP);
1200                 mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **)
1201                     kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1202                     KM_SLEEP);
1203                 mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **)
1204                     kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1205                     KM_SLEEP);
1206         }
1207 }
1208 
1209 static void


1578         mutex_exit(&srs->srs_lock);
1579 
1580         mac_tx_client_restart((mac_client_handle_t)mcip);
1581 }
1582 
1583 /*
1584  * The uber function that deals with any update to bandwidth limits.
1585  */
1586 void
1587 mac_srs_update_bwlimit(flow_entry_t *flent, mac_resource_props_t *mrp)
1588 {
1589         int                     count;
1590 
1591         for (count = 0; count < flent->fe_rx_srs_cnt; count++)
1592                 mac_rx_srs_update_bwlimit(flent->fe_rx_srs[count], mrp);
1593         mac_tx_srs_update_bwlimit(flent->fe_tx_srs, mrp);
1594 }
1595 
1596 /*
1597  * When the first sub-flow is added to a link, we disable polling on the
1598  * link and also modify the entry point to mac_rx_srs_subflow_process().
1599  * (polling is disabled because with the subflow added, accounting
1600  * for polling needs additional logic, it is assumed that when a subflow is
1601  * added, we can take some hit as a result of disabling polling rather than
1602  * adding more complexity - if this becomes a perf. issue we need to
1603  * re-rvaluate this logic).  When the last subflow is removed, we turn back
1604  * polling and also reset the entry point to mac_rx_srs_process().
1605  *
1606  * In the future if there are multiple SRS, we can simply
1607  * take one and give it to the flow rather than disabling polling and
1608  * resetting the entry point.
1609  */
1610 void
1611 mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable)
1612 {
1613         flow_entry_t            *flent = mcip->mci_flent;
1614         int                     i;
1615         mac_impl_t              *mip = mcip->mci_mip;
1616         mac_rx_func_t           rx_func;
1617         uint_t                  rx_srs_cnt;
1618         boolean_t               enable_classifier;
1619 
1620         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1621 
1622         enable_classifier = !FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && enable;
1623 
1624         rx_func = enable_classifier ? mac_rx_srs_subflow_process :


1629                 enable_classifier = B_TRUE;
1630 
1631         /*
1632          * If receive function has already been configured correctly for
1633          * current subflow configuration, do nothing.
1634          */
1635         if (flent->fe_cb_fn == (flow_fn_t)rx_func)
1636                 return;
1637 
1638         rx_srs_cnt = flent->fe_rx_srs_cnt;
1639         for (i = 0; i < rx_srs_cnt; i++) {
1640                 ASSERT(flent->fe_rx_srs[i] != NULL);
1641                 mac_srs_poll_state_change(flent->fe_rx_srs[i],
1642                     enable_classifier, rx_func);
1643         }
1644 
1645         /*
1646          * Change the S/W classifier so that we can land in the
1647          * correct processing function with correct argument.
1648          * If all subflows have been removed we can revert to
1649          * mac_rx_srs_process(), else we need mac_rx_srs_subflow_process().
1650          */
1651         mutex_enter(&flent->fe_lock);
1652         flent->fe_cb_fn = (flow_fn_t)rx_func;
1653         flent->fe_cb_arg1 = (void *)mip;
1654         flent->fe_cb_arg2 = flent->fe_rx_srs[0];
1655         mutex_exit(&flent->fe_lock);
1656 }
1657 
1658 static void
1659 mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs)
1660 {
1661         int tcp_count = 0, udp_count = 0, oth_count = 0, tx_count = 0;
1662         mac_soft_ring_t *softring;
1663 
1664         softring = mac_srs->srs_soft_ring_head;
1665         if (softring == NULL) {
1666                 ASSERT(mac_srs->srs_soft_ring_count == 0);
1667                 mac_srs->srs_tcp_ring_count = 0;
1668                 mac_srs->srs_udp_ring_count = 0;
1669                 mac_srs->srs_oth_ring_count = 0;


2168                 mutex_exit(&mac_bw->mac_bw_lock);
2169                 mac_srs->srs_type |= SRST_BW_CONTROL;
2170         } else {
2171                 mac_srs->srs_drain_func = mac_rx_srs_drain;
2172         }
2173 
2174         /*
2175          * We use the following policy to control Receive
2176          * Side Dynamic Polling:
2177          * 1) We switch to poll mode anytime the processing thread causes
2178          *    a backlog to build up in SRS and its associated Soft Rings
2179          *    (sr_poll_pkt_cnt > 0).
2180          * 2) As long as the backlog stays under the low water mark
2181          *    (sr_lowat), we poll the H/W for more packets.
2182          * 3) If the backlog (sr_poll_pkt_cnt) exceeds low water mark, we
2183          *    stay in poll mode but don't poll the H/W for more packets.
2184          * 4) Anytime in polling mode, if we poll the H/W for packets and
2185          *    find nothing plus we have an existing backlog
2186          *    (sr_poll_pkt_cnt > 0), we stay in polling mode but don't poll
2187          *    the H/W for packets anymore (let the polling thread go to sleep).
2188          * 5) Once the backlog is relieved (packets are processed) we reenable
2189          *    polling (by signalling the poll thread) only when the backlog
2190          *    dips below sr_poll_thres.
2191          * 6) sr_hiwat is used exclusively when we are not polling capable
2192          *    and is used to decide when to drop packets so the SRS queue
2193          *    length doesn't grow infinitely.
2194          */
2195         if (!is_tx_srs) {
2196                 srs_rx->sr_hiwat = mac_soft_ring_max_q_cnt;
2197                 /* Low water mark needs to be less than high water mark */
2198                 srs_rx->sr_lowat = mac_soft_ring_min_q_cnt <=
2199                     mac_soft_ring_max_q_cnt ? mac_soft_ring_min_q_cnt :
2200                     (mac_soft_ring_max_q_cnt >> 2);
2201                 /* Poll threshold need to be half of low water mark or less */
2202                 srs_rx->sr_poll_thres = mac_soft_ring_poll_thres <=
2203                     (srs_rx->sr_lowat >> 1) ? mac_soft_ring_poll_thres :
2204                     (srs_rx->sr_lowat >> 1);
2205                 if (mac_latency_optimize)
2206                         mac_srs->srs_state |= SRS_LATENCY_OPT;
2207                 else
2208                         mac_srs->srs_state |= SRS_SOFTRING_QUEUE;


2239                 /* Is the mac_srs created over the RX default group? */
2240                 if (ring->mr_gh == (mac_group_handle_t)
2241                     MAC_DEFAULT_RX_GROUP(mcip->mci_mip)) {
2242                         mac_srs->srs_type |= SRST_DEFAULT_GRP;
2243                 }
2244                 mac_srs->srs_ring = ring;
2245                 ring->mr_srs = mac_srs;
2246                 ring->mr_classify_type = MAC_HW_CLASSIFIER;
2247                 ring->mr_flag |= MR_INCIPIENT;
2248 
2249                 if (!(mcip->mci_mip->mi_state_flags & MIS_POLL_DISABLE) &&
2250                     FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && mac_poll_enable)
2251                         mac_srs->srs_state |= SRS_POLLING_CAPAB;
2252 
2253                 mac_srs->srs_poll_thr = thread_create(NULL, 0,
2254                     mac_rx_srs_poll_ring, mac_srs, 0, &p0, TS_RUN,
2255                     mac_srs->srs_pri);
2256                 /*
2257                  * Some drivers require serialization and don't send
2258                  * packet chains in interrupt context. For such
2259                  * drivers, we should always queue in the soft ring
2260                  * so that we get a chance to switch into polling
2261                  * mode under backlog.
2262                  */
2263                 ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring);
2264                 if (ring_info & MAC_RING_RX_ENQUEUE)
2265                         mac_srs->srs_state |= SRS_SOFTRING_QUEUE;
2266         }
2267 done:
2268         mac_srs_stat_create(mac_srs);
2269         return (mac_srs);
2270 }
2271 
2272 /*
2273  * Figure out the number of soft rings required. Its dependant on
2274  * if protocol fanout is required (for LINKs), global settings
2275  * require us to do fanout for performance (based on mac_soft_ring_enable),
2276  * or user has specifically requested fanout.
2277  */
2278 static uint32_t
2279 mac_find_fanout(flow_entry_t *flent, uint32_t link_type)
2280 {


2347 mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2348     uint32_t link_type)
2349 {
2350         cpupart_t               *cpupart;
2351         mac_resource_props_t    *mrp = MCIP_RESOURCE_PROPS(mcip);
2352         mac_resource_props_t    *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2353         boolean_t               use_default = B_FALSE;
2354 
2355         mac_rx_srs_group_setup(mcip, flent, link_type);
2356         mac_tx_srs_group_setup(mcip, flent, link_type);
2357 
2358         pool_lock();
2359         cpupart = mac_pset_find(mrp, &use_default);
2360         mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip),
2361             mac_rx_deliver, mcip, NULL, cpupart);
2362         mac_set_pool_effective(use_default, cpupart, mrp, emrp);
2363         pool_unlock();
2364 }
2365 
2366 /*
2367  * Set up the Rx SRSes. If there is no group associated with the
2368  * client, then only setup SW classification. If the client has
2369  * exlusive (MAC_GROUP_STATE_RESERVED) use of the group, then create an
2370  * SRS for each HW ring. If the client is sharing a group, then make
2371  * sure to teardown the HW SRSes.
2372  */
2373 void
2374 mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2375     uint32_t link_type)
2376 {
2377         mac_impl_t              *mip = mcip->mci_mip;
2378         mac_soft_ring_set_t     *mac_srs;
2379         mac_ring_t              *ring;
2380         uint32_t                fanout_type;
2381         mac_group_t             *rx_group = flent->fe_rx_ring_group;
2382         boolean_t               no_unicast;
2383 
2384         fanout_type = mac_find_fanout(flent, link_type);
2385         no_unicast = (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) != 0;
2386 
2387         /* Create the SRS for SW classification if none exists */
2388         if (flent->fe_rx_srs[0] == NULL) {
2389                 ASSERT(flent->fe_rx_srs_cnt == 0);

2390                 mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type,
2391                     mac_rx_deliver, mcip, NULL, NULL);
2392                 mutex_enter(&flent->fe_lock);
2393                 flent->fe_cb_fn = (flow_fn_t)mac_srs->srs_rx.sr_lower_proc;
2394                 flent->fe_cb_arg1 = (void *)mip;
2395                 flent->fe_cb_arg2 = (void *)mac_srs;
2396                 mutex_exit(&flent->fe_lock);
2397         }
2398 
2399         if (rx_group == NULL)
2400                 return;
2401 
2402         /*
2403          * If the group is marked RESERVED then setup an SRS and
2404          * fanout for each HW ring.

2405          */
2406         switch (rx_group->mrg_state) {
2407         case MAC_GROUP_STATE_RESERVED:
2408                 for (ring = rx_group->mrg_rings; ring != NULL;
2409                     ring = ring->mr_next) {
2410                         uint16_t vid = i_mac_flow_vid(mcip->mci_flent);
2411 
2412                         switch (ring->mr_state) {
2413                         case MR_INUSE:
2414                         case MR_FREE:
2415                                 if (ring->mr_srs != NULL)
2416                                         break;
2417                                 if (ring->mr_state != MR_INUSE)
2418                                         (void) mac_start_ring(ring);
2419 
2420                                 /*
2421                                  * If a client requires SW VLAN
2422                                  * filtering or has no unicast address
2423                                  * then we don't create any HW ring
2424                                  * SRSes.



2425                                  */
2426                                 if ((!MAC_GROUP_HW_VLAN(rx_group) &&
2427                                     vid != VLAN_ID_NONE) || no_unicast)


2428                                         break;
2429 
2430                                 /*
2431                                  * When a client has exclusive use of
2432                                  * a group, and that group's traffic
2433                                  * is fully HW classified, we create
2434                                  * an SRS for each HW ring in order to
2435                                  * make use of dynamic polling of said
2436                                  * HW rings.
2437                                  */
2438                                 mac_srs = mac_srs_create(mcip, flent,
2439                                     fanout_type | link_type,
2440                                     mac_rx_deliver, mcip, NULL, ring);
2441                                 break;
2442                         default:
2443                                 cmn_err(CE_PANIC,
2444                                     "srs_setup: mcip = %p "
2445                                     "trying to add UNKNOWN ring = %p\n",
2446                                     (void *)mcip, (void *)ring);
2447                                 break;
2448                         }
2449                 }
2450                 break;
2451         case MAC_GROUP_STATE_SHARED:
2452                 /*
2453                  * When a group is shared by multiple clients, we must
2454                  * use SW classifiction to ensure packets are
2455                  * delivered to the correct client.





2456                  */
2457                 mac_rx_switch_grp_to_sw(rx_group);
2458                 break;
2459         default:
2460                 ASSERT(B_FALSE);
2461                 break;
2462         }
2463 }
2464 
2465 /*
2466  * Set up the TX SRS.
2467  */
2468 void
2469 mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2470     uint32_t link_type)
2471 {
2472         int                     cnt;
2473         int                     ringcnt;
2474         mac_ring_t              *ring;
2475         mac_group_t             *grp;


2488                         return;
2489                 grp = (mac_group_t *)flent->fe_tx_ring_group;
2490                 ringcnt = grp->mrg_cur_count;
2491                 ring = grp->mrg_rings;
2492                 for (cnt = 0; cnt < ringcnt; cnt++) {
2493                         if (ring->mr_state != MR_INUSE) {
2494                                 (void) mac_start_ring(ring);
2495                         }
2496                         ring = ring->mr_next;
2497                 }
2498                 return;
2499         }
2500         if (flent->fe_tx_srs == NULL) {
2501                 (void) mac_srs_create(mcip, flent, SRST_TX | link_type,
2502                     NULL, mcip, NULL, NULL);
2503         }
2504         mac_tx_srs_setup(mcip, flent);
2505 }
2506 
2507 /*
2508  * Teardown all the Rx SRSes. Unless hwonly is set, then only teardown
2509  * the Rx HW SRSes and leave the SW SRS alone. The hwonly flag is set
2510  * when we wish to move a MAC client from one group to another. In
2511  * that case, we need to release the current HW SRSes but keep the SW
2512  * SRS for continued traffic classifiction.
2513  */
2514 void
2515 mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly)
2516 {
2517         mac_soft_ring_set_t     *mac_srs;
2518         int                     i;
2519         int                     count = flent->fe_rx_srs_cnt;
2520 
2521         for (i = 0; i < count; i++) {
2522                 if (i == 0 && hwonly)
2523                         continue;
2524                 mac_srs = flent->fe_rx_srs[i];
2525                 mac_rx_srs_quiesce(mac_srs, SRS_CONDEMNED);
2526                 mac_srs_free(mac_srs);
2527                 flent->fe_rx_srs[i] = NULL;
2528                 flent->fe_rx_srs_cnt--;
2529         }
2530 
2531         /*
2532          * If we are only tearing down the HW SRSes then there must be
2533          * one SRS left for SW classification. Otherwise we are tearing
2534          * down both HW and SW and there should be no SRSes left.
2535          */
2536         if (hwonly)
2537                 VERIFY3S(flent->fe_rx_srs_cnt, ==, 1);
2538         else
2539                 VERIFY3S(flent->fe_rx_srs_cnt, ==, 0);
2540 }
2541 
2542 /*
2543  * Remove the TX SRS.
2544  */
2545 void
2546 mac_tx_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
2547     uint32_t link_type)
2548 {
2549         mac_soft_ring_set_t     *tx_srs;
2550         mac_srs_tx_t            *tx;
2551 
2552         if ((tx_srs = flent->fe_tx_srs) == NULL)
2553                 return;
2554 
2555         tx = &tx_srs->srs_tx;
2556         switch (link_type) {
2557         case SRST_FLOW:
2558                 /*
2559                  * For flows, we need to work with passed


2821  *
2822  * Note: In future, if no fanout is specified, we try to assign 2 Rx
2823  * rings for the primary Link with the primary MAC address + TCP going
2824  * to one ring and primary MAC address + UDP|SCTP going to other ring.
2825  * Any remaining traffic for primary MAC address can go to the default
2826  * Rx ring and get S/W classified. This way the respective SRSs don't
2827  * need to do proto fanout and don't need to have softrings at all and
2828  * can poll their respective Rx rings.
2829  *
2830  * As an optimization, when a new NIC or VNIC is created, we can get
2831  * only one Rx ring and make it a TCP specific Rx ring and use the
2832  * H/W default Rx ring for the rest (this Rx ring is never polled).
2833  *
2834  * For clients that don't have MAC address, but want to receive and
2835  * transmit packets (e.g, bpf, gvrp etc.), we need to setup the datapath.
2836  * For such clients (identified by the MCIS_NO_UNICAST_ADDR flag) we
2837  * always give the default group and use software classification (i.e.
2838  * even if this is the only client in the default group, we will
2839  * leave group as shared).
2840  */
2841 
2842 int
2843 mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2844     uint32_t link_type)
2845 {
2846         mac_impl_t              *mip = mcip->mci_mip;
2847         mac_group_t             *rgroup = NULL;
2848         mac_group_t             *tgroup = NULL;
2849         mac_group_t             *default_rgroup;
2850         mac_group_t             *default_tgroup;
2851         int                     err;
2852         uint16_t                vid;
2853         uint8_t                 *mac_addr;
2854         mac_group_state_t       next_state;
2855         mac_client_impl_t       *group_only_mcip;
2856         mac_resource_props_t    *mrp = MCIP_RESOURCE_PROPS(mcip);
2857         mac_resource_props_t    *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2858         boolean_t               rxhw;
2859         boolean_t               txhw;
2860         boolean_t               use_default = B_FALSE;
2861         cpupart_t               *cpupart;
2862         boolean_t               no_unicast;
2863         boolean_t               isprimary = flent->fe_type & FLOW_PRIMARY_MAC;
2864         mac_client_impl_t       *reloc_pmcip = NULL;
2865         boolean_t               use_hw;
2866 
2867         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2868 
2869         switch (link_type) {
2870         case SRST_FLOW:
2871                 mac_srs_group_setup(mcip, flent, link_type);
2872                 return (0);
2873 
2874         case SRST_LINK:
2875                 no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR;
2876                 mac_addr = flent->fe_flow_desc.fd_dst_mac;
2877 
2878                 /* Default RX group */
2879                 default_rgroup = MAC_DEFAULT_RX_GROUP(mip);
2880 
2881                 /* Default TX group */
2882                 default_tgroup = MAC_DEFAULT_TX_GROUP(mip);
2883 
2884                 if (no_unicast) {
2885                         rgroup = default_rgroup;
2886                         tgroup = default_tgroup;
2887                         goto grp_found;
2888                 }
2889                 rxhw = (mrp->mrp_mask & MRP_RX_RINGS) &&
2890                     (mrp->mrp_nrxrings > 0 ||
2891                     (mrp->mrp_mask & MRP_RXRINGS_UNSPEC));
2892                 txhw = (mrp->mrp_mask & MRP_TX_RINGS) &&
2893                     (mrp->mrp_ntxrings > 0 ||
2894                     (mrp->mrp_mask & MRP_TXRINGS_UNSPEC));
2895 
2896                 /*
2897                  * All the rings initially belong to the default group
2898                  * under dynamic grouping. The primary client uses the
2899                  * default group when it is the only client. The
2900                  * default group is also used as the destination for
2901                  * all multicast and broadcast traffic of all clients.
2902                  * Therefore, the primary client loses its ability to
2903                  * poll the softrings on addition of a second client.
2904                  * To avoid a performance penalty, MAC will move the
2905                  * primary client to a dedicated group when it can.
2906                  *
2907                  * When using static grouping, the primary client
2908                  * begins life on a non-default group. There is
2909                  * no moving needed upon addition of a second client.
2910                  */
2911                 if (!isprimary && mip->mi_nactiveclients == 2 &&
2912                     (group_only_mcip = mac_primary_client_handle(mip)) !=
2913                     NULL && mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) {
2914                         reloc_pmcip = mac_check_primary_relocation(
2915                             group_only_mcip, rxhw);
2916                 }
2917 
2918                 /*
2919                  * Check to see if we can get an exclusive group for
2920                  * this mac address or if there already exists a
2921                  * group that has this mac address (case of VLANs).
2922                  * If no groups are available, use the default group.
2923                  */
2924                 rgroup = mac_reserve_rx_group(mcip, mac_addr, B_FALSE);
2925                 if (rgroup == NULL && rxhw) {
2926                         err = ENOSPC;
2927                         goto setup_failed;
2928                 } else if (rgroup == NULL) {
2929                         rgroup = default_rgroup;
2930                 }
2931 
2932                 /*
2933                  * If we are adding a second client to a
2934                  * non-default group then we need to move the
2935                  * existing client to the default group and
2936                  * add the new client to the default group as
2937                  * well.
2938                  */
2939                 if (rgroup != default_rgroup &&
2940                     rgroup->mrg_state == MAC_GROUP_STATE_RESERVED) {
2941                         group_only_mcip = MAC_GROUP_ONLY_CLIENT(rgroup);
2942                         err = mac_rx_switch_group(group_only_mcip, rgroup,
2943                             default_rgroup);
2944 
2945                         if (err != 0)
2946                                 goto setup_failed;
2947 
2948                         rgroup = default_rgroup;
2949                 }
2950 
2951                 /*
2952                  * Check to see if we can get an exclusive group for
2953                  * this mac client. If no groups are available, use
2954                  * the default group.
2955                  */
2956                 tgroup = mac_reserve_tx_group(mcip, B_FALSE);
2957                 if (tgroup == NULL && txhw) {
2958                         if (rgroup != NULL && rgroup != default_rgroup)
2959                                 mac_release_rx_group(mcip, rgroup);
2960                         err = ENOSPC;
2961                         goto setup_failed;
2962                 } else if (tgroup == NULL) {
2963                         tgroup = default_tgroup;
2964                 }
2965 
2966                 /*
2967                  * Some NICs don't support any Rx rings, so there may not
2968                  * even be a default group.
2969                  */
2970         grp_found:
2971                 if (rgroup != NULL) {
2972                         if (rgroup != default_rgroup &&
2973                             MAC_GROUP_NO_CLIENT(rgroup) &&
2974                             (rxhw || mcip->mci_share != 0)) {
2975                                 MAC_RX_GRP_RESERVED(mip);
2976                                 if (mip->mi_rx_group_type ==
2977                                     MAC_GROUP_TYPE_DYNAMIC) {
2978                                         MAC_RX_RING_RESERVED(mip,
2979                                             rgroup->mrg_cur_count);
2980                                 }
2981                         }
2982 
2983                         flent->fe_rx_ring_group = rgroup;
2984                         /*
2985                          * Add the client to the group and update the
2986                          * group's state. If rgroup != default_group
2987                          * then the rgroup should only ever have one
2988                          * client and be in the RESERVED state. But no
2989                          * matter what, the default_rgroup will enter
2990                          * the SHARED state since it has to receive
2991                          * all broadcast and multicast traffic. This
2992                          * case is handled later in the function.
2993                          */
2994                         mac_group_add_client(rgroup, mcip);
2995                         next_state = mac_group_next_state(rgroup,
2996                             &group_only_mcip, default_rgroup, B_TRUE);
2997                         mac_set_group_state(rgroup, next_state);
2998                 }
2999 
3000                 if (tgroup != NULL) {
3001                         if (tgroup != default_tgroup &&
3002                             MAC_GROUP_NO_CLIENT(tgroup) &&
3003                             (txhw || mcip->mci_share != 0)) {
3004                                 MAC_TX_GRP_RESERVED(mip);
3005                                 if (mip->mi_tx_group_type ==
3006                                     MAC_GROUP_TYPE_DYNAMIC) {
3007                                         MAC_TX_RING_RESERVED(mip,
3008                                             tgroup->mrg_cur_count);
3009                                 }
3010                         }
3011                         flent->fe_tx_ring_group = tgroup;
3012                         mac_group_add_client(tgroup, mcip);
3013                         next_state = mac_group_next_state(tgroup,
3014                             &group_only_mcip, default_tgroup, B_FALSE);
3015                         tgroup->mrg_state = next_state;
3016                 }








3017 
3018                 /* We are setting up minimal datapath only */
3019                 if (no_unicast) {
3020                         mac_srs_group_setup(mcip, flent, link_type);
3021                         break;
3022                 }
3023 
3024                 /* Program software classification. */
3025                 if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0)
3026                         goto setup_failed;
3027 
3028                 /* Program hardware classification. */
3029                 vid = i_mac_flow_vid(flent);
3030                 use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0;
3031                 err = mac_add_macaddr_vlan(mip, rgroup, mac_addr, vid, use_hw);
3032 
3033                 if (err != 0)
3034                         goto setup_failed;
3035 
3036                 mcip->mci_unicast = mac_find_macaddr(mip, mac_addr);
3037                 VERIFY3P(mcip->mci_unicast, !=, NULL);
3038 
3039                 /*
3040                  * Setup the Rx and Tx SRSes. If the client has a
3041                  * reserved group, then mac_srs_group_setup() creates
3042                  * the required SRSes for the HW rings. If we have a
3043                  * shared group, mac_srs_group_setup() dismantles the
3044                  * HW SRSes of the previously exclusive group.
3045                  */
3046                 mac_srs_group_setup(mcip, flent, link_type);
3047 
3048                 /* (Re)init the v6 token & local addr used by link protection */
3049                 mac_protect_update_mac_token(mcip);
3050                 break;
3051 
3052         default:
3053                 ASSERT(B_FALSE);
3054                 break;
3055         }
3056 
3057         /*
3058          * All broadcast and multicast traffic is received only on the default
3059          * group. If we have setup the datapath for a non-default group above
3060          * then move the default group to shared state to allow distribution of
3061          * incoming broadcast traffic to the other groups and dismantle the
3062          * SRSes over the default group.
3063          */
3064         if (rgroup != NULL) {
3065                 if (rgroup != default_rgroup) {
3066                         if (default_rgroup->mrg_state ==
3067                             MAC_GROUP_STATE_RESERVED) {


3071                                     mip->mi_nactiveclients > 1);
3072 
3073                                 mac_set_group_state(default_rgroup,
3074                                     MAC_GROUP_STATE_SHARED);
3075                                 mac_rx_srs_group_setup(group_only_mcip,
3076                                     group_only_mcip->mci_flent, SRST_LINK);
3077                                 pool_lock();
3078                                 cpupart = mac_pset_find(mrp, &use_default);
3079                                 mac_fanout_setup(group_only_mcip,
3080                                     group_only_mcip->mci_flent,
3081                                     MCIP_RESOURCE_PROPS(group_only_mcip),
3082                                     mac_rx_deliver, group_only_mcip, NULL,
3083                                     cpupart);
3084                                 mac_set_pool_effective(use_default, cpupart,
3085                                     mrp, emrp);
3086                                 pool_unlock();
3087                         }
3088                         ASSERT(default_rgroup->mrg_state ==
3089                             MAC_GROUP_STATE_SHARED);
3090                 }
3091 
3092                 /*
3093                  * A VLAN MAC client on a reserved group still
3094                  * requires SW classification if the MAC doesn't
3095                  * provide VLAN HW filtering.
3096                  *
3097                  * Clients with no unicast address also require SW
3098                  * classification.
3099                  */
3100                 if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED &&
3101                     ((!MAC_GROUP_HW_VLAN(rgroup) && vid != VLAN_ID_NONE) ||
3102                     no_unicast)) {
3103                         mac_rx_switch_grp_to_sw(rgroup);
3104                 }
3105 
3106         }
3107 
3108         mac_set_rings_effective(mcip);
3109         return (0);
3110 
3111 setup_failed:
3112         /* Switch the primary back to default group */
3113         if (reloc_pmcip != NULL) {
3114                 (void) mac_rx_switch_group(reloc_pmcip,
3115                     reloc_pmcip->mci_flent->fe_rx_ring_group, default_rgroup);
3116         }
3117         mac_datapath_teardown(mcip, flent, link_type);
3118         return (err);
3119 }
3120 
3121 void
3122 mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
3123     uint32_t link_type)
3124 {
3125         mac_impl_t              *mip = mcip->mci_mip;
3126         mac_group_t             *group = NULL;
3127         mac_client_impl_t       *grp_only_mcip;
3128         flow_entry_t            *group_only_flent;
3129         mac_group_t             *default_group;
3130         boolean_t               check_default_group = B_FALSE;
3131         mac_group_state_t       next_state;
3132         mac_resource_props_t    *mrp = MCIP_RESOURCE_PROPS(mcip);
3133         uint16_t                vid;
3134 
3135         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
3136 
3137         switch (link_type) {
3138         case SRST_FLOW:
3139                 mac_rx_srs_group_teardown(flent, B_FALSE);
3140                 mac_tx_srs_group_teardown(mcip, flent, SRST_FLOW);
3141                 return;
3142 
3143         case SRST_LINK:
3144                 /* Stop sending packets */
3145                 mac_tx_client_block(mcip);
3146                 group = flent->fe_rx_ring_group;
3147                 vid = i_mac_flow_vid(flent);
3148 
3149                 /*
3150                  * Stop the packet flow from the hardware by disabling
3151                  * any hardware filters assigned to this client.
3152                  */
3153                 if (mcip->mci_unicast != NULL) {
3154                         int err;
3155 
3156                         err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid);
3157 
3158                         if (err != 0) {
3159                                 cmn_err(CE_WARN, "%s: failed to remove a MAC HW"
3160                                     " filters because of error 0x%x",
3161                                     mip->mi_name, err);
3162                         }
3163 
3164                         mcip->mci_unicast = NULL;
3165                 }
3166 
3167                 /* Stop the packets coming from the S/W classifier */
3168                 mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
3169                 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
3170 
3171                 /* Now quiesce and destroy all SRS and soft rings */
3172                 mac_rx_srs_group_teardown(flent, B_FALSE);
3173                 mac_tx_srs_group_teardown(mcip, flent, SRST_LINK);
3174 
3175                 ASSERT((mcip->mci_flent == flent) &&
3176                     (flent->fe_next == NULL));
3177 
3178                 /*
3179                  * Release our hold on the group as well. We need
3180                  * to check if the shared group has only one client
3181                  * left who can use it exclusively. Also, if we
3182                  * were the last client, release the group.
3183                  */

3184                 default_group = MAC_DEFAULT_RX_GROUP(mip);
3185                 if (group != NULL) {
3186                         mac_group_remove_client(group, mcip);
3187                         next_state = mac_group_next_state(group,
3188                             &grp_only_mcip, default_group, B_TRUE);
3189 
3190                         if (next_state == MAC_GROUP_STATE_RESERVED) {
3191                                 /*
3192                                  * Only one client left on this RX group.
3193                                  */
3194                                 VERIFY3P(grp_only_mcip, !=, NULL);
3195                                 mac_set_group_state(group,
3196                                     MAC_GROUP_STATE_RESERVED);
3197                                 group_only_flent = grp_only_mcip->mci_flent;
3198 
3199                                 /*
3200                                  * The only remaining client has exclusive
3201                                  * access on the group. Allow it to
3202                                  * dynamically poll the H/W rings etc.
3203                                  */
3204                                 mac_rx_srs_group_setup(grp_only_mcip,
3205                                     group_only_flent, SRST_LINK);
3206                                 mac_fanout_setup(grp_only_mcip,
3207                                     group_only_flent,
3208                                     MCIP_RESOURCE_PROPS(grp_only_mcip),
3209                                     mac_rx_deliver, grp_only_mcip, NULL, NULL);
3210                                 mac_rx_group_unmark(group, MR_INCIPIENT);
3211                                 mac_set_rings_effective(grp_only_mcip);
3212                         } else if (next_state == MAC_GROUP_STATE_REGISTERED) {
3213                                 /*
3214                                  * This is a non-default group being freed up.
3215                                  * We need to reevaluate the default group
3216                                  * to see if the primary client can get
3217                                  * exclusive access to the default group.
3218                                  */
3219                                 VERIFY3P(group, !=, MAC_DEFAULT_RX_GROUP(mip));
3220                                 if (mrp->mrp_mask & MRP_RX_RINGS) {
3221                                         MAC_RX_GRP_RELEASED(mip);
3222                                         if (mip->mi_rx_group_type ==
3223                                             MAC_GROUP_TYPE_DYNAMIC) {
3224                                                 MAC_RX_RING_RELEASED(mip,
3225                                                     group->mrg_cur_count);
3226                                         }
3227                                 }
3228                                 mac_release_rx_group(mcip, group);
3229                                 mac_set_group_state(group,
3230                                     MAC_GROUP_STATE_REGISTERED);
3231                                 check_default_group = B_TRUE;
3232                         } else {
3233                                 VERIFY3S(next_state, ==,
3234                                     MAC_GROUP_STATE_SHARED);
3235                                 mac_set_group_state(group,
3236                                     MAC_GROUP_STATE_SHARED);
3237                                 mac_rx_group_unmark(group, MR_CONDEMNED);
3238                         }
3239                         flent->fe_rx_ring_group = NULL;
3240                 }
3241                 /*
3242                  * Remove the client from the TX group. Additionally, if
3243                  * this a non-default group, then we also need to release
3244                  * the group.
3245                  */
3246                 group = flent->fe_tx_ring_group;
3247                 default_group = MAC_DEFAULT_TX_GROUP(mip);
3248                 if (group != NULL) {
3249                         mac_group_remove_client(group, mcip);
3250                         next_state = mac_group_next_state(group,
3251                             &grp_only_mcip, default_group, B_FALSE);
3252                         if (next_state == MAC_GROUP_STATE_REGISTERED) {
3253                                 if (group != default_group) {
3254                                         if (mrp->mrp_mask & MRP_TX_RINGS) {


3303                         } else if (next_state == MAC_GROUP_STATE_RESERVED) {
3304                                 mac_set_rings_effective(grp_only_mcip);
3305                         }
3306                         flent->fe_tx_ring_group = NULL;
3307                         group->mrg_state = next_state;
3308                 }
3309                 break;
3310         default:
3311                 ASSERT(B_FALSE);
3312                 break;
3313         }
3314 
3315         /*
3316          * The mac client using the default group gets exclusive access to the
3317          * default group if and only if it is the sole client on the entire
3318          * mip. If so set the group state to reserved, and set up the SRSes
3319          * over the default group.
3320          */
3321         if (check_default_group) {
3322                 default_group = MAC_DEFAULT_RX_GROUP(mip);
3323                 VERIFY3S(default_group->mrg_state, ==, MAC_GROUP_STATE_SHARED);
3324                 next_state = mac_group_next_state(default_group,
3325                     &grp_only_mcip, default_group, B_TRUE);
3326                 if (next_state == MAC_GROUP_STATE_RESERVED) {
3327                         VERIFY3P(grp_only_mcip, !=, NULL);
3328                         VERIFY3U(mip->mi_nactiveclients, ==, 1);
3329                         mac_set_group_state(default_group,
3330                             MAC_GROUP_STATE_RESERVED);
3331                         mac_rx_srs_group_setup(grp_only_mcip,
3332                             grp_only_mcip->mci_flent, SRST_LINK);
3333                         mac_fanout_setup(grp_only_mcip,
3334                             grp_only_mcip->mci_flent,
3335                             MCIP_RESOURCE_PROPS(grp_only_mcip), mac_rx_deliver,
3336                             grp_only_mcip, NULL, NULL);
3337                         mac_rx_group_unmark(default_group, MR_INCIPIENT);
3338                         mac_set_rings_effective(grp_only_mcip);
3339                 }
3340         }
3341 
3342         /*
3343          * If the primary is the only one left and the MAC supports
3344          * dynamic grouping, we need to see if the primary needs to
3345          * be moved to the default group so that it can use all the
3346          * H/W rings.
3347          */
3348         if (!(flent->fe_type & FLOW_PRIMARY_MAC) &&


3832 mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring)
3833 {
3834         int i;
3835         mac_soft_ring_t *soft_ring, *remove_sring;
3836         mac_client_impl_t *mcip = mac_srs->srs_mcip;
3837 
3838         mutex_enter(&mac_srs->srs_lock);
3839         for (i = 0; i < mac_srs->srs_tx_ring_count; i++) {
3840                 soft_ring =  mac_srs->srs_tx_soft_rings[i];
3841                 if (soft_ring->s_ring_tx_arg2 == tx_ring)
3842                         break;
3843         }
3844         mutex_exit(&mac_srs->srs_lock);
3845         ASSERT(i < mac_srs->srs_tx_ring_count);
3846         remove_sring = soft_ring;
3847         /*
3848          * In the case of aggr, the soft ring associated with a Tx ring
3849          * is also stored in st_soft_rings[] array. That entry should
3850          * be removed.
3851          */
3852         if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
3853                 mac_srs_tx_t *tx = &mac_srs->srs_tx;
3854 
3855                 ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring);
3856                 tx->st_soft_rings[tx_ring->mr_index] = NULL;
3857         }
3858         mac_soft_ring_remove(mac_srs, remove_sring);
3859         mac_srs_update_fanout_list(mac_srs);
3860 }
3861 
3862 /*
3863  * mac_tx_srs_setup():
3864  * Used to setup Tx rings. If no free Tx ring is available, then default
3865  * Tx ring is used.
3866  */
3867 void
3868 mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent)
3869 {
3870         mac_impl_t              *mip = mcip->mci_mip;
3871         mac_soft_ring_set_t     *tx_srs = flent->fe_tx_srs;
3872         int                     i;
3873         int                     tx_ring_count = 0;
3874         uint32_t                soft_ring_type;
3875         mac_group_t             *grp = NULL;
3876         mac_ring_t              *ring;
3877         mac_srs_tx_t            *tx = &tx_srs->srs_tx;
3878         boolean_t               is_aggr;
3879         uint_t                  ring_info = 0;
3880 
3881         is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) != 0;
3882         grp = flent->fe_tx_ring_group;
3883         if (grp == NULL) {
3884                 ring = (mac_ring_t *)mip->mi_default_tx_ring;
3885                 goto no_group;
3886         }
3887         tx_ring_count = grp->mrg_cur_count;
3888         ring = grp->mrg_rings;
3889         /*
3890          * An attempt is made to reserve 'tx_ring_count' number
3891          * of Tx rings. If tx_ring_count is 0, default Tx ring
3892          * is used. If it is 1, an attempt is made to reserve one
3893          * Tx ring. In both the cases, the ring information is
3894          * stored in Tx SRS. If multiple Tx rings are specified,
3895          * then each Tx ring will have a Tx-side soft ring. All
3896          * these soft rings will be hang off Tx SRS.
3897          */
3898         switch (grp->mrg_state) {
3899                 case MAC_GROUP_STATE_SHARED:
3900                 case MAC_GROUP_STATE_RESERVED:
3901                         if (tx_ring_count <= 1 && !is_aggr) {