Print this page
11490 SRS ring polling disabled for VLANs
11491 Want DLS bypass for VLAN traffic
11492 add VLVF bypass to ixgbe core
2869 duplicate packets with vnics over aggrs
11489 DLS stat delete and aggr kstat can deadlock
Portions contributed by: Theo Schlossnagle <jesus@omniti.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/mac/mac_datapath_setup.c
          +++ new/usr/src/uts/common/io/mac/mac_datapath_setup.c
↓ open down ↓ 12 lines elided ↑ open up ↑
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright 2017, Joyent, Inc.
       23 + * Copyright 2018 Joyent, Inc.
  24   24   */
  25   25  
  26   26  #include <sys/types.h>
  27   27  #include <sys/callb.h>
  28   28  #include <sys/cpupart.h>
  29   29  #include <sys/pool.h>
  30   30  #include <sys/pool_pset.h>
  31   31  #include <sys/sdt.h>
  32   32  #include <sys/strsubr.h>
  33   33  #include <sys/strsun.h>
↓ open down ↓ 1145 lines elided ↑ open up ↑
1179 1179   */
1180 1180  static void
1181 1181  mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs)
1182 1182  {
1183 1183          mac_client_impl_t *mcip = mac_srs->srs_mcip;
1184 1184  
1185 1185          if (mac_srs->srs_type & SRST_TX) {
1186 1186                  mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **)
1187 1187                      kmem_zalloc(sizeof (mac_soft_ring_t *) *
1188 1188                      MAX_RINGS_PER_GROUP, KM_SLEEP);
1189      -                if (mcip->mci_state_flags & MCIS_IS_AGGR) {
     1189 +                if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
1190 1190                          mac_srs_tx_t *tx = &mac_srs->srs_tx;
1191 1191  
1192 1192                          tx->st_soft_rings = (mac_soft_ring_t **)
1193 1193                              kmem_zalloc(sizeof (mac_soft_ring_t *) *
1194 1194                              MAX_RINGS_PER_GROUP, KM_SLEEP);
1195 1195                  }
1196 1196          } else {
1197 1197                  mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **)
1198 1198                      kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1199 1199                      KM_SLEEP);
↓ open down ↓ 388 lines elided ↑ open up ↑
1588 1588  {
1589 1589          int                     count;
1590 1590  
1591 1591          for (count = 0; count < flent->fe_rx_srs_cnt; count++)
1592 1592                  mac_rx_srs_update_bwlimit(flent->fe_rx_srs[count], mrp);
1593 1593          mac_tx_srs_update_bwlimit(flent->fe_tx_srs, mrp);
1594 1594  }
1595 1595  
1596 1596  /*
1597 1597   * When the first sub-flow is added to a link, we disable polling on the
1598      - * link and also modify the entry point to mac_rx_srs_subflow_process.
     1598 + * link and also modify the entry point to mac_rx_srs_subflow_process().
1599 1599   * (polling is disabled because with the subflow added, accounting
1600 1600   * for polling needs additional logic, it is assumed that when a subflow is
1601 1601   * added, we can take some hit as a result of disabling polling rather than
1602 1602   * adding more complexity - if this becomes a perf. issue we need to
1603 1603   * re-rvaluate this logic).  When the last subflow is removed, we turn back
1604      - * polling and also reset the entry point to mac_rx_srs_process.
     1604 + * polling and also reset the entry point to mac_rx_srs_process().
1605 1605   *
1606 1606   * In the future if there are multiple SRS, we can simply
1607 1607   * take one and give it to the flow rather than disabling polling and
1608 1608   * resetting the entry point.
1609 1609   */
1610 1610  void
1611 1611  mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable)
1612 1612  {
1613 1613          flow_entry_t            *flent = mcip->mci_flent;
1614 1614          int                     i;
↓ open down ↓ 24 lines elided ↑ open up ↑
1639 1639          for (i = 0; i < rx_srs_cnt; i++) {
1640 1640                  ASSERT(flent->fe_rx_srs[i] != NULL);
1641 1641                  mac_srs_poll_state_change(flent->fe_rx_srs[i],
1642 1642                      enable_classifier, rx_func);
1643 1643          }
1644 1644  
1645 1645          /*
1646 1646           * Change the S/W classifier so that we can land in the
1647 1647           * correct processing function with correct argument.
1648 1648           * If all subflows have been removed we can revert to
1649      -         * mac_rx_srsprocess, else we need mac_rx_srs_subflow_process.
     1649 +         * mac_rx_srs_process(), else we need mac_rx_srs_subflow_process().
1650 1650           */
1651 1651          mutex_enter(&flent->fe_lock);
1652 1652          flent->fe_cb_fn = (flow_fn_t)rx_func;
1653 1653          flent->fe_cb_arg1 = (void *)mip;
1654 1654          flent->fe_cb_arg2 = flent->fe_rx_srs[0];
1655 1655          mutex_exit(&flent->fe_lock);
1656 1656  }
1657 1657  
1658 1658  static void
1659 1659  mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs)
↓ open down ↓ 518 lines elided ↑ open up ↑
2178 2178           *    a backlog to build up in SRS and its associated Soft Rings
2179 2179           *    (sr_poll_pkt_cnt > 0).
2180 2180           * 2) As long as the backlog stays under the low water mark
2181 2181           *    (sr_lowat), we poll the H/W for more packets.
2182 2182           * 3) If the backlog (sr_poll_pkt_cnt) exceeds low water mark, we
2183 2183           *    stay in poll mode but don't poll the H/W for more packets.
2184 2184           * 4) Anytime in polling mode, if we poll the H/W for packets and
2185 2185           *    find nothing plus we have an existing backlog
2186 2186           *    (sr_poll_pkt_cnt > 0), we stay in polling mode but don't poll
2187 2187           *    the H/W for packets anymore (let the polling thread go to sleep).
2188      -         * 5) Once the backlog is relived (packets are processed) we reenable
     2188 +         * 5) Once the backlog is relieved (packets are processed) we reenable
2189 2189           *    polling (by signalling the poll thread) only when the backlog
2190 2190           *    dips below sr_poll_thres.
2191 2191           * 6) sr_hiwat is used exclusively when we are not polling capable
2192 2192           *    and is used to decide when to drop packets so the SRS queue
2193 2193           *    length doesn't grow infinitely.
2194 2194           */
2195 2195          if (!is_tx_srs) {
2196 2196                  srs_rx->sr_hiwat = mac_soft_ring_max_q_cnt;
2197 2197                  /* Low water mark needs to be less than high water mark */
2198 2198                  srs_rx->sr_lowat = mac_soft_ring_min_q_cnt <=
↓ open down ↓ 50 lines elided ↑ open up ↑
2249 2249                  if (!(mcip->mci_mip->mi_state_flags & MIS_POLL_DISABLE) &&
2250 2250                      FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && mac_poll_enable)
2251 2251                          mac_srs->srs_state |= SRS_POLLING_CAPAB;
2252 2252  
2253 2253                  mac_srs->srs_poll_thr = thread_create(NULL, 0,
2254 2254                      mac_rx_srs_poll_ring, mac_srs, 0, &p0, TS_RUN,
2255 2255                      mac_srs->srs_pri);
2256 2256                  /*
2257 2257                   * Some drivers require serialization and don't send
2258 2258                   * packet chains in interrupt context. For such
2259      -                 * drivers, we should always queue in soft ring
2260      -                 * so that we get a chance to switch into a polling
     2259 +                 * drivers, we should always queue in the soft ring
     2260 +                 * so that we get a chance to switch into polling
2261 2261                   * mode under backlog.
2262 2262                   */
2263 2263                  ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring);
2264 2264                  if (ring_info & MAC_RING_RX_ENQUEUE)
2265 2265                          mac_srs->srs_state |= SRS_SOFTRING_QUEUE;
2266 2266          }
2267 2267  done:
2268 2268          mac_srs_stat_create(mac_srs);
2269 2269          return (mac_srs);
2270 2270  }
↓ open down ↓ 86 lines elided ↑ open up ↑
2357 2357  
2358 2358          pool_lock();
2359 2359          cpupart = mac_pset_find(mrp, &use_default);
2360 2360          mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip),
2361 2361              mac_rx_deliver, mcip, NULL, cpupart);
2362 2362          mac_set_pool_effective(use_default, cpupart, mrp, emrp);
2363 2363          pool_unlock();
2364 2364  }
2365 2365  
2366 2366  /*
2367      - * Set up the RX SRSs. If the S/W SRS is not set, set  it up, if there
2368      - * is a group associated with this MAC client, set up SRSs for individual
2369      - * h/w rings.
     2367 + * Set up the Rx SRSes. If there is no group associated with the
     2368 + * client, then only setup SW classification. If the client has
     2369 + * exlusive (MAC_GROUP_STATE_RESERVED) use of the group, then create an
     2370 + * SRS for each HW ring. If the client is sharing a group, then make
     2371 + * sure to teardown the HW SRSes.
2370 2372   */
2371 2373  void
2372 2374  mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2373 2375      uint32_t link_type)
2374 2376  {
2375 2377          mac_impl_t              *mip = mcip->mci_mip;
2376 2378          mac_soft_ring_set_t     *mac_srs;
2377 2379          mac_ring_t              *ring;
2378 2380          uint32_t                fanout_type;
2379 2381          mac_group_t             *rx_group = flent->fe_rx_ring_group;
     2382 +        boolean_t               no_unicast;
2380 2383  
2381 2384          fanout_type = mac_find_fanout(flent, link_type);
     2385 +        no_unicast = (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) != 0;
2382 2386  
2383      -        /* Create the SRS for S/W classification if none exists */
     2387 +        /* Create the SRS for SW classification if none exists */
2384 2388          if (flent->fe_rx_srs[0] == NULL) {
2385 2389                  ASSERT(flent->fe_rx_srs_cnt == 0);
2386      -                /* Setup the Rx SRS */
2387 2390                  mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type,
2388 2391                      mac_rx_deliver, mcip, NULL, NULL);
2389 2392                  mutex_enter(&flent->fe_lock);
2390 2393                  flent->fe_cb_fn = (flow_fn_t)mac_srs->srs_rx.sr_lower_proc;
2391 2394                  flent->fe_cb_arg1 = (void *)mip;
2392 2395                  flent->fe_cb_arg2 = (void *)mac_srs;
2393 2396                  mutex_exit(&flent->fe_lock);
2394 2397          }
2395 2398  
2396 2399          if (rx_group == NULL)
2397 2400                  return;
     2401 +
2398 2402          /*
2399      -         * fanout for default SRS is done when default SRS are created
2400      -         * above. As each ring is added to the group, we setup the
2401      -         * SRS and fanout to it.
     2403 +         * If the group is marked RESERVED then setup an SRS and
     2404 +         * fanout for each HW ring.
2402 2405           */
2403 2406          switch (rx_group->mrg_state) {
2404 2407          case MAC_GROUP_STATE_RESERVED:
2405 2408                  for (ring = rx_group->mrg_rings; ring != NULL;
2406 2409                      ring = ring->mr_next) {
     2410 +                        uint16_t vid = i_mac_flow_vid(mcip->mci_flent);
     2411 +
2407 2412                          switch (ring->mr_state) {
2408 2413                          case MR_INUSE:
2409 2414                          case MR_FREE:
2410 2415                                  if (ring->mr_srs != NULL)
2411 2416                                          break;
2412 2417                                  if (ring->mr_state != MR_INUSE)
2413 2418                                          (void) mac_start_ring(ring);
2414 2419  
2415 2420                                  /*
2416      -                                 * Since the group is exclusively ours create
2417      -                                 * an SRS for this ring to allow the
2418      -                                 * individual SRS to dynamically poll the
2419      -                                 * ring. Do this only if the  client is not
2420      -                                 * a VLAN MAC client, since for VLAN we do
2421      -                                 * s/w classification for the VID check, and
2422      -                                 * if it has a unicast address.
     2421 +                                 * If a client requires SW VLAN
     2422 +                                 * filtering or has no unicast address
     2423 +                                 * then we don't create any HW ring
     2424 +                                 * SRSes.
2423 2425                                   */
2424      -                                if ((mcip->mci_state_flags &
2425      -                                    MCIS_NO_UNICAST_ADDR) ||
2426      -                                    i_mac_flow_vid(mcip->mci_flent) !=
2427      -                                    VLAN_ID_NONE) {
     2426 +                                if ((!MAC_GROUP_HW_VLAN(rx_group) &&
     2427 +                                    vid != VLAN_ID_NONE) || no_unicast)
2428 2428                                          break;
2429      -                                }
     2429 +
     2430 +                                /*
     2431 +                                 * When a client has exclusive use of
     2432 +                                 * a group, and that group's traffic
     2433 +                                 * is fully HW classified, we create
     2434 +                                 * an SRS for each HW ring in order to
     2435 +                                 * make use of dynamic polling of said
     2436 +                                 * HW rings.
     2437 +                                 */
2430 2438                                  mac_srs = mac_srs_create(mcip, flent,
2431 2439                                      fanout_type | link_type,
2432 2440                                      mac_rx_deliver, mcip, NULL, ring);
2433 2441                                  break;
2434 2442                          default:
2435 2443                                  cmn_err(CE_PANIC,
2436 2444                                      "srs_setup: mcip = %p "
2437 2445                                      "trying to add UNKNOWN ring = %p\n",
2438 2446                                      (void *)mcip, (void *)ring);
2439 2447                                  break;
2440 2448                          }
2441 2449                  }
2442 2450                  break;
2443 2451          case MAC_GROUP_STATE_SHARED:
2444 2452                  /*
2445      -                 * Set all rings of this group to software classified.
2446      -                 *
2447      -                 * If the group is current RESERVED, the existing mac
2448      -                 * client (the only client on this group) is using
2449      -                 * this group exclusively.  In that case we need to
2450      -                 * disable polling on the rings of the group (if it
2451      -                 * was enabled), and free the SRS associated with the
2452      -                 * rings.
     2453 +                 * When a group is shared by multiple clients, we must
     2454 +                 * use SW classifiction to ensure packets are
     2455 +                 * delivered to the correct client.
2453 2456                   */
2454 2457                  mac_rx_switch_grp_to_sw(rx_group);
2455 2458                  break;
2456 2459          default:
2457 2460                  ASSERT(B_FALSE);
2458 2461                  break;
2459 2462          }
2460 2463  }
2461 2464  
2462 2465  /*
↓ open down ↓ 32 lines elided ↑ open up ↑
2495 2498                  return;
2496 2499          }
2497 2500          if (flent->fe_tx_srs == NULL) {
2498 2501                  (void) mac_srs_create(mcip, flent, SRST_TX | link_type,
2499 2502                      NULL, mcip, NULL, NULL);
2500 2503          }
2501 2504          mac_tx_srs_setup(mcip, flent);
2502 2505  }
2503 2506  
2504 2507  /*
2505      - * Remove all the RX SRSs. If we want to remove only the SRSs associated
2506      - * with h/w rings, leave the S/W SRS alone. This is used when we want to
2507      - * move the MAC client from one group to another, so we need to teardown
2508      - * on the h/w SRSs.
     2508 + * Teardown all the Rx SRSes. Unless hwonly is set, then only teardown
     2509 + * the Rx HW SRSes and leave the SW SRS alone. The hwonly flag is set
     2510 + * when we wish to move a MAC client from one group to another. In
     2511 + * that case, we need to release the current HW SRSes but keep the SW
     2512 + * SRS for continued traffic classifiction.
2509 2513   */
2510 2514  void
2511 2515  mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly)
2512 2516  {
2513 2517          mac_soft_ring_set_t     *mac_srs;
2514 2518          int                     i;
2515 2519          int                     count = flent->fe_rx_srs_cnt;
2516 2520  
2517 2521          for (i = 0; i < count; i++) {
2518 2522                  if (i == 0 && hwonly)
2519 2523                          continue;
2520 2524                  mac_srs = flent->fe_rx_srs[i];
2521 2525                  mac_rx_srs_quiesce(mac_srs, SRS_CONDEMNED);
2522 2526                  mac_srs_free(mac_srs);
2523 2527                  flent->fe_rx_srs[i] = NULL;
2524 2528                  flent->fe_rx_srs_cnt--;
2525 2529          }
2526      -        ASSERT(!hwonly || flent->fe_rx_srs_cnt == 1);
2527      -        ASSERT(hwonly || flent->fe_rx_srs_cnt == 0);
     2530 +
     2531 +        /*
     2532 +         * If we are only tearing down the HW SRSes then there must be
     2533 +         * one SRS left for SW classification. Otherwise we are tearing
     2534 +         * down both HW and SW and there should be no SRSes left.
     2535 +         */
     2536 +        if (hwonly)
     2537 +                VERIFY3S(flent->fe_rx_srs_cnt, ==, 1);
     2538 +        else
     2539 +                VERIFY3S(flent->fe_rx_srs_cnt, ==, 0);
2528 2540  }
2529 2541  
2530 2542  /*
2531 2543   * Remove the TX SRS.
2532 2544   */
2533 2545  void
2534 2546  mac_tx_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
2535 2547      uint32_t link_type)
2536 2548  {
2537 2549          mac_soft_ring_set_t     *tx_srs;
↓ open down ↓ 281 lines elided ↑ open up ↑
2819 2831   * only one Rx ring and make it a TCP specific Rx ring and use the
2820 2832   * H/W default Rx ring for the rest (this Rx ring is never polled).
2821 2833   *
2822 2834   * For clients that don't have MAC address, but want to receive and
2823 2835   * transmit packets (e.g, bpf, gvrp etc.), we need to setup the datapath.
2824 2836   * For such clients (identified by the MCIS_NO_UNICAST_ADDR flag) we
2825 2837   * always give the default group and use software classification (i.e.
2826 2838   * even if this is the only client in the default group, we will
2827 2839   * leave group as shared).
2828 2840   */
     2841 +
2829 2842  int
2830 2843  mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2831 2844      uint32_t link_type)
2832 2845  {
2833 2846          mac_impl_t              *mip = mcip->mci_mip;
2834 2847          mac_group_t             *rgroup = NULL;
2835 2848          mac_group_t             *tgroup = NULL;
2836 2849          mac_group_t             *default_rgroup;
2837 2850          mac_group_t             *default_tgroup;
2838 2851          int                     err;
     2852 +        uint16_t                vid;
2839 2853          uint8_t                 *mac_addr;
2840 2854          mac_group_state_t       next_state;
2841 2855          mac_client_impl_t       *group_only_mcip;
2842 2856          mac_resource_props_t    *mrp = MCIP_RESOURCE_PROPS(mcip);
2843 2857          mac_resource_props_t    *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2844 2858          boolean_t               rxhw;
2845 2859          boolean_t               txhw;
2846 2860          boolean_t               use_default = B_FALSE;
2847 2861          cpupart_t               *cpupart;
2848 2862          boolean_t               no_unicast;
2849 2863          boolean_t               isprimary = flent->fe_type & FLOW_PRIMARY_MAC;
2850 2864          mac_client_impl_t       *reloc_pmcip = NULL;
     2865 +        boolean_t               use_hw;
2851 2866  
2852 2867          ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2853 2868  
2854 2869          switch (link_type) {
2855 2870          case SRST_FLOW:
2856 2871                  mac_srs_group_setup(mcip, flent, link_type);
2857 2872                  return (0);
2858 2873  
2859 2874          case SRST_LINK:
2860 2875                  no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR;
↓ open down ↓ 11 lines elided ↑ open up ↑
2872 2887                          goto grp_found;
2873 2888                  }
2874 2889                  rxhw = (mrp->mrp_mask & MRP_RX_RINGS) &&
2875 2890                      (mrp->mrp_nrxrings > 0 ||
2876 2891                      (mrp->mrp_mask & MRP_RXRINGS_UNSPEC));
2877 2892                  txhw = (mrp->mrp_mask & MRP_TX_RINGS) &&
2878 2893                      (mrp->mrp_ntxrings > 0 ||
2879 2894                      (mrp->mrp_mask & MRP_TXRINGS_UNSPEC));
2880 2895  
2881 2896                  /*
2882      -                 * By default we have given the primary all the rings
2883      -                 * i.e. the default group. Let's see if the primary
2884      -                 * needs to be relocated so that the addition of this
2885      -                 * client doesn't impact the primary's performance,
2886      -                 * i.e. if the primary is in the default group and
2887      -                 * we add this client, the primary will lose polling.
2888      -                 * We do this only for NICs supporting dynamic ring
2889      -                 * grouping and only when this is the first client
2890      -                 * after the primary (i.e. nactiveclients is 2)
     2897 +                 * All the rings initially belong to the default group
     2898 +                 * under dynamic grouping. The primary client uses the
     2899 +                 * default group when it is the only client. The
     2900 +                 * default group is also used as the destination for
     2901 +                 * all multicast and broadcast traffic of all clients.
     2902 +                 * Therefore, the primary client loses its ability to
     2903 +                 * poll the softrings on addition of a second client.
     2904 +                 * To avoid a performance penalty, MAC will move the
     2905 +                 * primary client to a dedicated group when it can.
     2906 +                 *
     2907 +                 * When using static grouping, the primary client
     2908 +                 * begins life on a non-default group. There is
     2909 +                 * no moving needed upon addition of a second client.
2891 2910                   */
2892 2911                  if (!isprimary && mip->mi_nactiveclients == 2 &&
2893 2912                      (group_only_mcip = mac_primary_client_handle(mip)) !=
2894 2913                      NULL && mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) {
2895 2914                          reloc_pmcip = mac_check_primary_relocation(
2896 2915                              group_only_mcip, rxhw);
2897 2916                  }
     2917 +
2898 2918                  /*
2899 2919                   * Check to see if we can get an exclusive group for
2900 2920                   * this mac address or if there already exists a
2901 2921                   * group that has this mac address (case of VLANs).
2902 2922                   * If no groups are available, use the default group.
2903 2923                   */
2904 2924                  rgroup = mac_reserve_rx_group(mcip, mac_addr, B_FALSE);
2905 2925                  if (rgroup == NULL && rxhw) {
2906 2926                          err = ENOSPC;
2907 2927                          goto setup_failed;
2908 2928                  } else if (rgroup == NULL) {
2909 2929                          rgroup = default_rgroup;
2910 2930                  }
     2931 +
2911 2932                  /*
     2933 +                 * If we are adding a second client to a
     2934 +                 * non-default group then we need to move the
     2935 +                 * existing client to the default group and
     2936 +                 * add the new client to the default group as
     2937 +                 * well.
     2938 +                 */
     2939 +                if (rgroup != default_rgroup &&
     2940 +                    rgroup->mrg_state == MAC_GROUP_STATE_RESERVED) {
     2941 +                        group_only_mcip = MAC_GROUP_ONLY_CLIENT(rgroup);
     2942 +                        err = mac_rx_switch_group(group_only_mcip, rgroup,
     2943 +                            default_rgroup);
     2944 +
     2945 +                        if (err != 0)
     2946 +                                goto setup_failed;
     2947 +
     2948 +                        rgroup = default_rgroup;
     2949 +                }
     2950 +
     2951 +                /*
2912 2952                   * Check to see if we can get an exclusive group for
2913 2953                   * this mac client. If no groups are available, use
2914 2954                   * the default group.
2915 2955                   */
2916 2956                  tgroup = mac_reserve_tx_group(mcip, B_FALSE);
2917 2957                  if (tgroup == NULL && txhw) {
2918 2958                          if (rgroup != NULL && rgroup != default_rgroup)
2919 2959                                  mac_release_rx_group(mcip, rgroup);
2920 2960                          err = ENOSPC;
2921 2961                          goto setup_failed;
↓ open down ↓ 10 lines elided ↑ open up ↑
2932 2972                          if (rgroup != default_rgroup &&
2933 2973                              MAC_GROUP_NO_CLIENT(rgroup) &&
2934 2974                              (rxhw || mcip->mci_share != 0)) {
2935 2975                                  MAC_RX_GRP_RESERVED(mip);
2936 2976                                  if (mip->mi_rx_group_type ==
2937 2977                                      MAC_GROUP_TYPE_DYNAMIC) {
2938 2978                                          MAC_RX_RING_RESERVED(mip,
2939 2979                                              rgroup->mrg_cur_count);
2940 2980                                  }
2941 2981                          }
     2982 +
2942 2983                          flent->fe_rx_ring_group = rgroup;
2943 2984                          /*
2944      -                         * Add the client to the group. This could cause
2945      -                         * either this group to move to the shared state or
2946      -                         * cause the default group to move to the shared state.
2947      -                         * The actions on this group are done here, while the
2948      -                         * actions on the default group are postponed to
2949      -                         * the end of this function.
     2985 +                         * Add the client to the group and update the
     2986 +                         * group's state. If rgroup != default_group
     2987 +                         * then the rgroup should only ever have one
     2988 +                         * client and be in the RESERVED state. But no
     2989 +                         * matter what, the default_rgroup will enter
     2990 +                         * the SHARED state since it has to receive
     2991 +                         * all broadcast and multicast traffic. This
     2992 +                         * case is handled later in the function.
2950 2993                           */
2951 2994                          mac_group_add_client(rgroup, mcip);
2952 2995                          next_state = mac_group_next_state(rgroup,
2953 2996                              &group_only_mcip, default_rgroup, B_TRUE);
2954 2997                          mac_set_group_state(rgroup, next_state);
2955 2998                  }
2956 2999  
2957 3000                  if (tgroup != NULL) {
2958 3001                          if (tgroup != default_tgroup &&
2959 3002                              MAC_GROUP_NO_CLIENT(tgroup) &&
↓ open down ↓ 4 lines elided ↑ open up ↑
2964 3007                                          MAC_TX_RING_RESERVED(mip,
2965 3008                                              tgroup->mrg_cur_count);
2966 3009                                  }
2967 3010                          }
2968 3011                          flent->fe_tx_ring_group = tgroup;
2969 3012                          mac_group_add_client(tgroup, mcip);
2970 3013                          next_state = mac_group_next_state(tgroup,
2971 3014                              &group_only_mcip, default_tgroup, B_FALSE);
2972 3015                          tgroup->mrg_state = next_state;
2973 3016                  }
2974      -                /*
2975      -                 * Setup the Rx and Tx SRSes. If we got a pristine group
2976      -                 * exclusively above, mac_srs_group_setup would simply create
2977      -                 * the required SRSes. If we ended up sharing a previously
2978      -                 * reserved group, mac_srs_group_setup would also dismantle the
2979      -                 * SRSes of the previously exclusive group
2980      -                 */
2981      -                mac_srs_group_setup(mcip, flent, link_type);
2982 3017  
2983 3018                  /* We are setting up minimal datapath only */
2984      -                if (no_unicast)
     3019 +                if (no_unicast) {
     3020 +                        mac_srs_group_setup(mcip, flent, link_type);
2985 3021                          break;
2986      -                /* Program the S/W Classifer */
     3022 +                }
     3023 +
     3024 +                /* Program software classification. */
2987 3025                  if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0)
2988 3026                          goto setup_failed;
2989 3027  
2990      -                /* Program the H/W Classifier */
2991      -                if ((err = mac_add_macaddr(mip, rgroup, mac_addr,
2992      -                    (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0)) != 0)
     3028 +                /* Program hardware classification. */
     3029 +                vid = i_mac_flow_vid(flent);
     3030 +                use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0;
     3031 +                err = mac_add_macaddr_vlan(mip, rgroup, mac_addr, vid, use_hw);
     3032 +
     3033 +                if (err != 0)
2993 3034                          goto setup_failed;
     3035 +
2994 3036                  mcip->mci_unicast = mac_find_macaddr(mip, mac_addr);
2995      -                ASSERT(mcip->mci_unicast != NULL);
     3037 +                VERIFY3P(mcip->mci_unicast, !=, NULL);
     3038 +
     3039 +                /*
     3040 +                 * Setup the Rx and Tx SRSes. If the client has a
     3041 +                 * reserved group, then mac_srs_group_setup() creates
     3042 +                 * the required SRSes for the HW rings. If we have a
     3043 +                 * shared group, mac_srs_group_setup() dismantles the
     3044 +                 * HW SRSes of the previously exclusive group.
     3045 +                 */
     3046 +                mac_srs_group_setup(mcip, flent, link_type);
     3047 +
2996 3048                  /* (Re)init the v6 token & local addr used by link protection */
2997 3049                  mac_protect_update_mac_token(mcip);
2998 3050                  break;
2999 3051  
3000 3052          default:
3001 3053                  ASSERT(B_FALSE);
3002 3054                  break;
3003 3055          }
3004 3056  
3005 3057          /*
↓ open down ↓ 23 lines elided ↑ open up ↑
3029 3081                                      MCIP_RESOURCE_PROPS(group_only_mcip),
3030 3082                                      mac_rx_deliver, group_only_mcip, NULL,
3031 3083                                      cpupart);
3032 3084                                  mac_set_pool_effective(use_default, cpupart,
3033 3085                                      mrp, emrp);
3034 3086                                  pool_unlock();
3035 3087                          }
3036 3088                          ASSERT(default_rgroup->mrg_state ==
3037 3089                              MAC_GROUP_STATE_SHARED);
3038 3090                  }
     3091 +
3039 3092                  /*
3040      -                 * If we get an exclusive group for a VLAN MAC client we
3041      -                 * need to take the s/w path to make the additional check for
3042      -                 * the vid. Disable polling and set it to s/w classification.
3043      -                 * Similarly for clients that don't have a unicast address.
     3093 +                 * A VLAN MAC client on a reserved group still
     3094 +                 * requires SW classification if the MAC doesn't
     3095 +                 * provide VLAN HW filtering.
     3096 +                 *
     3097 +                 * Clients with no unicast address also require SW
     3098 +                 * classification.
3044 3099                   */
3045 3100                  if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED &&
3046      -                    (i_mac_flow_vid(flent) != VLAN_ID_NONE || no_unicast)) {
     3101 +                    ((!MAC_GROUP_HW_VLAN(rgroup) && vid != VLAN_ID_NONE) ||
     3102 +                    no_unicast)) {
3047 3103                          mac_rx_switch_grp_to_sw(rgroup);
3048 3104                  }
     3105 +
3049 3106          }
     3107 +
3050 3108          mac_set_rings_effective(mcip);
3051 3109          return (0);
3052 3110  
3053 3111  setup_failed:
3054 3112          /* Switch the primary back to default group */
3055 3113          if (reloc_pmcip != NULL) {
3056 3114                  (void) mac_rx_switch_group(reloc_pmcip,
3057 3115                      reloc_pmcip->mci_flent->fe_rx_ring_group, default_rgroup);
3058 3116          }
3059 3117          mac_datapath_teardown(mcip, flent, link_type);
↓ open down ↓ 5 lines elided ↑ open up ↑
3065 3123      uint32_t link_type)
3066 3124  {
3067 3125          mac_impl_t              *mip = mcip->mci_mip;
3068 3126          mac_group_t             *group = NULL;
3069 3127          mac_client_impl_t       *grp_only_mcip;
3070 3128          flow_entry_t            *group_only_flent;
3071 3129          mac_group_t             *default_group;
3072 3130          boolean_t               check_default_group = B_FALSE;
3073 3131          mac_group_state_t       next_state;
3074 3132          mac_resource_props_t    *mrp = MCIP_RESOURCE_PROPS(mcip);
     3133 +        uint16_t                vid;
3075 3134  
3076 3135          ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
3077 3136  
3078 3137          switch (link_type) {
3079 3138          case SRST_FLOW:
3080 3139                  mac_rx_srs_group_teardown(flent, B_FALSE);
3081 3140                  mac_tx_srs_group_teardown(mcip, flent, SRST_FLOW);
3082 3141                  return;
3083 3142  
3084 3143          case SRST_LINK:
3085 3144                  /* Stop sending packets */
3086 3145                  mac_tx_client_block(mcip);
     3146 +                group = flent->fe_rx_ring_group;
     3147 +                vid = i_mac_flow_vid(flent);
3087 3148  
3088      -                /* Stop the packets coming from the H/W */
     3149 +                /*
     3150 +                 * Stop the packet flow from the hardware by disabling
     3151 +                 * any hardware filters assigned to this client.
     3152 +                 */
3089 3153                  if (mcip->mci_unicast != NULL) {
3090 3154                          int err;
3091      -                        err = mac_remove_macaddr(mcip->mci_unicast);
     3155 +
     3156 +                        err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid);
     3157 +
3092 3158                          if (err != 0) {
3093      -                                cmn_err(CE_WARN, "%s: failed to remove a MAC"
3094      -                                    " address because of error 0x%x",
     3159 +                                cmn_err(CE_WARN, "%s: failed to remove a MAC HW"
     3160 +                                    " filters because of error 0x%x",
3095 3161                                      mip->mi_name, err);
3096 3162                          }
     3163 +
3097 3164                          mcip->mci_unicast = NULL;
3098 3165                  }
3099 3166  
3100 3167                  /* Stop the packets coming from the S/W classifier */
3101 3168                  mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
3102 3169                  mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
3103 3170  
3104 3171                  /* Now quiesce and destroy all SRS and soft rings */
3105 3172                  mac_rx_srs_group_teardown(flent, B_FALSE);
3106 3173                  mac_tx_srs_group_teardown(mcip, flent, SRST_LINK);
3107 3174  
3108 3175                  ASSERT((mcip->mci_flent == flent) &&
3109 3176                      (flent->fe_next == NULL));
3110 3177  
3111 3178                  /*
3112 3179                   * Release our hold on the group as well. We need
3113 3180                   * to check if the shared group has only one client
3114 3181                   * left who can use it exclusively. Also, if we
3115 3182                   * were the last client, release the group.
3116 3183                   */
3117      -                group = flent->fe_rx_ring_group;
3118 3184                  default_group = MAC_DEFAULT_RX_GROUP(mip);
3119 3185                  if (group != NULL) {
3120 3186                          mac_group_remove_client(group, mcip);
3121 3187                          next_state = mac_group_next_state(group,
3122 3188                              &grp_only_mcip, default_group, B_TRUE);
     3189 +
3123 3190                          if (next_state == MAC_GROUP_STATE_RESERVED) {
3124 3191                                  /*
3125 3192                                   * Only one client left on this RX group.
3126 3193                                   */
3127      -                                ASSERT(grp_only_mcip != NULL);
     3194 +                                VERIFY3P(grp_only_mcip, !=, NULL);
3128 3195                                  mac_set_group_state(group,
3129 3196                                      MAC_GROUP_STATE_RESERVED);
3130 3197                                  group_only_flent = grp_only_mcip->mci_flent;
3131 3198  
3132 3199                                  /*
3133 3200                                   * The only remaining client has exclusive
3134 3201                                   * access on the group. Allow it to
3135 3202                                   * dynamically poll the H/W rings etc.
3136 3203                                   */
3137 3204                                  mac_rx_srs_group_setup(grp_only_mcip,
↓ open down ↓ 4 lines elided ↑ open up ↑
3142 3209                                      mac_rx_deliver, grp_only_mcip, NULL, NULL);
3143 3210                                  mac_rx_group_unmark(group, MR_INCIPIENT);
3144 3211                                  mac_set_rings_effective(grp_only_mcip);
3145 3212                          } else if (next_state == MAC_GROUP_STATE_REGISTERED) {
3146 3213                                  /*
3147 3214                                   * This is a non-default group being freed up.
3148 3215                                   * We need to reevaluate the default group
3149 3216                                   * to see if the primary client can get
3150 3217                                   * exclusive access to the default group.
3151 3218                                   */
3152      -                                ASSERT(group != MAC_DEFAULT_RX_GROUP(mip));
     3219 +                                VERIFY3P(group, !=, MAC_DEFAULT_RX_GROUP(mip));
3153 3220                                  if (mrp->mrp_mask & MRP_RX_RINGS) {
3154 3221                                          MAC_RX_GRP_RELEASED(mip);
3155 3222                                          if (mip->mi_rx_group_type ==
3156 3223                                              MAC_GROUP_TYPE_DYNAMIC) {
3157 3224                                                  MAC_RX_RING_RELEASED(mip,
3158 3225                                                      group->mrg_cur_count);
3159 3226                                          }
3160 3227                                  }
3161 3228                                  mac_release_rx_group(mcip, group);
3162 3229                                  mac_set_group_state(group,
3163 3230                                      MAC_GROUP_STATE_REGISTERED);
3164 3231                                  check_default_group = B_TRUE;
3165 3232                          } else {
3166      -                                ASSERT(next_state == MAC_GROUP_STATE_SHARED);
     3233 +                                VERIFY3S(next_state, ==,
     3234 +                                    MAC_GROUP_STATE_SHARED);
3167 3235                                  mac_set_group_state(group,
3168 3236                                      MAC_GROUP_STATE_SHARED);
3169 3237                                  mac_rx_group_unmark(group, MR_CONDEMNED);
3170 3238                          }
3171 3239                          flent->fe_rx_ring_group = NULL;
3172 3240                  }
3173 3241                  /*
3174 3242                   * Remove the client from the TX group. Additionally, if
3175 3243                   * this a non-default group, then we also need to release
3176 3244                   * the group.
↓ open down ↓ 68 lines elided ↑ open up ↑
3245 3313          }
3246 3314  
3247 3315          /*
3248 3316           * The mac client using the default group gets exclusive access to the
3249 3317           * default group if and only if it is the sole client on the entire
3250 3318           * mip. If so set the group state to reserved, and set up the SRSes
3251 3319           * over the default group.
3252 3320           */
3253 3321          if (check_default_group) {
3254 3322                  default_group = MAC_DEFAULT_RX_GROUP(mip);
3255      -                ASSERT(default_group->mrg_state == MAC_GROUP_STATE_SHARED);
     3323 +                VERIFY3S(default_group->mrg_state, ==, MAC_GROUP_STATE_SHARED);
3256 3324                  next_state = mac_group_next_state(default_group,
3257 3325                      &grp_only_mcip, default_group, B_TRUE);
3258 3326                  if (next_state == MAC_GROUP_STATE_RESERVED) {
3259      -                        ASSERT(grp_only_mcip != NULL &&
3260      -                            mip->mi_nactiveclients == 1);
     3327 +                        VERIFY3P(grp_only_mcip, !=, NULL);
     3328 +                        VERIFY3U(mip->mi_nactiveclients, ==, 1);
3261 3329                          mac_set_group_state(default_group,
3262 3330                              MAC_GROUP_STATE_RESERVED);
3263 3331                          mac_rx_srs_group_setup(grp_only_mcip,
3264 3332                              grp_only_mcip->mci_flent, SRST_LINK);
3265 3333                          mac_fanout_setup(grp_only_mcip,
3266 3334                              grp_only_mcip->mci_flent,
3267 3335                              MCIP_RESOURCE_PROPS(grp_only_mcip), mac_rx_deliver,
3268 3336                              grp_only_mcip, NULL, NULL);
3269 3337                          mac_rx_group_unmark(default_group, MR_INCIPIENT);
3270 3338                          mac_set_rings_effective(grp_only_mcip);
↓ open down ↓ 503 lines elided ↑ open up ↑
3774 3842                          break;
3775 3843          }
3776 3844          mutex_exit(&mac_srs->srs_lock);
3777 3845          ASSERT(i < mac_srs->srs_tx_ring_count);
3778 3846          remove_sring = soft_ring;
3779 3847          /*
3780 3848           * In the case of aggr, the soft ring associated with a Tx ring
3781 3849           * is also stored in st_soft_rings[] array. That entry should
3782 3850           * be removed.
3783 3851           */
3784      -        if (mcip->mci_state_flags & MCIS_IS_AGGR) {
     3852 +        if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
3785 3853                  mac_srs_tx_t *tx = &mac_srs->srs_tx;
3786 3854  
3787 3855                  ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring);
3788 3856                  tx->st_soft_rings[tx_ring->mr_index] = NULL;
3789 3857          }
3790 3858          mac_soft_ring_remove(mac_srs, remove_sring);
3791 3859          mac_srs_update_fanout_list(mac_srs);
3792 3860  }
3793 3861  
3794 3862  /*
↓ open down ↓ 8 lines elided ↑ open up ↑
3803 3871          mac_soft_ring_set_t     *tx_srs = flent->fe_tx_srs;
3804 3872          int                     i;
3805 3873          int                     tx_ring_count = 0;
3806 3874          uint32_t                soft_ring_type;
3807 3875          mac_group_t             *grp = NULL;
3808 3876          mac_ring_t              *ring;
3809 3877          mac_srs_tx_t            *tx = &tx_srs->srs_tx;
3810 3878          boolean_t               is_aggr;
3811 3879          uint_t                  ring_info = 0;
3812 3880  
3813      -        is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR) != 0;
     3881 +        is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) != 0;
3814 3882          grp = flent->fe_tx_ring_group;
3815 3883          if (grp == NULL) {
3816 3884                  ring = (mac_ring_t *)mip->mi_default_tx_ring;
3817 3885                  goto no_group;
3818 3886          }
3819 3887          tx_ring_count = grp->mrg_cur_count;
3820 3888          ring = grp->mrg_rings;
3821 3889          /*
3822 3890           * An attempt is made to reserve 'tx_ring_count' number
3823 3891           * of Tx rings. If tx_ring_count is 0, default Tx ring
↓ open down ↓ 193 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX