3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2017, Joyent, Inc.
24 */
25
26 #include <sys/types.h>
27 #include <sys/callb.h>
28 #include <sys/cpupart.h>
29 #include <sys/pool.h>
30 #include <sys/pool_pset.h>
31 #include <sys/sdt.h>
32 #include <sys/strsubr.h>
33 #include <sys/strsun.h>
34 #include <sys/vlan.h>
35 #include <inet/ipsec_impl.h>
36 #include <inet/ip_impl.h>
37 #include <inet/sadb.h>
38 #include <inet/ipsecesp.h>
39 #include <inet/ipsecah.h>
40
41 #include <sys/mac_impl.h>
42 #include <sys/mac_client_impl.h>
43 #include <sys/mac_client_priv.h>
1169 *
1170 * The underlying device can expose upto MAX_RINGS_PER_GROUP worth of
1171 * rings to a client. In such a case, MAX_RINGS_PER_GROUP worth of
1172 * array space is needed to store Tx soft rings. Thus we allocate so
1173 * much array space for srs_tx_soft_rings.
1174 *
1175 * And when it is an aggr, again we allocate MAX_RINGS_PER_GROUP worth
1176 * of space to st_soft_rings. This array is used for quick access to
1177 * soft ring associated with a pseudo Tx ring based on the pseudo
1178 * ring's index (mr_index).
1179 */
1180 static void
1181 mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs)
1182 {
1183 mac_client_impl_t *mcip = mac_srs->srs_mcip;
1184
1185 if (mac_srs->srs_type & SRST_TX) {
1186 mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **)
1187 kmem_zalloc(sizeof (mac_soft_ring_t *) *
1188 MAX_RINGS_PER_GROUP, KM_SLEEP);
1189 if (mcip->mci_state_flags & MCIS_IS_AGGR) {
1190 mac_srs_tx_t *tx = &mac_srs->srs_tx;
1191
1192 tx->st_soft_rings = (mac_soft_ring_t **)
1193 kmem_zalloc(sizeof (mac_soft_ring_t *) *
1194 MAX_RINGS_PER_GROUP, KM_SLEEP);
1195 }
1196 } else {
1197 mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **)
1198 kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1199 KM_SLEEP);
1200 mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **)
1201 kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1202 KM_SLEEP);
1203 mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **)
1204 kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1205 KM_SLEEP);
1206 }
1207 }
1208
1209 static void
1578 mutex_exit(&srs->srs_lock);
1579
1580 mac_tx_client_restart((mac_client_handle_t)mcip);
1581 }
1582
1583 /*
1584 * The uber function that deals with any update to bandwidth limits.
1585 */
1586 void
1587 mac_srs_update_bwlimit(flow_entry_t *flent, mac_resource_props_t *mrp)
1588 {
1589 int count;
1590
1591 for (count = 0; count < flent->fe_rx_srs_cnt; count++)
1592 mac_rx_srs_update_bwlimit(flent->fe_rx_srs[count], mrp);
1593 mac_tx_srs_update_bwlimit(flent->fe_tx_srs, mrp);
1594 }
1595
1596 /*
1597 * When the first sub-flow is added to a link, we disable polling on the
1598 * link and also modify the entry point to mac_rx_srs_subflow_process.
1599 * (polling is disabled because with the subflow added, accounting
1600 * for polling needs additional logic, it is assumed that when a subflow is
1601 * added, we can take some hit as a result of disabling polling rather than
1602 * adding more complexity - if this becomes a perf. issue we need to
1603 * re-rvaluate this logic). When the last subflow is removed, we turn back
1604 * polling and also reset the entry point to mac_rx_srs_process.
1605 *
1606 * In the future if there are multiple SRS, we can simply
1607 * take one and give it to the flow rather than disabling polling and
1608 * resetting the entry point.
1609 */
1610 void
1611 mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable)
1612 {
1613 flow_entry_t *flent = mcip->mci_flent;
1614 int i;
1615 mac_impl_t *mip = mcip->mci_mip;
1616 mac_rx_func_t rx_func;
1617 uint_t rx_srs_cnt;
1618 boolean_t enable_classifier;
1619
1620 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1621
1622 enable_classifier = !FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && enable;
1623
1624 rx_func = enable_classifier ? mac_rx_srs_subflow_process :
1629 enable_classifier = B_TRUE;
1630
1631 /*
1632 * If receive function has already been configured correctly for
1633 * current subflow configuration, do nothing.
1634 */
1635 if (flent->fe_cb_fn == (flow_fn_t)rx_func)
1636 return;
1637
1638 rx_srs_cnt = flent->fe_rx_srs_cnt;
1639 for (i = 0; i < rx_srs_cnt; i++) {
1640 ASSERT(flent->fe_rx_srs[i] != NULL);
1641 mac_srs_poll_state_change(flent->fe_rx_srs[i],
1642 enable_classifier, rx_func);
1643 }
1644
1645 /*
1646 * Change the S/W classifier so that we can land in the
1647 * correct processing function with correct argument.
1648 * If all subflows have been removed we can revert to
1649 * mac_rx_srsprocess, else we need mac_rx_srs_subflow_process.
1650 */
1651 mutex_enter(&flent->fe_lock);
1652 flent->fe_cb_fn = (flow_fn_t)rx_func;
1653 flent->fe_cb_arg1 = (void *)mip;
1654 flent->fe_cb_arg2 = flent->fe_rx_srs[0];
1655 mutex_exit(&flent->fe_lock);
1656 }
1657
1658 static void
1659 mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs)
1660 {
1661 int tcp_count = 0, udp_count = 0, oth_count = 0, tx_count = 0;
1662 mac_soft_ring_t *softring;
1663
1664 softring = mac_srs->srs_soft_ring_head;
1665 if (softring == NULL) {
1666 ASSERT(mac_srs->srs_soft_ring_count == 0);
1667 mac_srs->srs_tcp_ring_count = 0;
1668 mac_srs->srs_udp_ring_count = 0;
1669 mac_srs->srs_oth_ring_count = 0;
2168 mutex_exit(&mac_bw->mac_bw_lock);
2169 mac_srs->srs_type |= SRST_BW_CONTROL;
2170 } else {
2171 mac_srs->srs_drain_func = mac_rx_srs_drain;
2172 }
2173
2174 /*
2175 * We use the following policy to control Receive
2176 * Side Dynamic Polling:
2177 * 1) We switch to poll mode anytime the processing thread causes
2178 * a backlog to build up in SRS and its associated Soft Rings
2179 * (sr_poll_pkt_cnt > 0).
2180 * 2) As long as the backlog stays under the low water mark
2181 * (sr_lowat), we poll the H/W for more packets.
2182 * 3) If the backlog (sr_poll_pkt_cnt) exceeds low water mark, we
2183 * stay in poll mode but don't poll the H/W for more packets.
2184 * 4) Anytime in polling mode, if we poll the H/W for packets and
2185 * find nothing plus we have an existing backlog
2186 * (sr_poll_pkt_cnt > 0), we stay in polling mode but don't poll
2187 * the H/W for packets anymore (let the polling thread go to sleep).
2188 * 5) Once the backlog is relived (packets are processed) we reenable
2189 * polling (by signalling the poll thread) only when the backlog
2190 * dips below sr_poll_thres.
2191 * 6) sr_hiwat is used exclusively when we are not polling capable
2192 * and is used to decide when to drop packets so the SRS queue
2193 * length doesn't grow infinitely.
2194 */
2195 if (!is_tx_srs) {
2196 srs_rx->sr_hiwat = mac_soft_ring_max_q_cnt;
2197 /* Low water mark needs to be less than high water mark */
2198 srs_rx->sr_lowat = mac_soft_ring_min_q_cnt <=
2199 mac_soft_ring_max_q_cnt ? mac_soft_ring_min_q_cnt :
2200 (mac_soft_ring_max_q_cnt >> 2);
2201 /* Poll threshold need to be half of low water mark or less */
2202 srs_rx->sr_poll_thres = mac_soft_ring_poll_thres <=
2203 (srs_rx->sr_lowat >> 1) ? mac_soft_ring_poll_thres :
2204 (srs_rx->sr_lowat >> 1);
2205 if (mac_latency_optimize)
2206 mac_srs->srs_state |= SRS_LATENCY_OPT;
2207 else
2208 mac_srs->srs_state |= SRS_SOFTRING_QUEUE;
2239 /* Is the mac_srs created over the RX default group? */
2240 if (ring->mr_gh == (mac_group_handle_t)
2241 MAC_DEFAULT_RX_GROUP(mcip->mci_mip)) {
2242 mac_srs->srs_type |= SRST_DEFAULT_GRP;
2243 }
2244 mac_srs->srs_ring = ring;
2245 ring->mr_srs = mac_srs;
2246 ring->mr_classify_type = MAC_HW_CLASSIFIER;
2247 ring->mr_flag |= MR_INCIPIENT;
2248
2249 if (!(mcip->mci_mip->mi_state_flags & MIS_POLL_DISABLE) &&
2250 FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && mac_poll_enable)
2251 mac_srs->srs_state |= SRS_POLLING_CAPAB;
2252
2253 mac_srs->srs_poll_thr = thread_create(NULL, 0,
2254 mac_rx_srs_poll_ring, mac_srs, 0, &p0, TS_RUN,
2255 mac_srs->srs_pri);
2256 /*
2257 * Some drivers require serialization and don't send
2258 * packet chains in interrupt context. For such
2259 * drivers, we should always queue in soft ring
2260 * so that we get a chance to switch into a polling
2261 * mode under backlog.
2262 */
2263 ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring);
2264 if (ring_info & MAC_RING_RX_ENQUEUE)
2265 mac_srs->srs_state |= SRS_SOFTRING_QUEUE;
2266 }
2267 done:
2268 mac_srs_stat_create(mac_srs);
2269 return (mac_srs);
2270 }
2271
2272 /*
2273 * Figure out the number of soft rings required. Its dependant on
2274 * if protocol fanout is required (for LINKs), global settings
2275 * require us to do fanout for performance (based on mac_soft_ring_enable),
2276 * or user has specifically requested fanout.
2277 */
2278 static uint32_t
2279 mac_find_fanout(flow_entry_t *flent, uint32_t link_type)
2280 {
2347 mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2348 uint32_t link_type)
2349 {
2350 cpupart_t *cpupart;
2351 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
2352 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2353 boolean_t use_default = B_FALSE;
2354
2355 mac_rx_srs_group_setup(mcip, flent, link_type);
2356 mac_tx_srs_group_setup(mcip, flent, link_type);
2357
2358 pool_lock();
2359 cpupart = mac_pset_find(mrp, &use_default);
2360 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip),
2361 mac_rx_deliver, mcip, NULL, cpupart);
2362 mac_set_pool_effective(use_default, cpupart, mrp, emrp);
2363 pool_unlock();
2364 }
2365
2366 /*
2367 * Set up the RX SRSs. If the S/W SRS is not set, set it up, if there
2368 * is a group associated with this MAC client, set up SRSs for individual
2369 * h/w rings.
2370 */
2371 void
2372 mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2373 uint32_t link_type)
2374 {
2375 mac_impl_t *mip = mcip->mci_mip;
2376 mac_soft_ring_set_t *mac_srs;
2377 mac_ring_t *ring;
2378 uint32_t fanout_type;
2379 mac_group_t *rx_group = flent->fe_rx_ring_group;
2380
2381 fanout_type = mac_find_fanout(flent, link_type);
2382
2383 /* Create the SRS for S/W classification if none exists */
2384 if (flent->fe_rx_srs[0] == NULL) {
2385 ASSERT(flent->fe_rx_srs_cnt == 0);
2386 /* Setup the Rx SRS */
2387 mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type,
2388 mac_rx_deliver, mcip, NULL, NULL);
2389 mutex_enter(&flent->fe_lock);
2390 flent->fe_cb_fn = (flow_fn_t)mac_srs->srs_rx.sr_lower_proc;
2391 flent->fe_cb_arg1 = (void *)mip;
2392 flent->fe_cb_arg2 = (void *)mac_srs;
2393 mutex_exit(&flent->fe_lock);
2394 }
2395
2396 if (rx_group == NULL)
2397 return;
2398 /*
2399 * fanout for default SRS is done when default SRS are created
2400 * above. As each ring is added to the group, we setup the
2401 * SRS and fanout to it.
2402 */
2403 switch (rx_group->mrg_state) {
2404 case MAC_GROUP_STATE_RESERVED:
2405 for (ring = rx_group->mrg_rings; ring != NULL;
2406 ring = ring->mr_next) {
2407 switch (ring->mr_state) {
2408 case MR_INUSE:
2409 case MR_FREE:
2410 if (ring->mr_srs != NULL)
2411 break;
2412 if (ring->mr_state != MR_INUSE)
2413 (void) mac_start_ring(ring);
2414
2415 /*
2416 * Since the group is exclusively ours create
2417 * an SRS for this ring to allow the
2418 * individual SRS to dynamically poll the
2419 * ring. Do this only if the client is not
2420 * a VLAN MAC client, since for VLAN we do
2421 * s/w classification for the VID check, and
2422 * if it has a unicast address.
2423 */
2424 if ((mcip->mci_state_flags &
2425 MCIS_NO_UNICAST_ADDR) ||
2426 i_mac_flow_vid(mcip->mci_flent) !=
2427 VLAN_ID_NONE) {
2428 break;
2429 }
2430 mac_srs = mac_srs_create(mcip, flent,
2431 fanout_type | link_type,
2432 mac_rx_deliver, mcip, NULL, ring);
2433 break;
2434 default:
2435 cmn_err(CE_PANIC,
2436 "srs_setup: mcip = %p "
2437 "trying to add UNKNOWN ring = %p\n",
2438 (void *)mcip, (void *)ring);
2439 break;
2440 }
2441 }
2442 break;
2443 case MAC_GROUP_STATE_SHARED:
2444 /*
2445 * Set all rings of this group to software classified.
2446 *
2447 * If the group is current RESERVED, the existing mac
2448 * client (the only client on this group) is using
2449 * this group exclusively. In that case we need to
2450 * disable polling on the rings of the group (if it
2451 * was enabled), and free the SRS associated with the
2452 * rings.
2453 */
2454 mac_rx_switch_grp_to_sw(rx_group);
2455 break;
2456 default:
2457 ASSERT(B_FALSE);
2458 break;
2459 }
2460 }
2461
2462 /*
2463 * Set up the TX SRS.
2464 */
2465 void
2466 mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2467 uint32_t link_type)
2468 {
2469 int cnt;
2470 int ringcnt;
2471 mac_ring_t *ring;
2472 mac_group_t *grp;
2485 return;
2486 grp = (mac_group_t *)flent->fe_tx_ring_group;
2487 ringcnt = grp->mrg_cur_count;
2488 ring = grp->mrg_rings;
2489 for (cnt = 0; cnt < ringcnt; cnt++) {
2490 if (ring->mr_state != MR_INUSE) {
2491 (void) mac_start_ring(ring);
2492 }
2493 ring = ring->mr_next;
2494 }
2495 return;
2496 }
2497 if (flent->fe_tx_srs == NULL) {
2498 (void) mac_srs_create(mcip, flent, SRST_TX | link_type,
2499 NULL, mcip, NULL, NULL);
2500 }
2501 mac_tx_srs_setup(mcip, flent);
2502 }
2503
2504 /*
2505 * Remove all the RX SRSs. If we want to remove only the SRSs associated
2506 * with h/w rings, leave the S/W SRS alone. This is used when we want to
2507 * move the MAC client from one group to another, so we need to teardown
2508 * on the h/w SRSs.
2509 */
2510 void
2511 mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly)
2512 {
2513 mac_soft_ring_set_t *mac_srs;
2514 int i;
2515 int count = flent->fe_rx_srs_cnt;
2516
2517 for (i = 0; i < count; i++) {
2518 if (i == 0 && hwonly)
2519 continue;
2520 mac_srs = flent->fe_rx_srs[i];
2521 mac_rx_srs_quiesce(mac_srs, SRS_CONDEMNED);
2522 mac_srs_free(mac_srs);
2523 flent->fe_rx_srs[i] = NULL;
2524 flent->fe_rx_srs_cnt--;
2525 }
2526 ASSERT(!hwonly || flent->fe_rx_srs_cnt == 1);
2527 ASSERT(hwonly || flent->fe_rx_srs_cnt == 0);
2528 }
2529
2530 /*
2531 * Remove the TX SRS.
2532 */
2533 void
2534 mac_tx_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
2535 uint32_t link_type)
2536 {
2537 mac_soft_ring_set_t *tx_srs;
2538 mac_srs_tx_t *tx;
2539
2540 if ((tx_srs = flent->fe_tx_srs) == NULL)
2541 return;
2542
2543 tx = &tx_srs->srs_tx;
2544 switch (link_type) {
2545 case SRST_FLOW:
2546 /*
2547 * For flows, we need to work with passed
2809 *
2810 * Note: In future, if no fanout is specified, we try to assign 2 Rx
2811 * rings for the primary Link with the primary MAC address + TCP going
2812 * to one ring and primary MAC address + UDP|SCTP going to other ring.
2813 * Any remaining traffic for primary MAC address can go to the default
2814 * Rx ring and get S/W classified. This way the respective SRSs don't
2815 * need to do proto fanout and don't need to have softrings at all and
2816 * can poll their respective Rx rings.
2817 *
2818 * As an optimization, when a new NIC or VNIC is created, we can get
2819 * only one Rx ring and make it a TCP specific Rx ring and use the
2820 * H/W default Rx ring for the rest (this Rx ring is never polled).
2821 *
2822 * For clients that don't have MAC address, but want to receive and
2823 * transmit packets (e.g, bpf, gvrp etc.), we need to setup the datapath.
2824 * For such clients (identified by the MCIS_NO_UNICAST_ADDR flag) we
2825 * always give the default group and use software classification (i.e.
2826 * even if this is the only client in the default group, we will
2827 * leave group as shared).
2828 */
2829 int
2830 mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2831 uint32_t link_type)
2832 {
2833 mac_impl_t *mip = mcip->mci_mip;
2834 mac_group_t *rgroup = NULL;
2835 mac_group_t *tgroup = NULL;
2836 mac_group_t *default_rgroup;
2837 mac_group_t *default_tgroup;
2838 int err;
2839 uint8_t *mac_addr;
2840 mac_group_state_t next_state;
2841 mac_client_impl_t *group_only_mcip;
2842 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
2843 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2844 boolean_t rxhw;
2845 boolean_t txhw;
2846 boolean_t use_default = B_FALSE;
2847 cpupart_t *cpupart;
2848 boolean_t no_unicast;
2849 boolean_t isprimary = flent->fe_type & FLOW_PRIMARY_MAC;
2850 mac_client_impl_t *reloc_pmcip = NULL;
2851
2852 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2853
2854 switch (link_type) {
2855 case SRST_FLOW:
2856 mac_srs_group_setup(mcip, flent, link_type);
2857 return (0);
2858
2859 case SRST_LINK:
2860 no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR;
2861 mac_addr = flent->fe_flow_desc.fd_dst_mac;
2862
2863 /* Default RX group */
2864 default_rgroup = MAC_DEFAULT_RX_GROUP(mip);
2865
2866 /* Default TX group */
2867 default_tgroup = MAC_DEFAULT_TX_GROUP(mip);
2868
2869 if (no_unicast) {
2870 rgroup = default_rgroup;
2871 tgroup = default_tgroup;
2872 goto grp_found;
2873 }
2874 rxhw = (mrp->mrp_mask & MRP_RX_RINGS) &&
2875 (mrp->mrp_nrxrings > 0 ||
2876 (mrp->mrp_mask & MRP_RXRINGS_UNSPEC));
2877 txhw = (mrp->mrp_mask & MRP_TX_RINGS) &&
2878 (mrp->mrp_ntxrings > 0 ||
2879 (mrp->mrp_mask & MRP_TXRINGS_UNSPEC));
2880
2881 /*
2882 * By default we have given the primary all the rings
2883 * i.e. the default group. Let's see if the primary
2884 * needs to be relocated so that the addition of this
2885 * client doesn't impact the primary's performance,
2886 * i.e. if the primary is in the default group and
2887 * we add this client, the primary will lose polling.
2888 * We do this only for NICs supporting dynamic ring
2889 * grouping and only when this is the first client
2890 * after the primary (i.e. nactiveclients is 2)
2891 */
2892 if (!isprimary && mip->mi_nactiveclients == 2 &&
2893 (group_only_mcip = mac_primary_client_handle(mip)) !=
2894 NULL && mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) {
2895 reloc_pmcip = mac_check_primary_relocation(
2896 group_only_mcip, rxhw);
2897 }
2898 /*
2899 * Check to see if we can get an exclusive group for
2900 * this mac address or if there already exists a
2901 * group that has this mac address (case of VLANs).
2902 * If no groups are available, use the default group.
2903 */
2904 rgroup = mac_reserve_rx_group(mcip, mac_addr, B_FALSE);
2905 if (rgroup == NULL && rxhw) {
2906 err = ENOSPC;
2907 goto setup_failed;
2908 } else if (rgroup == NULL) {
2909 rgroup = default_rgroup;
2910 }
2911 /*
2912 * Check to see if we can get an exclusive group for
2913 * this mac client. If no groups are available, use
2914 * the default group.
2915 */
2916 tgroup = mac_reserve_tx_group(mcip, B_FALSE);
2917 if (tgroup == NULL && txhw) {
2918 if (rgroup != NULL && rgroup != default_rgroup)
2919 mac_release_rx_group(mcip, rgroup);
2920 err = ENOSPC;
2921 goto setup_failed;
2922 } else if (tgroup == NULL) {
2923 tgroup = default_tgroup;
2924 }
2925
2926 /*
2927 * Some NICs don't support any Rx rings, so there may not
2928 * even be a default group.
2929 */
2930 grp_found:
2931 if (rgroup != NULL) {
2932 if (rgroup != default_rgroup &&
2933 MAC_GROUP_NO_CLIENT(rgroup) &&
2934 (rxhw || mcip->mci_share != 0)) {
2935 MAC_RX_GRP_RESERVED(mip);
2936 if (mip->mi_rx_group_type ==
2937 MAC_GROUP_TYPE_DYNAMIC) {
2938 MAC_RX_RING_RESERVED(mip,
2939 rgroup->mrg_cur_count);
2940 }
2941 }
2942 flent->fe_rx_ring_group = rgroup;
2943 /*
2944 * Add the client to the group. This could cause
2945 * either this group to move to the shared state or
2946 * cause the default group to move to the shared state.
2947 * The actions on this group are done here, while the
2948 * actions on the default group are postponed to
2949 * the end of this function.
2950 */
2951 mac_group_add_client(rgroup, mcip);
2952 next_state = mac_group_next_state(rgroup,
2953 &group_only_mcip, default_rgroup, B_TRUE);
2954 mac_set_group_state(rgroup, next_state);
2955 }
2956
2957 if (tgroup != NULL) {
2958 if (tgroup != default_tgroup &&
2959 MAC_GROUP_NO_CLIENT(tgroup) &&
2960 (txhw || mcip->mci_share != 0)) {
2961 MAC_TX_GRP_RESERVED(mip);
2962 if (mip->mi_tx_group_type ==
2963 MAC_GROUP_TYPE_DYNAMIC) {
2964 MAC_TX_RING_RESERVED(mip,
2965 tgroup->mrg_cur_count);
2966 }
2967 }
2968 flent->fe_tx_ring_group = tgroup;
2969 mac_group_add_client(tgroup, mcip);
2970 next_state = mac_group_next_state(tgroup,
2971 &group_only_mcip, default_tgroup, B_FALSE);
2972 tgroup->mrg_state = next_state;
2973 }
2974 /*
2975 * Setup the Rx and Tx SRSes. If we got a pristine group
2976 * exclusively above, mac_srs_group_setup would simply create
2977 * the required SRSes. If we ended up sharing a previously
2978 * reserved group, mac_srs_group_setup would also dismantle the
2979 * SRSes of the previously exclusive group
2980 */
2981 mac_srs_group_setup(mcip, flent, link_type);
2982
2983 /* We are setting up minimal datapath only */
2984 if (no_unicast)
2985 break;
2986 /* Program the S/W Classifer */
2987 if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0)
2988 goto setup_failed;
2989
2990 /* Program the H/W Classifier */
2991 if ((err = mac_add_macaddr(mip, rgroup, mac_addr,
2992 (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0)) != 0)
2993 goto setup_failed;
2994 mcip->mci_unicast = mac_find_macaddr(mip, mac_addr);
2995 ASSERT(mcip->mci_unicast != NULL);
2996 /* (Re)init the v6 token & local addr used by link protection */
2997 mac_protect_update_mac_token(mcip);
2998 break;
2999
3000 default:
3001 ASSERT(B_FALSE);
3002 break;
3003 }
3004
3005 /*
3006 * All broadcast and multicast traffic is received only on the default
3007 * group. If we have setup the datapath for a non-default group above
3008 * then move the default group to shared state to allow distribution of
3009 * incoming broadcast traffic to the other groups and dismantle the
3010 * SRSes over the default group.
3011 */
3012 if (rgroup != NULL) {
3013 if (rgroup != default_rgroup) {
3014 if (default_rgroup->mrg_state ==
3015 MAC_GROUP_STATE_RESERVED) {
3019 mip->mi_nactiveclients > 1);
3020
3021 mac_set_group_state(default_rgroup,
3022 MAC_GROUP_STATE_SHARED);
3023 mac_rx_srs_group_setup(group_only_mcip,
3024 group_only_mcip->mci_flent, SRST_LINK);
3025 pool_lock();
3026 cpupart = mac_pset_find(mrp, &use_default);
3027 mac_fanout_setup(group_only_mcip,
3028 group_only_mcip->mci_flent,
3029 MCIP_RESOURCE_PROPS(group_only_mcip),
3030 mac_rx_deliver, group_only_mcip, NULL,
3031 cpupart);
3032 mac_set_pool_effective(use_default, cpupart,
3033 mrp, emrp);
3034 pool_unlock();
3035 }
3036 ASSERT(default_rgroup->mrg_state ==
3037 MAC_GROUP_STATE_SHARED);
3038 }
3039 /*
3040 * If we get an exclusive group for a VLAN MAC client we
3041 * need to take the s/w path to make the additional check for
3042 * the vid. Disable polling and set it to s/w classification.
3043 * Similarly for clients that don't have a unicast address.
3044 */
3045 if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED &&
3046 (i_mac_flow_vid(flent) != VLAN_ID_NONE || no_unicast)) {
3047 mac_rx_switch_grp_to_sw(rgroup);
3048 }
3049 }
3050 mac_set_rings_effective(mcip);
3051 return (0);
3052
3053 setup_failed:
3054 /* Switch the primary back to default group */
3055 if (reloc_pmcip != NULL) {
3056 (void) mac_rx_switch_group(reloc_pmcip,
3057 reloc_pmcip->mci_flent->fe_rx_ring_group, default_rgroup);
3058 }
3059 mac_datapath_teardown(mcip, flent, link_type);
3060 return (err);
3061 }
3062
3063 void
3064 mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
3065 uint32_t link_type)
3066 {
3067 mac_impl_t *mip = mcip->mci_mip;
3068 mac_group_t *group = NULL;
3069 mac_client_impl_t *grp_only_mcip;
3070 flow_entry_t *group_only_flent;
3071 mac_group_t *default_group;
3072 boolean_t check_default_group = B_FALSE;
3073 mac_group_state_t next_state;
3074 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
3075
3076 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
3077
3078 switch (link_type) {
3079 case SRST_FLOW:
3080 mac_rx_srs_group_teardown(flent, B_FALSE);
3081 mac_tx_srs_group_teardown(mcip, flent, SRST_FLOW);
3082 return;
3083
3084 case SRST_LINK:
3085 /* Stop sending packets */
3086 mac_tx_client_block(mcip);
3087
3088 /* Stop the packets coming from the H/W */
3089 if (mcip->mci_unicast != NULL) {
3090 int err;
3091 err = mac_remove_macaddr(mcip->mci_unicast);
3092 if (err != 0) {
3093 cmn_err(CE_WARN, "%s: failed to remove a MAC"
3094 " address because of error 0x%x",
3095 mip->mi_name, err);
3096 }
3097 mcip->mci_unicast = NULL;
3098 }
3099
3100 /* Stop the packets coming from the S/W classifier */
3101 mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
3102 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
3103
3104 /* Now quiesce and destroy all SRS and soft rings */
3105 mac_rx_srs_group_teardown(flent, B_FALSE);
3106 mac_tx_srs_group_teardown(mcip, flent, SRST_LINK);
3107
3108 ASSERT((mcip->mci_flent == flent) &&
3109 (flent->fe_next == NULL));
3110
3111 /*
3112 * Release our hold on the group as well. We need
3113 * to check if the shared group has only one client
3114 * left who can use it exclusively. Also, if we
3115 * were the last client, release the group.
3116 */
3117 group = flent->fe_rx_ring_group;
3118 default_group = MAC_DEFAULT_RX_GROUP(mip);
3119 if (group != NULL) {
3120 mac_group_remove_client(group, mcip);
3121 next_state = mac_group_next_state(group,
3122 &grp_only_mcip, default_group, B_TRUE);
3123 if (next_state == MAC_GROUP_STATE_RESERVED) {
3124 /*
3125 * Only one client left on this RX group.
3126 */
3127 ASSERT(grp_only_mcip != NULL);
3128 mac_set_group_state(group,
3129 MAC_GROUP_STATE_RESERVED);
3130 group_only_flent = grp_only_mcip->mci_flent;
3131
3132 /*
3133 * The only remaining client has exclusive
3134 * access on the group. Allow it to
3135 * dynamically poll the H/W rings etc.
3136 */
3137 mac_rx_srs_group_setup(grp_only_mcip,
3138 group_only_flent, SRST_LINK);
3139 mac_fanout_setup(grp_only_mcip,
3140 group_only_flent,
3141 MCIP_RESOURCE_PROPS(grp_only_mcip),
3142 mac_rx_deliver, grp_only_mcip, NULL, NULL);
3143 mac_rx_group_unmark(group, MR_INCIPIENT);
3144 mac_set_rings_effective(grp_only_mcip);
3145 } else if (next_state == MAC_GROUP_STATE_REGISTERED) {
3146 /*
3147 * This is a non-default group being freed up.
3148 * We need to reevaluate the default group
3149 * to see if the primary client can get
3150 * exclusive access to the default group.
3151 */
3152 ASSERT(group != MAC_DEFAULT_RX_GROUP(mip));
3153 if (mrp->mrp_mask & MRP_RX_RINGS) {
3154 MAC_RX_GRP_RELEASED(mip);
3155 if (mip->mi_rx_group_type ==
3156 MAC_GROUP_TYPE_DYNAMIC) {
3157 MAC_RX_RING_RELEASED(mip,
3158 group->mrg_cur_count);
3159 }
3160 }
3161 mac_release_rx_group(mcip, group);
3162 mac_set_group_state(group,
3163 MAC_GROUP_STATE_REGISTERED);
3164 check_default_group = B_TRUE;
3165 } else {
3166 ASSERT(next_state == MAC_GROUP_STATE_SHARED);
3167 mac_set_group_state(group,
3168 MAC_GROUP_STATE_SHARED);
3169 mac_rx_group_unmark(group, MR_CONDEMNED);
3170 }
3171 flent->fe_rx_ring_group = NULL;
3172 }
3173 /*
3174 * Remove the client from the TX group. Additionally, if
3175 * this a non-default group, then we also need to release
3176 * the group.
3177 */
3178 group = flent->fe_tx_ring_group;
3179 default_group = MAC_DEFAULT_TX_GROUP(mip);
3180 if (group != NULL) {
3181 mac_group_remove_client(group, mcip);
3182 next_state = mac_group_next_state(group,
3183 &grp_only_mcip, default_group, B_FALSE);
3184 if (next_state == MAC_GROUP_STATE_REGISTERED) {
3185 if (group != default_group) {
3186 if (mrp->mrp_mask & MRP_TX_RINGS) {
3235 } else if (next_state == MAC_GROUP_STATE_RESERVED) {
3236 mac_set_rings_effective(grp_only_mcip);
3237 }
3238 flent->fe_tx_ring_group = NULL;
3239 group->mrg_state = next_state;
3240 }
3241 break;
3242 default:
3243 ASSERT(B_FALSE);
3244 break;
3245 }
3246
3247 /*
3248 * The mac client using the default group gets exclusive access to the
3249 * default group if and only if it is the sole client on the entire
3250 * mip. If so set the group state to reserved, and set up the SRSes
3251 * over the default group.
3252 */
3253 if (check_default_group) {
3254 default_group = MAC_DEFAULT_RX_GROUP(mip);
3255 ASSERT(default_group->mrg_state == MAC_GROUP_STATE_SHARED);
3256 next_state = mac_group_next_state(default_group,
3257 &grp_only_mcip, default_group, B_TRUE);
3258 if (next_state == MAC_GROUP_STATE_RESERVED) {
3259 ASSERT(grp_only_mcip != NULL &&
3260 mip->mi_nactiveclients == 1);
3261 mac_set_group_state(default_group,
3262 MAC_GROUP_STATE_RESERVED);
3263 mac_rx_srs_group_setup(grp_only_mcip,
3264 grp_only_mcip->mci_flent, SRST_LINK);
3265 mac_fanout_setup(grp_only_mcip,
3266 grp_only_mcip->mci_flent,
3267 MCIP_RESOURCE_PROPS(grp_only_mcip), mac_rx_deliver,
3268 grp_only_mcip, NULL, NULL);
3269 mac_rx_group_unmark(default_group, MR_INCIPIENT);
3270 mac_set_rings_effective(grp_only_mcip);
3271 }
3272 }
3273
3274 /*
3275 * If the primary is the only one left and the MAC supports
3276 * dynamic grouping, we need to see if the primary needs to
3277 * be moved to the default group so that it can use all the
3278 * H/W rings.
3279 */
3280 if (!(flent->fe_type & FLOW_PRIMARY_MAC) &&
3764 mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring)
3765 {
3766 int i;
3767 mac_soft_ring_t *soft_ring, *remove_sring;
3768 mac_client_impl_t *mcip = mac_srs->srs_mcip;
3769
3770 mutex_enter(&mac_srs->srs_lock);
3771 for (i = 0; i < mac_srs->srs_tx_ring_count; i++) {
3772 soft_ring = mac_srs->srs_tx_soft_rings[i];
3773 if (soft_ring->s_ring_tx_arg2 == tx_ring)
3774 break;
3775 }
3776 mutex_exit(&mac_srs->srs_lock);
3777 ASSERT(i < mac_srs->srs_tx_ring_count);
3778 remove_sring = soft_ring;
3779 /*
3780 * In the case of aggr, the soft ring associated with a Tx ring
3781 * is also stored in st_soft_rings[] array. That entry should
3782 * be removed.
3783 */
3784 if (mcip->mci_state_flags & MCIS_IS_AGGR) {
3785 mac_srs_tx_t *tx = &mac_srs->srs_tx;
3786
3787 ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring);
3788 tx->st_soft_rings[tx_ring->mr_index] = NULL;
3789 }
3790 mac_soft_ring_remove(mac_srs, remove_sring);
3791 mac_srs_update_fanout_list(mac_srs);
3792 }
3793
3794 /*
3795 * mac_tx_srs_setup():
3796 * Used to setup Tx rings. If no free Tx ring is available, then default
3797 * Tx ring is used.
3798 */
3799 void
3800 mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent)
3801 {
3802 mac_impl_t *mip = mcip->mci_mip;
3803 mac_soft_ring_set_t *tx_srs = flent->fe_tx_srs;
3804 int i;
3805 int tx_ring_count = 0;
3806 uint32_t soft_ring_type;
3807 mac_group_t *grp = NULL;
3808 mac_ring_t *ring;
3809 mac_srs_tx_t *tx = &tx_srs->srs_tx;
3810 boolean_t is_aggr;
3811 uint_t ring_info = 0;
3812
3813 is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR) != 0;
3814 grp = flent->fe_tx_ring_group;
3815 if (grp == NULL) {
3816 ring = (mac_ring_t *)mip->mi_default_tx_ring;
3817 goto no_group;
3818 }
3819 tx_ring_count = grp->mrg_cur_count;
3820 ring = grp->mrg_rings;
3821 /*
3822 * An attempt is made to reserve 'tx_ring_count' number
3823 * of Tx rings. If tx_ring_count is 0, default Tx ring
3824 * is used. If it is 1, an attempt is made to reserve one
3825 * Tx ring. In both the cases, the ring information is
3826 * stored in Tx SRS. If multiple Tx rings are specified,
3827 * then each Tx ring will have a Tx-side soft ring. All
3828 * these soft rings will be hang off Tx SRS.
3829 */
3830 switch (grp->mrg_state) {
3831 case MAC_GROUP_STATE_SHARED:
3832 case MAC_GROUP_STATE_RESERVED:
3833 if (tx_ring_count <= 1 && !is_aggr) {
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2018 Joyent, Inc.
24 */
25
26 #include <sys/types.h>
27 #include <sys/callb.h>
28 #include <sys/cpupart.h>
29 #include <sys/pool.h>
30 #include <sys/pool_pset.h>
31 #include <sys/sdt.h>
32 #include <sys/strsubr.h>
33 #include <sys/strsun.h>
34 #include <sys/vlan.h>
35 #include <inet/ipsec_impl.h>
36 #include <inet/ip_impl.h>
37 #include <inet/sadb.h>
38 #include <inet/ipsecesp.h>
39 #include <inet/ipsecah.h>
40
41 #include <sys/mac_impl.h>
42 #include <sys/mac_client_impl.h>
43 #include <sys/mac_client_priv.h>
1169 *
1170 * The underlying device can expose upto MAX_RINGS_PER_GROUP worth of
1171 * rings to a client. In such a case, MAX_RINGS_PER_GROUP worth of
1172 * array space is needed to store Tx soft rings. Thus we allocate so
1173 * much array space for srs_tx_soft_rings.
1174 *
1175 * And when it is an aggr, again we allocate MAX_RINGS_PER_GROUP worth
1176 * of space to st_soft_rings. This array is used for quick access to
1177 * soft ring associated with a pseudo Tx ring based on the pseudo
1178 * ring's index (mr_index).
1179 */
1180 static void
1181 mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs)
1182 {
1183 mac_client_impl_t *mcip = mac_srs->srs_mcip;
1184
1185 if (mac_srs->srs_type & SRST_TX) {
1186 mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **)
1187 kmem_zalloc(sizeof (mac_soft_ring_t *) *
1188 MAX_RINGS_PER_GROUP, KM_SLEEP);
1189 if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
1190 mac_srs_tx_t *tx = &mac_srs->srs_tx;
1191
1192 tx->st_soft_rings = (mac_soft_ring_t **)
1193 kmem_zalloc(sizeof (mac_soft_ring_t *) *
1194 MAX_RINGS_PER_GROUP, KM_SLEEP);
1195 }
1196 } else {
1197 mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **)
1198 kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1199 KM_SLEEP);
1200 mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **)
1201 kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1202 KM_SLEEP);
1203 mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **)
1204 kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1205 KM_SLEEP);
1206 }
1207 }
1208
1209 static void
1578 mutex_exit(&srs->srs_lock);
1579
1580 mac_tx_client_restart((mac_client_handle_t)mcip);
1581 }
1582
1583 /*
1584 * The uber function that deals with any update to bandwidth limits.
1585 */
1586 void
1587 mac_srs_update_bwlimit(flow_entry_t *flent, mac_resource_props_t *mrp)
1588 {
1589 int count;
1590
1591 for (count = 0; count < flent->fe_rx_srs_cnt; count++)
1592 mac_rx_srs_update_bwlimit(flent->fe_rx_srs[count], mrp);
1593 mac_tx_srs_update_bwlimit(flent->fe_tx_srs, mrp);
1594 }
1595
1596 /*
1597 * When the first sub-flow is added to a link, we disable polling on the
1598 * link and also modify the entry point to mac_rx_srs_subflow_process().
1599 * (polling is disabled because with the subflow added, accounting
1600 * for polling needs additional logic, it is assumed that when a subflow is
1601 * added, we can take some hit as a result of disabling polling rather than
1602 * adding more complexity - if this becomes a perf. issue we need to
1603 * re-rvaluate this logic). When the last subflow is removed, we turn back
1604 * polling and also reset the entry point to mac_rx_srs_process().
1605 *
1606 * In the future if there are multiple SRS, we can simply
1607 * take one and give it to the flow rather than disabling polling and
1608 * resetting the entry point.
1609 */
1610 void
1611 mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable)
1612 {
1613 flow_entry_t *flent = mcip->mci_flent;
1614 int i;
1615 mac_impl_t *mip = mcip->mci_mip;
1616 mac_rx_func_t rx_func;
1617 uint_t rx_srs_cnt;
1618 boolean_t enable_classifier;
1619
1620 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1621
1622 enable_classifier = !FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && enable;
1623
1624 rx_func = enable_classifier ? mac_rx_srs_subflow_process :
1629 enable_classifier = B_TRUE;
1630
1631 /*
1632 * If receive function has already been configured correctly for
1633 * current subflow configuration, do nothing.
1634 */
1635 if (flent->fe_cb_fn == (flow_fn_t)rx_func)
1636 return;
1637
1638 rx_srs_cnt = flent->fe_rx_srs_cnt;
1639 for (i = 0; i < rx_srs_cnt; i++) {
1640 ASSERT(flent->fe_rx_srs[i] != NULL);
1641 mac_srs_poll_state_change(flent->fe_rx_srs[i],
1642 enable_classifier, rx_func);
1643 }
1644
1645 /*
1646 * Change the S/W classifier so that we can land in the
1647 * correct processing function with correct argument.
1648 * If all subflows have been removed we can revert to
1649 * mac_rx_srs_process(), else we need mac_rx_srs_subflow_process().
1650 */
1651 mutex_enter(&flent->fe_lock);
1652 flent->fe_cb_fn = (flow_fn_t)rx_func;
1653 flent->fe_cb_arg1 = (void *)mip;
1654 flent->fe_cb_arg2 = flent->fe_rx_srs[0];
1655 mutex_exit(&flent->fe_lock);
1656 }
1657
1658 static void
1659 mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs)
1660 {
1661 int tcp_count = 0, udp_count = 0, oth_count = 0, tx_count = 0;
1662 mac_soft_ring_t *softring;
1663
1664 softring = mac_srs->srs_soft_ring_head;
1665 if (softring == NULL) {
1666 ASSERT(mac_srs->srs_soft_ring_count == 0);
1667 mac_srs->srs_tcp_ring_count = 0;
1668 mac_srs->srs_udp_ring_count = 0;
1669 mac_srs->srs_oth_ring_count = 0;
2168 mutex_exit(&mac_bw->mac_bw_lock);
2169 mac_srs->srs_type |= SRST_BW_CONTROL;
2170 } else {
2171 mac_srs->srs_drain_func = mac_rx_srs_drain;
2172 }
2173
2174 /*
2175 * We use the following policy to control Receive
2176 * Side Dynamic Polling:
2177 * 1) We switch to poll mode anytime the processing thread causes
2178 * a backlog to build up in SRS and its associated Soft Rings
2179 * (sr_poll_pkt_cnt > 0).
2180 * 2) As long as the backlog stays under the low water mark
2181 * (sr_lowat), we poll the H/W for more packets.
2182 * 3) If the backlog (sr_poll_pkt_cnt) exceeds low water mark, we
2183 * stay in poll mode but don't poll the H/W for more packets.
2184 * 4) Anytime in polling mode, if we poll the H/W for packets and
2185 * find nothing plus we have an existing backlog
2186 * (sr_poll_pkt_cnt > 0), we stay in polling mode but don't poll
2187 * the H/W for packets anymore (let the polling thread go to sleep).
2188 * 5) Once the backlog is relieved (packets are processed) we reenable
2189 * polling (by signalling the poll thread) only when the backlog
2190 * dips below sr_poll_thres.
2191 * 6) sr_hiwat is used exclusively when we are not polling capable
2192 * and is used to decide when to drop packets so the SRS queue
2193 * length doesn't grow infinitely.
2194 */
2195 if (!is_tx_srs) {
2196 srs_rx->sr_hiwat = mac_soft_ring_max_q_cnt;
2197 /* Low water mark needs to be less than high water mark */
2198 srs_rx->sr_lowat = mac_soft_ring_min_q_cnt <=
2199 mac_soft_ring_max_q_cnt ? mac_soft_ring_min_q_cnt :
2200 (mac_soft_ring_max_q_cnt >> 2);
2201 /* Poll threshold need to be half of low water mark or less */
2202 srs_rx->sr_poll_thres = mac_soft_ring_poll_thres <=
2203 (srs_rx->sr_lowat >> 1) ? mac_soft_ring_poll_thres :
2204 (srs_rx->sr_lowat >> 1);
2205 if (mac_latency_optimize)
2206 mac_srs->srs_state |= SRS_LATENCY_OPT;
2207 else
2208 mac_srs->srs_state |= SRS_SOFTRING_QUEUE;
2239 /* Is the mac_srs created over the RX default group? */
2240 if (ring->mr_gh == (mac_group_handle_t)
2241 MAC_DEFAULT_RX_GROUP(mcip->mci_mip)) {
2242 mac_srs->srs_type |= SRST_DEFAULT_GRP;
2243 }
2244 mac_srs->srs_ring = ring;
2245 ring->mr_srs = mac_srs;
2246 ring->mr_classify_type = MAC_HW_CLASSIFIER;
2247 ring->mr_flag |= MR_INCIPIENT;
2248
2249 if (!(mcip->mci_mip->mi_state_flags & MIS_POLL_DISABLE) &&
2250 FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && mac_poll_enable)
2251 mac_srs->srs_state |= SRS_POLLING_CAPAB;
2252
2253 mac_srs->srs_poll_thr = thread_create(NULL, 0,
2254 mac_rx_srs_poll_ring, mac_srs, 0, &p0, TS_RUN,
2255 mac_srs->srs_pri);
2256 /*
2257 * Some drivers require serialization and don't send
2258 * packet chains in interrupt context. For such
2259 * drivers, we should always queue in the soft ring
2260 * so that we get a chance to switch into polling
2261 * mode under backlog.
2262 */
2263 ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring);
2264 if (ring_info & MAC_RING_RX_ENQUEUE)
2265 mac_srs->srs_state |= SRS_SOFTRING_QUEUE;
2266 }
2267 done:
2268 mac_srs_stat_create(mac_srs);
2269 return (mac_srs);
2270 }
2271
2272 /*
2273 * Figure out the number of soft rings required. Its dependant on
2274 * if protocol fanout is required (for LINKs), global settings
2275 * require us to do fanout for performance (based on mac_soft_ring_enable),
2276 * or user has specifically requested fanout.
2277 */
2278 static uint32_t
2279 mac_find_fanout(flow_entry_t *flent, uint32_t link_type)
2280 {
2347 mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2348 uint32_t link_type)
2349 {
2350 cpupart_t *cpupart;
2351 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
2352 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2353 boolean_t use_default = B_FALSE;
2354
2355 mac_rx_srs_group_setup(mcip, flent, link_type);
2356 mac_tx_srs_group_setup(mcip, flent, link_type);
2357
2358 pool_lock();
2359 cpupart = mac_pset_find(mrp, &use_default);
2360 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip),
2361 mac_rx_deliver, mcip, NULL, cpupart);
2362 mac_set_pool_effective(use_default, cpupart, mrp, emrp);
2363 pool_unlock();
2364 }
2365
2366 /*
2367 * Set up the Rx SRSes. If there is no group associated with the
2368 * client, then only setup SW classification. If the client has
2369 * exlusive (MAC_GROUP_STATE_RESERVED) use of the group, then create an
2370 * SRS for each HW ring. If the client is sharing a group, then make
2371 * sure to teardown the HW SRSes.
2372 */
2373 void
2374 mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2375 uint32_t link_type)
2376 {
2377 mac_impl_t *mip = mcip->mci_mip;
2378 mac_soft_ring_set_t *mac_srs;
2379 mac_ring_t *ring;
2380 uint32_t fanout_type;
2381 mac_group_t *rx_group = flent->fe_rx_ring_group;
2382 boolean_t no_unicast;
2383
2384 fanout_type = mac_find_fanout(flent, link_type);
2385 no_unicast = (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) != 0;
2386
2387 /* Create the SRS for SW classification if none exists */
2388 if (flent->fe_rx_srs[0] == NULL) {
2389 ASSERT(flent->fe_rx_srs_cnt == 0);
2390 mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type,
2391 mac_rx_deliver, mcip, NULL, NULL);
2392 mutex_enter(&flent->fe_lock);
2393 flent->fe_cb_fn = (flow_fn_t)mac_srs->srs_rx.sr_lower_proc;
2394 flent->fe_cb_arg1 = (void *)mip;
2395 flent->fe_cb_arg2 = (void *)mac_srs;
2396 mutex_exit(&flent->fe_lock);
2397 }
2398
2399 if (rx_group == NULL)
2400 return;
2401
2402 /*
2403 * If the group is marked RESERVED then setup an SRS and
2404 * fanout for each HW ring.
2405 */
2406 switch (rx_group->mrg_state) {
2407 case MAC_GROUP_STATE_RESERVED:
2408 for (ring = rx_group->mrg_rings; ring != NULL;
2409 ring = ring->mr_next) {
2410 uint16_t vid = i_mac_flow_vid(mcip->mci_flent);
2411
2412 switch (ring->mr_state) {
2413 case MR_INUSE:
2414 case MR_FREE:
2415 if (ring->mr_srs != NULL)
2416 break;
2417 if (ring->mr_state != MR_INUSE)
2418 (void) mac_start_ring(ring);
2419
2420 /*
2421 * If a client requires SW VLAN
2422 * filtering or has no unicast address
2423 * then we don't create any HW ring
2424 * SRSes.
2425 */
2426 if ((!MAC_GROUP_HW_VLAN(rx_group) &&
2427 vid != VLAN_ID_NONE) || no_unicast)
2428 break;
2429
2430 /*
2431 * When a client has exclusive use of
2432 * a group, and that group's traffic
2433 * is fully HW classified, we create
2434 * an SRS for each HW ring in order to
2435 * make use of dynamic polling of said
2436 * HW rings.
2437 */
2438 mac_srs = mac_srs_create(mcip, flent,
2439 fanout_type | link_type,
2440 mac_rx_deliver, mcip, NULL, ring);
2441 break;
2442 default:
2443 cmn_err(CE_PANIC,
2444 "srs_setup: mcip = %p "
2445 "trying to add UNKNOWN ring = %p\n",
2446 (void *)mcip, (void *)ring);
2447 break;
2448 }
2449 }
2450 break;
2451 case MAC_GROUP_STATE_SHARED:
2452 /*
2453 * When a group is shared by multiple clients, we must
2454 * use SW classifiction to ensure packets are
2455 * delivered to the correct client.
2456 */
2457 mac_rx_switch_grp_to_sw(rx_group);
2458 break;
2459 default:
2460 ASSERT(B_FALSE);
2461 break;
2462 }
2463 }
2464
2465 /*
2466 * Set up the TX SRS.
2467 */
2468 void
2469 mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2470 uint32_t link_type)
2471 {
2472 int cnt;
2473 int ringcnt;
2474 mac_ring_t *ring;
2475 mac_group_t *grp;
2488 return;
2489 grp = (mac_group_t *)flent->fe_tx_ring_group;
2490 ringcnt = grp->mrg_cur_count;
2491 ring = grp->mrg_rings;
2492 for (cnt = 0; cnt < ringcnt; cnt++) {
2493 if (ring->mr_state != MR_INUSE) {
2494 (void) mac_start_ring(ring);
2495 }
2496 ring = ring->mr_next;
2497 }
2498 return;
2499 }
2500 if (flent->fe_tx_srs == NULL) {
2501 (void) mac_srs_create(mcip, flent, SRST_TX | link_type,
2502 NULL, mcip, NULL, NULL);
2503 }
2504 mac_tx_srs_setup(mcip, flent);
2505 }
2506
2507 /*
2508 * Teardown all the Rx SRSes. Unless hwonly is set, then only teardown
2509 * the Rx HW SRSes and leave the SW SRS alone. The hwonly flag is set
2510 * when we wish to move a MAC client from one group to another. In
2511 * that case, we need to release the current HW SRSes but keep the SW
2512 * SRS for continued traffic classifiction.
2513 */
2514 void
2515 mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly)
2516 {
2517 mac_soft_ring_set_t *mac_srs;
2518 int i;
2519 int count = flent->fe_rx_srs_cnt;
2520
2521 for (i = 0; i < count; i++) {
2522 if (i == 0 && hwonly)
2523 continue;
2524 mac_srs = flent->fe_rx_srs[i];
2525 mac_rx_srs_quiesce(mac_srs, SRS_CONDEMNED);
2526 mac_srs_free(mac_srs);
2527 flent->fe_rx_srs[i] = NULL;
2528 flent->fe_rx_srs_cnt--;
2529 }
2530
2531 /*
2532 * If we are only tearing down the HW SRSes then there must be
2533 * one SRS left for SW classification. Otherwise we are tearing
2534 * down both HW and SW and there should be no SRSes left.
2535 */
2536 if (hwonly)
2537 VERIFY3S(flent->fe_rx_srs_cnt, ==, 1);
2538 else
2539 VERIFY3S(flent->fe_rx_srs_cnt, ==, 0);
2540 }
2541
2542 /*
2543 * Remove the TX SRS.
2544 */
2545 void
2546 mac_tx_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
2547 uint32_t link_type)
2548 {
2549 mac_soft_ring_set_t *tx_srs;
2550 mac_srs_tx_t *tx;
2551
2552 if ((tx_srs = flent->fe_tx_srs) == NULL)
2553 return;
2554
2555 tx = &tx_srs->srs_tx;
2556 switch (link_type) {
2557 case SRST_FLOW:
2558 /*
2559 * For flows, we need to work with passed
2821 *
2822 * Note: In future, if no fanout is specified, we try to assign 2 Rx
2823 * rings for the primary Link with the primary MAC address + TCP going
2824 * to one ring and primary MAC address + UDP|SCTP going to other ring.
2825 * Any remaining traffic for primary MAC address can go to the default
2826 * Rx ring and get S/W classified. This way the respective SRSs don't
2827 * need to do proto fanout and don't need to have softrings at all and
2828 * can poll their respective Rx rings.
2829 *
2830 * As an optimization, when a new NIC or VNIC is created, we can get
2831 * only one Rx ring and make it a TCP specific Rx ring and use the
2832 * H/W default Rx ring for the rest (this Rx ring is never polled).
2833 *
2834 * For clients that don't have MAC address, but want to receive and
2835 * transmit packets (e.g, bpf, gvrp etc.), we need to setup the datapath.
2836 * For such clients (identified by the MCIS_NO_UNICAST_ADDR flag) we
2837 * always give the default group and use software classification (i.e.
2838 * even if this is the only client in the default group, we will
2839 * leave group as shared).
2840 */
2841
2842 int
2843 mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2844 uint32_t link_type)
2845 {
2846 mac_impl_t *mip = mcip->mci_mip;
2847 mac_group_t *rgroup = NULL;
2848 mac_group_t *tgroup = NULL;
2849 mac_group_t *default_rgroup;
2850 mac_group_t *default_tgroup;
2851 int err;
2852 uint16_t vid;
2853 uint8_t *mac_addr;
2854 mac_group_state_t next_state;
2855 mac_client_impl_t *group_only_mcip;
2856 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
2857 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2858 boolean_t rxhw;
2859 boolean_t txhw;
2860 boolean_t use_default = B_FALSE;
2861 cpupart_t *cpupart;
2862 boolean_t no_unicast;
2863 boolean_t isprimary = flent->fe_type & FLOW_PRIMARY_MAC;
2864 mac_client_impl_t *reloc_pmcip = NULL;
2865 boolean_t use_hw;
2866
2867 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2868
2869 switch (link_type) {
2870 case SRST_FLOW:
2871 mac_srs_group_setup(mcip, flent, link_type);
2872 return (0);
2873
2874 case SRST_LINK:
2875 no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR;
2876 mac_addr = flent->fe_flow_desc.fd_dst_mac;
2877
2878 /* Default RX group */
2879 default_rgroup = MAC_DEFAULT_RX_GROUP(mip);
2880
2881 /* Default TX group */
2882 default_tgroup = MAC_DEFAULT_TX_GROUP(mip);
2883
2884 if (no_unicast) {
2885 rgroup = default_rgroup;
2886 tgroup = default_tgroup;
2887 goto grp_found;
2888 }
2889 rxhw = (mrp->mrp_mask & MRP_RX_RINGS) &&
2890 (mrp->mrp_nrxrings > 0 ||
2891 (mrp->mrp_mask & MRP_RXRINGS_UNSPEC));
2892 txhw = (mrp->mrp_mask & MRP_TX_RINGS) &&
2893 (mrp->mrp_ntxrings > 0 ||
2894 (mrp->mrp_mask & MRP_TXRINGS_UNSPEC));
2895
2896 /*
2897 * All the rings initially belong to the default group
2898 * under dynamic grouping. The primary client uses the
2899 * default group when it is the only client. The
2900 * default group is also used as the destination for
2901 * all multicast and broadcast traffic of all clients.
2902 * Therefore, the primary client loses its ability to
2903 * poll the softrings on addition of a second client.
2904 * To avoid a performance penalty, MAC will move the
2905 * primary client to a dedicated group when it can.
2906 *
2907 * When using static grouping, the primary client
2908 * begins life on a non-default group. There is
2909 * no moving needed upon addition of a second client.
2910 */
2911 if (!isprimary && mip->mi_nactiveclients == 2 &&
2912 (group_only_mcip = mac_primary_client_handle(mip)) !=
2913 NULL && mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) {
2914 reloc_pmcip = mac_check_primary_relocation(
2915 group_only_mcip, rxhw);
2916 }
2917
2918 /*
2919 * Check to see if we can get an exclusive group for
2920 * this mac address or if there already exists a
2921 * group that has this mac address (case of VLANs).
2922 * If no groups are available, use the default group.
2923 */
2924 rgroup = mac_reserve_rx_group(mcip, mac_addr, B_FALSE);
2925 if (rgroup == NULL && rxhw) {
2926 err = ENOSPC;
2927 goto setup_failed;
2928 } else if (rgroup == NULL) {
2929 rgroup = default_rgroup;
2930 }
2931
2932 /*
2933 * If we are adding a second client to a
2934 * non-default group then we need to move the
2935 * existing client to the default group and
2936 * add the new client to the default group as
2937 * well.
2938 */
2939 if (rgroup != default_rgroup &&
2940 rgroup->mrg_state == MAC_GROUP_STATE_RESERVED) {
2941 group_only_mcip = MAC_GROUP_ONLY_CLIENT(rgroup);
2942 err = mac_rx_switch_group(group_only_mcip, rgroup,
2943 default_rgroup);
2944
2945 if (err != 0)
2946 goto setup_failed;
2947
2948 rgroup = default_rgroup;
2949 }
2950
2951 /*
2952 * Check to see if we can get an exclusive group for
2953 * this mac client. If no groups are available, use
2954 * the default group.
2955 */
2956 tgroup = mac_reserve_tx_group(mcip, B_FALSE);
2957 if (tgroup == NULL && txhw) {
2958 if (rgroup != NULL && rgroup != default_rgroup)
2959 mac_release_rx_group(mcip, rgroup);
2960 err = ENOSPC;
2961 goto setup_failed;
2962 } else if (tgroup == NULL) {
2963 tgroup = default_tgroup;
2964 }
2965
2966 /*
2967 * Some NICs don't support any Rx rings, so there may not
2968 * even be a default group.
2969 */
2970 grp_found:
2971 if (rgroup != NULL) {
2972 if (rgroup != default_rgroup &&
2973 MAC_GROUP_NO_CLIENT(rgroup) &&
2974 (rxhw || mcip->mci_share != 0)) {
2975 MAC_RX_GRP_RESERVED(mip);
2976 if (mip->mi_rx_group_type ==
2977 MAC_GROUP_TYPE_DYNAMIC) {
2978 MAC_RX_RING_RESERVED(mip,
2979 rgroup->mrg_cur_count);
2980 }
2981 }
2982
2983 flent->fe_rx_ring_group = rgroup;
2984 /*
2985 * Add the client to the group and update the
2986 * group's state. If rgroup != default_group
2987 * then the rgroup should only ever have one
2988 * client and be in the RESERVED state. But no
2989 * matter what, the default_rgroup will enter
2990 * the SHARED state since it has to receive
2991 * all broadcast and multicast traffic. This
2992 * case is handled later in the function.
2993 */
2994 mac_group_add_client(rgroup, mcip);
2995 next_state = mac_group_next_state(rgroup,
2996 &group_only_mcip, default_rgroup, B_TRUE);
2997 mac_set_group_state(rgroup, next_state);
2998 }
2999
3000 if (tgroup != NULL) {
3001 if (tgroup != default_tgroup &&
3002 MAC_GROUP_NO_CLIENT(tgroup) &&
3003 (txhw || mcip->mci_share != 0)) {
3004 MAC_TX_GRP_RESERVED(mip);
3005 if (mip->mi_tx_group_type ==
3006 MAC_GROUP_TYPE_DYNAMIC) {
3007 MAC_TX_RING_RESERVED(mip,
3008 tgroup->mrg_cur_count);
3009 }
3010 }
3011 flent->fe_tx_ring_group = tgroup;
3012 mac_group_add_client(tgroup, mcip);
3013 next_state = mac_group_next_state(tgroup,
3014 &group_only_mcip, default_tgroup, B_FALSE);
3015 tgroup->mrg_state = next_state;
3016 }
3017
3018 /* We are setting up minimal datapath only */
3019 if (no_unicast) {
3020 mac_srs_group_setup(mcip, flent, link_type);
3021 break;
3022 }
3023
3024 /* Program software classification. */
3025 if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0)
3026 goto setup_failed;
3027
3028 /* Program hardware classification. */
3029 vid = i_mac_flow_vid(flent);
3030 use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0;
3031 err = mac_add_macaddr_vlan(mip, rgroup, mac_addr, vid, use_hw);
3032
3033 if (err != 0)
3034 goto setup_failed;
3035
3036 mcip->mci_unicast = mac_find_macaddr(mip, mac_addr);
3037 VERIFY3P(mcip->mci_unicast, !=, NULL);
3038
3039 /*
3040 * Setup the Rx and Tx SRSes. If the client has a
3041 * reserved group, then mac_srs_group_setup() creates
3042 * the required SRSes for the HW rings. If we have a
3043 * shared group, mac_srs_group_setup() dismantles the
3044 * HW SRSes of the previously exclusive group.
3045 */
3046 mac_srs_group_setup(mcip, flent, link_type);
3047
3048 /* (Re)init the v6 token & local addr used by link protection */
3049 mac_protect_update_mac_token(mcip);
3050 break;
3051
3052 default:
3053 ASSERT(B_FALSE);
3054 break;
3055 }
3056
3057 /*
3058 * All broadcast and multicast traffic is received only on the default
3059 * group. If we have setup the datapath for a non-default group above
3060 * then move the default group to shared state to allow distribution of
3061 * incoming broadcast traffic to the other groups and dismantle the
3062 * SRSes over the default group.
3063 */
3064 if (rgroup != NULL) {
3065 if (rgroup != default_rgroup) {
3066 if (default_rgroup->mrg_state ==
3067 MAC_GROUP_STATE_RESERVED) {
3071 mip->mi_nactiveclients > 1);
3072
3073 mac_set_group_state(default_rgroup,
3074 MAC_GROUP_STATE_SHARED);
3075 mac_rx_srs_group_setup(group_only_mcip,
3076 group_only_mcip->mci_flent, SRST_LINK);
3077 pool_lock();
3078 cpupart = mac_pset_find(mrp, &use_default);
3079 mac_fanout_setup(group_only_mcip,
3080 group_only_mcip->mci_flent,
3081 MCIP_RESOURCE_PROPS(group_only_mcip),
3082 mac_rx_deliver, group_only_mcip, NULL,
3083 cpupart);
3084 mac_set_pool_effective(use_default, cpupart,
3085 mrp, emrp);
3086 pool_unlock();
3087 }
3088 ASSERT(default_rgroup->mrg_state ==
3089 MAC_GROUP_STATE_SHARED);
3090 }
3091
3092 /*
3093 * A VLAN MAC client on a reserved group still
3094 * requires SW classification if the MAC doesn't
3095 * provide VLAN HW filtering.
3096 *
3097 * Clients with no unicast address also require SW
3098 * classification.
3099 */
3100 if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED &&
3101 ((!MAC_GROUP_HW_VLAN(rgroup) && vid != VLAN_ID_NONE) ||
3102 no_unicast)) {
3103 mac_rx_switch_grp_to_sw(rgroup);
3104 }
3105
3106 }
3107
3108 mac_set_rings_effective(mcip);
3109 return (0);
3110
3111 setup_failed:
3112 /* Switch the primary back to default group */
3113 if (reloc_pmcip != NULL) {
3114 (void) mac_rx_switch_group(reloc_pmcip,
3115 reloc_pmcip->mci_flent->fe_rx_ring_group, default_rgroup);
3116 }
3117 mac_datapath_teardown(mcip, flent, link_type);
3118 return (err);
3119 }
3120
3121 void
3122 mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
3123 uint32_t link_type)
3124 {
3125 mac_impl_t *mip = mcip->mci_mip;
3126 mac_group_t *group = NULL;
3127 mac_client_impl_t *grp_only_mcip;
3128 flow_entry_t *group_only_flent;
3129 mac_group_t *default_group;
3130 boolean_t check_default_group = B_FALSE;
3131 mac_group_state_t next_state;
3132 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
3133 uint16_t vid;
3134
3135 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
3136
3137 switch (link_type) {
3138 case SRST_FLOW:
3139 mac_rx_srs_group_teardown(flent, B_FALSE);
3140 mac_tx_srs_group_teardown(mcip, flent, SRST_FLOW);
3141 return;
3142
3143 case SRST_LINK:
3144 /* Stop sending packets */
3145 mac_tx_client_block(mcip);
3146 group = flent->fe_rx_ring_group;
3147 vid = i_mac_flow_vid(flent);
3148
3149 /*
3150 * Stop the packet flow from the hardware by disabling
3151 * any hardware filters assigned to this client.
3152 */
3153 if (mcip->mci_unicast != NULL) {
3154 int err;
3155
3156 err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid);
3157
3158 if (err != 0) {
3159 cmn_err(CE_WARN, "%s: failed to remove a MAC HW"
3160 " filters because of error 0x%x",
3161 mip->mi_name, err);
3162 }
3163
3164 mcip->mci_unicast = NULL;
3165 }
3166
3167 /* Stop the packets coming from the S/W classifier */
3168 mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
3169 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
3170
3171 /* Now quiesce and destroy all SRS and soft rings */
3172 mac_rx_srs_group_teardown(flent, B_FALSE);
3173 mac_tx_srs_group_teardown(mcip, flent, SRST_LINK);
3174
3175 ASSERT((mcip->mci_flent == flent) &&
3176 (flent->fe_next == NULL));
3177
3178 /*
3179 * Release our hold on the group as well. We need
3180 * to check if the shared group has only one client
3181 * left who can use it exclusively. Also, if we
3182 * were the last client, release the group.
3183 */
3184 default_group = MAC_DEFAULT_RX_GROUP(mip);
3185 if (group != NULL) {
3186 mac_group_remove_client(group, mcip);
3187 next_state = mac_group_next_state(group,
3188 &grp_only_mcip, default_group, B_TRUE);
3189
3190 if (next_state == MAC_GROUP_STATE_RESERVED) {
3191 /*
3192 * Only one client left on this RX group.
3193 */
3194 VERIFY3P(grp_only_mcip, !=, NULL);
3195 mac_set_group_state(group,
3196 MAC_GROUP_STATE_RESERVED);
3197 group_only_flent = grp_only_mcip->mci_flent;
3198
3199 /*
3200 * The only remaining client has exclusive
3201 * access on the group. Allow it to
3202 * dynamically poll the H/W rings etc.
3203 */
3204 mac_rx_srs_group_setup(grp_only_mcip,
3205 group_only_flent, SRST_LINK);
3206 mac_fanout_setup(grp_only_mcip,
3207 group_only_flent,
3208 MCIP_RESOURCE_PROPS(grp_only_mcip),
3209 mac_rx_deliver, grp_only_mcip, NULL, NULL);
3210 mac_rx_group_unmark(group, MR_INCIPIENT);
3211 mac_set_rings_effective(grp_only_mcip);
3212 } else if (next_state == MAC_GROUP_STATE_REGISTERED) {
3213 /*
3214 * This is a non-default group being freed up.
3215 * We need to reevaluate the default group
3216 * to see if the primary client can get
3217 * exclusive access to the default group.
3218 */
3219 VERIFY3P(group, !=, MAC_DEFAULT_RX_GROUP(mip));
3220 if (mrp->mrp_mask & MRP_RX_RINGS) {
3221 MAC_RX_GRP_RELEASED(mip);
3222 if (mip->mi_rx_group_type ==
3223 MAC_GROUP_TYPE_DYNAMIC) {
3224 MAC_RX_RING_RELEASED(mip,
3225 group->mrg_cur_count);
3226 }
3227 }
3228 mac_release_rx_group(mcip, group);
3229 mac_set_group_state(group,
3230 MAC_GROUP_STATE_REGISTERED);
3231 check_default_group = B_TRUE;
3232 } else {
3233 VERIFY3S(next_state, ==,
3234 MAC_GROUP_STATE_SHARED);
3235 mac_set_group_state(group,
3236 MAC_GROUP_STATE_SHARED);
3237 mac_rx_group_unmark(group, MR_CONDEMNED);
3238 }
3239 flent->fe_rx_ring_group = NULL;
3240 }
3241 /*
3242 * Remove the client from the TX group. Additionally, if
3243 * this a non-default group, then we also need to release
3244 * the group.
3245 */
3246 group = flent->fe_tx_ring_group;
3247 default_group = MAC_DEFAULT_TX_GROUP(mip);
3248 if (group != NULL) {
3249 mac_group_remove_client(group, mcip);
3250 next_state = mac_group_next_state(group,
3251 &grp_only_mcip, default_group, B_FALSE);
3252 if (next_state == MAC_GROUP_STATE_REGISTERED) {
3253 if (group != default_group) {
3254 if (mrp->mrp_mask & MRP_TX_RINGS) {
3303 } else if (next_state == MAC_GROUP_STATE_RESERVED) {
3304 mac_set_rings_effective(grp_only_mcip);
3305 }
3306 flent->fe_tx_ring_group = NULL;
3307 group->mrg_state = next_state;
3308 }
3309 break;
3310 default:
3311 ASSERT(B_FALSE);
3312 break;
3313 }
3314
3315 /*
3316 * The mac client using the default group gets exclusive access to the
3317 * default group if and only if it is the sole client on the entire
3318 * mip. If so set the group state to reserved, and set up the SRSes
3319 * over the default group.
3320 */
3321 if (check_default_group) {
3322 default_group = MAC_DEFAULT_RX_GROUP(mip);
3323 VERIFY3S(default_group->mrg_state, ==, MAC_GROUP_STATE_SHARED);
3324 next_state = mac_group_next_state(default_group,
3325 &grp_only_mcip, default_group, B_TRUE);
3326 if (next_state == MAC_GROUP_STATE_RESERVED) {
3327 VERIFY3P(grp_only_mcip, !=, NULL);
3328 VERIFY3U(mip->mi_nactiveclients, ==, 1);
3329 mac_set_group_state(default_group,
3330 MAC_GROUP_STATE_RESERVED);
3331 mac_rx_srs_group_setup(grp_only_mcip,
3332 grp_only_mcip->mci_flent, SRST_LINK);
3333 mac_fanout_setup(grp_only_mcip,
3334 grp_only_mcip->mci_flent,
3335 MCIP_RESOURCE_PROPS(grp_only_mcip), mac_rx_deliver,
3336 grp_only_mcip, NULL, NULL);
3337 mac_rx_group_unmark(default_group, MR_INCIPIENT);
3338 mac_set_rings_effective(grp_only_mcip);
3339 }
3340 }
3341
3342 /*
3343 * If the primary is the only one left and the MAC supports
3344 * dynamic grouping, we need to see if the primary needs to
3345 * be moved to the default group so that it can use all the
3346 * H/W rings.
3347 */
3348 if (!(flent->fe_type & FLOW_PRIMARY_MAC) &&
3832 mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring)
3833 {
3834 int i;
3835 mac_soft_ring_t *soft_ring, *remove_sring;
3836 mac_client_impl_t *mcip = mac_srs->srs_mcip;
3837
3838 mutex_enter(&mac_srs->srs_lock);
3839 for (i = 0; i < mac_srs->srs_tx_ring_count; i++) {
3840 soft_ring = mac_srs->srs_tx_soft_rings[i];
3841 if (soft_ring->s_ring_tx_arg2 == tx_ring)
3842 break;
3843 }
3844 mutex_exit(&mac_srs->srs_lock);
3845 ASSERT(i < mac_srs->srs_tx_ring_count);
3846 remove_sring = soft_ring;
3847 /*
3848 * In the case of aggr, the soft ring associated with a Tx ring
3849 * is also stored in st_soft_rings[] array. That entry should
3850 * be removed.
3851 */
3852 if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
3853 mac_srs_tx_t *tx = &mac_srs->srs_tx;
3854
3855 ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring);
3856 tx->st_soft_rings[tx_ring->mr_index] = NULL;
3857 }
3858 mac_soft_ring_remove(mac_srs, remove_sring);
3859 mac_srs_update_fanout_list(mac_srs);
3860 }
3861
3862 /*
3863 * mac_tx_srs_setup():
3864 * Used to setup Tx rings. If no free Tx ring is available, then default
3865 * Tx ring is used.
3866 */
3867 void
3868 mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent)
3869 {
3870 mac_impl_t *mip = mcip->mci_mip;
3871 mac_soft_ring_set_t *tx_srs = flent->fe_tx_srs;
3872 int i;
3873 int tx_ring_count = 0;
3874 uint32_t soft_ring_type;
3875 mac_group_t *grp = NULL;
3876 mac_ring_t *ring;
3877 mac_srs_tx_t *tx = &tx_srs->srs_tx;
3878 boolean_t is_aggr;
3879 uint_t ring_info = 0;
3880
3881 is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) != 0;
3882 grp = flent->fe_tx_ring_group;
3883 if (grp == NULL) {
3884 ring = (mac_ring_t *)mip->mi_default_tx_ring;
3885 goto no_group;
3886 }
3887 tx_ring_count = grp->mrg_cur_count;
3888 ring = grp->mrg_rings;
3889 /*
3890 * An attempt is made to reserve 'tx_ring_count' number
3891 * of Tx rings. If tx_ring_count is 0, default Tx ring
3892 * is used. If it is 1, an attempt is made to reserve one
3893 * Tx ring. In both the cases, the ring information is
3894 * stored in Tx SRS. If multiple Tx rings are specified,
3895 * then each Tx ring will have a Tx-side soft ring. All
3896 * these soft rings will be hang off Tx SRS.
3897 */
3898 switch (grp->mrg_state) {
3899 case MAC_GROUP_STATE_SHARED:
3900 case MAC_GROUP_STATE_RESERVED:
3901 if (tx_ring_count <= 1 && !is_aggr) {
|