3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2017, Joyent, Inc.
24 */
25
26 /*
27 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
28 *
29 * An instance of the structure aggr_grp_t is allocated for each
30 * link aggregation group. When created, aggr_grp_t objects are
31 * entered into the aggr_grp_hash hash table maintained by the modhash
32 * module. The hash key is the linkid associated with the link
33 * aggregation group.
34 *
35 * A set of MAC ports are associated with each association group.
36 *
37 * Aggr pseudo TX rings
38 * --------------------
39 * The underlying ports (NICs) in an aggregation can have TX rings. To
40 * enhance aggr's performance, these TX rings are made available to the
41 * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
42 * They are already present and implemented on the RX side. It is called
43 * as pseudo RX rings. The same concept is extended to the TX side where
107 mac_prop_info_handle_t);
108
109 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
110 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
111 boolean_t *);
112
113 static void aggr_grp_capab_set(aggr_grp_t *);
114 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
115 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
116 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
117 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
118 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
119
120 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
121 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
122 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
123 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
124 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
125 static int aggr_addmac(void *, const uint8_t *);
126 static int aggr_remmac(void *, const uint8_t *);
127 static mblk_t *aggr_rx_poll(void *, int);
128 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
129 const int, mac_ring_info_t *, mac_ring_handle_t);
130 static void aggr_fill_group(void *, mac_ring_type_t, const int,
131 mac_group_info_t *, mac_group_handle_t);
132
133 static kmem_cache_t *aggr_grp_cache;
134 static mod_hash_t *aggr_grp_hash;
135 static krwlock_t aggr_grp_lock;
136 static uint_t aggr_grp_cnt;
137 static id_space_t *key_ids;
138
139 #define GRP_HASHSZ 64
140 #define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid)
141 #define AGGR_PORT_NAME_DELIMIT '-'
142
143 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
144
145 #define AGGR_M_CALLBACK_FLAGS \
146 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
307 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
308 ASSERT(MAC_PERIM_HELD(port->lp_mh));
309
310 if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
311 return (B_FALSE);
312
313 /*
314 * Validate the MAC port link speed and update the group
315 * link speed if needed.
316 */
317 if (port->lp_ifspeed == 0 ||
318 port->lp_link_state != LINK_STATE_UP ||
319 port->lp_link_duplex != LINK_DUPLEX_FULL) {
320 /*
321 * Can't attach a MAC port with unknown link speed,
322 * down link, or not in full duplex mode.
323 */
324 return (B_FALSE);
325 }
326
327 if (grp->lg_ifspeed == 0) {
328 /*
329 * The group inherits the speed of the first link being
330 * attached.
331 */
332 grp->lg_ifspeed = port->lp_ifspeed;
333 link_state_changed = B_TRUE;
334 } else if (grp->lg_ifspeed != port->lp_ifspeed) {
335 /*
336 * The link speed of the MAC port must be the same as
337 * the group link speed, as per 802.3ad. Since it is
338 * not, the attach is cancelled.
339 */
340 return (B_FALSE);
341 }
342
343 grp->lg_nattached_ports++;
344
345 /*
346 * Update the group link state.
347 */
348 if (grp->lg_link_state != LINK_STATE_UP) {
349 grp->lg_link_state = LINK_STATE_UP;
350 grp->lg_link_duplex = LINK_DUPLEX_FULL;
351 link_state_changed = B_TRUE;
352 }
353
354 /*
355 * Update port's state.
356 */
357 port->lp_state = AGGR_PORT_STATE_ATTACHED;
358
359 aggr_grp_multicst_port(port, B_TRUE);
360
361 /*
362 * Set port's receive callback
363 */
364 mac_rx_set(port->lp_mch, aggr_recv_cb, port);
365
366 /*
367 * If LACP is OFF, the port can be used to send data as soon
368 * as its link is up and verified to be compatible with the
369 * aggregation.
370 *
388 ASSERT(MAC_PERIM_HELD(port->lp_mh));
389
390 /* update state */
391 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
392 return (B_FALSE);
393
394 mac_rx_clear(port->lp_mch);
395
396 aggr_grp_multicst_port(port, B_FALSE);
397
398 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
399 aggr_send_port_disable(port);
400 else
401 aggr_lacp_port_detached(port);
402
403 port->lp_state = AGGR_PORT_STATE_STANDBY;
404
405 grp->lg_nattached_ports--;
406 if (grp->lg_nattached_ports == 0) {
407 /* the last attached MAC port of the group is being detached */
408 grp->lg_ifspeed = 0;
409 grp->lg_link_state = LINK_STATE_DOWN;
410 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
411 link_state_changed = B_TRUE;
412 }
413
414 return (link_state_changed);
415 }
416
417 /*
418 * Update the MAC addresses of the constituent ports of the specified
419 * group. This function is invoked:
420 * - after creating a new aggregation group.
421 * - after adding new ports to an aggregation group.
422 * - after removing a port from a group when the MAC address of
423 * that port was used for the MAC address of the group.
424 * - after the MAC address of a port changed when the MAC address
425 * of that port was used for the MAC address of the group.
426 *
427 * Return true if the link state of the aggregation changed, for example
428 * as a result of a failure changing the MAC address of one of the
429 * constituent ports.
430 */
658
659 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
660 ring = rx_grp->arg_rings + j;
661 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
662 ring->arr_hw_rh != hw_rh) {
663 continue;
664 }
665
666 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
667
668 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
669 ring->arr_hw_rh = NULL;
670 ring->arr_port = NULL;
671 rx_grp->arg_ring_cnt--;
672 mac_hwring_teardown(hw_rh);
673 break;
674 }
675 }
676
677 /*
678 * This function is called to create pseudo rings over the hardware rings of
679 * the underlying device. Note that there is a 1:1 mapping between the pseudo
680 * RX rings of the aggr and the hardware rings of the underlying port.
681 */
682 static int
683 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
684 {
685 aggr_grp_t *grp = port->lp_grp;
686 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP];
687 aggr_unicst_addr_t *addr, *a;
688 mac_perim_handle_t pmph;
689 int hw_rh_cnt, i = 0, j;
690 int err = 0;
691
692 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
693 mac_perim_enter_by_mh(port->lp_mh, &pmph);
694
695 /*
696 * This function must be called after the aggr registers its mac
697 * and its RX group has been initialized.
698 */
699 ASSERT(rx_grp->arg_gh != NULL);
700
701 /*
702 * Get the list the the underlying HW rings.
703 */
704 hw_rh_cnt = mac_hwrings_get(port->lp_mch,
705 &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
706
707 if (port->lp_hwgh != NULL) {
708 /*
709 * Quiesce the HW ring and the mac srs on the ring. Note
710 * that the HW ring will be restarted when the pseudo ring
711 * is started. At that time all the packets will be
712 * directly passed up to the pseudo RX ring and handled
713 * by mac srs created over the pseudo RX ring.
714 */
715 mac_rx_client_quiesce(port->lp_mch);
716 mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
717 }
718
719 /*
720 * Add all the unicast addresses to the newly added port.
721 */
722 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
723 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
724 break;
725 }
726
727 for (i = 0; err == 0 && i < hw_rh_cnt; i++)
728 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
729
730 if (err != 0) {
731 for (j = 0; j < i; j++)
732 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
733
734 for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
735 aggr_port_remmac(port, a->aua_addr);
736
737 if (port->lp_hwgh != NULL) {
738 mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
739 mac_rx_client_restart(port->lp_mch);
740 port->lp_hwgh = NULL;
741 }
742 } else {
743 port->lp_rx_grp_added = B_TRUE;
744 }
745 done:
746 mac_perim_exit(pmph);
747 return (err);
748 }
749
750 /*
751 * This function is called by aggr to remove pseudo RX rings over the
752 * HW rings of the underlying port.
753 */
754 static void
755 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
756 {
757 aggr_grp_t *grp = port->lp_grp;
758 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP];
759 aggr_unicst_addr_t *addr;
760 mac_group_handle_t hwgh;
761 mac_perim_handle_t pmph;
762 int hw_rh_cnt, i;
763
764 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
765 mac_perim_enter_by_mh(port->lp_mh, &pmph);
766
767 if (!port->lp_rx_grp_added)
768 goto done;
769
770 ASSERT(rx_grp->arg_gh != NULL);
771 hw_rh_cnt = mac_hwrings_get(port->lp_mch,
772 &hwgh, hw_rh, MAC_RING_TYPE_RX);
773
774 /*
775 * If hw_rh_cnt is 0, it means that the underlying port does not
776 * support RX rings. Directly return in this case.
777 */
778 for (i = 0; i < hw_rh_cnt; i++)
779 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
780
781 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
782 aggr_port_remmac(port, addr->aua_addr);
783
784 if (port->lp_hwgh != NULL) {
785 port->lp_hwgh = NULL;
786
787 /*
788 * First clear the permanent-quiesced flag of the RX srs then
789 * restart the HW ring and the mac srs on the ring. Note that
790 * the HW ring and associated SRS will soon been removed when
791 * the port is removed from the aggr.
792 */
793 mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
794 mac_rx_client_restart(port->lp_mch);
795 }
796
797 port->lp_rx_grp_added = B_FALSE;
798 done:
799 mac_perim_exit(pmph);
800 }
801
802 /*
803 * Add a pseudo TX ring for the given HW ring handle.
1290 grp->lg_zoneid = crgetzoneid(credp);
1291 grp->lg_ifspeed = 0;
1292 grp->lg_link_state = LINK_STATE_UNKNOWN;
1293 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1294 grp->lg_started = B_FALSE;
1295 grp->lg_promisc = B_FALSE;
1296 grp->lg_lacp_done = B_FALSE;
1297 grp->lg_tx_notify_done = B_FALSE;
1298 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1299 grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1300 aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1301 grp->lg_tx_notify_thread = thread_create(NULL, 0,
1302 aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1303 grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
1304 MAX_RINGS_PER_GROUP), KM_SLEEP);
1305 grp->lg_tx_blocked_cnt = 0;
1306 bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1307 bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
1308 aggr_lacp_init_grp(grp);
1309
1310 /* add MAC ports to group */
1311 grp->lg_ports = NULL;
1312 grp->lg_nports = 0;
1313 grp->lg_nattached_ports = 0;
1314 grp->lg_ntx_ports = 0;
1315
1316 /*
1317 * If key is not specified by the user, allocate the key.
1318 */
1319 if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1320 err = ENOMEM;
1321 goto bail;
1322 }
1323 grp->lg_key = key;
1324
1325 for (i = 0; i < nports; i++) {
1326 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
1327 if (err != 0)
1328 goto bail;
1329 }
1330
1331 /*
1332 * If no explicit MAC address was specified by the administrator,
1333 * set it to the MAC address of the first port.
1334 */
1335 grp->lg_addr_fixed = mac_fixed;
1336 if (grp->lg_addr_fixed) {
1337 /* validate specified address */
1338 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1339 err = EINVAL;
1340 goto bail;
1341 }
1342 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1343 } else {
1344 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1345 grp->lg_mac_addr_port = grp->lg_ports;
1346 }
1528 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1529 grp->lg_mac_addr_port = grp->lg_ports;
1530 mac_addr_changed = B_TRUE;
1531 }
1532
1533 link_state_changed = aggr_grp_detach_port(grp, port);
1534
1535 /*
1536 * Add the counter statistics of the ports while it was aggregated
1537 * to the group's residual statistics. This is done by obtaining
1538 * the current counter from the underlying MAC then subtracting the
1539 * value of the counter at the moment it was added to the
1540 * aggregation.
1541 */
1542 for (i = 0; i < MAC_NSTAT; i++) {
1543 stat = i + MAC_STAT_MIN;
1544 if (!MAC_STAT_ISACOUNTER(stat))
1545 continue;
1546 val = aggr_port_stat(port, stat);
1547 val -= port->lp_stat[i];
1548 grp->lg_stat[i] += val;
1549 }
1550 for (i = 0; i < ETHER_NSTAT; i++) {
1551 stat = i + MACTYPE_STAT_MIN;
1552 if (!ETHER_STAT_ISACOUNTER(stat))
1553 continue;
1554 val = aggr_port_stat(port, stat);
1555 val -= port->lp_ether_stat[i];
1556 grp->lg_ether_stat[i] += val;
1557 }
1558
1559 grp->lg_nports--;
1560 mac_perim_exit(mph);
1561
1562 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1563 aggr_port_delete(port);
1564
1565 /*
1566 * If the group MAC address has changed, update the MAC address of
1567 * the remaining constituent ports according to the new MAC
1568 * address of the group.
1569 */
1570 if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1571 link_state_changed = B_TRUE;
1572
1573 done:
1574 if (mac_addr_changedp != NULL)
1575 *mac_addr_changedp = mac_addr_changed;
1576 if (link_state_changedp != NULL)
1785 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1786 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1787 aggr_port_delete(port);
1788 port = cport;
1789 }
1790
1791 mac_perim_exit(mph);
1792
1793 kmem_free(grp->lg_tx_blocked_rings,
1794 (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1795 /*
1796 * Wait for the port's lacp timer thread and its notification callback
1797 * to exit before calling mac_unregister() since both needs to access
1798 * the mac perimeter of the grp.
1799 */
1800 aggr_grp_port_wait(grp);
1801
1802 VERIFY(mac_unregister(grp->lg_mh) == 0);
1803 grp->lg_mh = NULL;
1804
1805 AGGR_GRP_REFRELE(grp);
1806 return (0);
1807 }
1808
1809 void
1810 aggr_grp_free(aggr_grp_t *grp)
1811 {
1812 ASSERT(grp->lg_refs == 0);
1813 ASSERT(grp->lg_port_ref == 0);
1814 if (grp->lg_key > AGGR_MAX_KEY) {
1815 id_free(key_ids, grp->lg_key);
1816 grp->lg_key = 0;
1817 }
1818 kmem_cache_free(aggr_grp_cache, grp);
1819 }
1820
1821 int
1822 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1823 aggr_grp_info_new_grp_fn_t new_grp_fn,
1824 aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
1867
1868 bail:
1869 mac_perim_exit(mph);
1870 AGGR_GRP_REFRELE(grp);
1871 return (rc);
1872 }
1873
1874 /*ARGSUSED*/
1875 static void
1876 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1877 {
1878 miocnak(q, mp, 0, ENOTSUP);
1879 }
1880
1881 static int
1882 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1883 {
1884 aggr_port_t *port;
1885 uint_t stat_index;
1886
1887 /* We only aggregate counter statistics. */
1888 if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1889 IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1890 return (ENOTSUP);
1891 }
1892
1893 /*
1894 * Counter statistics for a group are computed by aggregating the
1895 * counters of the members MACs while they were aggregated, plus
1896 * the residual counter of the group itself, which is updated each
1897 * time a MAC is removed from the group.
1898 */
1899 *val = 0;
1900 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1901 /* actual port statistic */
1902 *val += aggr_port_stat(port, stat);
1903 /*
1904 * minus the port stat when it was added, plus any residual
1905 * amount for the group.
1906 */
1935
1936 int
1937 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1938 {
1939 aggr_pseudo_tx_ring_t *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
1940
1941 if (tx_ring->atr_hw_rh != NULL) {
1942 *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
1943 } else {
1944 aggr_port_t *port = tx_ring->atr_port;
1945
1946 *val = mac_stat_get(port->lp_mh, stat);
1947 }
1948 return (0);
1949 }
1950
1951 static int
1952 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1953 {
1954 aggr_grp_t *grp = arg;
1955 mac_perim_handle_t mph;
1956 int rval = 0;
1957
1958 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1959
1960 switch (stat) {
1961 case MAC_STAT_IFSPEED:
1962 *val = grp->lg_ifspeed;
1963 break;
1964
1965 case ETHER_STAT_LINK_DUPLEX:
1966 *val = grp->lg_link_duplex;
1967 break;
1968
1969 default:
1970 /*
1971 * For all other statistics, we return the aggregated stat
1972 * from the underlying ports. aggr_grp_stat() will set
1973 * rval appropriately if the statistic isn't a counter.
1974 */
1975 rval = aggr_grp_stat(grp, stat, val);
1976 }
1977
1978 mac_perim_exit(mph);
1979 return (rval);
1980 }
1981
1982 static int
1983 aggr_m_start(void *arg)
1984 {
1985 aggr_grp_t *grp = arg;
1986 aggr_port_t *port;
1987 mac_perim_handle_t mph, pmph;
1988
1989 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1990
1991 /*
1992 * Attempts to start all configured members of the group.
1993 * Group members will be attached when their link-up notification
1994 * is received.
1995 */
1996 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1997 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1998 if (aggr_port_start(port) != 0) {
2190 case MAC_CAPAB_AGGR:
2191 {
2192 mac_capab_aggr_t *aggr_cap;
2193
2194 if (cap_data != NULL) {
2195 aggr_cap = cap_data;
2196 aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2197 aggr_cap->mca_unicst = aggr_m_unicst;
2198 aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
2199 aggr_cap->mca_arg = arg;
2200 }
2201 return (B_TRUE);
2202 }
2203 default:
2204 return (B_FALSE);
2205 }
2206 return (B_TRUE);
2207 }
2208
2209 /*
2210 * Callback funtion for MAC layer to register groups.
2211 */
2212 static void
2213 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2214 mac_group_info_t *infop, mac_group_handle_t gh)
2215 {
2216 aggr_grp_t *grp = arg;
2217 aggr_pseudo_rx_group_t *rx_group;
2218 aggr_pseudo_tx_group_t *tx_group;
2219
2220 ASSERT(index == 0);
2221 if (rtype == MAC_RING_TYPE_RX) {
2222 rx_group = &grp->lg_rx_group;
2223 rx_group->arg_gh = gh;
2224 rx_group->arg_grp = grp;
2225
2226 infop->mgi_driver = (mac_group_driver_t)rx_group;
2227 infop->mgi_start = NULL;
2228 infop->mgi_stop = NULL;
2229 infop->mgi_addmac = aggr_addmac;
2230 infop->mgi_remmac = aggr_remmac;
2231 infop->mgi_count = rx_group->arg_ring_cnt;
2232 } else {
2233 tx_group = &grp->lg_tx_group;
2234 tx_group->atg_gh = gh;
2235 }
2236 }
2237
2238 /*
2239 * Callback funtion for MAC layer to register all rings.
2240 */
2241 static void
2242 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2243 const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2244 {
2245 aggr_grp_t *grp = arg;
2246
2247 switch (rtype) {
2248 case MAC_RING_TYPE_RX: {
2249 aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_group;
2250 aggr_pseudo_rx_ring_t *rx_ring;
2251 mac_intr_t aggr_mac_intr;
2420 while ((addr = *pprev) != NULL) {
2421 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2422 pprev = &addr->aua_next;
2423 continue;
2424 }
2425 break;
2426 }
2427 if (addr == NULL) {
2428 mac_perim_exit(mph);
2429 return (EINVAL);
2430 }
2431
2432 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2433 aggr_port_remmac(port, mac_addr);
2434
2435 *pprev = addr->aua_next;
2436 kmem_free(addr, sizeof (aggr_unicst_addr_t));
2437
2438 mac_perim_exit(mph);
2439 return (err);
2440 }
2441
2442 /*
2443 * Add or remove the multicast addresses that are defined for the group
2444 * to or from the specified port.
2445 *
2446 * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2447 * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2448 * called when the port is either stopped or detached.
2449 */
2450 void
2451 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2452 {
2453 aggr_grp_t *grp = port->lp_grp;
2454
2455 ASSERT(MAC_PERIM_HELD(port->lp_mh));
2456 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2457
2458 if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2459 return;
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2018 Joyent, Inc.
24 */
25
26 /*
27 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
28 *
29 * An instance of the structure aggr_grp_t is allocated for each
30 * link aggregation group. When created, aggr_grp_t objects are
31 * entered into the aggr_grp_hash hash table maintained by the modhash
32 * module. The hash key is the linkid associated with the link
33 * aggregation group.
34 *
35 * A set of MAC ports are associated with each association group.
36 *
37 * Aggr pseudo TX rings
38 * --------------------
39 * The underlying ports (NICs) in an aggregation can have TX rings. To
40 * enhance aggr's performance, these TX rings are made available to the
41 * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
42 * They are already present and implemented on the RX side. It is called
43 * as pseudo RX rings. The same concept is extended to the TX side where
107 mac_prop_info_handle_t);
108
109 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
110 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
111 boolean_t *);
112
113 static void aggr_grp_capab_set(aggr_grp_t *);
114 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
115 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
116 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
117 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
118 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
119
120 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
121 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
122 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
123 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
124 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
125 static int aggr_addmac(void *, const uint8_t *);
126 static int aggr_remmac(void *, const uint8_t *);
127 static int aggr_addvlan(mac_group_driver_t, uint16_t);
128 static int aggr_remvlan(mac_group_driver_t, uint16_t);
129 static mblk_t *aggr_rx_poll(void *, int);
130 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
131 const int, mac_ring_info_t *, mac_ring_handle_t);
132 static void aggr_fill_group(void *, mac_ring_type_t, const int,
133 mac_group_info_t *, mac_group_handle_t);
134
135 static kmem_cache_t *aggr_grp_cache;
136 static mod_hash_t *aggr_grp_hash;
137 static krwlock_t aggr_grp_lock;
138 static uint_t aggr_grp_cnt;
139 static id_space_t *key_ids;
140
141 #define GRP_HASHSZ 64
142 #define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid)
143 #define AGGR_PORT_NAME_DELIMIT '-'
144
145 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
146
147 #define AGGR_M_CALLBACK_FLAGS \
148 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
309 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
310 ASSERT(MAC_PERIM_HELD(port->lp_mh));
311
312 if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
313 return (B_FALSE);
314
315 /*
316 * Validate the MAC port link speed and update the group
317 * link speed if needed.
318 */
319 if (port->lp_ifspeed == 0 ||
320 port->lp_link_state != LINK_STATE_UP ||
321 port->lp_link_duplex != LINK_DUPLEX_FULL) {
322 /*
323 * Can't attach a MAC port with unknown link speed,
324 * down link, or not in full duplex mode.
325 */
326 return (B_FALSE);
327 }
328
329 mutex_enter(&grp->lg_stat_lock);
330 if (grp->lg_ifspeed == 0) {
331 /*
332 * The group inherits the speed of the first link being
333 * attached.
334 */
335 grp->lg_ifspeed = port->lp_ifspeed;
336 link_state_changed = B_TRUE;
337 } else if (grp->lg_ifspeed != port->lp_ifspeed) {
338 /*
339 * The link speed of the MAC port must be the same as
340 * the group link speed, as per 802.3ad. Since it is
341 * not, the attach is cancelled.
342 */
343 mutex_exit(&grp->lg_stat_lock);
344 return (B_FALSE);
345 }
346 mutex_exit(&grp->lg_stat_lock);
347
348 grp->lg_nattached_ports++;
349
350 /*
351 * Update the group link state.
352 */
353 if (grp->lg_link_state != LINK_STATE_UP) {
354 grp->lg_link_state = LINK_STATE_UP;
355 mutex_enter(&grp->lg_stat_lock);
356 grp->lg_link_duplex = LINK_DUPLEX_FULL;
357 mutex_exit(&grp->lg_stat_lock);
358 link_state_changed = B_TRUE;
359 }
360
361 /*
362 * Update port's state.
363 */
364 port->lp_state = AGGR_PORT_STATE_ATTACHED;
365
366 aggr_grp_multicst_port(port, B_TRUE);
367
368 /*
369 * Set port's receive callback
370 */
371 mac_rx_set(port->lp_mch, aggr_recv_cb, port);
372
373 /*
374 * If LACP is OFF, the port can be used to send data as soon
375 * as its link is up and verified to be compatible with the
376 * aggregation.
377 *
395 ASSERT(MAC_PERIM_HELD(port->lp_mh));
396
397 /* update state */
398 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
399 return (B_FALSE);
400
401 mac_rx_clear(port->lp_mch);
402
403 aggr_grp_multicst_port(port, B_FALSE);
404
405 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
406 aggr_send_port_disable(port);
407 else
408 aggr_lacp_port_detached(port);
409
410 port->lp_state = AGGR_PORT_STATE_STANDBY;
411
412 grp->lg_nattached_ports--;
413 if (grp->lg_nattached_ports == 0) {
414 /* the last attached MAC port of the group is being detached */
415 grp->lg_link_state = LINK_STATE_DOWN;
416 mutex_enter(&grp->lg_stat_lock);
417 grp->lg_ifspeed = 0;
418 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
419 mutex_exit(&grp->lg_stat_lock);
420 link_state_changed = B_TRUE;
421 }
422
423 return (link_state_changed);
424 }
425
426 /*
427 * Update the MAC addresses of the constituent ports of the specified
428 * group. This function is invoked:
429 * - after creating a new aggregation group.
430 * - after adding new ports to an aggregation group.
431 * - after removing a port from a group when the MAC address of
432 * that port was used for the MAC address of the group.
433 * - after the MAC address of a port changed when the MAC address
434 * of that port was used for the MAC address of the group.
435 *
436 * Return true if the link state of the aggregation changed, for example
437 * as a result of a failure changing the MAC address of one of the
438 * constituent ports.
439 */
667
668 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
669 ring = rx_grp->arg_rings + j;
670 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
671 ring->arr_hw_rh != hw_rh) {
672 continue;
673 }
674
675 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
676
677 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
678 ring->arr_hw_rh = NULL;
679 ring->arr_port = NULL;
680 rx_grp->arg_ring_cnt--;
681 mac_hwring_teardown(hw_rh);
682 break;
683 }
684 }
685
686 /*
687 * Create pseudo rings over the HW rings of the port.
688 *
689 * o Create a pseudo ring in rx_grp per HW ring in the port's HW group.
690 *
691 * o Program existing unicast filters on the pseudo group into the HW group.
692 *
693 * o Program existing VLAN filters on the pseudo group into the HW group.
694 */
695 static int
696 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
697 {
698 aggr_grp_t *grp = port->lp_grp;
699 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP];
700 aggr_unicst_addr_t *addr, *a;
701 mac_perim_handle_t pmph;
702 aggr_vlan_t *avp;
703 int hw_rh_cnt, i = 0, j;
704 int err = 0;
705
706 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
707 mac_perim_enter_by_mh(port->lp_mh, &pmph);
708
709 /*
710 * This function must be called after the aggr registers its MAC
711 * and its Rx group has been initialized.
712 */
713 ASSERT(rx_grp->arg_gh != NULL);
714
715 /*
716 * Get the list of the underlying HW rings.
717 */
718 hw_rh_cnt = mac_hwrings_get(port->lp_mch,
719 &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
720
721 if (port->lp_hwgh != NULL) {
722 /*
723 * Quiesce the HW ring and the MAC SRS on the ring. Note
724 * that the HW ring will be restarted when the pseudo ring
725 * is started. At that time all the packets will be
726 * directly passed up to the pseudo Rx ring and handled
727 * by MAC SRS created over the pseudo Rx ring.
728 */
729 mac_rx_client_quiesce(port->lp_mch);
730 mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
731 }
732
733 /*
734 * Add existing VLAN and unicast address filters to the port.
735 */
736 for (avp = list_head(&rx_grp->arg_vlans); avp != NULL;
737 avp = list_next(&rx_grp->arg_vlans, avp)) {
738 if ((err = aggr_port_addvlan(port, avp->av_vid)) != 0)
739 goto err;
740 }
741
742 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
743 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
744 goto err;
745 }
746
747 for (i = 0; i < hw_rh_cnt; i++) {
748 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
749 if (err != 0)
750 goto err;
751 }
752
753 port->lp_rx_grp_added = B_TRUE;
754 mac_perim_exit(pmph);
755 return (0);
756
757 err:
758 ASSERT(err != 0);
759
760 for (j = 0; j < i; j++)
761 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
762
763 for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
764 aggr_port_remmac(port, a->aua_addr);
765
766 if (avp != NULL)
767 avp = list_prev(&rx_grp->arg_vlans, avp);
768
769 for (; avp != NULL; avp = list_prev(&rx_grp->arg_vlans, avp)) {
770 int err2;
771
772 if ((err2 = aggr_port_remvlan(port, avp->av_vid)) != 0) {
773 cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
774 ": errno %d.", avp->av_vid,
775 mac_client_name(port->lp_mch), err2);
776 }
777 }
778
779 if (port->lp_hwgh != NULL) {
780 mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
781 mac_rx_client_restart(port->lp_mch);
782 port->lp_hwgh = NULL;
783 }
784
785 mac_perim_exit(pmph);
786 return (err);
787 }
788
789 /*
790 * Destroy the pseudo rings mapping to this port and remove all VLAN
791 * and unicast filters from this port. Even if there are no underlying
792 * HW rings we must still remove the unicast filters to take the port
793 * out of promisc mode.
794 */
795 static void
796 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
797 {
798 aggr_grp_t *grp = port->lp_grp;
799 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP];
800 aggr_unicst_addr_t *addr;
801 mac_group_handle_t hwgh;
802 mac_perim_handle_t pmph;
803 int hw_rh_cnt, i;
804
805 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
806 mac_perim_enter_by_mh(port->lp_mh, &pmph);
807
808 if (!port->lp_rx_grp_added)
809 goto done;
810
811 ASSERT(rx_grp->arg_gh != NULL);
812 hw_rh_cnt = mac_hwrings_get(port->lp_mch,
813 &hwgh, hw_rh, MAC_RING_TYPE_RX);
814
815 for (i = 0; i < hw_rh_cnt; i++)
816 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
817
818 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
819 aggr_port_remmac(port, addr->aua_addr);
820
821 for (aggr_vlan_t *avp = list_head(&rx_grp->arg_vlans); avp != NULL;
822 avp = list_next(&rx_grp->arg_vlans, avp)) {
823 int err;
824
825 if ((err = aggr_port_remvlan(port, avp->av_vid)) != 0) {
826 cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
827 ": errno %d.", avp->av_vid,
828 mac_client_name(port->lp_mch), err);
829 }
830 }
831
832 if (port->lp_hwgh != NULL) {
833 port->lp_hwgh = NULL;
834
835 /*
836 * First clear the permanent-quiesced flag of the RX srs then
837 * restart the HW ring and the mac srs on the ring. Note that
838 * the HW ring and associated SRS will soon been removed when
839 * the port is removed from the aggr.
840 */
841 mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
842 mac_rx_client_restart(port->lp_mch);
843 }
844
845 port->lp_rx_grp_added = B_FALSE;
846 done:
847 mac_perim_exit(pmph);
848 }
849
850 /*
851 * Add a pseudo TX ring for the given HW ring handle.
1338 grp->lg_zoneid = crgetzoneid(credp);
1339 grp->lg_ifspeed = 0;
1340 grp->lg_link_state = LINK_STATE_UNKNOWN;
1341 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1342 grp->lg_started = B_FALSE;
1343 grp->lg_promisc = B_FALSE;
1344 grp->lg_lacp_done = B_FALSE;
1345 grp->lg_tx_notify_done = B_FALSE;
1346 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1347 grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1348 aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1349 grp->lg_tx_notify_thread = thread_create(NULL, 0,
1350 aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1351 grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
1352 MAX_RINGS_PER_GROUP), KM_SLEEP);
1353 grp->lg_tx_blocked_cnt = 0;
1354 bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1355 bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
1356 aggr_lacp_init_grp(grp);
1357
1358 grp->lg_rx_group.arg_untagged = 0;
1359 list_create(&(grp->lg_rx_group.arg_vlans), sizeof (aggr_vlan_t),
1360 offsetof(aggr_vlan_t, av_link));
1361
1362 /* add MAC ports to group */
1363 grp->lg_ports = NULL;
1364 grp->lg_nports = 0;
1365 grp->lg_nattached_ports = 0;
1366 grp->lg_ntx_ports = 0;
1367
1368 /*
1369 * If key is not specified by the user, allocate the key.
1370 */
1371 if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1372 err = ENOMEM;
1373 goto bail;
1374 }
1375 grp->lg_key = key;
1376
1377 for (i = 0; i < nports; i++) {
1378 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, &port);
1379 if (err != 0)
1380 goto bail;
1381 }
1382
1383 /*
1384 * If no explicit MAC address was specified by the administrator,
1385 * set it to the MAC address of the first port.
1386 */
1387 grp->lg_addr_fixed = mac_fixed;
1388 if (grp->lg_addr_fixed) {
1389 /* validate specified address */
1390 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1391 err = EINVAL;
1392 goto bail;
1393 }
1394 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1395 } else {
1396 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1397 grp->lg_mac_addr_port = grp->lg_ports;
1398 }
1580 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1581 grp->lg_mac_addr_port = grp->lg_ports;
1582 mac_addr_changed = B_TRUE;
1583 }
1584
1585 link_state_changed = aggr_grp_detach_port(grp, port);
1586
1587 /*
1588 * Add the counter statistics of the ports while it was aggregated
1589 * to the group's residual statistics. This is done by obtaining
1590 * the current counter from the underlying MAC then subtracting the
1591 * value of the counter at the moment it was added to the
1592 * aggregation.
1593 */
1594 for (i = 0; i < MAC_NSTAT; i++) {
1595 stat = i + MAC_STAT_MIN;
1596 if (!MAC_STAT_ISACOUNTER(stat))
1597 continue;
1598 val = aggr_port_stat(port, stat);
1599 val -= port->lp_stat[i];
1600 mutex_enter(&grp->lg_stat_lock);
1601 grp->lg_stat[i] += val;
1602 mutex_exit(&grp->lg_stat_lock);
1603 }
1604 for (i = 0; i < ETHER_NSTAT; i++) {
1605 stat = i + MACTYPE_STAT_MIN;
1606 if (!ETHER_STAT_ISACOUNTER(stat))
1607 continue;
1608 val = aggr_port_stat(port, stat);
1609 val -= port->lp_ether_stat[i];
1610 mutex_enter(&grp->lg_stat_lock);
1611 grp->lg_ether_stat[i] += val;
1612 mutex_exit(&grp->lg_stat_lock);
1613 }
1614
1615 grp->lg_nports--;
1616 mac_perim_exit(mph);
1617
1618 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1619 aggr_port_delete(port);
1620
1621 /*
1622 * If the group MAC address has changed, update the MAC address of
1623 * the remaining constituent ports according to the new MAC
1624 * address of the group.
1625 */
1626 if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1627 link_state_changed = B_TRUE;
1628
1629 done:
1630 if (mac_addr_changedp != NULL)
1631 *mac_addr_changedp = mac_addr_changed;
1632 if (link_state_changedp != NULL)
1841 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1842 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1843 aggr_port_delete(port);
1844 port = cport;
1845 }
1846
1847 mac_perim_exit(mph);
1848
1849 kmem_free(grp->lg_tx_blocked_rings,
1850 (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1851 /*
1852 * Wait for the port's lacp timer thread and its notification callback
1853 * to exit before calling mac_unregister() since both needs to access
1854 * the mac perimeter of the grp.
1855 */
1856 aggr_grp_port_wait(grp);
1857
1858 VERIFY(mac_unregister(grp->lg_mh) == 0);
1859 grp->lg_mh = NULL;
1860
1861 list_destroy(&(grp->lg_rx_group.arg_vlans));
1862
1863 AGGR_GRP_REFRELE(grp);
1864 return (0);
1865 }
1866
1867 void
1868 aggr_grp_free(aggr_grp_t *grp)
1869 {
1870 ASSERT(grp->lg_refs == 0);
1871 ASSERT(grp->lg_port_ref == 0);
1872 if (grp->lg_key > AGGR_MAX_KEY) {
1873 id_free(key_ids, grp->lg_key);
1874 grp->lg_key = 0;
1875 }
1876 kmem_cache_free(aggr_grp_cache, grp);
1877 }
1878
1879 int
1880 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1881 aggr_grp_info_new_grp_fn_t new_grp_fn,
1882 aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
1925
1926 bail:
1927 mac_perim_exit(mph);
1928 AGGR_GRP_REFRELE(grp);
1929 return (rc);
1930 }
1931
1932 /*ARGSUSED*/
1933 static void
1934 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1935 {
1936 miocnak(q, mp, 0, ENOTSUP);
1937 }
1938
1939 static int
1940 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1941 {
1942 aggr_port_t *port;
1943 uint_t stat_index;
1944
1945 ASSERT(MUTEX_HELD(&grp->lg_stat_lock));
1946
1947 /* We only aggregate counter statistics. */
1948 if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1949 IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1950 return (ENOTSUP);
1951 }
1952
1953 /*
1954 * Counter statistics for a group are computed by aggregating the
1955 * counters of the members MACs while they were aggregated, plus
1956 * the residual counter of the group itself, which is updated each
1957 * time a MAC is removed from the group.
1958 */
1959 *val = 0;
1960 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1961 /* actual port statistic */
1962 *val += aggr_port_stat(port, stat);
1963 /*
1964 * minus the port stat when it was added, plus any residual
1965 * amount for the group.
1966 */
1995
1996 int
1997 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1998 {
1999 aggr_pseudo_tx_ring_t *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
2000
2001 if (tx_ring->atr_hw_rh != NULL) {
2002 *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
2003 } else {
2004 aggr_port_t *port = tx_ring->atr_port;
2005
2006 *val = mac_stat_get(port->lp_mh, stat);
2007 }
2008 return (0);
2009 }
2010
2011 static int
2012 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
2013 {
2014 aggr_grp_t *grp = arg;
2015 int rval = 0;
2016
2017 mutex_enter(&grp->lg_stat_lock);
2018
2019 switch (stat) {
2020 case MAC_STAT_IFSPEED:
2021 *val = grp->lg_ifspeed;
2022 break;
2023
2024 case ETHER_STAT_LINK_DUPLEX:
2025 *val = grp->lg_link_duplex;
2026 break;
2027
2028 default:
2029 /*
2030 * For all other statistics, we return the aggregated stat
2031 * from the underlying ports. aggr_grp_stat() will set
2032 * rval appropriately if the statistic isn't a counter.
2033 */
2034 rval = aggr_grp_stat(grp, stat, val);
2035 }
2036
2037 mutex_exit(&grp->lg_stat_lock);
2038 return (rval);
2039 }
2040
2041 static int
2042 aggr_m_start(void *arg)
2043 {
2044 aggr_grp_t *grp = arg;
2045 aggr_port_t *port;
2046 mac_perim_handle_t mph, pmph;
2047
2048 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2049
2050 /*
2051 * Attempts to start all configured members of the group.
2052 * Group members will be attached when their link-up notification
2053 * is received.
2054 */
2055 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2056 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2057 if (aggr_port_start(port) != 0) {
2249 case MAC_CAPAB_AGGR:
2250 {
2251 mac_capab_aggr_t *aggr_cap;
2252
2253 if (cap_data != NULL) {
2254 aggr_cap = cap_data;
2255 aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2256 aggr_cap->mca_unicst = aggr_m_unicst;
2257 aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
2258 aggr_cap->mca_arg = arg;
2259 }
2260 return (B_TRUE);
2261 }
2262 default:
2263 return (B_FALSE);
2264 }
2265 return (B_TRUE);
2266 }
2267
2268 /*
2269 * Callback function for MAC layer to register groups.
2270 */
2271 static void
2272 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2273 mac_group_info_t *infop, mac_group_handle_t gh)
2274 {
2275 aggr_grp_t *grp = arg;
2276 aggr_pseudo_rx_group_t *rx_group;
2277 aggr_pseudo_tx_group_t *tx_group;
2278
2279 ASSERT(index == 0);
2280 if (rtype == MAC_RING_TYPE_RX) {
2281 rx_group = &grp->lg_rx_group;
2282 rx_group->arg_gh = gh;
2283 rx_group->arg_grp = grp;
2284
2285 infop->mgi_driver = (mac_group_driver_t)rx_group;
2286 infop->mgi_start = NULL;
2287 infop->mgi_stop = NULL;
2288 infop->mgi_addmac = aggr_addmac;
2289 infop->mgi_remmac = aggr_remmac;
2290 infop->mgi_count = rx_group->arg_ring_cnt;
2291
2292 /*
2293 * Always set the HW VLAN callbacks. They are smart
2294 * enough to know when a port has HW VLAN filters to
2295 * program and when it doesn't.
2296 */
2297 infop->mgi_addvlan = aggr_addvlan;
2298 infop->mgi_remvlan = aggr_remvlan;
2299 } else {
2300 tx_group = &grp->lg_tx_group;
2301 tx_group->atg_gh = gh;
2302 }
2303 }
2304
2305 /*
2306 * Callback funtion for MAC layer to register all rings.
2307 */
2308 static void
2309 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2310 const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2311 {
2312 aggr_grp_t *grp = arg;
2313
2314 switch (rtype) {
2315 case MAC_RING_TYPE_RX: {
2316 aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_group;
2317 aggr_pseudo_rx_ring_t *rx_ring;
2318 mac_intr_t aggr_mac_intr;
2487 while ((addr = *pprev) != NULL) {
2488 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2489 pprev = &addr->aua_next;
2490 continue;
2491 }
2492 break;
2493 }
2494 if (addr == NULL) {
2495 mac_perim_exit(mph);
2496 return (EINVAL);
2497 }
2498
2499 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2500 aggr_port_remmac(port, mac_addr);
2501
2502 *pprev = addr->aua_next;
2503 kmem_free(addr, sizeof (aggr_unicst_addr_t));
2504
2505 mac_perim_exit(mph);
2506 return (err);
2507 }
2508
2509 /*
2510 * Search for VID in the Rx group's list and return a pointer if
2511 * found. Otherwise return NULL.
2512 */
2513 static aggr_vlan_t *
2514 aggr_find_vlan(aggr_pseudo_rx_group_t *rx_group, uint16_t vid)
2515 {
2516 ASSERT(MAC_PERIM_HELD(rx_group->arg_grp->lg_mh));
2517 for (aggr_vlan_t *avp = list_head(&rx_group->arg_vlans); avp != NULL;
2518 avp = list_next(&rx_group->arg_vlans, avp)) {
2519 if (avp->av_vid == vid)
2520 return (avp);
2521 }
2522
2523 return (NULL);
2524 }
2525
2526 /*
2527 * Accept traffic on the specified VID.
2528 *
2529 * Persist VLAN state in the aggr so that ports added later will
2530 * receive the correct filters. In the future it would be nice to
2531 * allow aggr to iterate its clients instead of duplicating state.
2532 */
2533 static int
2534 aggr_addvlan(mac_group_driver_t gdriver, uint16_t vid)
2535 {
2536 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
2537 aggr_grp_t *aggr = rx_group->arg_grp;
2538 aggr_port_t *port, *p;
2539 mac_perim_handle_t mph;
2540 int err = 0;
2541 aggr_vlan_t *avp = NULL;
2542
2543 mac_perim_enter_by_mh(aggr->lg_mh, &mph);
2544
2545 if (vid == MAC_VLAN_UNTAGGED) {
2546 /*
2547 * Aggr is both a MAC provider and MAC client. As a
2548 * MAC provider it is passed MAC_VLAN_UNTAGGED by its
2549 * client. As a client itself, it should pass
2550 * VLAN_ID_NONE to its ports.
2551 */
2552 vid = VLAN_ID_NONE;
2553 rx_group->arg_untagged++;
2554 goto update_ports;
2555 }
2556
2557 avp = aggr_find_vlan(rx_group, vid);
2558
2559 if (avp != NULL) {
2560 avp->av_refs++;
2561 mac_perim_exit(mph);
2562 return (0);
2563 }
2564
2565 avp = kmem_zalloc(sizeof (aggr_vlan_t), KM_SLEEP);
2566 avp->av_vid = vid;
2567 avp->av_refs = 1;
2568
2569 update_ports:
2570 for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
2571 if ((err = aggr_port_addvlan(port, vid)) != 0)
2572 break;
2573
2574 if (err != 0) {
2575 /*
2576 * If any of these calls fail then we are in a
2577 * situation where the ports have different HW state.
2578 * There's no reasonable action the MAC client can
2579 * take in this scenario to rectify the situation.
2580 */
2581 for (p = aggr->lg_ports; p != port; p = p->lp_next) {
2582 int err2;
2583
2584 if ((err2 = aggr_port_remvlan(p, vid)) != 0) {
2585 cmn_err(CE_WARN, "Failed to remove VLAN %u"
2586 " from port %s: errno %d.", vid,
2587 mac_client_name(p->lp_mch), err2);
2588 }
2589
2590 }
2591
2592 if (vid == VLAN_ID_NONE)
2593 rx_group->arg_untagged--;
2594
2595 if (avp != NULL) {
2596 kmem_free(avp, sizeof (aggr_vlan_t));
2597 avp = NULL;
2598 }
2599 }
2600
2601 if (avp != NULL)
2602 list_insert_tail(&rx_group->arg_vlans, avp);
2603
2604 done:
2605 mac_perim_exit(mph);
2606 return (err);
2607 }
2608
2609 /*
2610 * Stop accepting traffic on this VLAN if it's the last use of this VLAN.
2611 */
2612 static int
2613 aggr_remvlan(mac_group_driver_t gdriver, uint16_t vid)
2614 {
2615 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
2616 aggr_grp_t *aggr = rx_group->arg_grp;
2617 aggr_port_t *port, *p;
2618 mac_perim_handle_t mph;
2619 int err = 0;
2620 aggr_vlan_t *avp = NULL;
2621
2622 mac_perim_enter_by_mh(aggr->lg_mh, &mph);
2623
2624 /*
2625 * See the comment in aggr_addvlan().
2626 */
2627 if (vid == MAC_VLAN_UNTAGGED) {
2628 vid = VLAN_ID_NONE;
2629 rx_group->arg_untagged--;
2630
2631 if (rx_group->arg_untagged > 0)
2632 goto done;
2633
2634 goto update_ports;
2635 }
2636
2637 avp = aggr_find_vlan(rx_group, vid);
2638
2639 if (avp == NULL) {
2640 err = ENOENT;
2641 goto done;
2642 }
2643
2644 avp->av_refs--;
2645
2646 if (avp->av_refs > 0)
2647 goto done;
2648
2649 update_ports:
2650 for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
2651 if ((err = aggr_port_remvlan(port, vid)) != 0)
2652 break;
2653
2654 /*
2655 * See the comment in aggr_addvlan() for justification of the
2656 * use of VERIFY here.
2657 */
2658 if (err != 0) {
2659 for (p = aggr->lg_ports; p != port; p = p->lp_next) {
2660 int err2;
2661
2662 if ((err2 = aggr_port_addvlan(p, vid)) != 0) {
2663 cmn_err(CE_WARN, "Failed to add VLAN %u"
2664 " to port %s: errno %d.", vid,
2665 mac_client_name(p->lp_mch), err2);
2666 }
2667 }
2668
2669 if (avp != NULL)
2670 avp->av_refs++;
2671
2672 if (vid == VLAN_ID_NONE)
2673 rx_group->arg_untagged++;
2674
2675 goto done;
2676 }
2677
2678 if (err == 0 && avp != NULL) {
2679 VERIFY3U(avp->av_refs, ==, 0);
2680 list_remove(&rx_group->arg_vlans, avp);
2681 kmem_free(avp, sizeof (aggr_vlan_t));
2682 }
2683
2684 done:
2685 mac_perim_exit(mph);
2686 return (err);
2687 }
2688
2689 /*
2690 * Add or remove the multicast addresses that are defined for the group
2691 * to or from the specified port.
2692 *
2693 * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2694 * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2695 * called when the port is either stopped or detached.
2696 */
2697 void
2698 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2699 {
2700 aggr_grp_t *grp = port->lp_grp;
2701
2702 ASSERT(MAC_PERIM_HELD(port->lp_mh));
2703 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2704
2705 if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2706 return;
|