Print this page
11490 SRS ring polling disabled for VLANs
11491 Want DLS bypass for VLAN traffic
11492 add VLVF bypass to ixgbe core
2869 duplicate packets with vnics over aggrs
11489 DLS stat delete and aggr kstat can deadlock
Portions contributed by: Theo Schlossnagle <jesus@omniti.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>


   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2017, Joyent, Inc.
  24  */
  25 
  26 /*
  27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
  28  *
  29  * An instance of the structure aggr_grp_t is allocated for each
  30  * link aggregation group. When created, aggr_grp_t objects are
  31  * entered into the aggr_grp_hash hash table maintained by the modhash
  32  * module. The hash key is the linkid associated with the link
  33  * aggregation group.
  34  *
  35  * A set of MAC ports are associated with each association group.
  36  *
  37  * Aggr pseudo TX rings
  38  * --------------------
  39  * The underlying ports (NICs) in an aggregation can have TX rings. To
  40  * enhance aggr's performance, these TX rings are made available to the
  41  * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
  42  * They are already present and implemented on the RX side. It is called
  43  * as pseudo RX rings. The same concept is extended to the TX side where


 107     mac_prop_info_handle_t);
 108 
 109 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
 110 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
 111     boolean_t *);
 112 
 113 static void aggr_grp_capab_set(aggr_grp_t *);
 114 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
 115 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
 116 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
 117 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
 118 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
 119 
 120 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
 121 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
 122 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
 123 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
 124 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
 125 static int aggr_addmac(void *, const uint8_t *);
 126 static int aggr_remmac(void *, const uint8_t *);


 127 static mblk_t *aggr_rx_poll(void *, int);
 128 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
 129     const int, mac_ring_info_t *, mac_ring_handle_t);
 130 static void aggr_fill_group(void *, mac_ring_type_t, const int,
 131     mac_group_info_t *, mac_group_handle_t);
 132 
 133 static kmem_cache_t     *aggr_grp_cache;
 134 static mod_hash_t       *aggr_grp_hash;
 135 static krwlock_t        aggr_grp_lock;
 136 static uint_t           aggr_grp_cnt;
 137 static id_space_t       *key_ids;
 138 
 139 #define GRP_HASHSZ              64
 140 #define GRP_HASH_KEY(linkid)    ((mod_hash_key_t)(uintptr_t)linkid)
 141 #define AGGR_PORT_NAME_DELIMIT '-'
 142 
 143 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
 144 
 145 #define AGGR_M_CALLBACK_FLAGS   \
 146         (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)


 307         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 308         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 309 
 310         if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
 311                 return (B_FALSE);
 312 
 313         /*
 314          * Validate the MAC port link speed and update the group
 315          * link speed if needed.
 316          */
 317         if (port->lp_ifspeed == 0 ||
 318             port->lp_link_state != LINK_STATE_UP ||
 319             port->lp_link_duplex != LINK_DUPLEX_FULL) {
 320                 /*
 321                  * Can't attach a MAC port with unknown link speed,
 322                  * down link, or not in full duplex mode.
 323                  */
 324                 return (B_FALSE);
 325         }
 326 

 327         if (grp->lg_ifspeed == 0) {
 328                 /*
 329                  * The group inherits the speed of the first link being
 330                  * attached.
 331                  */
 332                 grp->lg_ifspeed = port->lp_ifspeed;
 333                 link_state_changed = B_TRUE;
 334         } else if (grp->lg_ifspeed != port->lp_ifspeed) {
 335                 /*
 336                  * The link speed of the MAC port must be the same as
 337                  * the group link speed, as per 802.3ad. Since it is
 338                  * not, the attach is cancelled.
 339                  */

 340                 return (B_FALSE);
 341         }

 342 
 343         grp->lg_nattached_ports++;
 344 
 345         /*
 346          * Update the group link state.
 347          */
 348         if (grp->lg_link_state != LINK_STATE_UP) {
 349                 grp->lg_link_state = LINK_STATE_UP;

 350                 grp->lg_link_duplex = LINK_DUPLEX_FULL;

 351                 link_state_changed = B_TRUE;
 352         }
 353 
 354         /*
 355          * Update port's state.
 356          */
 357         port->lp_state = AGGR_PORT_STATE_ATTACHED;
 358 
 359         aggr_grp_multicst_port(port, B_TRUE);
 360 
 361         /*
 362          * Set port's receive callback
 363          */
 364         mac_rx_set(port->lp_mch, aggr_recv_cb, port);
 365 
 366         /*
 367          * If LACP is OFF, the port can be used to send data as soon
 368          * as its link is up and verified to be compatible with the
 369          * aggregation.
 370          *


 388         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 389 
 390         /* update state */
 391         if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
 392                 return (B_FALSE);
 393 
 394         mac_rx_clear(port->lp_mch);
 395 
 396         aggr_grp_multicst_port(port, B_FALSE);
 397 
 398         if (grp->lg_lacp_mode == AGGR_LACP_OFF)
 399                 aggr_send_port_disable(port);
 400         else
 401                 aggr_lacp_port_detached(port);
 402 
 403         port->lp_state = AGGR_PORT_STATE_STANDBY;
 404 
 405         grp->lg_nattached_ports--;
 406         if (grp->lg_nattached_ports == 0) {
 407                 /* the last attached MAC port of the group is being detached */
 408                 grp->lg_ifspeed = 0;
 409                 grp->lg_link_state = LINK_STATE_DOWN;


 410                 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;

 411                 link_state_changed = B_TRUE;
 412         }
 413 
 414         return (link_state_changed);
 415 }
 416 
 417 /*
 418  * Update the MAC addresses of the constituent ports of the specified
 419  * group. This function is invoked:
 420  * - after creating a new aggregation group.
 421  * - after adding new ports to an aggregation group.
 422  * - after removing a port from a group when the MAC address of
 423  *   that port was used for the MAC address of the group.
 424  * - after the MAC address of a port changed when the MAC address
 425  *   of that port was used for the MAC address of the group.
 426  *
 427  * Return true if the link state of the aggregation changed, for example
 428  * as a result of a failure changing the MAC address of one of the
 429  * constituent ports.
 430  */


 658 
 659         for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
 660                 ring = rx_grp->arg_rings + j;
 661                 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
 662                     ring->arr_hw_rh != hw_rh) {
 663                         continue;
 664                 }
 665 
 666                 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
 667 
 668                 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
 669                 ring->arr_hw_rh = NULL;
 670                 ring->arr_port = NULL;
 671                 rx_grp->arg_ring_cnt--;
 672                 mac_hwring_teardown(hw_rh);
 673                 break;
 674         }
 675 }
 676 
 677 /*
 678  * This function is called to create pseudo rings over the hardware rings of
 679  * the underlying device. Note that there is a 1:1 mapping between the pseudo
 680  * RX rings of the aggr and the hardware rings of the underlying port.




 681  */
 682 static int
 683 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
 684 {
 685         aggr_grp_t              *grp = port->lp_grp;
 686         mac_ring_handle_t       hw_rh[MAX_RINGS_PER_GROUP];
 687         aggr_unicst_addr_t      *addr, *a;
 688         mac_perim_handle_t      pmph;

 689         int                     hw_rh_cnt, i = 0, j;
 690         int                     err = 0;
 691 
 692         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 693         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 694 
 695         /*
 696          * This function must be called after the aggr registers its mac
 697          * and its RX group has been initialized.
 698          */
 699         ASSERT(rx_grp->arg_gh != NULL);
 700 
 701         /*
 702          * Get the list the the underlying HW rings.
 703          */
 704         hw_rh_cnt = mac_hwrings_get(port->lp_mch,
 705             &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
 706 
 707         if (port->lp_hwgh != NULL) {
 708                 /*
 709                  * Quiesce the HW ring and the mac srs on the ring. Note
 710                  * that the HW ring will be restarted when the pseudo ring
 711                  * is started. At that time all the packets will be
 712                  * directly passed up to the pseudo RX ring and handled
 713                  * by mac srs created over the pseudo RX ring.
 714                  */
 715                 mac_rx_client_quiesce(port->lp_mch);
 716                 mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
 717         }
 718 
 719         /*
 720          * Add all the unicast addresses to the newly added port.
 721          */






 722         for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
 723                 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
 724                         break;
 725         }
 726 
 727         for (i = 0; err == 0 && i < hw_rh_cnt; i++)
 728                 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);



 729 
 730         if (err != 0) {






 731                 for (j = 0; j < i; j++)
 732                         aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
 733 
 734                 for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
 735                         aggr_port_remmac(port, a->aua_addr);
 736 













 737                 if (port->lp_hwgh != NULL) {
 738                         mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
 739                         mac_rx_client_restart(port->lp_mch);
 740                         port->lp_hwgh = NULL;
 741                 }
 742         } else {
 743                 port->lp_rx_grp_added = B_TRUE;
 744         }
 745 done:
 746         mac_perim_exit(pmph);
 747         return (err);
 748 }
 749 
 750 /*
 751  * This function is called by aggr to remove pseudo RX rings over the
 752  * HW rings of the underlying port.


 753  */
 754 static void
 755 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
 756 {
 757         aggr_grp_t              *grp = port->lp_grp;
 758         mac_ring_handle_t       hw_rh[MAX_RINGS_PER_GROUP];
 759         aggr_unicst_addr_t      *addr;
 760         mac_group_handle_t      hwgh;
 761         mac_perim_handle_t      pmph;
 762         int                     hw_rh_cnt, i;
 763 
 764         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 765         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 766 
 767         if (!port->lp_rx_grp_added)
 768                 goto done;
 769 
 770         ASSERT(rx_grp->arg_gh != NULL);
 771         hw_rh_cnt = mac_hwrings_get(port->lp_mch,
 772             &hwgh, hw_rh, MAC_RING_TYPE_RX);
 773 
 774         /*
 775          * If hw_rh_cnt is 0, it means that the underlying port does not
 776          * support RX rings. Directly return in this case.
 777          */
 778         for (i = 0; i < hw_rh_cnt; i++)
 779                 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
 780 
 781         for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
 782                 aggr_port_remmac(port, addr->aua_addr);
 783 











 784         if (port->lp_hwgh != NULL) {
 785                 port->lp_hwgh = NULL;
 786 
 787                 /*
 788                  * First clear the permanent-quiesced flag of the RX srs then
 789                  * restart the HW ring and the mac srs on the ring. Note that
 790                  * the HW ring and associated SRS will soon been removed when
 791                  * the port is removed from the aggr.
 792                  */
 793                 mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
 794                 mac_rx_client_restart(port->lp_mch);
 795         }
 796 
 797         port->lp_rx_grp_added = B_FALSE;
 798 done:
 799         mac_perim_exit(pmph);
 800 }
 801 
 802 /*
 803  * Add a pseudo TX ring for the given HW ring handle.


1290         grp->lg_zoneid = crgetzoneid(credp);
1291         grp->lg_ifspeed = 0;
1292         grp->lg_link_state = LINK_STATE_UNKNOWN;
1293         grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1294         grp->lg_started = B_FALSE;
1295         grp->lg_promisc = B_FALSE;
1296         grp->lg_lacp_done = B_FALSE;
1297         grp->lg_tx_notify_done = B_FALSE;
1298         grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1299         grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1300             aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1301         grp->lg_tx_notify_thread = thread_create(NULL, 0,
1302             aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1303         grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
1304             MAX_RINGS_PER_GROUP), KM_SLEEP);
1305         grp->lg_tx_blocked_cnt = 0;
1306         bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1307         bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
1308         aggr_lacp_init_grp(grp);
1309 




1310         /* add MAC ports to group */
1311         grp->lg_ports = NULL;
1312         grp->lg_nports = 0;
1313         grp->lg_nattached_ports = 0;
1314         grp->lg_ntx_ports = 0;
1315 
1316         /*
1317          * If key is not specified by the user, allocate the key.
1318          */
1319         if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1320                 err = ENOMEM;
1321                 goto bail;
1322         }
1323         grp->lg_key = key;
1324 
1325         for (i = 0; i < nports; i++) {
1326                 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
1327                 if (err != 0)
1328                         goto bail;
1329         }
1330 
1331         /*
1332          * If no explicit MAC address was specified by the administrator,
1333          * set it to the MAC address of the first port.
1334          */
1335         grp->lg_addr_fixed = mac_fixed;
1336         if (grp->lg_addr_fixed) {
1337                 /* validate specified address */
1338                 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1339                         err = EINVAL;
1340                         goto bail;
1341                 }
1342                 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1343         } else {
1344                 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1345                 grp->lg_mac_addr_port = grp->lg_ports;
1346         }


1528                 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1529                 grp->lg_mac_addr_port = grp->lg_ports;
1530                 mac_addr_changed = B_TRUE;
1531         }
1532 
1533         link_state_changed = aggr_grp_detach_port(grp, port);
1534 
1535         /*
1536          * Add the counter statistics of the ports while it was aggregated
1537          * to the group's residual statistics.  This is done by obtaining
1538          * the current counter from the underlying MAC then subtracting the
1539          * value of the counter at the moment it was added to the
1540          * aggregation.
1541          */
1542         for (i = 0; i < MAC_NSTAT; i++) {
1543                 stat = i + MAC_STAT_MIN;
1544                 if (!MAC_STAT_ISACOUNTER(stat))
1545                         continue;
1546                 val = aggr_port_stat(port, stat);
1547                 val -= port->lp_stat[i];

1548                 grp->lg_stat[i] += val;

1549         }
1550         for (i = 0; i < ETHER_NSTAT; i++) {
1551                 stat = i + MACTYPE_STAT_MIN;
1552                 if (!ETHER_STAT_ISACOUNTER(stat))
1553                         continue;
1554                 val = aggr_port_stat(port, stat);
1555                 val -= port->lp_ether_stat[i];

1556                 grp->lg_ether_stat[i] += val;

1557         }
1558 
1559         grp->lg_nports--;
1560         mac_perim_exit(mph);
1561 
1562         aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1563         aggr_port_delete(port);
1564 
1565         /*
1566          * If the group MAC address has changed, update the MAC address of
1567          * the remaining constituent ports according to the new MAC
1568          * address of the group.
1569          */
1570         if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1571                 link_state_changed = B_TRUE;
1572 
1573 done:
1574         if (mac_addr_changedp != NULL)
1575                 *mac_addr_changedp = mac_addr_changed;
1576         if (link_state_changedp != NULL)


1785                 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1786                 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1787                 aggr_port_delete(port);
1788                 port = cport;
1789         }
1790 
1791         mac_perim_exit(mph);
1792 
1793         kmem_free(grp->lg_tx_blocked_rings,
1794             (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1795         /*
1796          * Wait for the port's lacp timer thread and its notification callback
1797          * to exit before calling mac_unregister() since both needs to access
1798          * the mac perimeter of the grp.
1799          */
1800         aggr_grp_port_wait(grp);
1801 
1802         VERIFY(mac_unregister(grp->lg_mh) == 0);
1803         grp->lg_mh = NULL;
1804 


1805         AGGR_GRP_REFRELE(grp);
1806         return (0);
1807 }
1808 
1809 void
1810 aggr_grp_free(aggr_grp_t *grp)
1811 {
1812         ASSERT(grp->lg_refs == 0);
1813         ASSERT(grp->lg_port_ref == 0);
1814         if (grp->lg_key > AGGR_MAX_KEY) {
1815                 id_free(key_ids, grp->lg_key);
1816                 grp->lg_key = 0;
1817         }
1818         kmem_cache_free(aggr_grp_cache, grp);
1819 }
1820 
1821 int
1822 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1823     aggr_grp_info_new_grp_fn_t new_grp_fn,
1824     aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)


1867 
1868 bail:
1869         mac_perim_exit(mph);
1870         AGGR_GRP_REFRELE(grp);
1871         return (rc);
1872 }
1873 
1874 /*ARGSUSED*/
1875 static void
1876 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1877 {
1878         miocnak(q, mp, 0, ENOTSUP);
1879 }
1880 
1881 static int
1882 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1883 {
1884         aggr_port_t     *port;
1885         uint_t          stat_index;
1886 


1887         /* We only aggregate counter statistics. */
1888         if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1889             IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1890                 return (ENOTSUP);
1891         }
1892 
1893         /*
1894          * Counter statistics for a group are computed by aggregating the
1895          * counters of the members MACs while they were aggregated, plus
1896          * the residual counter of the group itself, which is updated each
1897          * time a MAC is removed from the group.
1898          */
1899         *val = 0;
1900         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1901                 /* actual port statistic */
1902                 *val += aggr_port_stat(port, stat);
1903                 /*
1904                  * minus the port stat when it was added, plus any residual
1905                  * amount for the group.
1906                  */


1935 
1936 int
1937 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1938 {
1939         aggr_pseudo_tx_ring_t   *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
1940 
1941         if (tx_ring->atr_hw_rh != NULL) {
1942                 *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
1943         } else {
1944                 aggr_port_t     *port = tx_ring->atr_port;
1945 
1946                 *val = mac_stat_get(port->lp_mh, stat);
1947         }
1948         return (0);
1949 }
1950 
1951 static int
1952 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1953 {
1954         aggr_grp_t              *grp = arg;
1955         mac_perim_handle_t      mph;
1956         int                     rval = 0;
1957 
1958         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1959 
1960         switch (stat) {
1961         case MAC_STAT_IFSPEED:
1962                 *val = grp->lg_ifspeed;
1963                 break;
1964 
1965         case ETHER_STAT_LINK_DUPLEX:
1966                 *val = grp->lg_link_duplex;
1967                 break;
1968 
1969         default:
1970                 /*
1971                  * For all other statistics, we return the aggregated stat
1972                  * from the underlying ports.  aggr_grp_stat() will set
1973                  * rval appropriately if the statistic isn't a counter.
1974                  */
1975                 rval = aggr_grp_stat(grp, stat, val);
1976         }
1977 
1978         mac_perim_exit(mph);
1979         return (rval);
1980 }
1981 
1982 static int
1983 aggr_m_start(void *arg)
1984 {
1985         aggr_grp_t *grp = arg;
1986         aggr_port_t *port;
1987         mac_perim_handle_t mph, pmph;
1988 
1989         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1990 
1991         /*
1992          * Attempts to start all configured members of the group.
1993          * Group members will be attached when their link-up notification
1994          * is received.
1995          */
1996         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1997                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1998                 if (aggr_port_start(port) != 0) {


2190         case MAC_CAPAB_AGGR:
2191         {
2192                 mac_capab_aggr_t *aggr_cap;
2193 
2194                 if (cap_data != NULL) {
2195                         aggr_cap = cap_data;
2196                         aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2197                         aggr_cap->mca_unicst = aggr_m_unicst;
2198                         aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
2199                         aggr_cap->mca_arg = arg;
2200                 }
2201                 return (B_TRUE);
2202         }
2203         default:
2204                 return (B_FALSE);
2205         }
2206         return (B_TRUE);
2207 }
2208 
2209 /*
2210  * Callback funtion for MAC layer to register groups.
2211  */
2212 static void
2213 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2214     mac_group_info_t *infop, mac_group_handle_t gh)
2215 {
2216         aggr_grp_t *grp = arg;
2217         aggr_pseudo_rx_group_t *rx_group;
2218         aggr_pseudo_tx_group_t *tx_group;
2219 
2220         ASSERT(index == 0);
2221         if (rtype == MAC_RING_TYPE_RX) {
2222                 rx_group = &grp->lg_rx_group;
2223                 rx_group->arg_gh = gh;
2224                 rx_group->arg_grp = grp;
2225 
2226                 infop->mgi_driver = (mac_group_driver_t)rx_group;
2227                 infop->mgi_start = NULL;
2228                 infop->mgi_stop = NULL;
2229                 infop->mgi_addmac = aggr_addmac;
2230                 infop->mgi_remmac = aggr_remmac;
2231                 infop->mgi_count = rx_group->arg_ring_cnt;








2232         } else {
2233                 tx_group = &grp->lg_tx_group;
2234                 tx_group->atg_gh = gh;
2235         }
2236 }
2237 
2238 /*
2239  * Callback funtion for MAC layer to register all rings.
2240  */
2241 static void
2242 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2243     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2244 {
2245         aggr_grp_t      *grp = arg;
2246 
2247         switch (rtype) {
2248         case MAC_RING_TYPE_RX: {
2249                 aggr_pseudo_rx_group_t  *rx_group = &grp->lg_rx_group;
2250                 aggr_pseudo_rx_ring_t   *rx_ring;
2251                 mac_intr_t              aggr_mac_intr;


2420         while ((addr = *pprev) != NULL) {
2421                 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2422                         pprev = &addr->aua_next;
2423                         continue;
2424                 }
2425                 break;
2426         }
2427         if (addr == NULL) {
2428                 mac_perim_exit(mph);
2429                 return (EINVAL);
2430         }
2431 
2432         for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2433                 aggr_port_remmac(port, mac_addr);
2434 
2435         *pprev = addr->aua_next;
2436         kmem_free(addr, sizeof (aggr_unicst_addr_t));
2437 
2438         mac_perim_exit(mph);
2439         return (err);




















































































































































































2440 }
2441 
2442 /*
2443  * Add or remove the multicast addresses that are defined for the group
2444  * to or from the specified port.
2445  *
2446  * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2447  * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2448  * called when the port is either stopped or detached.
2449  */
2450 void
2451 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2452 {
2453         aggr_grp_t *grp = port->lp_grp;
2454 
2455         ASSERT(MAC_PERIM_HELD(port->lp_mh));
2456         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2457 
2458         if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2459                 return;




   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2018 Joyent, Inc.
  24  */
  25 
  26 /*
  27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
  28  *
  29  * An instance of the structure aggr_grp_t is allocated for each
  30  * link aggregation group. When created, aggr_grp_t objects are
  31  * entered into the aggr_grp_hash hash table maintained by the modhash
  32  * module. The hash key is the linkid associated with the link
  33  * aggregation group.
  34  *
  35  * A set of MAC ports are associated with each association group.
  36  *
  37  * Aggr pseudo TX rings
  38  * --------------------
  39  * The underlying ports (NICs) in an aggregation can have TX rings. To
  40  * enhance aggr's performance, these TX rings are made available to the
  41  * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
  42  * They are already present and implemented on the RX side. It is called
  43  * as pseudo RX rings. The same concept is extended to the TX side where


 107     mac_prop_info_handle_t);
 108 
 109 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
 110 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
 111     boolean_t *);
 112 
 113 static void aggr_grp_capab_set(aggr_grp_t *);
 114 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
 115 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
 116 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
 117 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
 118 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
 119 
 120 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
 121 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
 122 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
 123 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
 124 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
 125 static int aggr_addmac(void *, const uint8_t *);
 126 static int aggr_remmac(void *, const uint8_t *);
 127 static int aggr_addvlan(mac_group_driver_t, uint16_t);
 128 static int aggr_remvlan(mac_group_driver_t, uint16_t);
 129 static mblk_t *aggr_rx_poll(void *, int);
 130 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
 131     const int, mac_ring_info_t *, mac_ring_handle_t);
 132 static void aggr_fill_group(void *, mac_ring_type_t, const int,
 133     mac_group_info_t *, mac_group_handle_t);
 134 
 135 static kmem_cache_t     *aggr_grp_cache;
 136 static mod_hash_t       *aggr_grp_hash;
 137 static krwlock_t        aggr_grp_lock;
 138 static uint_t           aggr_grp_cnt;
 139 static id_space_t       *key_ids;
 140 
 141 #define GRP_HASHSZ              64
 142 #define GRP_HASH_KEY(linkid)    ((mod_hash_key_t)(uintptr_t)linkid)
 143 #define AGGR_PORT_NAME_DELIMIT '-'
 144 
 145 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
 146 
 147 #define AGGR_M_CALLBACK_FLAGS   \
 148         (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)


 309         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 310         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 311 
 312         if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
 313                 return (B_FALSE);
 314 
 315         /*
 316          * Validate the MAC port link speed and update the group
 317          * link speed if needed.
 318          */
 319         if (port->lp_ifspeed == 0 ||
 320             port->lp_link_state != LINK_STATE_UP ||
 321             port->lp_link_duplex != LINK_DUPLEX_FULL) {
 322                 /*
 323                  * Can't attach a MAC port with unknown link speed,
 324                  * down link, or not in full duplex mode.
 325                  */
 326                 return (B_FALSE);
 327         }
 328 
 329         mutex_enter(&grp->lg_stat_lock);
 330         if (grp->lg_ifspeed == 0) {
 331                 /*
 332                  * The group inherits the speed of the first link being
 333                  * attached.
 334                  */
 335                 grp->lg_ifspeed = port->lp_ifspeed;
 336                 link_state_changed = B_TRUE;
 337         } else if (grp->lg_ifspeed != port->lp_ifspeed) {
 338                 /*
 339                  * The link speed of the MAC port must be the same as
 340                  * the group link speed, as per 802.3ad. Since it is
 341                  * not, the attach is cancelled.
 342                  */
 343                 mutex_exit(&grp->lg_stat_lock);
 344                 return (B_FALSE);
 345         }
 346         mutex_exit(&grp->lg_stat_lock);
 347 
 348         grp->lg_nattached_ports++;
 349 
 350         /*
 351          * Update the group link state.
 352          */
 353         if (grp->lg_link_state != LINK_STATE_UP) {
 354                 grp->lg_link_state = LINK_STATE_UP;
 355                 mutex_enter(&grp->lg_stat_lock);
 356                 grp->lg_link_duplex = LINK_DUPLEX_FULL;
 357                 mutex_exit(&grp->lg_stat_lock);
 358                 link_state_changed = B_TRUE;
 359         }
 360 
 361         /*
 362          * Update port's state.
 363          */
 364         port->lp_state = AGGR_PORT_STATE_ATTACHED;
 365 
 366         aggr_grp_multicst_port(port, B_TRUE);
 367 
 368         /*
 369          * Set port's receive callback
 370          */
 371         mac_rx_set(port->lp_mch, aggr_recv_cb, port);
 372 
 373         /*
 374          * If LACP is OFF, the port can be used to send data as soon
 375          * as its link is up and verified to be compatible with the
 376          * aggregation.
 377          *


 395         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 396 
 397         /* update state */
 398         if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
 399                 return (B_FALSE);
 400 
 401         mac_rx_clear(port->lp_mch);
 402 
 403         aggr_grp_multicst_port(port, B_FALSE);
 404 
 405         if (grp->lg_lacp_mode == AGGR_LACP_OFF)
 406                 aggr_send_port_disable(port);
 407         else
 408                 aggr_lacp_port_detached(port);
 409 
 410         port->lp_state = AGGR_PORT_STATE_STANDBY;
 411 
 412         grp->lg_nattached_ports--;
 413         if (grp->lg_nattached_ports == 0) {
 414                 /* the last attached MAC port of the group is being detached */

 415                 grp->lg_link_state = LINK_STATE_DOWN;
 416                 mutex_enter(&grp->lg_stat_lock);
 417                 grp->lg_ifspeed = 0;
 418                 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
 419                 mutex_exit(&grp->lg_stat_lock);
 420                 link_state_changed = B_TRUE;
 421         }
 422 
 423         return (link_state_changed);
 424 }
 425 
 426 /*
 427  * Update the MAC addresses of the constituent ports of the specified
 428  * group. This function is invoked:
 429  * - after creating a new aggregation group.
 430  * - after adding new ports to an aggregation group.
 431  * - after removing a port from a group when the MAC address of
 432  *   that port was used for the MAC address of the group.
 433  * - after the MAC address of a port changed when the MAC address
 434  *   of that port was used for the MAC address of the group.
 435  *
 436  * Return true if the link state of the aggregation changed, for example
 437  * as a result of a failure changing the MAC address of one of the
 438  * constituent ports.
 439  */


 667 
 668         for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
 669                 ring = rx_grp->arg_rings + j;
 670                 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
 671                     ring->arr_hw_rh != hw_rh) {
 672                         continue;
 673                 }
 674 
 675                 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
 676 
 677                 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
 678                 ring->arr_hw_rh = NULL;
 679                 ring->arr_port = NULL;
 680                 rx_grp->arg_ring_cnt--;
 681                 mac_hwring_teardown(hw_rh);
 682                 break;
 683         }
 684 }
 685 
 686 /*
 687  * Create pseudo rings over the HW rings of the port.
 688  *
 689  * o Create a pseudo ring in rx_grp per HW ring in the port's HW group.
 690  *
 691  * o Program existing unicast filters on the pseudo group into the HW group.
 692  *
 693  * o Program existing VLAN filters on the pseudo group into the HW group.
 694  */
 695 static int
 696 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
 697 {
 698         aggr_grp_t              *grp = port->lp_grp;
 699         mac_ring_handle_t       hw_rh[MAX_RINGS_PER_GROUP];
 700         aggr_unicst_addr_t      *addr, *a;
 701         mac_perim_handle_t      pmph;
 702         aggr_vlan_t             *avp;
 703         int                     hw_rh_cnt, i = 0, j;
 704         int                     err = 0;
 705 
 706         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 707         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 708 
 709         /*
 710          * This function must be called after the aggr registers its MAC
 711          * and its Rx group has been initialized.
 712          */
 713         ASSERT(rx_grp->arg_gh != NULL);
 714 
 715         /*
 716          * Get the list of the underlying HW rings.
 717          */
 718         hw_rh_cnt = mac_hwrings_get(port->lp_mch,
 719             &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
 720 
 721         if (port->lp_hwgh != NULL) {
 722                 /*
 723                  * Quiesce the HW ring and the MAC SRS on the ring. Note
 724                  * that the HW ring will be restarted when the pseudo ring
 725                  * is started. At that time all the packets will be
 726                  * directly passed up to the pseudo Rx ring and handled
 727                  * by MAC SRS created over the pseudo Rx ring.
 728                  */
 729                 mac_rx_client_quiesce(port->lp_mch);
 730                 mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
 731         }
 732 
 733         /*
 734          * Add existing VLAN and unicast address filters to the port.
 735          */
 736         for (avp = list_head(&rx_grp->arg_vlans); avp != NULL;
 737             avp = list_next(&rx_grp->arg_vlans, avp)) {
 738                 if ((err = aggr_port_addvlan(port, avp->av_vid)) != 0)
 739                         goto err;
 740         }
 741 
 742         for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
 743                 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
 744                         goto err;
 745         }
 746 
 747         for (i = 0; i < hw_rh_cnt; i++) {
 748                 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
 749                 if (err != 0)
 750                         goto err;
 751         }
 752 
 753         port->lp_rx_grp_added = B_TRUE;
 754         mac_perim_exit(pmph);
 755         return (0);
 756 
 757 err:
 758         ASSERT(err != 0);
 759 
 760         for (j = 0; j < i; j++)
 761                 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
 762 
 763         for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
 764                 aggr_port_remmac(port, a->aua_addr);
 765 
 766         if (avp != NULL)
 767                 avp = list_prev(&rx_grp->arg_vlans, avp);
 768 
 769         for (; avp != NULL; avp = list_prev(&rx_grp->arg_vlans, avp)) {
 770                 int err2;
 771 
 772                 if ((err2 = aggr_port_remvlan(port, avp->av_vid)) != 0) {
 773                         cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
 774                             ": errno %d.", avp->av_vid,
 775                             mac_client_name(port->lp_mch), err2);
 776                 }
 777         }
 778 
 779         if (port->lp_hwgh != NULL) {
 780                 mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
 781                 mac_rx_client_restart(port->lp_mch);
 782                 port->lp_hwgh = NULL;
 783         }
 784 



 785         mac_perim_exit(pmph);
 786         return (err);
 787 }
 788 
 789 /*
 790  * Destroy the pseudo rings mapping to this port and remove all VLAN
 791  * and unicast filters from this port. Even if there are no underlying
 792  * HW rings we must still remove the unicast filters to take the port
 793  * out of promisc mode.
 794  */
 795 static void
 796 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
 797 {
 798         aggr_grp_t              *grp = port->lp_grp;
 799         mac_ring_handle_t       hw_rh[MAX_RINGS_PER_GROUP];
 800         aggr_unicst_addr_t      *addr;
 801         mac_group_handle_t      hwgh;
 802         mac_perim_handle_t      pmph;
 803         int                     hw_rh_cnt, i;
 804 
 805         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 806         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 807 
 808         if (!port->lp_rx_grp_added)
 809                 goto done;
 810 
 811         ASSERT(rx_grp->arg_gh != NULL);
 812         hw_rh_cnt = mac_hwrings_get(port->lp_mch,
 813             &hwgh, hw_rh, MAC_RING_TYPE_RX);
 814 




 815         for (i = 0; i < hw_rh_cnt; i++)
 816                 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
 817 
 818         for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
 819                 aggr_port_remmac(port, addr->aua_addr);
 820 
 821         for (aggr_vlan_t *avp = list_head(&rx_grp->arg_vlans); avp != NULL;
 822             avp = list_next(&rx_grp->arg_vlans, avp)) {
 823                 int err;
 824 
 825                 if ((err = aggr_port_remvlan(port, avp->av_vid)) != 0) {
 826                         cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
 827                             ": errno %d.", avp->av_vid,
 828                             mac_client_name(port->lp_mch), err);
 829                 }
 830         }
 831 
 832         if (port->lp_hwgh != NULL) {
 833                 port->lp_hwgh = NULL;
 834 
 835                 /*
 836                  * First clear the permanent-quiesced flag of the RX srs then
 837                  * restart the HW ring and the mac srs on the ring. Note that
 838                  * the HW ring and associated SRS will soon been removed when
 839                  * the port is removed from the aggr.
 840                  */
 841                 mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
 842                 mac_rx_client_restart(port->lp_mch);
 843         }
 844 
 845         port->lp_rx_grp_added = B_FALSE;
 846 done:
 847         mac_perim_exit(pmph);
 848 }
 849 
 850 /*
 851  * Add a pseudo TX ring for the given HW ring handle.


1338         grp->lg_zoneid = crgetzoneid(credp);
1339         grp->lg_ifspeed = 0;
1340         grp->lg_link_state = LINK_STATE_UNKNOWN;
1341         grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1342         grp->lg_started = B_FALSE;
1343         grp->lg_promisc = B_FALSE;
1344         grp->lg_lacp_done = B_FALSE;
1345         grp->lg_tx_notify_done = B_FALSE;
1346         grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1347         grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1348             aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1349         grp->lg_tx_notify_thread = thread_create(NULL, 0,
1350             aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1351         grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
1352             MAX_RINGS_PER_GROUP), KM_SLEEP);
1353         grp->lg_tx_blocked_cnt = 0;
1354         bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1355         bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
1356         aggr_lacp_init_grp(grp);
1357 
1358         grp->lg_rx_group.arg_untagged = 0;
1359         list_create(&(grp->lg_rx_group.arg_vlans), sizeof (aggr_vlan_t),
1360             offsetof(aggr_vlan_t, av_link));
1361 
1362         /* add MAC ports to group */
1363         grp->lg_ports = NULL;
1364         grp->lg_nports = 0;
1365         grp->lg_nattached_ports = 0;
1366         grp->lg_ntx_ports = 0;
1367 
1368         /*
1369          * If key is not specified by the user, allocate the key.
1370          */
1371         if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1372                 err = ENOMEM;
1373                 goto bail;
1374         }
1375         grp->lg_key = key;
1376 
1377         for (i = 0; i < nports; i++) {
1378                 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, &port);
1379                 if (err != 0)
1380                         goto bail;
1381         }
1382 
1383         /*
1384          * If no explicit MAC address was specified by the administrator,
1385          * set it to the MAC address of the first port.
1386          */
1387         grp->lg_addr_fixed = mac_fixed;
1388         if (grp->lg_addr_fixed) {
1389                 /* validate specified address */
1390                 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1391                         err = EINVAL;
1392                         goto bail;
1393                 }
1394                 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1395         } else {
1396                 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1397                 grp->lg_mac_addr_port = grp->lg_ports;
1398         }


1580                 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1581                 grp->lg_mac_addr_port = grp->lg_ports;
1582                 mac_addr_changed = B_TRUE;
1583         }
1584 
1585         link_state_changed = aggr_grp_detach_port(grp, port);
1586 
1587         /*
1588          * Add the counter statistics of the ports while it was aggregated
1589          * to the group's residual statistics.  This is done by obtaining
1590          * the current counter from the underlying MAC then subtracting the
1591          * value of the counter at the moment it was added to the
1592          * aggregation.
1593          */
1594         for (i = 0; i < MAC_NSTAT; i++) {
1595                 stat = i + MAC_STAT_MIN;
1596                 if (!MAC_STAT_ISACOUNTER(stat))
1597                         continue;
1598                 val = aggr_port_stat(port, stat);
1599                 val -= port->lp_stat[i];
1600                 mutex_enter(&grp->lg_stat_lock);
1601                 grp->lg_stat[i] += val;
1602                 mutex_exit(&grp->lg_stat_lock);
1603         }
1604         for (i = 0; i < ETHER_NSTAT; i++) {
1605                 stat = i + MACTYPE_STAT_MIN;
1606                 if (!ETHER_STAT_ISACOUNTER(stat))
1607                         continue;
1608                 val = aggr_port_stat(port, stat);
1609                 val -= port->lp_ether_stat[i];
1610                 mutex_enter(&grp->lg_stat_lock);
1611                 grp->lg_ether_stat[i] += val;
1612                 mutex_exit(&grp->lg_stat_lock);
1613         }
1614 
1615         grp->lg_nports--;
1616         mac_perim_exit(mph);
1617 
1618         aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1619         aggr_port_delete(port);
1620 
1621         /*
1622          * If the group MAC address has changed, update the MAC address of
1623          * the remaining constituent ports according to the new MAC
1624          * address of the group.
1625          */
1626         if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1627                 link_state_changed = B_TRUE;
1628 
1629 done:
1630         if (mac_addr_changedp != NULL)
1631                 *mac_addr_changedp = mac_addr_changed;
1632         if (link_state_changedp != NULL)


1841                 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1842                 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1843                 aggr_port_delete(port);
1844                 port = cport;
1845         }
1846 
1847         mac_perim_exit(mph);
1848 
1849         kmem_free(grp->lg_tx_blocked_rings,
1850             (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1851         /*
1852          * Wait for the port's lacp timer thread and its notification callback
1853          * to exit before calling mac_unregister() since both needs to access
1854          * the mac perimeter of the grp.
1855          */
1856         aggr_grp_port_wait(grp);
1857 
1858         VERIFY(mac_unregister(grp->lg_mh) == 0);
1859         grp->lg_mh = NULL;
1860 
1861         list_destroy(&(grp->lg_rx_group.arg_vlans));
1862 
1863         AGGR_GRP_REFRELE(grp);
1864         return (0);
1865 }
1866 
1867 void
1868 aggr_grp_free(aggr_grp_t *grp)
1869 {
1870         ASSERT(grp->lg_refs == 0);
1871         ASSERT(grp->lg_port_ref == 0);
1872         if (grp->lg_key > AGGR_MAX_KEY) {
1873                 id_free(key_ids, grp->lg_key);
1874                 grp->lg_key = 0;
1875         }
1876         kmem_cache_free(aggr_grp_cache, grp);
1877 }
1878 
1879 int
1880 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1881     aggr_grp_info_new_grp_fn_t new_grp_fn,
1882     aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)


1925 
1926 bail:
1927         mac_perim_exit(mph);
1928         AGGR_GRP_REFRELE(grp);
1929         return (rc);
1930 }
1931 
1932 /*ARGSUSED*/
1933 static void
1934 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1935 {
1936         miocnak(q, mp, 0, ENOTSUP);
1937 }
1938 
1939 static int
1940 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1941 {
1942         aggr_port_t     *port;
1943         uint_t          stat_index;
1944 
1945         ASSERT(MUTEX_HELD(&grp->lg_stat_lock));
1946 
1947         /* We only aggregate counter statistics. */
1948         if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1949             IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1950                 return (ENOTSUP);
1951         }
1952 
1953         /*
1954          * Counter statistics for a group are computed by aggregating the
1955          * counters of the members MACs while they were aggregated, plus
1956          * the residual counter of the group itself, which is updated each
1957          * time a MAC is removed from the group.
1958          */
1959         *val = 0;
1960         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1961                 /* actual port statistic */
1962                 *val += aggr_port_stat(port, stat);
1963                 /*
1964                  * minus the port stat when it was added, plus any residual
1965                  * amount for the group.
1966                  */


1995 
1996 int
1997 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1998 {
1999         aggr_pseudo_tx_ring_t   *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
2000 
2001         if (tx_ring->atr_hw_rh != NULL) {
2002                 *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
2003         } else {
2004                 aggr_port_t     *port = tx_ring->atr_port;
2005 
2006                 *val = mac_stat_get(port->lp_mh, stat);
2007         }
2008         return (0);
2009 }
2010 
2011 static int
2012 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
2013 {
2014         aggr_grp_t              *grp = arg;

2015         int                     rval = 0;
2016 
2017         mutex_enter(&grp->lg_stat_lock);
2018 
2019         switch (stat) {
2020         case MAC_STAT_IFSPEED:
2021                 *val = grp->lg_ifspeed;
2022                 break;
2023 
2024         case ETHER_STAT_LINK_DUPLEX:
2025                 *val = grp->lg_link_duplex;
2026                 break;
2027 
2028         default:
2029                 /*
2030                  * For all other statistics, we return the aggregated stat
2031                  * from the underlying ports.  aggr_grp_stat() will set
2032                  * rval appropriately if the statistic isn't a counter.
2033                  */
2034                 rval = aggr_grp_stat(grp, stat, val);
2035         }
2036 
2037         mutex_exit(&grp->lg_stat_lock);
2038         return (rval);
2039 }
2040 
2041 static int
2042 aggr_m_start(void *arg)
2043 {
2044         aggr_grp_t *grp = arg;
2045         aggr_port_t *port;
2046         mac_perim_handle_t mph, pmph;
2047 
2048         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2049 
2050         /*
2051          * Attempts to start all configured members of the group.
2052          * Group members will be attached when their link-up notification
2053          * is received.
2054          */
2055         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2056                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2057                 if (aggr_port_start(port) != 0) {


2249         case MAC_CAPAB_AGGR:
2250         {
2251                 mac_capab_aggr_t *aggr_cap;
2252 
2253                 if (cap_data != NULL) {
2254                         aggr_cap = cap_data;
2255                         aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2256                         aggr_cap->mca_unicst = aggr_m_unicst;
2257                         aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
2258                         aggr_cap->mca_arg = arg;
2259                 }
2260                 return (B_TRUE);
2261         }
2262         default:
2263                 return (B_FALSE);
2264         }
2265         return (B_TRUE);
2266 }
2267 
2268 /*
2269  * Callback function for MAC layer to register groups.
2270  */
2271 static void
2272 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2273     mac_group_info_t *infop, mac_group_handle_t gh)
2274 {
2275         aggr_grp_t *grp = arg;
2276         aggr_pseudo_rx_group_t *rx_group;
2277         aggr_pseudo_tx_group_t *tx_group;
2278 
2279         ASSERT(index == 0);
2280         if (rtype == MAC_RING_TYPE_RX) {
2281                 rx_group = &grp->lg_rx_group;
2282                 rx_group->arg_gh = gh;
2283                 rx_group->arg_grp = grp;
2284 
2285                 infop->mgi_driver = (mac_group_driver_t)rx_group;
2286                 infop->mgi_start = NULL;
2287                 infop->mgi_stop = NULL;
2288                 infop->mgi_addmac = aggr_addmac;
2289                 infop->mgi_remmac = aggr_remmac;
2290                 infop->mgi_count = rx_group->arg_ring_cnt;
2291 
2292                 /*
2293                  * Always set the HW VLAN callbacks. They are smart
2294                  * enough to know when a port has HW VLAN filters to
2295                  * program and when it doesn't.
2296                  */
2297                 infop->mgi_addvlan = aggr_addvlan;
2298                 infop->mgi_remvlan = aggr_remvlan;
2299         } else {
2300                 tx_group = &grp->lg_tx_group;
2301                 tx_group->atg_gh = gh;
2302         }
2303 }
2304 
2305 /*
2306  * Callback funtion for MAC layer to register all rings.
2307  */
2308 static void
2309 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2310     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2311 {
2312         aggr_grp_t      *grp = arg;
2313 
2314         switch (rtype) {
2315         case MAC_RING_TYPE_RX: {
2316                 aggr_pseudo_rx_group_t  *rx_group = &grp->lg_rx_group;
2317                 aggr_pseudo_rx_ring_t   *rx_ring;
2318                 mac_intr_t              aggr_mac_intr;


2487         while ((addr = *pprev) != NULL) {
2488                 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2489                         pprev = &addr->aua_next;
2490                         continue;
2491                 }
2492                 break;
2493         }
2494         if (addr == NULL) {
2495                 mac_perim_exit(mph);
2496                 return (EINVAL);
2497         }
2498 
2499         for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2500                 aggr_port_remmac(port, mac_addr);
2501 
2502         *pprev = addr->aua_next;
2503         kmem_free(addr, sizeof (aggr_unicst_addr_t));
2504 
2505         mac_perim_exit(mph);
2506         return (err);
2507 }
2508 
2509 /*
2510  * Search for VID in the Rx group's list and return a pointer if
2511  * found. Otherwise return NULL.
2512  */
2513 static aggr_vlan_t *
2514 aggr_find_vlan(aggr_pseudo_rx_group_t *rx_group, uint16_t vid)
2515 {
2516         ASSERT(MAC_PERIM_HELD(rx_group->arg_grp->lg_mh));
2517         for (aggr_vlan_t *avp = list_head(&rx_group->arg_vlans); avp != NULL;
2518             avp = list_next(&rx_group->arg_vlans, avp)) {
2519                 if (avp->av_vid == vid)
2520                         return (avp);
2521         }
2522 
2523         return (NULL);
2524 }
2525 
2526 /*
2527  * Accept traffic on the specified VID.
2528  *
2529  * Persist VLAN state in the aggr so that ports added later will
2530  * receive the correct filters. In the future it would be nice to
2531  * allow aggr to iterate its clients instead of duplicating state.
2532  */
2533 static int
2534 aggr_addvlan(mac_group_driver_t gdriver, uint16_t vid)
2535 {
2536         aggr_pseudo_rx_group_t  *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
2537         aggr_grp_t              *aggr = rx_group->arg_grp;
2538         aggr_port_t             *port, *p;
2539         mac_perim_handle_t      mph;
2540         int                     err = 0;
2541         aggr_vlan_t             *avp = NULL;
2542 
2543         mac_perim_enter_by_mh(aggr->lg_mh, &mph);
2544 
2545         if (vid == MAC_VLAN_UNTAGGED) {
2546                 /*
2547                  * Aggr is both a MAC provider and MAC client. As a
2548                  * MAC provider it is passed MAC_VLAN_UNTAGGED by its
2549                  * client. As a client itself, it should pass
2550                  * VLAN_ID_NONE to its ports.
2551                  */
2552                 vid = VLAN_ID_NONE;
2553                 rx_group->arg_untagged++;
2554                 goto update_ports;
2555         }
2556 
2557         avp = aggr_find_vlan(rx_group, vid);
2558 
2559         if (avp != NULL) {
2560                 avp->av_refs++;
2561                 mac_perim_exit(mph);
2562                 return (0);
2563         }
2564 
2565         avp = kmem_zalloc(sizeof (aggr_vlan_t), KM_SLEEP);
2566         avp->av_vid = vid;
2567         avp->av_refs = 1;
2568 
2569 update_ports:
2570         for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
2571                 if ((err = aggr_port_addvlan(port, vid)) != 0)
2572                         break;
2573 
2574         if (err != 0) {
2575                 /*
2576                  * If any of these calls fail then we are in a
2577                  * situation where the ports have different HW state.
2578                  * There's no reasonable action the MAC client can
2579                  * take in this scenario to rectify the situation.
2580                  */
2581                 for (p = aggr->lg_ports; p != port; p = p->lp_next) {
2582                         int err2;
2583 
2584                         if ((err2 = aggr_port_remvlan(p, vid)) != 0) {
2585                                 cmn_err(CE_WARN, "Failed to remove VLAN %u"
2586                                     " from port %s: errno %d.", vid,
2587                                     mac_client_name(p->lp_mch), err2);
2588                         }
2589 
2590                 }
2591 
2592                 if (vid == VLAN_ID_NONE)
2593                         rx_group->arg_untagged--;
2594 
2595                 if (avp != NULL) {
2596                         kmem_free(avp, sizeof (aggr_vlan_t));
2597                         avp = NULL;
2598                 }
2599         }
2600 
2601         if (avp != NULL)
2602                 list_insert_tail(&rx_group->arg_vlans, avp);
2603 
2604 done:
2605         mac_perim_exit(mph);
2606         return (err);
2607 }
2608 
2609 /*
2610  * Stop accepting traffic on this VLAN if it's the last use of this VLAN.
2611  */
2612 static int
2613 aggr_remvlan(mac_group_driver_t gdriver, uint16_t vid)
2614 {
2615         aggr_pseudo_rx_group_t  *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
2616         aggr_grp_t              *aggr = rx_group->arg_grp;
2617         aggr_port_t             *port, *p;
2618         mac_perim_handle_t      mph;
2619         int                     err = 0;
2620         aggr_vlan_t             *avp = NULL;
2621 
2622         mac_perim_enter_by_mh(aggr->lg_mh, &mph);
2623 
2624         /*
2625          * See the comment in aggr_addvlan().
2626          */
2627         if (vid == MAC_VLAN_UNTAGGED) {
2628                 vid = VLAN_ID_NONE;
2629                 rx_group->arg_untagged--;
2630 
2631                 if (rx_group->arg_untagged > 0)
2632                         goto done;
2633 
2634                 goto update_ports;
2635         }
2636 
2637         avp = aggr_find_vlan(rx_group, vid);
2638 
2639         if (avp == NULL) {
2640                 err = ENOENT;
2641                 goto done;
2642         }
2643 
2644         avp->av_refs--;
2645 
2646         if (avp->av_refs > 0)
2647                 goto done;
2648 
2649 update_ports:
2650         for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
2651                 if ((err = aggr_port_remvlan(port, vid)) != 0)
2652                         break;
2653 
2654         /*
2655          * See the comment in aggr_addvlan() for justification of the
2656          * use of VERIFY here.
2657          */
2658         if (err != 0) {
2659                 for (p = aggr->lg_ports; p != port; p = p->lp_next) {
2660                         int err2;
2661 
2662                         if ((err2 = aggr_port_addvlan(p, vid)) != 0) {
2663                                 cmn_err(CE_WARN, "Failed to add VLAN %u"
2664                                     " to port %s: errno %d.", vid,
2665                                     mac_client_name(p->lp_mch), err2);
2666                         }
2667                 }
2668 
2669                 if (avp != NULL)
2670                         avp->av_refs++;
2671 
2672                 if (vid == VLAN_ID_NONE)
2673                         rx_group->arg_untagged++;
2674 
2675                 goto done;
2676         }
2677 
2678         if (err == 0 && avp != NULL) {
2679                 VERIFY3U(avp->av_refs, ==, 0);
2680                 list_remove(&rx_group->arg_vlans, avp);
2681                 kmem_free(avp, sizeof (aggr_vlan_t));
2682         }
2683 
2684 done:
2685         mac_perim_exit(mph);
2686         return (err);
2687 }
2688 
2689 /*
2690  * Add or remove the multicast addresses that are defined for the group
2691  * to or from the specified port.
2692  *
2693  * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2694  * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2695  * called when the port is either stopped or detached.
2696  */
2697 void
2698 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2699 {
2700         aggr_grp_t *grp = port->lp_grp;
2701 
2702         ASSERT(MAC_PERIM_HELD(port->lp_mh));
2703         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2704 
2705         if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2706                 return;