1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2017, Joyent, Inc.
  24  */
  25 
  26 /*
  27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
  28  *
  29  * An instance of the structure aggr_grp_t is allocated for each
  30  * link aggregation group. When created, aggr_grp_t objects are
  31  * entered into the aggr_grp_hash hash table maintained by the modhash
  32  * module. The hash key is the linkid associated with the link
  33  * aggregation group.
  34  *
  35  * A set of MAC ports are associated with each association group.
  36  *
  37  * Aggr pseudo TX rings
  38  * --------------------
  39  * The underlying ports (NICs) in an aggregation can have TX rings. To
  40  * enhance aggr's performance, these TX rings are made available to the
  41  * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
  42  * They are already present and implemented on the RX side. It is called
  43  * as pseudo RX rings. The same concept is extended to the TX side where
  44  * each TX ring of an underlying port is reflected in aggr as a pseudo
  45  * TX ring. Thus each pseudo TX ring will map to a specific hardware TX
  46  * ring. Even in the case of a NIC that does not have a TX ring, a pseudo
  47  * TX ring is given to the aggregation layer.
  48  *
  49  * With this change, the outgoing stack depth looks much better:
  50  *
  51  * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() ->
  52  * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx()
  53  *
  54  * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings:
  55  * SRS_TX_AGGR and SRS_TX_BW_AGGR.
  56  *
  57  * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine
  58  * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX
  59  * ring belonging to a port on which the packet has to be sent.
  60  * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4
  61  * policy and then uses the fanout_hint passed to it to pick a TX ring from
  62  * the selected port.
  63  *
  64  * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where
  65  * bandwidth limit is applied first on the outgoing packet and the packets
  66  * allowed to go out would call mac_tx_aggr_mode() to send the packet on a
  67  * particular TX ring.
  68  */
  69 
  70 #include <sys/types.h>
  71 #include <sys/sysmacros.h>
  72 #include <sys/conf.h>
  73 #include <sys/cmn_err.h>
  74 #include <sys/disp.h>
  75 #include <sys/list.h>
  76 #include <sys/ksynch.h>
  77 #include <sys/kmem.h>
  78 #include <sys/stream.h>
  79 #include <sys/modctl.h>
  80 #include <sys/ddi.h>
  81 #include <sys/sunddi.h>
  82 #include <sys/atomic.h>
  83 #include <sys/stat.h>
  84 #include <sys/modhash.h>
  85 #include <sys/id_space.h>
  86 #include <sys/strsun.h>
  87 #include <sys/cred.h>
  88 #include <sys/dlpi.h>
  89 #include <sys/zone.h>
  90 #include <sys/mac_provider.h>
  91 #include <sys/dls.h>
  92 #include <sys/vlan.h>
  93 #include <sys/aggr.h>
  94 #include <sys/aggr_impl.h>
  95 
  96 static int aggr_m_start(void *);
  97 static void aggr_m_stop(void *);
  98 static int aggr_m_promisc(void *, boolean_t);
  99 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
 100 static int aggr_m_unicst(void *, const uint8_t *);
 101 static int aggr_m_stat(void *, uint_t, uint64_t *);
 102 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
 103 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
 104 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
 105     const void *);
 106 static void aggr_m_propinfo(void *, const char *, mac_prop_id_t,
 107     mac_prop_info_handle_t);
 108 
 109 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
 110 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
 111     boolean_t *);
 112 
 113 static void aggr_grp_capab_set(aggr_grp_t *);
 114 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
 115 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
 116 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
 117 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
 118 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
 119 
 120 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
 121 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
 122 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
 123 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
 124 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
 125 static int aggr_addmac(void *, const uint8_t *);
 126 static int aggr_remmac(void *, const uint8_t *);
 127 static mblk_t *aggr_rx_poll(void *, int);
 128 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
 129     const int, mac_ring_info_t *, mac_ring_handle_t);
 130 static void aggr_fill_group(void *, mac_ring_type_t, const int,
 131     mac_group_info_t *, mac_group_handle_t);
 132 
 133 static kmem_cache_t     *aggr_grp_cache;
 134 static mod_hash_t       *aggr_grp_hash;
 135 static krwlock_t        aggr_grp_lock;
 136 static uint_t           aggr_grp_cnt;
 137 static id_space_t       *key_ids;
 138 
 139 #define GRP_HASHSZ              64
 140 #define GRP_HASH_KEY(linkid)    ((mod_hash_key_t)(uintptr_t)linkid)
 141 #define AGGR_PORT_NAME_DELIMIT '-'
 142 
 143 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
 144 
 145 #define AGGR_M_CALLBACK_FLAGS   \
 146         (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
 147 
 148 static mac_callbacks_t aggr_m_callbacks = {
 149         AGGR_M_CALLBACK_FLAGS,
 150         aggr_m_stat,
 151         aggr_m_start,
 152         aggr_m_stop,
 153         aggr_m_promisc,
 154         aggr_m_multicst,
 155         NULL,
 156         NULL,
 157         NULL,
 158         aggr_m_ioctl,
 159         aggr_m_capab_get,
 160         NULL,
 161         NULL,
 162         aggr_m_setprop,
 163         NULL,
 164         aggr_m_propinfo
 165 };
 166 
 167 /*ARGSUSED*/
 168 static int
 169 aggr_grp_constructor(void *buf, void *arg, int kmflag)
 170 {
 171         aggr_grp_t *grp = buf;
 172 
 173         bzero(grp, sizeof (*grp));
 174         mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
 175         cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
 176         rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
 177         mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
 178         cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
 179         mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL);
 180         cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL);
 181         grp->lg_link_state = LINK_STATE_UNKNOWN;
 182         return (0);
 183 }
 184 
 185 /*ARGSUSED*/
 186 static void
 187 aggr_grp_destructor(void *buf, void *arg)
 188 {
 189         aggr_grp_t *grp = buf;
 190 
 191         if (grp->lg_tx_ports != NULL) {
 192                 kmem_free(grp->lg_tx_ports,
 193                     grp->lg_tx_ports_size * sizeof (aggr_port_t *));
 194         }
 195 
 196         mutex_destroy(&grp->lg_lacp_lock);
 197         cv_destroy(&grp->lg_lacp_cv);
 198         mutex_destroy(&grp->lg_port_lock);
 199         cv_destroy(&grp->lg_port_cv);
 200         rw_destroy(&grp->lg_tx_lock);
 201         mutex_destroy(&grp->lg_tx_flowctl_lock);
 202         cv_destroy(&grp->lg_tx_flowctl_cv);
 203 }
 204 
 205 void
 206 aggr_grp_init(void)
 207 {
 208         aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
 209             sizeof (aggr_grp_t), 0, aggr_grp_constructor,
 210             aggr_grp_destructor, NULL, NULL, NULL, 0);
 211 
 212         aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
 213             GRP_HASHSZ, mod_hash_null_valdtor);
 214         rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
 215         aggr_grp_cnt = 0;
 216 
 217         /*
 218          * Allocate an id space to manage key values (when key is not
 219          * specified). The range of the id space will be from
 220          * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
 221          * uses a 16-bit key.
 222          */
 223         key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
 224         ASSERT(key_ids != NULL);
 225 }
 226 
 227 void
 228 aggr_grp_fini(void)
 229 {
 230         id_space_destroy(key_ids);
 231         rw_destroy(&aggr_grp_lock);
 232         mod_hash_destroy_idhash(aggr_grp_hash);
 233         kmem_cache_destroy(aggr_grp_cache);
 234 }
 235 
 236 uint_t
 237 aggr_grp_count(void)
 238 {
 239         uint_t  count;
 240 
 241         rw_enter(&aggr_grp_lock, RW_READER);
 242         count = aggr_grp_cnt;
 243         rw_exit(&aggr_grp_lock);
 244         return (count);
 245 }
 246 
 247 /*
 248  * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
 249  * requires the mac perimeter, this function holds a reference of the aggr
 250  * and aggr won't call mac_unregister() until this reference drops to 0.
 251  */
 252 void
 253 aggr_grp_port_hold(aggr_port_t *port)
 254 {
 255         aggr_grp_t      *grp = port->lp_grp;
 256 
 257         AGGR_PORT_REFHOLD(port);
 258         mutex_enter(&grp->lg_port_lock);
 259         grp->lg_port_ref++;
 260         mutex_exit(&grp->lg_port_lock);
 261 }
 262 
 263 /*
 264  * Release the reference of the grp and inform aggr_grp_delete() calling
 265  * mac_unregister() is now safe.
 266  */
 267 void
 268 aggr_grp_port_rele(aggr_port_t *port)
 269 {
 270         aggr_grp_t      *grp = port->lp_grp;
 271 
 272         mutex_enter(&grp->lg_port_lock);
 273         if (--grp->lg_port_ref == 0)
 274                 cv_signal(&grp->lg_port_cv);
 275         mutex_exit(&grp->lg_port_lock);
 276         AGGR_PORT_REFRELE(port);
 277 }
 278 
 279 /*
 280  * Wait for the port's lacp timer thread and the port's notification callback
 281  * to exit.
 282  */
 283 void
 284 aggr_grp_port_wait(aggr_grp_t *grp)
 285 {
 286         mutex_enter(&grp->lg_port_lock);
 287         if (grp->lg_port_ref != 0)
 288                 cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
 289         mutex_exit(&grp->lg_port_lock);
 290 }
 291 
 292 /*
 293  * Attach a port to a link aggregation group.
 294  *
 295  * A port is attached to a link aggregation group once its speed
 296  * and link state have been verified.
 297  *
 298  * Returns B_TRUE if the group link state or speed has changed. If
 299  * it's the case, the caller must notify the MAC layer via a call
 300  * to mac_link().
 301  */
 302 boolean_t
 303 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
 304 {
 305         boolean_t link_state_changed = B_FALSE;
 306 
 307         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 308         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 309 
 310         if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
 311                 return (B_FALSE);
 312 
 313         /*
 314          * Validate the MAC port link speed and update the group
 315          * link speed if needed.
 316          */
 317         if (port->lp_ifspeed == 0 ||
 318             port->lp_link_state != LINK_STATE_UP ||
 319             port->lp_link_duplex != LINK_DUPLEX_FULL) {
 320                 /*
 321                  * Can't attach a MAC port with unknown link speed,
 322                  * down link, or not in full duplex mode.
 323                  */
 324                 return (B_FALSE);
 325         }
 326 
 327         if (grp->lg_ifspeed == 0) {
 328                 /*
 329                  * The group inherits the speed of the first link being
 330                  * attached.
 331                  */
 332                 grp->lg_ifspeed = port->lp_ifspeed;
 333                 link_state_changed = B_TRUE;
 334         } else if (grp->lg_ifspeed != port->lp_ifspeed) {
 335                 /*
 336                  * The link speed of the MAC port must be the same as
 337                  * the group link speed, as per 802.3ad. Since it is
 338                  * not, the attach is cancelled.
 339                  */
 340                 return (B_FALSE);
 341         }
 342 
 343         grp->lg_nattached_ports++;
 344 
 345         /*
 346          * Update the group link state.
 347          */
 348         if (grp->lg_link_state != LINK_STATE_UP) {
 349                 grp->lg_link_state = LINK_STATE_UP;
 350                 grp->lg_link_duplex = LINK_DUPLEX_FULL;
 351                 link_state_changed = B_TRUE;
 352         }
 353 
 354         /*
 355          * Update port's state.
 356          */
 357         port->lp_state = AGGR_PORT_STATE_ATTACHED;
 358 
 359         aggr_grp_multicst_port(port, B_TRUE);
 360 
 361         /*
 362          * Set port's receive callback
 363          */
 364         mac_rx_set(port->lp_mch, aggr_recv_cb, port);
 365 
 366         /*
 367          * If LACP is OFF, the port can be used to send data as soon
 368          * as its link is up and verified to be compatible with the
 369          * aggregation.
 370          *
 371          * If LACP is active or passive, notify the LACP subsystem, which
 372          * will enable sending on the port following the LACP protocol.
 373          */
 374         if (grp->lg_lacp_mode == AGGR_LACP_OFF)
 375                 aggr_send_port_enable(port);
 376         else
 377                 aggr_lacp_port_attached(port);
 378 
 379         return (link_state_changed);
 380 }
 381 
 382 boolean_t
 383 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
 384 {
 385         boolean_t link_state_changed = B_FALSE;
 386 
 387         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 388         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 389 
 390         /* update state */
 391         if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
 392                 return (B_FALSE);
 393 
 394         mac_rx_clear(port->lp_mch);
 395 
 396         aggr_grp_multicst_port(port, B_FALSE);
 397 
 398         if (grp->lg_lacp_mode == AGGR_LACP_OFF)
 399                 aggr_send_port_disable(port);
 400         else
 401                 aggr_lacp_port_detached(port);
 402 
 403         port->lp_state = AGGR_PORT_STATE_STANDBY;
 404 
 405         grp->lg_nattached_ports--;
 406         if (grp->lg_nattached_ports == 0) {
 407                 /* the last attached MAC port of the group is being detached */
 408                 grp->lg_ifspeed = 0;
 409                 grp->lg_link_state = LINK_STATE_DOWN;
 410                 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
 411                 link_state_changed = B_TRUE;
 412         }
 413 
 414         return (link_state_changed);
 415 }
 416 
 417 /*
 418  * Update the MAC addresses of the constituent ports of the specified
 419  * group. This function is invoked:
 420  * - after creating a new aggregation group.
 421  * - after adding new ports to an aggregation group.
 422  * - after removing a port from a group when the MAC address of
 423  *   that port was used for the MAC address of the group.
 424  * - after the MAC address of a port changed when the MAC address
 425  *   of that port was used for the MAC address of the group.
 426  *
 427  * Return true if the link state of the aggregation changed, for example
 428  * as a result of a failure changing the MAC address of one of the
 429  * constituent ports.
 430  */
 431 boolean_t
 432 aggr_grp_update_ports_mac(aggr_grp_t *grp)
 433 {
 434         aggr_port_t *cport;
 435         boolean_t link_state_changed = B_FALSE;
 436         mac_perim_handle_t mph;
 437 
 438         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 439 
 440         for (cport = grp->lg_ports; cport != NULL;
 441             cport = cport->lp_next) {
 442                 mac_perim_enter_by_mh(cport->lp_mh, &mph);
 443                 if (aggr_port_unicst(cport) != 0) {
 444                         if (aggr_grp_detach_port(grp, cport))
 445                                 link_state_changed = B_TRUE;
 446                 } else {
 447                         /*
 448                          * If a port was detached because of a previous
 449                          * failure changing the MAC address, the port is
 450                          * reattached when it successfully changes the MAC
 451                          * address now, and this might cause the link state
 452                          * of the aggregation to change.
 453                          */
 454                         if (aggr_grp_attach_port(grp, cport))
 455                                 link_state_changed = B_TRUE;
 456                 }
 457                 mac_perim_exit(mph);
 458         }
 459         return (link_state_changed);
 460 }
 461 
 462 /*
 463  * Invoked when the MAC address of a port has changed. If the port's
 464  * MAC address was used for the group MAC address, set mac_addr_changedp
 465  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
 466  * notification. If the link state changes due to detach/attach of
 467  * the constituent port, set link_state_changedp to B_TRUE to indicate
 468  * to the caller that it should send a MAC_NOTE_LINK notification. In both
 469  * cases, it is the responsibility of the caller to invoke notification
 470  * functions after releasing the the port lock.
 471  */
 472 void
 473 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
 474     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
 475 {
 476         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 477         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 478         ASSERT(mac_addr_changedp != NULL);
 479         ASSERT(link_state_changedp != NULL);
 480 
 481         *mac_addr_changedp = B_FALSE;
 482         *link_state_changedp = B_FALSE;
 483 
 484         if (grp->lg_addr_fixed) {
 485                 /*
 486                  * The group is using a fixed MAC address or an automatic
 487                  * MAC address has not been set.
 488                  */
 489                 return;
 490         }
 491 
 492         if (grp->lg_mac_addr_port == port) {
 493                 /*
 494                  * The MAC address of the port was assigned to the group
 495                  * MAC address. Update the group MAC address.
 496                  */
 497                 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
 498                 *mac_addr_changedp = B_TRUE;
 499         } else {
 500                 /*
 501                  * Update the actual port MAC address to the MAC address
 502                  * of the group.
 503                  */
 504                 if (aggr_port_unicst(port) != 0) {
 505                         *link_state_changedp = aggr_grp_detach_port(grp, port);
 506                 } else {
 507                         /*
 508                          * If a port was detached because of a previous
 509                          * failure changing the MAC address, the port is
 510                          * reattached when it successfully changes the MAC
 511                          * address now, and this might cause the link state
 512                          * of the aggregation to change.
 513                          */
 514                         *link_state_changedp = aggr_grp_attach_port(grp, port);
 515                 }
 516         }
 517 }
 518 
 519 /*
 520  * Add a port to a link aggregation group.
 521  */
 522 static int
 523 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
 524     aggr_port_t **pp)
 525 {
 526         aggr_port_t *port, **cport;
 527         mac_perim_handle_t mph;
 528         zoneid_t port_zoneid = ALL_ZONES;
 529         int err;
 530 
 531         /* The port must be int the same zone as the aggregation. */
 532         if (zone_check_datalink(&port_zoneid, port_linkid) != 0)
 533                 port_zoneid = GLOBAL_ZONEID;
 534         if (grp->lg_zoneid != port_zoneid)
 535                 return (EBUSY);
 536 
 537         /*
 538          * lg_mh could be NULL when the function is called during the creation
 539          * of the aggregation.
 540          */
 541         ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
 542 
 543         /* create new port */
 544         err = aggr_port_create(grp, port_linkid, force, &port);
 545         if (err != 0)
 546                 return (err);
 547 
 548         mac_perim_enter_by_mh(port->lp_mh, &mph);
 549 
 550         /* add port to list of group constituent ports */
 551         cport = &grp->lg_ports;
 552         while (*cport != NULL)
 553                 cport = &((*cport)->lp_next);
 554         *cport = port;
 555 
 556         /*
 557          * Back reference to the group it is member of. A port always
 558          * holds a reference to its group to ensure that the back
 559          * reference is always valid.
 560          */
 561         port->lp_grp = grp;
 562         AGGR_GRP_REFHOLD(grp);
 563         grp->lg_nports++;
 564 
 565         aggr_lacp_init_port(port);
 566         mac_perim_exit(mph);
 567 
 568         if (pp != NULL)
 569                 *pp = port;
 570 
 571         return (0);
 572 }
 573 
 574 /*
 575  * This is called in response to either our LACP state machine or a MAC
 576  * notification that the link has gone down via aggr_send_port_disable(). At
 577  * this point, we may need to update our default ring. To that end, we go
 578  * through the set of ports (underlying datalinks in an aggregation) that are
 579  * currently enabled to transmit data. If all our links have been disabled for
 580  * transmit, then we don't do anything.
 581  *
 582  * Note, because we only have a single TX group, we don't have to worry about
 583  * the rings moving between groups and the chance that mac will reassign it
 584  * unless someone removes a port, at which point, we play it safe and call this
 585  * again.
 586  */
 587 void
 588 aggr_grp_update_default(aggr_grp_t *grp)
 589 {
 590         aggr_port_t *port;
 591         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 592 
 593         rw_enter(&grp->lg_tx_lock, RW_WRITER);
 594 
 595         if (grp->lg_ntx_ports == 0) {
 596                 rw_exit(&grp->lg_tx_lock);
 597                 return;
 598         }
 599 
 600         port = grp->lg_tx_ports[0];
 601         ASSERT(port->lp_tx_ring_cnt > 0);
 602         mac_hwring_set_default(grp->lg_mh, port->lp_pseudo_tx_rings[0]);
 603         rw_exit(&grp->lg_tx_lock);
 604 }
 605 
 606 /*
 607  * Add a pseudo RX ring for the given HW ring handle.
 608  */
 609 static int
 610 aggr_add_pseudo_rx_ring(aggr_port_t *port,
 611     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
 612 {
 613         aggr_pseudo_rx_ring_t   *ring;
 614         int                     err;
 615         int                     j;
 616 
 617         for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
 618                 ring = rx_grp->arg_rings + j;
 619                 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
 620                         break;
 621         }
 622 
 623         /*
 624          * No slot for this new RX ring.
 625          */
 626         if (j == MAX_RINGS_PER_GROUP)
 627                 return (EIO);
 628 
 629         ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
 630         ring->arr_hw_rh = hw_rh;
 631         ring->arr_port = port;
 632         rx_grp->arg_ring_cnt++;
 633 
 634         /*
 635          * The group is already registered, dynamically add a new ring to the
 636          * mac group.
 637          */
 638         if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
 639                 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
 640                 ring->arr_hw_rh = NULL;
 641                 ring->arr_port = NULL;
 642                 rx_grp->arg_ring_cnt--;
 643         } else {
 644                 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
 645                     mac_find_ring(rx_grp->arg_gh, j));
 646         }
 647         return (err);
 648 }
 649 
 650 /*
 651  * Remove the pseudo RX ring of the given HW ring handle.
 652  */
 653 static void
 654 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
 655 {
 656         aggr_pseudo_rx_ring_t   *ring;
 657         int                     j;
 658 
 659         for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
 660                 ring = rx_grp->arg_rings + j;
 661                 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
 662                     ring->arr_hw_rh != hw_rh) {
 663                         continue;
 664                 }
 665 
 666                 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
 667 
 668                 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
 669                 ring->arr_hw_rh = NULL;
 670                 ring->arr_port = NULL;
 671                 rx_grp->arg_ring_cnt--;
 672                 mac_hwring_teardown(hw_rh);
 673                 break;
 674         }
 675 }
 676 
 677 /*
 678  * This function is called to create pseudo rings over the hardware rings of
 679  * the underlying device. Note that there is a 1:1 mapping between the pseudo
 680  * RX rings of the aggr and the hardware rings of the underlying port.
 681  */
 682 static int
 683 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
 684 {
 685         aggr_grp_t              *grp = port->lp_grp;
 686         mac_ring_handle_t       hw_rh[MAX_RINGS_PER_GROUP];
 687         aggr_unicst_addr_t      *addr, *a;
 688         mac_perim_handle_t      pmph;
 689         int                     hw_rh_cnt, i = 0, j;
 690         int                     err = 0;
 691 
 692         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 693         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 694 
 695         /*
 696          * This function must be called after the aggr registers its mac
 697          * and its RX group has been initialized.
 698          */
 699         ASSERT(rx_grp->arg_gh != NULL);
 700 
 701         /*
 702          * Get the list the the underlying HW rings.
 703          */
 704         hw_rh_cnt = mac_hwrings_get(port->lp_mch,
 705             &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
 706 
 707         if (port->lp_hwgh != NULL) {
 708                 /*
 709                  * Quiesce the HW ring and the mac srs on the ring. Note
 710                  * that the HW ring will be restarted when the pseudo ring
 711                  * is started. At that time all the packets will be
 712                  * directly passed up to the pseudo RX ring and handled
 713                  * by mac srs created over the pseudo RX ring.
 714                  */
 715                 mac_rx_client_quiesce(port->lp_mch);
 716                 mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
 717         }
 718 
 719         /*
 720          * Add all the unicast addresses to the newly added port.
 721          */
 722         for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
 723                 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
 724                         break;
 725         }
 726 
 727         for (i = 0; err == 0 && i < hw_rh_cnt; i++)
 728                 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
 729 
 730         if (err != 0) {
 731                 for (j = 0; j < i; j++)
 732                         aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
 733 
 734                 for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
 735                         aggr_port_remmac(port, a->aua_addr);
 736 
 737                 if (port->lp_hwgh != NULL) {
 738                         mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
 739                         mac_rx_client_restart(port->lp_mch);
 740                         port->lp_hwgh = NULL;
 741                 }
 742         } else {
 743                 port->lp_rx_grp_added = B_TRUE;
 744         }
 745 done:
 746         mac_perim_exit(pmph);
 747         return (err);
 748 }
 749 
 750 /*
 751  * This function is called by aggr to remove pseudo RX rings over the
 752  * HW rings of the underlying port.
 753  */
 754 static void
 755 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
 756 {
 757         aggr_grp_t              *grp = port->lp_grp;
 758         mac_ring_handle_t       hw_rh[MAX_RINGS_PER_GROUP];
 759         aggr_unicst_addr_t      *addr;
 760         mac_group_handle_t      hwgh;
 761         mac_perim_handle_t      pmph;
 762         int                     hw_rh_cnt, i;
 763 
 764         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 765         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 766 
 767         if (!port->lp_rx_grp_added)
 768                 goto done;
 769 
 770         ASSERT(rx_grp->arg_gh != NULL);
 771         hw_rh_cnt = mac_hwrings_get(port->lp_mch,
 772             &hwgh, hw_rh, MAC_RING_TYPE_RX);
 773 
 774         /*
 775          * If hw_rh_cnt is 0, it means that the underlying port does not
 776          * support RX rings. Directly return in this case.
 777          */
 778         for (i = 0; i < hw_rh_cnt; i++)
 779                 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
 780 
 781         for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
 782                 aggr_port_remmac(port, addr->aua_addr);
 783 
 784         if (port->lp_hwgh != NULL) {
 785                 port->lp_hwgh = NULL;
 786 
 787                 /*
 788                  * First clear the permanent-quiesced flag of the RX srs then
 789                  * restart the HW ring and the mac srs on the ring. Note that
 790                  * the HW ring and associated SRS will soon been removed when
 791                  * the port is removed from the aggr.
 792                  */
 793                 mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
 794                 mac_rx_client_restart(port->lp_mch);
 795         }
 796 
 797         port->lp_rx_grp_added = B_FALSE;
 798 done:
 799         mac_perim_exit(pmph);
 800 }
 801 
 802 /*
 803  * Add a pseudo TX ring for the given HW ring handle.
 804  */
 805 static int
 806 aggr_add_pseudo_tx_ring(aggr_port_t *port,
 807     aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh,
 808     mac_ring_handle_t *pseudo_rh)
 809 {
 810         aggr_pseudo_tx_ring_t   *ring;
 811         int                     err;
 812         int                     i;
 813 
 814         ASSERT(MAC_PERIM_HELD(port->lp_mh));
 815         for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
 816                 ring = tx_grp->atg_rings + i;
 817                 if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE))
 818                         break;
 819         }
 820         /*
 821          * No slot for this new TX ring.
 822          */
 823         if (i == MAX_RINGS_PER_GROUP)
 824                 return (EIO);
 825         /*
 826          * The following 4 statements needs to be done before
 827          * calling mac_group_add_ring(). Otherwise it will
 828          * result in an assertion failure in mac_init_ring().
 829          */
 830         ring->atr_flags |= MAC_PSEUDO_RING_INUSE;
 831         ring->atr_hw_rh = hw_rh;
 832         ring->atr_port = port;
 833         tx_grp->atg_ring_cnt++;
 834 
 835         /*
 836          * The TX side has no concept of ring groups unlike RX groups.
 837          * There is just a single group which stores all the TX rings.
 838          * This group will be used to store aggr's pseudo TX rings.
 839          */
 840         if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) {
 841                 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
 842                 ring->atr_hw_rh = NULL;
 843                 ring->atr_port = NULL;
 844                 tx_grp->atg_ring_cnt--;
 845         } else {
 846                 *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i);
 847                 if (hw_rh != NULL) {
 848                         mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
 849                             mac_find_ring(tx_grp->atg_gh, i));
 850                 }
 851         }
 852 
 853         return (err);
 854 }
 855 
 856 /*
 857  * Remove the pseudo TX ring of the given HW ring handle.
 858  */
 859 static void
 860 aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp,
 861     mac_ring_handle_t pseudo_hw_rh)
 862 {
 863         aggr_pseudo_tx_ring_t   *ring;
 864         int                     i;
 865 
 866         for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
 867                 ring = tx_grp->atg_rings + i;
 868                 if (ring->atr_rh != pseudo_hw_rh)
 869                         continue;
 870 
 871                 ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE);
 872                 mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh);
 873                 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
 874                 mac_hwring_teardown(ring->atr_hw_rh);
 875                 ring->atr_hw_rh = NULL;
 876                 ring->atr_port = NULL;
 877                 tx_grp->atg_ring_cnt--;
 878                 break;
 879         }
 880 }
 881 
 882 /*
 883  * This function is called to create pseudo rings over hardware rings of
 884  * the underlying device. There is a 1:1 mapping between the pseudo TX
 885  * rings of the aggr and the hardware rings of the underlying port.
 886  */
 887 static int
 888 aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
 889 {
 890         aggr_grp_t              *grp = port->lp_grp;
 891         mac_ring_handle_t       hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh;
 892         mac_perim_handle_t      pmph;
 893         int                     hw_rh_cnt, i = 0, j;
 894         int                     err = 0;
 895 
 896         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 897         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 898 
 899         /*
 900          * Get the list the the underlying HW rings.
 901          */
 902         hw_rh_cnt = mac_hwrings_get(port->lp_mch,
 903             NULL, hw_rh, MAC_RING_TYPE_TX);
 904 
 905         /*
 906          * Even if the underlying NIC does not have TX rings, we
 907          * still make a psuedo TX ring for that NIC with NULL as
 908          * the ring handle.
 909          */
 910         if (hw_rh_cnt == 0)
 911                 port->lp_tx_ring_cnt = 1;
 912         else
 913                 port->lp_tx_ring_cnt = hw_rh_cnt;
 914 
 915         port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
 916             port->lp_tx_ring_cnt), KM_SLEEP);
 917         port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
 918             port->lp_tx_ring_cnt), KM_SLEEP);
 919 
 920         if (hw_rh_cnt == 0) {
 921                 if ((err = aggr_add_pseudo_tx_ring(port, tx_grp,
 922                     NULL, &pseudo_rh)) == 0) {
 923                         port->lp_tx_rings[0] = NULL;
 924                         port->lp_pseudo_tx_rings[0] = pseudo_rh;
 925                 }
 926         } else {
 927                 for (i = 0; err == 0 && i < hw_rh_cnt; i++) {
 928                         err = aggr_add_pseudo_tx_ring(port,
 929                             tx_grp, hw_rh[i], &pseudo_rh);
 930                         if (err != 0)
 931                                 break;
 932                         port->lp_tx_rings[i] = hw_rh[i];
 933                         port->lp_pseudo_tx_rings[i] = pseudo_rh;
 934                 }
 935         }
 936 
 937         if (err != 0) {
 938                 if (hw_rh_cnt != 0) {
 939                         for (j = 0; j < i; j++) {
 940                                 aggr_rem_pseudo_tx_ring(tx_grp,
 941                                     port->lp_pseudo_tx_rings[j]);
 942                         }
 943                 }
 944                 kmem_free(port->lp_tx_rings,
 945                     (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 946                 kmem_free(port->lp_pseudo_tx_rings,
 947                     (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 948                 port->lp_tx_ring_cnt = 0;
 949         } else {
 950                 port->lp_tx_grp_added = B_TRUE;
 951                 port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch,
 952                     aggr_tx_ring_update, port);
 953         }
 954         mac_perim_exit(pmph);
 955         aggr_grp_update_default(grp);
 956         return (err);
 957 }
 958 
 959 /*
 960  * This function is called by aggr to remove pseudo TX rings over the
 961  * HW rings of the underlying port.
 962  */
 963 static void
 964 aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
 965 {
 966         aggr_grp_t              *grp = port->lp_grp;
 967         mac_perim_handle_t      pmph;
 968         int                     i;
 969 
 970         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
 971         mac_perim_enter_by_mh(port->lp_mh, &pmph);
 972 
 973         if (!port->lp_tx_grp_added)
 974                 goto done;
 975 
 976         ASSERT(tx_grp->atg_gh != NULL);
 977 
 978         for (i = 0; i < port->lp_tx_ring_cnt; i++)
 979                 aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]);
 980 
 981         kmem_free(port->lp_tx_rings,
 982             (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 983         kmem_free(port->lp_pseudo_tx_rings,
 984             (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
 985 
 986         port->lp_tx_ring_cnt = 0;
 987         (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh);
 988         port->lp_tx_grp_added = B_FALSE;
 989         aggr_grp_update_default(grp);
 990 done:
 991         mac_perim_exit(pmph);
 992 }
 993 
 994 static int
 995 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
 996 {
 997         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
 998         return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
 999 }
1000 
1001 static int
1002 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
1003 {
1004         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
1005         return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
1006 }
1007 
1008 /*
1009  * Here we need to start the pseudo-ring. As MAC already ensures that the
1010  * underlying device is set up, all we need to do is save the ring generation.
1011  *
1012  * Note, we don't end up wanting to use the underlying mac_hwring_start/stop
1013  * functions here as those don't actually stop and start the ring, they just
1014  * quiesce the ring. Regardless of whether the aggr is logically up or not, we
1015  * want to make sure that we can receive traffic for LACP.
1016  */
1017 static int
1018 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)
1019 {
1020         aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
1021 
1022         rr_ring->arr_gen = mr_gen;
1023         return (0);
1024 }
1025 
1026 /*
1027  * Add one or more ports to an existing link aggregation group.
1028  */
1029 int
1030 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
1031     laioc_port_t *ports)
1032 {
1033         int rc, i, nadded = 0;
1034         aggr_grp_t *grp = NULL;
1035         aggr_port_t *port;
1036         boolean_t link_state_changed = B_FALSE;
1037         mac_perim_handle_t mph, pmph;
1038 
1039         /* get group corresponding to linkid */
1040         rw_enter(&aggr_grp_lock, RW_READER);
1041         if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1042             (mod_hash_val_t *)&grp) != 0) {
1043                 rw_exit(&aggr_grp_lock);
1044                 return (ENOENT);
1045         }
1046         AGGR_GRP_REFHOLD(grp);
1047 
1048         /*
1049          * Hold the perimeter so that the aggregation won't be destroyed.
1050          */
1051         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1052         rw_exit(&aggr_grp_lock);
1053 
1054         /* add the specified ports to group */
1055         for (i = 0; i < nports; i++) {
1056                 /* add port to group */
1057                 if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
1058                     force, &port)) != 0) {
1059                         goto bail;
1060                 }
1061                 ASSERT(port != NULL);
1062                 nadded++;
1063 
1064                 /* check capabilities */
1065                 if (!aggr_grp_capab_check(grp, port) ||
1066                     !aggr_grp_sdu_check(grp, port) ||
1067                     !aggr_grp_margin_check(grp, port)) {
1068                         rc = ENOTSUP;
1069                         goto bail;
1070                 }
1071 
1072                 /*
1073                  * Create the pseudo ring for each HW ring of the underlying
1074                  * port.
1075                  */
1076                 rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group);
1077                 if (rc != 0)
1078                         goto bail;
1079                 rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group);
1080                 if (rc != 0)
1081                         goto bail;
1082 
1083                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1084 
1085                 /* set LACP mode */
1086                 aggr_port_lacp_set_mode(grp, port);
1087 
1088                 /* start port if group has already been started */
1089                 if (grp->lg_started) {
1090                         rc = aggr_port_start(port);
1091                         if (rc != 0) {
1092                                 mac_perim_exit(pmph);
1093                                 goto bail;
1094                         }
1095 
1096                         /*
1097                          * Turn on the promiscuous mode over the port when it
1098                          * is requested to be turned on to receive the
1099                          * non-primary address over a port, or the promiscous
1100                          * mode is enabled over the aggr.
1101                          */
1102                         if (grp->lg_promisc || port->lp_prom_addr != NULL) {
1103                                 rc = aggr_port_promisc(port, B_TRUE);
1104                                 if (rc != 0) {
1105                                         mac_perim_exit(pmph);
1106                                         goto bail;
1107                                 }
1108                         }
1109                 }
1110                 mac_perim_exit(pmph);
1111 
1112                 /*
1113                  * Attach each port if necessary.
1114                  */
1115                 if (aggr_port_notify_link(grp, port))
1116                         link_state_changed = B_TRUE;
1117 
1118                 /*
1119                  * Initialize the callback functions for this port.
1120                  */
1121                 aggr_port_init_callbacks(port);
1122         }
1123 
1124         /* update the MAC address of the constituent ports */
1125         if (aggr_grp_update_ports_mac(grp))
1126                 link_state_changed = B_TRUE;
1127 
1128         if (link_state_changed)
1129                 mac_link_update(grp->lg_mh, grp->lg_link_state);
1130 
1131 bail:
1132         if (rc != 0) {
1133                 /* stop and remove ports that have been added */
1134                 for (i = 0; i < nadded; i++) {
1135                         port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1136                         ASSERT(port != NULL);
1137                         if (grp->lg_started) {
1138                                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1139                                 (void) aggr_port_promisc(port, B_FALSE);
1140                                 aggr_port_stop(port);
1141                                 mac_perim_exit(pmph);
1142                         }
1143                         aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1144                         aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1145                         (void) aggr_grp_rem_port(grp, port, NULL, NULL);
1146                 }
1147         }
1148 
1149         mac_perim_exit(mph);
1150         AGGR_GRP_REFRELE(grp);
1151         return (rc);
1152 }
1153 
1154 static int
1155 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
1156     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1157     aggr_lacp_timer_t lacp_timer)
1158 {
1159         boolean_t mac_addr_changed = B_FALSE;
1160         boolean_t link_state_changed = B_FALSE;
1161         mac_perim_handle_t pmph;
1162 
1163         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1164 
1165         /* validate fixed address if specified */
1166         if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
1167             ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
1168             (mac_addr[0] & 0x01))) {
1169                 return (EINVAL);
1170         }
1171 
1172         /* update policy if requested */
1173         if (update_mask & AGGR_MODIFY_POLICY)
1174                 aggr_send_update_policy(grp, policy);
1175 
1176         /* update unicast MAC address if requested */
1177         if (update_mask & AGGR_MODIFY_MAC) {
1178                 if (mac_fixed) {
1179                         /* user-supplied MAC address */
1180                         grp->lg_mac_addr_port = NULL;
1181                         if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
1182                                 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1183                                 mac_addr_changed = B_TRUE;
1184                         }
1185                 } else if (grp->lg_addr_fixed) {
1186                         /* switch from user-supplied to automatic */
1187                         aggr_port_t *port = grp->lg_ports;
1188 
1189                         mac_perim_enter_by_mh(port->lp_mh, &pmph);
1190                         bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
1191                         grp->lg_mac_addr_port = port;
1192                         mac_addr_changed = B_TRUE;
1193                         mac_perim_exit(pmph);
1194                 }
1195                 grp->lg_addr_fixed = mac_fixed;
1196         }
1197 
1198         if (mac_addr_changed)
1199                 link_state_changed = aggr_grp_update_ports_mac(grp);
1200 
1201         if (update_mask & AGGR_MODIFY_LACP_MODE)
1202                 aggr_lacp_update_mode(grp, lacp_mode);
1203 
1204         if (update_mask & AGGR_MODIFY_LACP_TIMER)
1205                 aggr_lacp_update_timer(grp, lacp_timer);
1206 
1207         if (link_state_changed)
1208                 mac_link_update(grp->lg_mh, grp->lg_link_state);
1209 
1210         if (mac_addr_changed)
1211                 mac_unicst_update(grp->lg_mh, grp->lg_addr);
1212 
1213         return (0);
1214 }
1215 
1216 /*
1217  * Update properties of an existing link aggregation group.
1218  */
1219 int
1220 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
1221     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1222     aggr_lacp_timer_t lacp_timer)
1223 {
1224         aggr_grp_t *grp = NULL;
1225         mac_perim_handle_t mph;
1226         int err;
1227 
1228         /* get group corresponding to linkid */
1229         rw_enter(&aggr_grp_lock, RW_READER);
1230         if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1231             (mod_hash_val_t *)&grp) != 0) {
1232                 rw_exit(&aggr_grp_lock);
1233                 return (ENOENT);
1234         }
1235         AGGR_GRP_REFHOLD(grp);
1236 
1237         /*
1238          * Hold the perimeter so that the aggregation won't be destroyed.
1239          */
1240         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1241         rw_exit(&aggr_grp_lock);
1242 
1243         err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
1244             mac_addr, lacp_mode, lacp_timer);
1245 
1246         mac_perim_exit(mph);
1247         AGGR_GRP_REFRELE(grp);
1248         return (err);
1249 }
1250 
1251 /*
1252  * Create a new link aggregation group upon request from administrator.
1253  * Returns 0 on success, an errno on failure.
1254  */
1255 int
1256 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
1257     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
1258     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer,
1259     cred_t *credp)
1260 {
1261         aggr_grp_t *grp = NULL;
1262         aggr_port_t *port;
1263         mac_register_t *mac;
1264         boolean_t link_state_changed;
1265         mac_perim_handle_t mph;
1266         int err;
1267         int i;
1268         kt_did_t tid = 0;
1269 
1270         /* need at least one port */
1271         if (nports == 0)
1272                 return (EINVAL);
1273 
1274         rw_enter(&aggr_grp_lock, RW_WRITER);
1275 
1276         /* does a group with the same linkid already exist? */
1277         err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1278             (mod_hash_val_t *)&grp);
1279         if (err == 0) {
1280                 rw_exit(&aggr_grp_lock);
1281                 return (EEXIST);
1282         }
1283 
1284         grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
1285 
1286         grp->lg_refs = 1;
1287         grp->lg_closing = B_FALSE;
1288         grp->lg_force = force;
1289         grp->lg_linkid = linkid;
1290         grp->lg_zoneid = crgetzoneid(credp);
1291         grp->lg_ifspeed = 0;
1292         grp->lg_link_state = LINK_STATE_UNKNOWN;
1293         grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1294         grp->lg_started = B_FALSE;
1295         grp->lg_promisc = B_FALSE;
1296         grp->lg_lacp_done = B_FALSE;
1297         grp->lg_tx_notify_done = B_FALSE;
1298         grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1299         grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1300             aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1301         grp->lg_tx_notify_thread = thread_create(NULL, 0,
1302             aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1303         grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
1304             MAX_RINGS_PER_GROUP), KM_SLEEP);
1305         grp->lg_tx_blocked_cnt = 0;
1306         bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1307         bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
1308         aggr_lacp_init_grp(grp);
1309 
1310         /* add MAC ports to group */
1311         grp->lg_ports = NULL;
1312         grp->lg_nports = 0;
1313         grp->lg_nattached_ports = 0;
1314         grp->lg_ntx_ports = 0;
1315 
1316         /*
1317          * If key is not specified by the user, allocate the key.
1318          */
1319         if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1320                 err = ENOMEM;
1321                 goto bail;
1322         }
1323         grp->lg_key = key;
1324 
1325         for (i = 0; i < nports; i++) {
1326                 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
1327                 if (err != 0)
1328                         goto bail;
1329         }
1330 
1331         /*
1332          * If no explicit MAC address was specified by the administrator,
1333          * set it to the MAC address of the first port.
1334          */
1335         grp->lg_addr_fixed = mac_fixed;
1336         if (grp->lg_addr_fixed) {
1337                 /* validate specified address */
1338                 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1339                         err = EINVAL;
1340                         goto bail;
1341                 }
1342                 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1343         } else {
1344                 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1345                 grp->lg_mac_addr_port = grp->lg_ports;
1346         }
1347 
1348         /* set the initial group capabilities */
1349         aggr_grp_capab_set(grp);
1350 
1351         if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
1352                 err = ENOMEM;
1353                 goto bail;
1354         }
1355         mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1356         mac->m_driver = grp;
1357         mac->m_dip = aggr_dip;
1358         mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
1359         mac->m_src_addr = grp->lg_addr;
1360         mac->m_callbacks = &aggr_m_callbacks;
1361         mac->m_min_sdu = 0;
1362         mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
1363         mac->m_margin = aggr_grp_max_margin(grp);
1364         mac->m_v12n = MAC_VIRT_LEVEL1;
1365         err = mac_register(mac, &grp->lg_mh);
1366         mac_free(mac);
1367         if (err != 0)
1368                 goto bail;
1369 
1370         err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp));
1371         if (err != 0) {
1372                 (void) mac_unregister(grp->lg_mh);
1373                 grp->lg_mh = NULL;
1374                 goto bail;
1375         }
1376 
1377         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1378 
1379         /*
1380          * Update the MAC address of the constituent ports.
1381          * None of the port is attached at this time, the link state of the
1382          * aggregation will not change.
1383          */
1384         link_state_changed = aggr_grp_update_ports_mac(grp);
1385         ASSERT(!link_state_changed);
1386 
1387         /* update outbound load balancing policy */
1388         aggr_send_update_policy(grp, policy);
1389 
1390         /* set LACP mode */
1391         aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
1392 
1393         /*
1394          * Attach each port if necessary.
1395          */
1396         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1397                 /*
1398                  * Create the pseudo ring for each HW ring of the underlying
1399                  * port. Note that this is done after the aggr registers the
1400                  * mac.
1401                  */
1402                 VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0);
1403                 VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0);
1404                 if (aggr_port_notify_link(grp, port))
1405                         link_state_changed = B_TRUE;
1406 
1407                 /*
1408                  * Initialize the callback functions for this port.
1409                  */
1410                 aggr_port_init_callbacks(port);
1411         }
1412 
1413         if (link_state_changed)
1414                 mac_link_update(grp->lg_mh, grp->lg_link_state);
1415 
1416         /* add new group to hash table */
1417         err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
1418             (mod_hash_val_t)grp);
1419         ASSERT(err == 0);
1420         aggr_grp_cnt++;
1421 
1422         mac_perim_exit(mph);
1423         rw_exit(&aggr_grp_lock);
1424         return (0);
1425 
1426 bail:
1427 
1428         grp->lg_closing = B_TRUE;
1429 
1430         port = grp->lg_ports;
1431         while (port != NULL) {
1432                 aggr_port_t *cport;
1433 
1434                 cport = port->lp_next;
1435                 aggr_port_delete(port);
1436                 port = cport;
1437         }
1438 
1439         /*
1440          * Inform the lacp_rx thread to exit.
1441          */
1442         mutex_enter(&grp->lg_lacp_lock);
1443         grp->lg_lacp_done = B_TRUE;
1444         cv_signal(&grp->lg_lacp_cv);
1445         while (grp->lg_lacp_rx_thread != NULL)
1446                 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1447         mutex_exit(&grp->lg_lacp_lock);
1448         /*
1449          * Inform the tx_notify thread to exit.
1450          */
1451         mutex_enter(&grp->lg_tx_flowctl_lock);
1452         if (grp->lg_tx_notify_thread != NULL) {
1453                 tid = grp->lg_tx_notify_thread->t_did;
1454                 grp->lg_tx_notify_done = B_TRUE;
1455                 cv_signal(&grp->lg_tx_flowctl_cv);
1456         }
1457         mutex_exit(&grp->lg_tx_flowctl_lock);
1458         if (tid != 0)
1459                 thread_join(tid);
1460 
1461         kmem_free(grp->lg_tx_blocked_rings,
1462             (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1463         rw_exit(&aggr_grp_lock);
1464         AGGR_GRP_REFRELE(grp);
1465         return (err);
1466 }
1467 
1468 /*
1469  * Return a pointer to the member of a group with specified linkid.
1470  */
1471 static aggr_port_t *
1472 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
1473 {
1474         aggr_port_t *port;
1475 
1476         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1477 
1478         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1479                 if (port->lp_linkid == linkid)
1480                         break;
1481         }
1482 
1483         return (port);
1484 }
1485 
1486 /*
1487  * Stop, detach and remove a port from a link aggregation group.
1488  */
1489 static int
1490 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
1491     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
1492 {
1493         int rc = 0;
1494         aggr_port_t **pport;
1495         boolean_t mac_addr_changed = B_FALSE;
1496         boolean_t link_state_changed = B_FALSE;
1497         mac_perim_handle_t mph;
1498         uint64_t val;
1499         uint_t i;
1500         uint_t stat;
1501 
1502         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1503         ASSERT(grp->lg_nports > 1);
1504         ASSERT(!grp->lg_closing);
1505 
1506         /* unlink port */
1507         for (pport = &grp->lg_ports; *pport != port;
1508             pport = &(*pport)->lp_next) {
1509                 if (*pport == NULL) {
1510                         rc = ENOENT;
1511                         goto done;
1512                 }
1513         }
1514         *pport = port->lp_next;
1515 
1516         mac_perim_enter_by_mh(port->lp_mh, &mph);
1517 
1518         /*
1519          * If the MAC address of the port being removed was assigned
1520          * to the group, update the group MAC address
1521          * using the MAC address of a different port.
1522          */
1523         if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
1524                 /*
1525                  * Set the MAC address of the group to the
1526                  * MAC address of its first port.
1527                  */
1528                 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1529                 grp->lg_mac_addr_port = grp->lg_ports;
1530                 mac_addr_changed = B_TRUE;
1531         }
1532 
1533         link_state_changed = aggr_grp_detach_port(grp, port);
1534 
1535         /*
1536          * Add the counter statistics of the ports while it was aggregated
1537          * to the group's residual statistics.  This is done by obtaining
1538          * the current counter from the underlying MAC then subtracting the
1539          * value of the counter at the moment it was added to the
1540          * aggregation.
1541          */
1542         for (i = 0; i < MAC_NSTAT; i++) {
1543                 stat = i + MAC_STAT_MIN;
1544                 if (!MAC_STAT_ISACOUNTER(stat))
1545                         continue;
1546                 val = aggr_port_stat(port, stat);
1547                 val -= port->lp_stat[i];
1548                 grp->lg_stat[i] += val;
1549         }
1550         for (i = 0; i < ETHER_NSTAT; i++) {
1551                 stat = i + MACTYPE_STAT_MIN;
1552                 if (!ETHER_STAT_ISACOUNTER(stat))
1553                         continue;
1554                 val = aggr_port_stat(port, stat);
1555                 val -= port->lp_ether_stat[i];
1556                 grp->lg_ether_stat[i] += val;
1557         }
1558 
1559         grp->lg_nports--;
1560         mac_perim_exit(mph);
1561 
1562         aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1563         aggr_port_delete(port);
1564 
1565         /*
1566          * If the group MAC address has changed, update the MAC address of
1567          * the remaining constituent ports according to the new MAC
1568          * address of the group.
1569          */
1570         if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1571                 link_state_changed = B_TRUE;
1572 
1573 done:
1574         if (mac_addr_changedp != NULL)
1575                 *mac_addr_changedp = mac_addr_changed;
1576         if (link_state_changedp != NULL)
1577                 *link_state_changedp = link_state_changed;
1578 
1579         return (rc);
1580 }
1581 
1582 /*
1583  * Remove one or more ports from an existing link aggregation group.
1584  */
1585 int
1586 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
1587 {
1588         int rc = 0, i;
1589         aggr_grp_t *grp = NULL;
1590         aggr_port_t *port;
1591         boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
1592         boolean_t link_state_update = B_FALSE, link_state_changed;
1593         mac_perim_handle_t mph, pmph;
1594 
1595         /* get group corresponding to linkid */
1596         rw_enter(&aggr_grp_lock, RW_READER);
1597         if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1598             (mod_hash_val_t *)&grp) != 0) {
1599                 rw_exit(&aggr_grp_lock);
1600                 return (ENOENT);
1601         }
1602         AGGR_GRP_REFHOLD(grp);
1603 
1604         /*
1605          * Hold the perimeter so that the aggregation won't be destroyed.
1606          */
1607         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1608         rw_exit(&aggr_grp_lock);
1609 
1610         /* we need to keep at least one port per group */
1611         if (nports >= grp->lg_nports) {
1612                 rc = EINVAL;
1613                 goto bail;
1614         }
1615 
1616         /* first verify that all the groups are valid */
1617         for (i = 0; i < nports; i++) {
1618                 if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
1619                         /* port not found */
1620                         rc = ENOENT;
1621                         goto bail;
1622                 }
1623         }
1624 
1625         /* clear the promiscous mode for the specified ports */
1626         for (i = 0; i < nports && rc == 0; i++) {
1627                 /* lookup port */
1628                 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1629                 ASSERT(port != NULL);
1630 
1631                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1632                 rc = aggr_port_promisc(port, B_FALSE);
1633                 mac_perim_exit(pmph);
1634         }
1635         if (rc != 0) {
1636                 for (i = 0; i < nports; i++) {
1637                         port = aggr_grp_port_lookup(grp,
1638                             ports[i].lp_linkid);
1639                         ASSERT(port != NULL);
1640 
1641                         /*
1642                          * Turn the promiscuous mode back on if it is required
1643                          * to receive the non-primary address over a port, or
1644                          * the promiscous mode is enabled over the aggr.
1645                          */
1646                         mac_perim_enter_by_mh(port->lp_mh, &pmph);
1647                         if (port->lp_started && (grp->lg_promisc ||
1648                             port->lp_prom_addr != NULL)) {
1649                                 (void) aggr_port_promisc(port, B_TRUE);
1650                         }
1651                         mac_perim_exit(pmph);
1652                 }
1653                 goto bail;
1654         }
1655 
1656         /* remove the specified ports from group */
1657         for (i = 0; i < nports; i++) {
1658                 /* lookup port */
1659                 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1660                 ASSERT(port != NULL);
1661 
1662                 /* stop port if group has already been started */
1663                 if (grp->lg_started) {
1664                         mac_perim_enter_by_mh(port->lp_mh, &pmph);
1665                         aggr_port_stop(port);
1666                         mac_perim_exit(pmph);
1667                 }
1668 
1669                 /*
1670                  * aggr_rem_pseudo_tx_group() is not called here. Instead
1671                  * it is called from inside aggr_grp_rem_port() after the
1672                  * port has been detached. The reason is that
1673                  * aggr_rem_pseudo_tx_group() removes one ring at a time
1674                  * and if there is still traffic going on, then there
1675                  * is the possibility of aggr_find_tx_ring() returning a
1676                  * removed ring for transmission. Once the port has been
1677                  * detached, that port will not be used and
1678                  * aggr_find_tx_ring() will not return any rings
1679                  * belonging to it.
1680                  */
1681                 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1682 
1683                 /* remove port from group */
1684                 rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1685                     &link_state_changed);
1686                 ASSERT(rc == 0);
1687                 mac_addr_update = mac_addr_update || mac_addr_changed;
1688                 link_state_update = link_state_update || link_state_changed;
1689         }
1690 
1691 bail:
1692         if (mac_addr_update)
1693                 mac_unicst_update(grp->lg_mh, grp->lg_addr);
1694         if (link_state_update)
1695                 mac_link_update(grp->lg_mh, grp->lg_link_state);
1696 
1697         mac_perim_exit(mph);
1698         AGGR_GRP_REFRELE(grp);
1699 
1700         return (rc);
1701 }
1702 
1703 int
1704 aggr_grp_delete(datalink_id_t linkid, cred_t *cred)
1705 {
1706         aggr_grp_t *grp = NULL;
1707         aggr_port_t *port, *cport;
1708         datalink_id_t tmpid;
1709         mod_hash_val_t val;
1710         mac_perim_handle_t mph, pmph;
1711         int err;
1712         kt_did_t tid = 0;
1713 
1714         rw_enter(&aggr_grp_lock, RW_WRITER);
1715 
1716         if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1717             (mod_hash_val_t *)&grp) != 0) {
1718                 rw_exit(&aggr_grp_lock);
1719                 return (ENOENT);
1720         }
1721 
1722         /*
1723          * Note that dls_devnet_destroy() must be called before lg_lock is
1724          * held. Otherwise, it will deadlock if another thread is in
1725          * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1726          * dls_devnet_destroy() needs to delete.
1727          */
1728         if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
1729                 rw_exit(&aggr_grp_lock);
1730                 return (err);
1731         }
1732         ASSERT(linkid == tmpid);
1733 
1734         /*
1735          * Unregister from the MAC service module. Since this can
1736          * fail if a client hasn't closed the MAC port, we gracefully
1737          * fail the operation.
1738          */
1739         if ((err = mac_disable(grp->lg_mh)) != 0) {
1740                 (void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred));
1741                 rw_exit(&aggr_grp_lock);
1742                 return (err);
1743         }
1744         (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1745         ASSERT(grp == (aggr_grp_t *)val);
1746 
1747         ASSERT(aggr_grp_cnt > 0);
1748         aggr_grp_cnt--;
1749         rw_exit(&aggr_grp_lock);
1750 
1751         /*
1752          * Inform the lacp_rx thread to exit.
1753          */
1754         mutex_enter(&grp->lg_lacp_lock);
1755         grp->lg_lacp_done = B_TRUE;
1756         cv_signal(&grp->lg_lacp_cv);
1757         while (grp->lg_lacp_rx_thread != NULL)
1758                 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1759         mutex_exit(&grp->lg_lacp_lock);
1760         /*
1761          * Inform the tx_notify_thread to exit.
1762          */
1763         mutex_enter(&grp->lg_tx_flowctl_lock);
1764         if (grp->lg_tx_notify_thread != NULL) {
1765                 tid = grp->lg_tx_notify_thread->t_did;
1766                 grp->lg_tx_notify_done = B_TRUE;
1767                 cv_signal(&grp->lg_tx_flowctl_cv);
1768         }
1769         mutex_exit(&grp->lg_tx_flowctl_lock);
1770         if (tid != 0)
1771                 thread_join(tid);
1772 
1773         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1774 
1775         grp->lg_closing = B_TRUE;
1776         /* detach and free MAC ports associated with group */
1777         port = grp->lg_ports;
1778         while (port != NULL) {
1779                 cport = port->lp_next;
1780                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1781                 if (grp->lg_started)
1782                         aggr_port_stop(port);
1783                 (void) aggr_grp_detach_port(grp, port);
1784                 mac_perim_exit(pmph);
1785                 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1786                 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1787                 aggr_port_delete(port);
1788                 port = cport;
1789         }
1790 
1791         mac_perim_exit(mph);
1792 
1793         kmem_free(grp->lg_tx_blocked_rings,
1794             (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1795         /*
1796          * Wait for the port's lacp timer thread and its notification callback
1797          * to exit before calling mac_unregister() since both needs to access
1798          * the mac perimeter of the grp.
1799          */
1800         aggr_grp_port_wait(grp);
1801 
1802         VERIFY(mac_unregister(grp->lg_mh) == 0);
1803         grp->lg_mh = NULL;
1804 
1805         AGGR_GRP_REFRELE(grp);
1806         return (0);
1807 }
1808 
1809 void
1810 aggr_grp_free(aggr_grp_t *grp)
1811 {
1812         ASSERT(grp->lg_refs == 0);
1813         ASSERT(grp->lg_port_ref == 0);
1814         if (grp->lg_key > AGGR_MAX_KEY) {
1815                 id_free(key_ids, grp->lg_key);
1816                 grp->lg_key = 0;
1817         }
1818         kmem_cache_free(aggr_grp_cache, grp);
1819 }
1820 
1821 int
1822 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1823     aggr_grp_info_new_grp_fn_t new_grp_fn,
1824     aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
1825 {
1826         aggr_grp_t      *grp;
1827         aggr_port_t     *port;
1828         mac_perim_handle_t mph, pmph;
1829         int             rc = 0;
1830 
1831         /*
1832          * Make sure that the aggregation link is visible from the caller's
1833          * zone.
1834          */
1835         if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred)))
1836                 return (ENOENT);
1837 
1838         rw_enter(&aggr_grp_lock, RW_READER);
1839 
1840         if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1841             (mod_hash_val_t *)&grp) != 0) {
1842                 rw_exit(&aggr_grp_lock);
1843                 return (ENOENT);
1844         }
1845         AGGR_GRP_REFHOLD(grp);
1846 
1847         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1848         rw_exit(&aggr_grp_lock);
1849 
1850         rc = new_grp_fn(fn_arg, grp->lg_linkid,
1851             (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1852             grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1853             grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1854 
1855         if (rc != 0)
1856                 goto bail;
1857 
1858         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1859                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1860                 rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1861                     port->lp_state, &port->lp_lacp.ActorOperPortState);
1862                 mac_perim_exit(pmph);
1863 
1864                 if (rc != 0)
1865                         goto bail;
1866         }
1867 
1868 bail:
1869         mac_perim_exit(mph);
1870         AGGR_GRP_REFRELE(grp);
1871         return (rc);
1872 }
1873 
1874 /*ARGSUSED*/
1875 static void
1876 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1877 {
1878         miocnak(q, mp, 0, ENOTSUP);
1879 }
1880 
1881 static int
1882 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1883 {
1884         aggr_port_t     *port;
1885         uint_t          stat_index;
1886 
1887         /* We only aggregate counter statistics. */
1888         if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1889             IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1890                 return (ENOTSUP);
1891         }
1892 
1893         /*
1894          * Counter statistics for a group are computed by aggregating the
1895          * counters of the members MACs while they were aggregated, plus
1896          * the residual counter of the group itself, which is updated each
1897          * time a MAC is removed from the group.
1898          */
1899         *val = 0;
1900         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1901                 /* actual port statistic */
1902                 *val += aggr_port_stat(port, stat);
1903                 /*
1904                  * minus the port stat when it was added, plus any residual
1905                  * amount for the group.
1906                  */
1907                 if (IS_MAC_STAT(stat)) {
1908                         stat_index = stat - MAC_STAT_MIN;
1909                         *val -= port->lp_stat[stat_index];
1910                         *val += grp->lg_stat[stat_index];
1911                 } else if (IS_MACTYPE_STAT(stat)) {
1912                         stat_index = stat - MACTYPE_STAT_MIN;
1913                         *val -= port->lp_ether_stat[stat_index];
1914                         *val += grp->lg_ether_stat[stat_index];
1915                 }
1916         }
1917         return (0);
1918 }
1919 
1920 int
1921 aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1922 {
1923         aggr_pseudo_rx_ring_t   *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver;
1924 
1925         if (rx_ring->arr_hw_rh != NULL) {
1926                 *val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat);
1927         } else {
1928                 aggr_port_t     *port = rx_ring->arr_port;
1929 
1930                 *val = mac_stat_get(port->lp_mh, stat);
1931 
1932         }
1933         return (0);
1934 }
1935 
1936 int
1937 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1938 {
1939         aggr_pseudo_tx_ring_t   *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
1940 
1941         if (tx_ring->atr_hw_rh != NULL) {
1942                 *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
1943         } else {
1944                 aggr_port_t     *port = tx_ring->atr_port;
1945 
1946                 *val = mac_stat_get(port->lp_mh, stat);
1947         }
1948         return (0);
1949 }
1950 
1951 static int
1952 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1953 {
1954         aggr_grp_t              *grp = arg;
1955         mac_perim_handle_t      mph;
1956         int                     rval = 0;
1957 
1958         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1959 
1960         switch (stat) {
1961         case MAC_STAT_IFSPEED:
1962                 *val = grp->lg_ifspeed;
1963                 break;
1964 
1965         case ETHER_STAT_LINK_DUPLEX:
1966                 *val = grp->lg_link_duplex;
1967                 break;
1968 
1969         default:
1970                 /*
1971                  * For all other statistics, we return the aggregated stat
1972                  * from the underlying ports.  aggr_grp_stat() will set
1973                  * rval appropriately if the statistic isn't a counter.
1974                  */
1975                 rval = aggr_grp_stat(grp, stat, val);
1976         }
1977 
1978         mac_perim_exit(mph);
1979         return (rval);
1980 }
1981 
1982 static int
1983 aggr_m_start(void *arg)
1984 {
1985         aggr_grp_t *grp = arg;
1986         aggr_port_t *port;
1987         mac_perim_handle_t mph, pmph;
1988 
1989         mac_perim_enter_by_mh(grp->lg_mh, &mph);
1990 
1991         /*
1992          * Attempts to start all configured members of the group.
1993          * Group members will be attached when their link-up notification
1994          * is received.
1995          */
1996         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1997                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1998                 if (aggr_port_start(port) != 0) {
1999                         mac_perim_exit(pmph);
2000                         continue;
2001                 }
2002 
2003                 /*
2004                  * Turn on the promiscuous mode if it is required to receive
2005                  * the non-primary address over a port, or the promiscous
2006                  * mode is enabled over the aggr.
2007                  */
2008                 if (grp->lg_promisc || port->lp_prom_addr != NULL) {
2009                         if (aggr_port_promisc(port, B_TRUE) != 0)
2010                                 aggr_port_stop(port);
2011                 }
2012                 mac_perim_exit(pmph);
2013         }
2014 
2015         grp->lg_started = B_TRUE;
2016 
2017         mac_perim_exit(mph);
2018         return (0);
2019 }
2020 
2021 static void
2022 aggr_m_stop(void *arg)
2023 {
2024         aggr_grp_t *grp = arg;
2025         aggr_port_t *port;
2026         mac_perim_handle_t mph, pmph;
2027 
2028         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2029 
2030         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2031                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2032 
2033                 /* reset port promiscuous mode */
2034                 (void) aggr_port_promisc(port, B_FALSE);
2035 
2036                 aggr_port_stop(port);
2037                 mac_perim_exit(pmph);
2038         }
2039 
2040         grp->lg_started = B_FALSE;
2041         mac_perim_exit(mph);
2042 }
2043 
2044 static int
2045 aggr_m_promisc(void *arg, boolean_t on)
2046 {
2047         aggr_grp_t *grp = arg;
2048         aggr_port_t *port;
2049         boolean_t link_state_changed = B_FALSE;
2050         mac_perim_handle_t mph, pmph;
2051 
2052         AGGR_GRP_REFHOLD(grp);
2053         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2054 
2055         ASSERT(!grp->lg_closing);
2056 
2057         if (on == grp->lg_promisc)
2058                 goto bail;
2059 
2060         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2061                 int     err = 0;
2062 
2063                 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2064                 AGGR_PORT_REFHOLD(port);
2065                 if (!on && (port->lp_prom_addr == NULL))
2066                         err = aggr_port_promisc(port, B_FALSE);
2067                 else if (on && port->lp_started)
2068                         err = aggr_port_promisc(port, B_TRUE);
2069 
2070                 if (err != 0) {
2071                         if (aggr_grp_detach_port(grp, port))
2072                                 link_state_changed = B_TRUE;
2073                 } else {
2074                         /*
2075                          * If a port was detached because of a previous
2076                          * failure changing the promiscuity, the port
2077                          * is reattached when it successfully changes
2078                          * the promiscuity now, and this might cause
2079                          * the link state of the aggregation to change.
2080                          */
2081                         if (aggr_grp_attach_port(grp, port))
2082                                 link_state_changed = B_TRUE;
2083                 }
2084                 mac_perim_exit(pmph);
2085                 AGGR_PORT_REFRELE(port);
2086         }
2087 
2088         grp->lg_promisc = on;
2089 
2090         if (link_state_changed)
2091                 mac_link_update(grp->lg_mh, grp->lg_link_state);
2092 
2093 bail:
2094         mac_perim_exit(mph);
2095         AGGR_GRP_REFRELE(grp);
2096 
2097         return (0);
2098 }
2099 
2100 static void
2101 aggr_grp_port_rename(const char *new_name, void *arg)
2102 {
2103         /*
2104          * aggr port's mac client name is the format of "aggr link name" plus
2105          * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
2106          */
2107         int aggr_len, link_len, clnt_name_len, i;
2108         char *str_end, *str_st, *str_del;
2109         char aggr_name[MAXNAMELEN];
2110         char link_name[MAXNAMELEN];
2111         char *clnt_name;
2112         aggr_grp_t *aggr_grp = arg;
2113         aggr_port_t *aggr_port = aggr_grp->lg_ports;
2114 
2115         for (i = 0; i < aggr_grp->lg_nports; i++) {
2116                 clnt_name = mac_client_name(aggr_port->lp_mch);
2117                 clnt_name_len = strlen(clnt_name);
2118                 str_st = clnt_name;
2119                 str_end = &(clnt_name[clnt_name_len]);
2120                 str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
2121                 ASSERT(str_del != NULL);
2122                 aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
2123                 link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
2124                 bzero(aggr_name, MAXNAMELEN);
2125                 bzero(link_name, MAXNAMELEN);
2126                 bcopy(clnt_name, aggr_name, aggr_len);
2127                 bcopy(str_del, link_name, link_len + 1);
2128                 bzero(clnt_name, MAXNAMELEN);
2129                 (void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
2130                     link_name);
2131 
2132                 (void) mac_rename_primary(aggr_port->lp_mh, NULL);
2133                 aggr_port = aggr_port->lp_next;
2134         }
2135 }
2136 
2137 /*
2138  * Initialize the capabilities that are advertised for the group
2139  * according to the capabilities of the constituent ports.
2140  */
2141 static boolean_t
2142 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
2143 {
2144         aggr_grp_t *grp = arg;
2145 
2146         switch (cap) {
2147         case MAC_CAPAB_HCKSUM: {
2148                 uint32_t *hcksum_txflags = cap_data;
2149                 *hcksum_txflags = grp->lg_hcksum_txflags;
2150                 break;
2151         }
2152         case MAC_CAPAB_LSO: {
2153                 mac_capab_lso_t *cap_lso = cap_data;
2154 
2155                 if (grp->lg_lso) {
2156                         *cap_lso = grp->lg_cap_lso;
2157                         break;
2158                 } else {
2159                         return (B_FALSE);
2160                 }
2161         }
2162         case MAC_CAPAB_NO_NATIVEVLAN:
2163                 return (!grp->lg_vlan);
2164         case MAC_CAPAB_NO_ZCOPY:
2165                 return (!grp->lg_zcopy);
2166         case MAC_CAPAB_RINGS: {
2167                 mac_capab_rings_t *cap_rings = cap_data;
2168 
2169                 if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2170                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2171                         cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt;
2172 
2173                         /*
2174                          * An aggregation advertises only one (pseudo) RX
2175                          * group, which virtualizes the main/primary group of
2176                          * the underlying devices.
2177                          */
2178                         cap_rings->mr_gnum = 1;
2179                         cap_rings->mr_gaddring = NULL;
2180                         cap_rings->mr_gremring = NULL;
2181                 } else {
2182                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2183                         cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt;
2184                         cap_rings->mr_gnum = 0;
2185                 }
2186                 cap_rings->mr_rget = aggr_fill_ring;
2187                 cap_rings->mr_gget = aggr_fill_group;
2188                 break;
2189         }
2190         case MAC_CAPAB_AGGR:
2191         {
2192                 mac_capab_aggr_t *aggr_cap;
2193 
2194                 if (cap_data != NULL) {
2195                         aggr_cap = cap_data;
2196                         aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2197                         aggr_cap->mca_unicst = aggr_m_unicst;
2198                         aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
2199                         aggr_cap->mca_arg = arg;
2200                 }
2201                 return (B_TRUE);
2202         }
2203         default:
2204                 return (B_FALSE);
2205         }
2206         return (B_TRUE);
2207 }
2208 
2209 /*
2210  * Callback funtion for MAC layer to register groups.
2211  */
2212 static void
2213 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2214     mac_group_info_t *infop, mac_group_handle_t gh)
2215 {
2216         aggr_grp_t *grp = arg;
2217         aggr_pseudo_rx_group_t *rx_group;
2218         aggr_pseudo_tx_group_t *tx_group;
2219 
2220         ASSERT(index == 0);
2221         if (rtype == MAC_RING_TYPE_RX) {
2222                 rx_group = &grp->lg_rx_group;
2223                 rx_group->arg_gh = gh;
2224                 rx_group->arg_grp = grp;
2225 
2226                 infop->mgi_driver = (mac_group_driver_t)rx_group;
2227                 infop->mgi_start = NULL;
2228                 infop->mgi_stop = NULL;
2229                 infop->mgi_addmac = aggr_addmac;
2230                 infop->mgi_remmac = aggr_remmac;
2231                 infop->mgi_count = rx_group->arg_ring_cnt;
2232         } else {
2233                 tx_group = &grp->lg_tx_group;
2234                 tx_group->atg_gh = gh;
2235         }
2236 }
2237 
2238 /*
2239  * Callback funtion for MAC layer to register all rings.
2240  */
2241 static void
2242 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2243     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2244 {
2245         aggr_grp_t      *grp = arg;
2246 
2247         switch (rtype) {
2248         case MAC_RING_TYPE_RX: {
2249                 aggr_pseudo_rx_group_t  *rx_group = &grp->lg_rx_group;
2250                 aggr_pseudo_rx_ring_t   *rx_ring;
2251                 mac_intr_t              aggr_mac_intr;
2252 
2253                 ASSERT(rg_index == 0);
2254 
2255                 ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt));
2256                 rx_ring = rx_group->arg_rings + index;
2257                 rx_ring->arr_rh = rh;
2258 
2259                 /*
2260                  * Entrypoint to enable interrupt (disable poll) and
2261                  * disable interrupt (enable poll).
2262                  */
2263                 aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
2264                 aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
2265                 aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
2266                 aggr_mac_intr.mi_ddi_handle = NULL;
2267 
2268                 infop->mri_driver = (mac_ring_driver_t)rx_ring;
2269                 infop->mri_start = aggr_pseudo_start_ring;
2270                 infop->mri_stop = NULL;
2271 
2272                 infop->mri_intr = aggr_mac_intr;
2273                 infop->mri_poll = aggr_rx_poll;
2274 
2275                 infop->mri_stat = aggr_rx_ring_stat;
2276                 break;
2277         }
2278         case MAC_RING_TYPE_TX: {
2279                 aggr_pseudo_tx_group_t  *tx_group = &grp->lg_tx_group;
2280                 aggr_pseudo_tx_ring_t   *tx_ring;
2281 
2282                 ASSERT(rg_index == -1);
2283                 ASSERT(index < tx_group->atg_ring_cnt);
2284 
2285                 tx_ring = &tx_group->atg_rings[index];
2286                 tx_ring->atr_rh = rh;
2287 
2288                 infop->mri_driver = (mac_ring_driver_t)tx_ring;
2289                 infop->mri_start = NULL;
2290                 infop->mri_stop = NULL;
2291                 infop->mri_tx = aggr_ring_tx;
2292                 infop->mri_stat = aggr_tx_ring_stat;
2293                 /*
2294                  * Use the hw TX ring handle to find if the ring needs
2295                  * serialization or not. For NICs that do not expose
2296                  * Tx rings, atr_hw_rh will be NULL.
2297                  */
2298                 if (tx_ring->atr_hw_rh != NULL) {
2299                         infop->mri_flags =
2300                             mac_hwring_getinfo(tx_ring->atr_hw_rh);
2301                 }
2302                 break;
2303         }
2304         default:
2305                 break;
2306         }
2307 }
2308 
2309 static mblk_t *
2310 aggr_rx_poll(void *arg, int bytes_to_pickup)
2311 {
2312         aggr_pseudo_rx_ring_t *rr_ring = arg;
2313         aggr_port_t *port = rr_ring->arr_port;
2314         aggr_grp_t *grp = port->lp_grp;
2315         mblk_t *mp_chain, *mp, **mpp;
2316 
2317         mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
2318 
2319         if (grp->lg_lacp_mode == AGGR_LACP_OFF)
2320                 return (mp_chain);
2321 
2322         mpp = &mp_chain;
2323         while ((mp = *mpp) != NULL) {
2324                 if (MBLKL(mp) >= sizeof (struct ether_header)) {
2325                         struct ether_header *ehp;
2326 
2327                         ehp = (struct ether_header *)mp->b_rptr;
2328                         if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
2329                                 *mpp = mp->b_next;
2330                                 mp->b_next = NULL;
2331                                 aggr_recv_lacp(port,
2332                                     (mac_resource_handle_t)rr_ring, mp);
2333                                 continue;
2334                         }
2335                 }
2336 
2337                 if (!port->lp_collector_enabled) {
2338                         *mpp = mp->b_next;
2339                         mp->b_next = NULL;
2340                         freemsg(mp);
2341                         continue;
2342                 }
2343                 mpp = &mp->b_next;
2344         }
2345         return (mp_chain);
2346 }
2347 
2348 static int
2349 aggr_addmac(void *arg, const uint8_t *mac_addr)
2350 {
2351         aggr_pseudo_rx_group_t  *rx_group = (aggr_pseudo_rx_group_t *)arg;
2352         aggr_unicst_addr_t      *addr, **pprev;
2353         aggr_grp_t              *grp = rx_group->arg_grp;
2354         aggr_port_t             *port, *p;
2355         mac_perim_handle_t      mph;
2356         int                     err = 0;
2357 
2358         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2359 
2360         if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2361                 mac_perim_exit(mph);
2362                 return (0);
2363         }
2364 
2365         /*
2366          * Insert this mac address into the list of mac addresses owned by
2367          * the aggregation pseudo group.
2368          */
2369         pprev = &rx_group->arg_macaddr;
2370         while ((addr = *pprev) != NULL) {
2371                 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
2372                         mac_perim_exit(mph);
2373                         return (EEXIST);
2374                 }
2375                 pprev = &addr->aua_next;
2376         }
2377         addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
2378         bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
2379         addr->aua_next = NULL;
2380         *pprev = addr;
2381 
2382         for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2383                 if ((err = aggr_port_addmac(port, mac_addr)) != 0)
2384                         break;
2385 
2386         if (err != 0) {
2387                 for (p = grp->lg_ports; p != port; p = p->lp_next)
2388                         aggr_port_remmac(p, mac_addr);
2389 
2390                 *pprev = NULL;
2391                 kmem_free(addr, sizeof (aggr_unicst_addr_t));
2392         }
2393 
2394         mac_perim_exit(mph);
2395         return (err);
2396 }
2397 
2398 static int
2399 aggr_remmac(void *arg, const uint8_t *mac_addr)
2400 {
2401         aggr_pseudo_rx_group_t  *rx_group = (aggr_pseudo_rx_group_t *)arg;
2402         aggr_unicst_addr_t      *addr, **pprev;
2403         aggr_grp_t              *grp = rx_group->arg_grp;
2404         aggr_port_t             *port;
2405         mac_perim_handle_t      mph;
2406         int                     err = 0;
2407 
2408         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2409 
2410         if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2411                 mac_perim_exit(mph);
2412                 return (0);
2413         }
2414 
2415         /*
2416          * Insert this mac address into the list of mac addresses owned by
2417          * the aggregation pseudo group.
2418          */
2419         pprev = &rx_group->arg_macaddr;
2420         while ((addr = *pprev) != NULL) {
2421                 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2422                         pprev = &addr->aua_next;
2423                         continue;
2424                 }
2425                 break;
2426         }
2427         if (addr == NULL) {
2428                 mac_perim_exit(mph);
2429                 return (EINVAL);
2430         }
2431 
2432         for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2433                 aggr_port_remmac(port, mac_addr);
2434 
2435         *pprev = addr->aua_next;
2436         kmem_free(addr, sizeof (aggr_unicst_addr_t));
2437 
2438         mac_perim_exit(mph);
2439         return (err);
2440 }
2441 
2442 /*
2443  * Add or remove the multicast addresses that are defined for the group
2444  * to or from the specified port.
2445  *
2446  * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2447  * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2448  * called when the port is either stopped or detached.
2449  */
2450 void
2451 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2452 {
2453         aggr_grp_t *grp = port->lp_grp;
2454 
2455         ASSERT(MAC_PERIM_HELD(port->lp_mh));
2456         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2457 
2458         if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2459                 return;
2460 
2461         mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
2462 }
2463 
2464 static int
2465 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
2466 {
2467         aggr_grp_t *grp = arg;
2468         aggr_port_t *port = NULL, *errport = NULL;
2469         mac_perim_handle_t mph;
2470         int err = 0;
2471 
2472         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2473         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2474                 if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2475                     !port->lp_started) {
2476                         continue;
2477                 }
2478                 err = aggr_port_multicst(port, add, addrp);
2479                 if (err != 0) {
2480                         errport = port;
2481                         break;
2482                 }
2483         }
2484 
2485         /*
2486          * At least one port caused error return and this error is returned to
2487          * mac, eventually a NAK would be sent upwards.
2488          * Some ports have this multicast address listed now, and some don't.
2489          * Treat this error as a whole aggr failure not individual port failure.
2490          * Therefore remove this multicast address from other ports.
2491          */
2492         if ((err != 0) && add) {
2493                 for (port = grp->lg_ports; port != errport;
2494                     port = port->lp_next) {
2495                         if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2496                             !port->lp_started) {
2497                                 continue;
2498                         }
2499                         (void) aggr_port_multicst(port, B_FALSE, addrp);
2500                 }
2501         }
2502         mac_perim_exit(mph);
2503         return (err);
2504 }
2505 
2506 static int
2507 aggr_m_unicst(void *arg, const uint8_t *macaddr)
2508 {
2509         aggr_grp_t *grp = arg;
2510         mac_perim_handle_t mph;
2511         int err;
2512 
2513         mac_perim_enter_by_mh(grp->lg_mh, &mph);
2514         err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
2515             0, 0);
2516         mac_perim_exit(mph);
2517         return (err);
2518 }
2519 
2520 /*
2521  * Initialize the capabilities that are advertised for the group
2522  * according to the capabilities of the constituent ports.
2523  */
2524 static void
2525 aggr_grp_capab_set(aggr_grp_t *grp)
2526 {
2527         uint32_t cksum;
2528         aggr_port_t *port;
2529         mac_capab_lso_t cap_lso;
2530 
2531         ASSERT(grp->lg_mh == NULL);
2532         ASSERT(grp->lg_ports != NULL);
2533 
2534         grp->lg_hcksum_txflags = (uint32_t)-1;
2535         grp->lg_zcopy = B_TRUE;
2536         grp->lg_vlan = B_TRUE;
2537 
2538         grp->lg_lso = B_TRUE;
2539         grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1;
2540         grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1;
2541 
2542         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2543                 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
2544                         cksum = 0;
2545                 grp->lg_hcksum_txflags &= cksum;
2546 
2547                 grp->lg_vlan &=
2548                     !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
2549 
2550                 grp->lg_zcopy &=
2551                     !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
2552 
2553                 grp->lg_lso &=
2554                     mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso);
2555                 if (grp->lg_lso) {
2556                         grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags;
2557                         if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2558                             cap_lso.lso_basic_tcp_ipv4.lso_max)
2559                                 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max =
2560                                     cap_lso.lso_basic_tcp_ipv4.lso_max;
2561                 }
2562         }
2563 }
2564 
2565 /*
2566  * Checks whether the capabilities of the port being added are compatible
2567  * with the current capabilities of the aggregation.
2568  */
2569 static boolean_t
2570 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
2571 {
2572         uint32_t hcksum_txflags;
2573 
2574         ASSERT(grp->lg_ports != NULL);
2575 
2576         if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
2577             grp->lg_vlan) != grp->lg_vlan) {
2578                 return (B_FALSE);
2579         }
2580 
2581         if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
2582             grp->lg_zcopy) != grp->lg_zcopy) {
2583                 return (B_FALSE);
2584         }
2585 
2586         if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
2587                 if (grp->lg_hcksum_txflags != 0)
2588                         return (B_FALSE);
2589         } else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
2590             grp->lg_hcksum_txflags) {
2591                 return (B_FALSE);
2592         }
2593 
2594         if (grp->lg_lso) {
2595                 mac_capab_lso_t cap_lso;
2596 
2597                 if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) {
2598                         if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) !=
2599                             grp->lg_cap_lso.lso_flags)
2600                                 return (B_FALSE);
2601                         if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2602                             cap_lso.lso_basic_tcp_ipv4.lso_max)
2603                                 return (B_FALSE);
2604                 } else {
2605                         return (B_FALSE);
2606                 }
2607         }
2608 
2609         return (B_TRUE);
2610 }
2611 
2612 /*
2613  * Returns the maximum SDU according to the SDU of the constituent ports.
2614  */
2615 static uint_t
2616 aggr_grp_max_sdu(aggr_grp_t *grp)
2617 {
2618         uint_t max_sdu = (uint_t)-1;
2619         aggr_port_t *port;
2620 
2621         ASSERT(grp->lg_ports != NULL);
2622 
2623         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2624                 uint_t port_sdu_max;
2625 
2626                 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2627                 if (max_sdu > port_sdu_max)
2628                         max_sdu = port_sdu_max;
2629         }
2630 
2631         return (max_sdu);
2632 }
2633 
2634 /*
2635  * Checks if the maximum SDU of the specified port is compatible
2636  * with the maximum SDU of the specified aggregation group, returns
2637  * B_TRUE if it is, B_FALSE otherwise.
2638  */
2639 static boolean_t
2640 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
2641 {
2642         uint_t port_sdu_max;
2643 
2644         mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2645         return (port_sdu_max >= grp->lg_max_sdu);
2646 }
2647 
2648 /*
2649  * Returns the maximum margin according to the margin of the constituent ports.
2650  */
2651 static uint32_t
2652 aggr_grp_max_margin(aggr_grp_t *grp)
2653 {
2654         uint32_t margin = UINT32_MAX;
2655         aggr_port_t *port;
2656 
2657         ASSERT(grp->lg_mh == NULL);
2658         ASSERT(grp->lg_ports != NULL);
2659 
2660         for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2661                 if (margin > port->lp_margin)
2662                         margin = port->lp_margin;
2663         }
2664 
2665         grp->lg_margin = margin;
2666         return (margin);
2667 }
2668 
2669 /*
2670  * Checks if the maximum margin of the specified port is compatible
2671  * with the maximum margin of the specified aggregation group, returns
2672  * B_TRUE if it is, B_FALSE otherwise.
2673  */
2674 static boolean_t
2675 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
2676 {
2677         if (port->lp_margin >= grp->lg_margin)
2678                 return (B_TRUE);
2679 
2680         /*
2681          * See whether the current margin value is allowed to be changed to
2682          * the new value.
2683          */
2684         if (!mac_margin_update(grp->lg_mh, port->lp_margin))
2685                 return (B_FALSE);
2686 
2687         grp->lg_margin = port->lp_margin;
2688         return (B_TRUE);
2689 }
2690 
2691 /*
2692  * Set MTU on individual ports of an aggregation group
2693  */
2694 static int
2695 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu,
2696     uint32_t *old_mtu)
2697 {
2698         boolean_t               removed = B_FALSE;
2699         mac_perim_handle_t      mph;
2700         mac_diag_t              diag;
2701         int                     err, rv, retry = 0;
2702 
2703         if (port->lp_mah != NULL) {
2704                 (void) mac_unicast_remove(port->lp_mch, port->lp_mah);
2705                 port->lp_mah = NULL;
2706                 removed = B_TRUE;
2707         }
2708         err = mac_set_mtu(port->lp_mh, sdu, old_mtu);
2709 try_again:
2710         if (removed && (rv = mac_unicast_add(port->lp_mch, NULL,
2711             MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK,
2712             &port->lp_mah, 0, &diag)) != 0) {
2713                 /*
2714                  * following is a workaround for a bug in 'bge' driver.
2715                  * See CR 6794654 for more information and this work around
2716                  * will be removed once the CR is fixed.
2717                  */
2718                 if (rv == EIO && retry++ < 3) {
2719                         delay(2 * hz);
2720                         goto try_again;
2721                 }
2722                 /*
2723                  * if mac_unicast_add() failed while setting the MTU,
2724                  * detach the port from the group.
2725                  */
2726                 mac_perim_enter_by_mh(port->lp_mh, &mph);
2727                 (void) aggr_grp_detach_port(grp, port);
2728                 mac_perim_exit(mph);
2729                 cmn_err(CE_WARN, "Unable to restart the port %s while "
2730                     "setting MTU. Detaching the port from the aggregation.",
2731                     mac_client_name(port->lp_mch));
2732         }
2733         return (err);
2734 }
2735 
2736 static int
2737 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu)
2738 {
2739         int                     err = 0, i, rv;
2740         aggr_port_t             *port;
2741         uint32_t                *mtu;
2742 
2743         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2744 
2745         /*
2746          * If the MTU being set is equal to aggr group's maximum
2747          * allowable value, then there is nothing to change
2748          */
2749         if (sdu == grp->lg_max_sdu)
2750                 return (0);
2751 
2752         /* 0 is aggr group's min sdu */
2753         if (sdu == 0)
2754                 return (EINVAL);
2755 
2756         mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP);
2757         for (port = grp->lg_ports, i = 0; port != NULL && err == 0;
2758             port = port->lp_next, i++) {
2759                 err = aggr_set_port_sdu(grp, port, sdu, mtu + i);
2760         }
2761         if (err != 0) {
2762                 /* recover from error: reset the mtus of the ports */
2763                 aggr_port_t *tmp;
2764 
2765                 for (tmp = grp->lg_ports, i = 0; tmp != port;
2766                     tmp = tmp->lp_next, i++) {
2767                         (void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL);
2768                 }
2769                 goto bail;
2770         }
2771         grp->lg_max_sdu = aggr_grp_max_sdu(grp);
2772         rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu);
2773         ASSERT(rv == 0);
2774 bail:
2775         kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports);
2776         return (err);
2777 }
2778 
2779 /*
2780  * Callback functions for set/get of properties
2781  */
2782 /*ARGSUSED*/
2783 static int
2784 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2785     uint_t pr_valsize, const void *pr_val)
2786 {
2787         int             err = ENOTSUP;
2788         aggr_grp_t      *grp = m_driver;
2789 
2790         switch (pr_num) {
2791         case MAC_PROP_MTU: {
2792                 uint32_t        mtu;
2793 
2794                 if (pr_valsize < sizeof (mtu)) {
2795                         err = EINVAL;
2796                         break;
2797                 }
2798                 bcopy(pr_val, &mtu, sizeof (mtu));
2799                 err = aggr_sdu_update(grp, mtu);
2800                 break;
2801         }
2802         default:
2803                 break;
2804         }
2805         return (err);
2806 }
2807 
2808 typedef struct rboundary {
2809         uint32_t        bval;
2810         int             btype;
2811 } rboundary_t;
2812 
2813 /*
2814  * This function finds the intersection of mtu ranges stored in arrays -
2815  * mrange[0] ... mrange[mcount -1]. It returns the intersection in rval.
2816  * Individual arrays are assumed to contain non-overlapping ranges.
2817  * Algorithm:
2818  *   A range has two boundaries - min and max. We scan all arrays and store
2819  * each boundary as a separate element in a temporary array. We also store
2820  * the boundary types, min or max, as +1 or -1 respectively in the temporary
2821  * array. Then we sort the temporary array in ascending order. We scan the
2822  * sorted array from lower to higher values and keep a cumulative sum of
2823  * boundary types. Element in the temporary array for which the sum reaches
2824  * mcount is a min boundary of a range in the result and next element will be
2825  * max boundary.
2826  *
2827  * Example for mcount = 3,
2828  *
2829  *  ----|_________|-------|_______|----|__|------ mrange[0]
2830  *
2831  *  -------|________|--|____________|-----|___|-- mrange[1]
2832  *
2833  *  --------|________________|-------|____|------ mrange[2]
2834  *
2835  *                                      3 2 1
2836  *                                       \|/
2837  *      1  23     2 1  2  3  2    1 01 2  V   0  <- the sum
2838  *  ----|--||-----|-|--|--|--|----|-||-|--|---|-- sorted array
2839  *
2840  *                                 same min and max
2841  *                                        V
2842  *  --------|_____|-------|__|------------|------ intersecting ranges
2843  */
2844 void
2845 aggr_mtu_range_intersection(mac_propval_range_t **mrange, int mcount,
2846     mac_propval_uint32_range_t **prval, int *prmaxcnt, int *prcount)
2847 {
2848         mac_propval_uint32_range_t      *rval, *ur;
2849         int                             rmaxcnt, rcount;
2850         size_t                          sz_range32;
2851         rboundary_t                     *ta; /* temporary array */
2852         rboundary_t                     temp;
2853         boolean_t                       range_started = B_FALSE;
2854         int                             i, j, m, sum;
2855 
2856         sz_range32 = sizeof (mac_propval_uint32_range_t);
2857 
2858         for (i = 0, rmaxcnt = 0; i < mcount; i++)
2859                 rmaxcnt += mrange[i]->mpr_count;
2860 
2861         /* Allocate enough space to store the results */
2862         rval = kmem_alloc(rmaxcnt * sz_range32, KM_SLEEP);
2863 
2864         /* Number of boundaries are twice as many as ranges */
2865         ta = kmem_alloc(2 * rmaxcnt * sizeof (rboundary_t), KM_SLEEP);
2866 
2867         for (i = 0, m = 0; i < mcount; i++) {
2868                 ur = &(mrange[i]->mpr_range_uint32[0]);
2869                 for (j = 0; j < mrange[i]->mpr_count; j++) {
2870                         ta[m].bval = ur[j].mpur_min;
2871                         ta[m++].btype = 1;
2872                         ta[m].bval = ur[j].mpur_max;
2873                         ta[m++].btype = -1;
2874                 }
2875         }
2876 
2877         /*
2878          * Sort the temporary array in ascending order of bval;
2879          * if boundary values are same then sort on btype.
2880          */
2881         for (i = 0; i < m-1; i++) {
2882                 for (j = i+1; j < m; j++) {
2883                         if ((ta[i].bval > ta[j].bval) ||
2884                             ((ta[i].bval == ta[j].bval) &&
2885                             (ta[i].btype < ta[j].btype))) {
2886                                 temp = ta[i];
2887                                 ta[i] = ta[j];
2888                                 ta[j] = temp;
2889                         }
2890                 }
2891         }
2892 
2893         /* Walk through temporary array to find all ranges in the results */
2894         for (i = 0, sum = 0, rcount = 0; i < m; i++) {
2895                 sum += ta[i].btype;
2896                 if (sum == mcount) {
2897                         rval[rcount].mpur_min = ta[i].bval;
2898                         range_started = B_TRUE;
2899                 } else if (sum < mcount && range_started) {
2900                         rval[rcount++].mpur_max = ta[i].bval;
2901                         range_started = B_FALSE;
2902                 }
2903         }
2904 
2905         *prval = rval;
2906         *prmaxcnt = rmaxcnt;
2907         *prcount = rcount;
2908 
2909         kmem_free(ta, 2 * rmaxcnt * sizeof (rboundary_t));
2910 }
2911 
2912 /*
2913  * Returns the mtu ranges which could be supported by aggr group.
2914  * prmaxcnt returns the size of the buffer prval, prcount returns
2915  * the number of valid entries in prval. Caller is responsible
2916  * for freeing up prval.
2917  */
2918 int
2919 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_uint32_range_t **prval,
2920     int *prmaxcnt, int *prcount)
2921 {
2922         mac_propval_range_t             **vals;
2923         aggr_port_t                     *port;
2924         mac_perim_handle_t              mph;
2925         uint_t                          i, numr;
2926         int                             err = 0;
2927         size_t                          sz_propval, sz_range32;
2928         size_t                          size;
2929 
2930         sz_propval = sizeof (mac_propval_range_t);
2931         sz_range32 = sizeof (mac_propval_uint32_range_t);
2932 
2933         ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2934 
2935         vals = kmem_zalloc(sizeof (mac_propval_range_t *) * grp->lg_nports,
2936             KM_SLEEP);
2937 
2938         for (port = grp->lg_ports, i = 0; port != NULL;
2939             port = port->lp_next, i++) {
2940 
2941                 size = sz_propval;
2942                 vals[i] = kmem_alloc(size, KM_SLEEP);
2943                 vals[i]->mpr_count = 1;
2944 
2945                 mac_perim_enter_by_mh(port->lp_mh, &mph);
2946 
2947                 err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
2948                     NULL, 0, vals[i], NULL);
2949                 if (err == ENOSPC) {
2950                         /*
2951                          * Not enough space to hold all ranges.
2952                          * Allocate extra space as indicated and retry.
2953                          */
2954                         numr = vals[i]->mpr_count;
2955                         kmem_free(vals[i], sz_propval);
2956                         size = sz_propval + (numr - 1) * sz_range32;
2957                         vals[i] = kmem_alloc(size, KM_SLEEP);
2958                         vals[i]->mpr_count = numr;
2959                         err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
2960                             NULL, 0, vals[i], NULL);
2961                         ASSERT(err != ENOSPC);
2962                 }
2963                 mac_perim_exit(mph);
2964                 if (err != 0) {
2965                         kmem_free(vals[i], size);
2966                         vals[i] = NULL;
2967                         break;
2968                 }
2969         }
2970 
2971         /*
2972          * if any of the underlying ports does not support changing MTU then
2973          * just return ENOTSUP
2974          */
2975         if (port != NULL) {
2976                 ASSERT(err != 0);
2977                 goto done;
2978         }
2979 
2980         aggr_mtu_range_intersection(vals, grp->lg_nports, prval, prmaxcnt,
2981             prcount);
2982 
2983 done:
2984         for (i = 0; i < grp->lg_nports; i++) {
2985                 if (vals[i] != NULL) {
2986                         numr = vals[i]->mpr_count;
2987                         size = sz_propval + (numr - 1) * sz_range32;
2988                         kmem_free(vals[i], size);
2989                 }
2990         }
2991 
2992         kmem_free(vals, sizeof (mac_propval_range_t *) * grp->lg_nports);
2993         return (err);
2994 }
2995 
2996 static void
2997 aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2998     mac_prop_info_handle_t prh)
2999 {
3000         aggr_grp_t                      *grp = m_driver;
3001         mac_propval_uint32_range_t      *rval = NULL;
3002         int                             i, rcount, rmaxcnt;
3003         int                             err = 0;
3004 
3005         _NOTE(ARGUNUSED(pr_name));
3006 
3007         switch (pr_num) {
3008         case MAC_PROP_MTU:
3009 
3010                 err = aggr_grp_possible_mtu_range(grp, &rval, &rmaxcnt,
3011                     &rcount);
3012                 if (err != 0) {
3013                         ASSERT(rval == NULL);
3014                         return;
3015                 }
3016                 for (i = 0; i < rcount; i++) {
3017                         mac_prop_info_set_range_uint32(prh,
3018                             rval[i].mpur_min, rval[i].mpur_max);
3019                 }
3020                 kmem_free(rval, sizeof (mac_propval_uint32_range_t) * rmaxcnt);
3021                 break;
3022         }
3023 }