1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2018 Joyent, Inc. 24 */ 25 26 /* 27 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups. 28 * 29 * An instance of the structure aggr_grp_t is allocated for each 30 * link aggregation group. When created, aggr_grp_t objects are 31 * entered into the aggr_grp_hash hash table maintained by the modhash 32 * module. The hash key is the linkid associated with the link 33 * aggregation group. 34 * 35 * A set of MAC ports are associated with each association group. 36 * 37 * Aggr pseudo TX rings 38 * -------------------- 39 * The underlying ports (NICs) in an aggregation can have TX rings. To 40 * enhance aggr's performance, these TX rings are made available to the 41 * aggr layer as pseudo TX rings. The concept of pseudo rings are not new. 42 * They are already present and implemented on the RX side. It is called 43 * as pseudo RX rings. The same concept is extended to the TX side where 44 * each TX ring of an underlying port is reflected in aggr as a pseudo 45 * TX ring. Thus each pseudo TX ring will map to a specific hardware TX 46 * ring. Even in the case of a NIC that does not have a TX ring, a pseudo 47 * TX ring is given to the aggregation layer. 48 * 49 * With this change, the outgoing stack depth looks much better: 50 * 51 * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() -> 52 * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx() 53 * 54 * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings: 55 * SRS_TX_AGGR and SRS_TX_BW_AGGR. 56 * 57 * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine 58 * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX 59 * ring belonging to a port on which the packet has to be sent. 60 * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4 61 * policy and then uses the fanout_hint passed to it to pick a TX ring from 62 * the selected port. 63 * 64 * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where 65 * bandwidth limit is applied first on the outgoing packet and the packets 66 * allowed to go out would call mac_tx_aggr_mode() to send the packet on a 67 * particular TX ring. 68 */ 69 70 #include <sys/types.h> 71 #include <sys/sysmacros.h> 72 #include <sys/conf.h> 73 #include <sys/cmn_err.h> 74 #include <sys/disp.h> 75 #include <sys/list.h> 76 #include <sys/ksynch.h> 77 #include <sys/kmem.h> 78 #include <sys/stream.h> 79 #include <sys/modctl.h> 80 #include <sys/ddi.h> 81 #include <sys/sunddi.h> 82 #include <sys/atomic.h> 83 #include <sys/stat.h> 84 #include <sys/modhash.h> 85 #include <sys/id_space.h> 86 #include <sys/strsun.h> 87 #include <sys/cred.h> 88 #include <sys/dlpi.h> 89 #include <sys/zone.h> 90 #include <sys/mac_provider.h> 91 #include <sys/dls.h> 92 #include <sys/vlan.h> 93 #include <sys/aggr.h> 94 #include <sys/aggr_impl.h> 95 96 static int aggr_m_start(void *); 97 static void aggr_m_stop(void *); 98 static int aggr_m_promisc(void *, boolean_t); 99 static int aggr_m_multicst(void *, boolean_t, const uint8_t *); 100 static int aggr_m_unicst(void *, const uint8_t *); 101 static int aggr_m_stat(void *, uint_t, uint64_t *); 102 static void aggr_m_ioctl(void *, queue_t *, mblk_t *); 103 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *); 104 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t, 105 const void *); 106 static void aggr_m_propinfo(void *, const char *, mac_prop_id_t, 107 mac_prop_info_handle_t); 108 109 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t); 110 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *, 111 boolean_t *); 112 113 static void aggr_grp_capab_set(aggr_grp_t *); 114 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); 115 static uint_t aggr_grp_max_sdu(aggr_grp_t *); 116 static uint32_t aggr_grp_max_margin(aggr_grp_t *); 117 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *); 118 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *); 119 120 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *); 121 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *); 122 static int aggr_pseudo_disable_intr(mac_intr_handle_t); 123 static int aggr_pseudo_enable_intr(mac_intr_handle_t); 124 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t); 125 static int aggr_addmac(void *, const uint8_t *); 126 static int aggr_remmac(void *, const uint8_t *); 127 static int aggr_addvlan(mac_group_driver_t, uint16_t); 128 static int aggr_remvlan(mac_group_driver_t, uint16_t); 129 static mblk_t *aggr_rx_poll(void *, int); 130 static void aggr_fill_ring(void *, mac_ring_type_t, const int, 131 const int, mac_ring_info_t *, mac_ring_handle_t); 132 static void aggr_fill_group(void *, mac_ring_type_t, const int, 133 mac_group_info_t *, mac_group_handle_t); 134 135 static kmem_cache_t *aggr_grp_cache; 136 static mod_hash_t *aggr_grp_hash; 137 static krwlock_t aggr_grp_lock; 138 static uint_t aggr_grp_cnt; 139 static id_space_t *key_ids; 140 141 #define GRP_HASHSZ 64 142 #define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid) 143 #define AGGR_PORT_NAME_DELIMIT '-' 144 145 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; 146 147 #define AGGR_M_CALLBACK_FLAGS \ 148 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO) 149 150 static mac_callbacks_t aggr_m_callbacks = { 151 AGGR_M_CALLBACK_FLAGS, 152 aggr_m_stat, 153 aggr_m_start, 154 aggr_m_stop, 155 aggr_m_promisc, 156 aggr_m_multicst, 157 NULL, 158 NULL, 159 NULL, 160 aggr_m_ioctl, 161 aggr_m_capab_get, 162 NULL, 163 NULL, 164 aggr_m_setprop, 165 NULL, 166 aggr_m_propinfo 167 }; 168 169 /*ARGSUSED*/ 170 static int 171 aggr_grp_constructor(void *buf, void *arg, int kmflag) 172 { 173 aggr_grp_t *grp = buf; 174 175 bzero(grp, sizeof (*grp)); 176 mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL); 177 cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL); 178 rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL); 179 mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL); 180 cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL); 181 mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL); 182 cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL); 183 grp->lg_link_state = LINK_STATE_UNKNOWN; 184 return (0); 185 } 186 187 /*ARGSUSED*/ 188 static void 189 aggr_grp_destructor(void *buf, void *arg) 190 { 191 aggr_grp_t *grp = buf; 192 193 if (grp->lg_tx_ports != NULL) { 194 kmem_free(grp->lg_tx_ports, 195 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 196 } 197 198 mutex_destroy(&grp->lg_lacp_lock); 199 cv_destroy(&grp->lg_lacp_cv); 200 mutex_destroy(&grp->lg_port_lock); 201 cv_destroy(&grp->lg_port_cv); 202 rw_destroy(&grp->lg_tx_lock); 203 mutex_destroy(&grp->lg_tx_flowctl_lock); 204 cv_destroy(&grp->lg_tx_flowctl_cv); 205 } 206 207 void 208 aggr_grp_init(void) 209 { 210 aggr_grp_cache = kmem_cache_create("aggr_grp_cache", 211 sizeof (aggr_grp_t), 0, aggr_grp_constructor, 212 aggr_grp_destructor, NULL, NULL, NULL, 0); 213 214 aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash", 215 GRP_HASHSZ, mod_hash_null_valdtor); 216 rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL); 217 aggr_grp_cnt = 0; 218 219 /* 220 * Allocate an id space to manage key values (when key is not 221 * specified). The range of the id space will be from 222 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol 223 * uses a 16-bit key. 224 */ 225 key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX); 226 ASSERT(key_ids != NULL); 227 } 228 229 void 230 aggr_grp_fini(void) 231 { 232 id_space_destroy(key_ids); 233 rw_destroy(&aggr_grp_lock); 234 mod_hash_destroy_idhash(aggr_grp_hash); 235 kmem_cache_destroy(aggr_grp_cache); 236 } 237 238 uint_t 239 aggr_grp_count(void) 240 { 241 uint_t count; 242 243 rw_enter(&aggr_grp_lock, RW_READER); 244 count = aggr_grp_cnt; 245 rw_exit(&aggr_grp_lock); 246 return (count); 247 } 248 249 /* 250 * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions 251 * requires the mac perimeter, this function holds a reference of the aggr 252 * and aggr won't call mac_unregister() until this reference drops to 0. 253 */ 254 void 255 aggr_grp_port_hold(aggr_port_t *port) 256 { 257 aggr_grp_t *grp = port->lp_grp; 258 259 AGGR_PORT_REFHOLD(port); 260 mutex_enter(&grp->lg_port_lock); 261 grp->lg_port_ref++; 262 mutex_exit(&grp->lg_port_lock); 263 } 264 265 /* 266 * Release the reference of the grp and inform aggr_grp_delete() calling 267 * mac_unregister() is now safe. 268 */ 269 void 270 aggr_grp_port_rele(aggr_port_t *port) 271 { 272 aggr_grp_t *grp = port->lp_grp; 273 274 mutex_enter(&grp->lg_port_lock); 275 if (--grp->lg_port_ref == 0) 276 cv_signal(&grp->lg_port_cv); 277 mutex_exit(&grp->lg_port_lock); 278 AGGR_PORT_REFRELE(port); 279 } 280 281 /* 282 * Wait for the port's lacp timer thread and the port's notification callback 283 * to exit. 284 */ 285 void 286 aggr_grp_port_wait(aggr_grp_t *grp) 287 { 288 mutex_enter(&grp->lg_port_lock); 289 if (grp->lg_port_ref != 0) 290 cv_wait(&grp->lg_port_cv, &grp->lg_port_lock); 291 mutex_exit(&grp->lg_port_lock); 292 } 293 294 /* 295 * Attach a port to a link aggregation group. 296 * 297 * A port is attached to a link aggregation group once its speed 298 * and link state have been verified. 299 * 300 * Returns B_TRUE if the group link state or speed has changed. If 301 * it's the case, the caller must notify the MAC layer via a call 302 * to mac_link(). 303 */ 304 boolean_t 305 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) 306 { 307 boolean_t link_state_changed = B_FALSE; 308 309 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 310 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 311 312 if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 313 return (B_FALSE); 314 315 /* 316 * Validate the MAC port link speed and update the group 317 * link speed if needed. 318 */ 319 if (port->lp_ifspeed == 0 || 320 port->lp_link_state != LINK_STATE_UP || 321 port->lp_link_duplex != LINK_DUPLEX_FULL) { 322 /* 323 * Can't attach a MAC port with unknown link speed, 324 * down link, or not in full duplex mode. 325 */ 326 return (B_FALSE); 327 } 328 329 mutex_enter(&grp->lg_stat_lock); 330 if (grp->lg_ifspeed == 0) { 331 /* 332 * The group inherits the speed of the first link being 333 * attached. 334 */ 335 grp->lg_ifspeed = port->lp_ifspeed; 336 link_state_changed = B_TRUE; 337 } else if (grp->lg_ifspeed != port->lp_ifspeed) { 338 /* 339 * The link speed of the MAC port must be the same as 340 * the group link speed, as per 802.3ad. Since it is 341 * not, the attach is cancelled. 342 */ 343 mutex_exit(&grp->lg_stat_lock); 344 return (B_FALSE); 345 } 346 mutex_exit(&grp->lg_stat_lock); 347 348 grp->lg_nattached_ports++; 349 350 /* 351 * Update the group link state. 352 */ 353 if (grp->lg_link_state != LINK_STATE_UP) { 354 grp->lg_link_state = LINK_STATE_UP; 355 mutex_enter(&grp->lg_stat_lock); 356 grp->lg_link_duplex = LINK_DUPLEX_FULL; 357 mutex_exit(&grp->lg_stat_lock); 358 link_state_changed = B_TRUE; 359 } 360 361 /* 362 * Update port's state. 363 */ 364 port->lp_state = AGGR_PORT_STATE_ATTACHED; 365 366 aggr_grp_multicst_port(port, B_TRUE); 367 368 /* 369 * Set port's receive callback 370 */ 371 mac_rx_set(port->lp_mch, aggr_recv_cb, port); 372 373 /* 374 * If LACP is OFF, the port can be used to send data as soon 375 * as its link is up and verified to be compatible with the 376 * aggregation. 377 * 378 * If LACP is active or passive, notify the LACP subsystem, which 379 * will enable sending on the port following the LACP protocol. 380 */ 381 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 382 aggr_send_port_enable(port); 383 else 384 aggr_lacp_port_attached(port); 385 386 return (link_state_changed); 387 } 388 389 boolean_t 390 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) 391 { 392 boolean_t link_state_changed = B_FALSE; 393 394 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 395 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 396 397 /* update state */ 398 if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 399 return (B_FALSE); 400 401 mac_rx_clear(port->lp_mch); 402 403 aggr_grp_multicst_port(port, B_FALSE); 404 405 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 406 aggr_send_port_disable(port); 407 else 408 aggr_lacp_port_detached(port); 409 410 port->lp_state = AGGR_PORT_STATE_STANDBY; 411 412 grp->lg_nattached_ports--; 413 if (grp->lg_nattached_ports == 0) { 414 /* the last attached MAC port of the group is being detached */ 415 grp->lg_link_state = LINK_STATE_DOWN; 416 mutex_enter(&grp->lg_stat_lock); 417 grp->lg_ifspeed = 0; 418 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 419 mutex_exit(&grp->lg_stat_lock); 420 link_state_changed = B_TRUE; 421 } 422 423 return (link_state_changed); 424 } 425 426 /* 427 * Update the MAC addresses of the constituent ports of the specified 428 * group. This function is invoked: 429 * - after creating a new aggregation group. 430 * - after adding new ports to an aggregation group. 431 * - after removing a port from a group when the MAC address of 432 * that port was used for the MAC address of the group. 433 * - after the MAC address of a port changed when the MAC address 434 * of that port was used for the MAC address of the group. 435 * 436 * Return true if the link state of the aggregation changed, for example 437 * as a result of a failure changing the MAC address of one of the 438 * constituent ports. 439 */ 440 boolean_t 441 aggr_grp_update_ports_mac(aggr_grp_t *grp) 442 { 443 aggr_port_t *cport; 444 boolean_t link_state_changed = B_FALSE; 445 mac_perim_handle_t mph; 446 447 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 448 449 for (cport = grp->lg_ports; cport != NULL; 450 cport = cport->lp_next) { 451 mac_perim_enter_by_mh(cport->lp_mh, &mph); 452 if (aggr_port_unicst(cport) != 0) { 453 if (aggr_grp_detach_port(grp, cport)) 454 link_state_changed = B_TRUE; 455 } else { 456 /* 457 * If a port was detached because of a previous 458 * failure changing the MAC address, the port is 459 * reattached when it successfully changes the MAC 460 * address now, and this might cause the link state 461 * of the aggregation to change. 462 */ 463 if (aggr_grp_attach_port(grp, cport)) 464 link_state_changed = B_TRUE; 465 } 466 mac_perim_exit(mph); 467 } 468 return (link_state_changed); 469 } 470 471 /* 472 * Invoked when the MAC address of a port has changed. If the port's 473 * MAC address was used for the group MAC address, set mac_addr_changedp 474 * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST 475 * notification. If the link state changes due to detach/attach of 476 * the constituent port, set link_state_changedp to B_TRUE to indicate 477 * to the caller that it should send a MAC_NOTE_LINK notification. In both 478 * cases, it is the responsibility of the caller to invoke notification 479 * functions after releasing the the port lock. 480 */ 481 void 482 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port, 483 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 484 { 485 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 486 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 487 ASSERT(mac_addr_changedp != NULL); 488 ASSERT(link_state_changedp != NULL); 489 490 *mac_addr_changedp = B_FALSE; 491 *link_state_changedp = B_FALSE; 492 493 if (grp->lg_addr_fixed) { 494 /* 495 * The group is using a fixed MAC address or an automatic 496 * MAC address has not been set. 497 */ 498 return; 499 } 500 501 if (grp->lg_mac_addr_port == port) { 502 /* 503 * The MAC address of the port was assigned to the group 504 * MAC address. Update the group MAC address. 505 */ 506 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 507 *mac_addr_changedp = B_TRUE; 508 } else { 509 /* 510 * Update the actual port MAC address to the MAC address 511 * of the group. 512 */ 513 if (aggr_port_unicst(port) != 0) { 514 *link_state_changedp = aggr_grp_detach_port(grp, port); 515 } else { 516 /* 517 * If a port was detached because of a previous 518 * failure changing the MAC address, the port is 519 * reattached when it successfully changes the MAC 520 * address now, and this might cause the link state 521 * of the aggregation to change. 522 */ 523 *link_state_changedp = aggr_grp_attach_port(grp, port); 524 } 525 } 526 } 527 528 /* 529 * Add a port to a link aggregation group. 530 */ 531 static int 532 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force, 533 aggr_port_t **pp) 534 { 535 aggr_port_t *port, **cport; 536 mac_perim_handle_t mph; 537 zoneid_t port_zoneid = ALL_ZONES; 538 int err; 539 540 /* The port must be int the same zone as the aggregation. */ 541 if (zone_check_datalink(&port_zoneid, port_linkid) != 0) 542 port_zoneid = GLOBAL_ZONEID; 543 if (grp->lg_zoneid != port_zoneid) 544 return (EBUSY); 545 546 /* 547 * lg_mh could be NULL when the function is called during the creation 548 * of the aggregation. 549 */ 550 ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh)); 551 552 /* create new port */ 553 err = aggr_port_create(grp, port_linkid, force, &port); 554 if (err != 0) 555 return (err); 556 557 mac_perim_enter_by_mh(port->lp_mh, &mph); 558 559 /* add port to list of group constituent ports */ 560 cport = &grp->lg_ports; 561 while (*cport != NULL) 562 cport = &((*cport)->lp_next); 563 *cport = port; 564 565 /* 566 * Back reference to the group it is member of. A port always 567 * holds a reference to its group to ensure that the back 568 * reference is always valid. 569 */ 570 port->lp_grp = grp; 571 AGGR_GRP_REFHOLD(grp); 572 grp->lg_nports++; 573 574 aggr_lacp_init_port(port); 575 mac_perim_exit(mph); 576 577 if (pp != NULL) 578 *pp = port; 579 580 return (0); 581 } 582 583 /* 584 * This is called in response to either our LACP state machine or a MAC 585 * notification that the link has gone down via aggr_send_port_disable(). At 586 * this point, we may need to update our default ring. To that end, we go 587 * through the set of ports (underlying datalinks in an aggregation) that are 588 * currently enabled to transmit data. If all our links have been disabled for 589 * transmit, then we don't do anything. 590 * 591 * Note, because we only have a single TX group, we don't have to worry about 592 * the rings moving between groups and the chance that mac will reassign it 593 * unless someone removes a port, at which point, we play it safe and call this 594 * again. 595 */ 596 void 597 aggr_grp_update_default(aggr_grp_t *grp) 598 { 599 aggr_port_t *port; 600 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 601 602 rw_enter(&grp->lg_tx_lock, RW_WRITER); 603 604 if (grp->lg_ntx_ports == 0) { 605 rw_exit(&grp->lg_tx_lock); 606 return; 607 } 608 609 port = grp->lg_tx_ports[0]; 610 ASSERT(port->lp_tx_ring_cnt > 0); 611 mac_hwring_set_default(grp->lg_mh, port->lp_pseudo_tx_rings[0]); 612 rw_exit(&grp->lg_tx_lock); 613 } 614 615 /* 616 * Add a pseudo RX ring for the given HW ring handle. 617 */ 618 static int 619 aggr_add_pseudo_rx_ring(aggr_port_t *port, 620 aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) 621 { 622 aggr_pseudo_rx_ring_t *ring; 623 int err; 624 int j; 625 626 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) { 627 ring = rx_grp->arg_rings + j; 628 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE)) 629 break; 630 } 631 632 /* 633 * No slot for this new RX ring. 634 */ 635 if (j == MAX_RINGS_PER_GROUP) 636 return (EIO); 637 638 ring->arr_flags |= MAC_PSEUDO_RING_INUSE; 639 ring->arr_hw_rh = hw_rh; 640 ring->arr_port = port; 641 rx_grp->arg_ring_cnt++; 642 643 /* 644 * The group is already registered, dynamically add a new ring to the 645 * mac group. 646 */ 647 if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) { 648 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; 649 ring->arr_hw_rh = NULL; 650 ring->arr_port = NULL; 651 rx_grp->arg_ring_cnt--; 652 } else { 653 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, 654 mac_find_ring(rx_grp->arg_gh, j)); 655 } 656 return (err); 657 } 658 659 /* 660 * Remove the pseudo RX ring of the given HW ring handle. 661 */ 662 static void 663 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) 664 { 665 aggr_pseudo_rx_ring_t *ring; 666 int j; 667 668 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) { 669 ring = rx_grp->arg_rings + j; 670 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) || 671 ring->arr_hw_rh != hw_rh) { 672 continue; 673 } 674 675 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh); 676 677 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; 678 ring->arr_hw_rh = NULL; 679 ring->arr_port = NULL; 680 rx_grp->arg_ring_cnt--; 681 mac_hwring_teardown(hw_rh); 682 break; 683 } 684 } 685 686 /* 687 * Create pseudo rings over the HW rings of the port. 688 * 689 * o Create a pseudo ring in rx_grp per HW ring in the port's HW group. 690 * 691 * o Program existing unicast filters on the pseudo group into the HW group. 692 * 693 * o Program existing VLAN filters on the pseudo group into the HW group. 694 */ 695 static int 696 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) 697 { 698 aggr_grp_t *grp = port->lp_grp; 699 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; 700 aggr_unicst_addr_t *addr, *a; 701 mac_perim_handle_t pmph; 702 aggr_vlan_t *avp; 703 int hw_rh_cnt, i = 0, j; 704 int err = 0; 705 706 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 707 mac_perim_enter_by_mh(port->lp_mh, &pmph); 708 709 /* 710 * This function must be called after the aggr registers its MAC 711 * and its Rx group has been initialized. 712 */ 713 ASSERT(rx_grp->arg_gh != NULL); 714 715 /* 716 * Get the list of the underlying HW rings. 717 */ 718 hw_rh_cnt = mac_hwrings_get(port->lp_mch, 719 &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX); 720 721 if (port->lp_hwgh != NULL) { 722 /* 723 * Quiesce the HW ring and the MAC SRS on the ring. Note 724 * that the HW ring will be restarted when the pseudo ring 725 * is started. At that time all the packets will be 726 * directly passed up to the pseudo Rx ring and handled 727 * by MAC SRS created over the pseudo Rx ring. 728 */ 729 mac_rx_client_quiesce(port->lp_mch); 730 mac_srs_perm_quiesce(port->lp_mch, B_TRUE); 731 } 732 733 /* 734 * Add existing VLAN and unicast address filters to the port. 735 */ 736 for (avp = list_head(&rx_grp->arg_vlans); avp != NULL; 737 avp = list_next(&rx_grp->arg_vlans, avp)) { 738 if ((err = aggr_port_addvlan(port, avp->av_vid)) != 0) 739 goto err; 740 } 741 742 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) { 743 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0) 744 goto err; 745 } 746 747 for (i = 0; i < hw_rh_cnt; i++) { 748 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]); 749 if (err != 0) 750 goto err; 751 } 752 753 port->lp_rx_grp_added = B_TRUE; 754 mac_perim_exit(pmph); 755 return (0); 756 757 err: 758 ASSERT(err != 0); 759 760 for (j = 0; j < i; j++) 761 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]); 762 763 for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next) 764 aggr_port_remmac(port, a->aua_addr); 765 766 if (avp != NULL) 767 avp = list_prev(&rx_grp->arg_vlans, avp); 768 769 for (; avp != NULL; avp = list_prev(&rx_grp->arg_vlans, avp)) { 770 int err2; 771 772 if ((err2 = aggr_port_remvlan(port, avp->av_vid)) != 0) { 773 cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s" 774 ": errno %d.", avp->av_vid, 775 mac_client_name(port->lp_mch), err2); 776 } 777 } 778 779 if (port->lp_hwgh != NULL) { 780 mac_srs_perm_quiesce(port->lp_mch, B_FALSE); 781 mac_rx_client_restart(port->lp_mch); 782 port->lp_hwgh = NULL; 783 } 784 785 mac_perim_exit(pmph); 786 return (err); 787 } 788 789 /* 790 * Destroy the pseudo rings mapping to this port and remove all VLAN 791 * and unicast filters from this port. Even if there are no underlying 792 * HW rings we must still remove the unicast filters to take the port 793 * out of promisc mode. 794 */ 795 static void 796 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) 797 { 798 aggr_grp_t *grp = port->lp_grp; 799 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; 800 aggr_unicst_addr_t *addr; 801 mac_group_handle_t hwgh; 802 mac_perim_handle_t pmph; 803 int hw_rh_cnt, i; 804 805 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 806 mac_perim_enter_by_mh(port->lp_mh, &pmph); 807 808 if (!port->lp_rx_grp_added) 809 goto done; 810 811 ASSERT(rx_grp->arg_gh != NULL); 812 hw_rh_cnt = mac_hwrings_get(port->lp_mch, 813 &hwgh, hw_rh, MAC_RING_TYPE_RX); 814 815 for (i = 0; i < hw_rh_cnt; i++) 816 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]); 817 818 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) 819 aggr_port_remmac(port, addr->aua_addr); 820 821 for (aggr_vlan_t *avp = list_head(&rx_grp->arg_vlans); avp != NULL; 822 avp = list_next(&rx_grp->arg_vlans, avp)) { 823 int err; 824 825 if ((err = aggr_port_remvlan(port, avp->av_vid)) != 0) { 826 cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s" 827 ": errno %d.", avp->av_vid, 828 mac_client_name(port->lp_mch), err); 829 } 830 } 831 832 if (port->lp_hwgh != NULL) { 833 port->lp_hwgh = NULL; 834 835 /* 836 * First clear the permanent-quiesced flag of the RX srs then 837 * restart the HW ring and the mac srs on the ring. Note that 838 * the HW ring and associated SRS will soon been removed when 839 * the port is removed from the aggr. 840 */ 841 mac_srs_perm_quiesce(port->lp_mch, B_FALSE); 842 mac_rx_client_restart(port->lp_mch); 843 } 844 845 port->lp_rx_grp_added = B_FALSE; 846 done: 847 mac_perim_exit(pmph); 848 } 849 850 /* 851 * Add a pseudo TX ring for the given HW ring handle. 852 */ 853 static int 854 aggr_add_pseudo_tx_ring(aggr_port_t *port, 855 aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh, 856 mac_ring_handle_t *pseudo_rh) 857 { 858 aggr_pseudo_tx_ring_t *ring; 859 int err; 860 int i; 861 862 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 863 for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { 864 ring = tx_grp->atg_rings + i; 865 if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE)) 866 break; 867 } 868 /* 869 * No slot for this new TX ring. 870 */ 871 if (i == MAX_RINGS_PER_GROUP) 872 return (EIO); 873 /* 874 * The following 4 statements needs to be done before 875 * calling mac_group_add_ring(). Otherwise it will 876 * result in an assertion failure in mac_init_ring(). 877 */ 878 ring->atr_flags |= MAC_PSEUDO_RING_INUSE; 879 ring->atr_hw_rh = hw_rh; 880 ring->atr_port = port; 881 tx_grp->atg_ring_cnt++; 882 883 /* 884 * The TX side has no concept of ring groups unlike RX groups. 885 * There is just a single group which stores all the TX rings. 886 * This group will be used to store aggr's pseudo TX rings. 887 */ 888 if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) { 889 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; 890 ring->atr_hw_rh = NULL; 891 ring->atr_port = NULL; 892 tx_grp->atg_ring_cnt--; 893 } else { 894 *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i); 895 if (hw_rh != NULL) { 896 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, 897 mac_find_ring(tx_grp->atg_gh, i)); 898 } 899 } 900 901 return (err); 902 } 903 904 /* 905 * Remove the pseudo TX ring of the given HW ring handle. 906 */ 907 static void 908 aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp, 909 mac_ring_handle_t pseudo_hw_rh) 910 { 911 aggr_pseudo_tx_ring_t *ring; 912 int i; 913 914 for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { 915 ring = tx_grp->atg_rings + i; 916 if (ring->atr_rh != pseudo_hw_rh) 917 continue; 918 919 ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE); 920 mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh); 921 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; 922 mac_hwring_teardown(ring->atr_hw_rh); 923 ring->atr_hw_rh = NULL; 924 ring->atr_port = NULL; 925 tx_grp->atg_ring_cnt--; 926 break; 927 } 928 } 929 930 /* 931 * This function is called to create pseudo rings over hardware rings of 932 * the underlying device. There is a 1:1 mapping between the pseudo TX 933 * rings of the aggr and the hardware rings of the underlying port. 934 */ 935 static int 936 aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) 937 { 938 aggr_grp_t *grp = port->lp_grp; 939 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh; 940 mac_perim_handle_t pmph; 941 int hw_rh_cnt, i = 0, j; 942 int err = 0; 943 944 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 945 mac_perim_enter_by_mh(port->lp_mh, &pmph); 946 947 /* 948 * Get the list the the underlying HW rings. 949 */ 950 hw_rh_cnt = mac_hwrings_get(port->lp_mch, 951 NULL, hw_rh, MAC_RING_TYPE_TX); 952 953 /* 954 * Even if the underlying NIC does not have TX rings, we 955 * still make a psuedo TX ring for that NIC with NULL as 956 * the ring handle. 957 */ 958 if (hw_rh_cnt == 0) 959 port->lp_tx_ring_cnt = 1; 960 else 961 port->lp_tx_ring_cnt = hw_rh_cnt; 962 963 port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * 964 port->lp_tx_ring_cnt), KM_SLEEP); 965 port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * 966 port->lp_tx_ring_cnt), KM_SLEEP); 967 968 if (hw_rh_cnt == 0) { 969 if ((err = aggr_add_pseudo_tx_ring(port, tx_grp, 970 NULL, &pseudo_rh)) == 0) { 971 port->lp_tx_rings[0] = NULL; 972 port->lp_pseudo_tx_rings[0] = pseudo_rh; 973 } 974 } else { 975 for (i = 0; err == 0 && i < hw_rh_cnt; i++) { 976 err = aggr_add_pseudo_tx_ring(port, 977 tx_grp, hw_rh[i], &pseudo_rh); 978 if (err != 0) 979 break; 980 port->lp_tx_rings[i] = hw_rh[i]; 981 port->lp_pseudo_tx_rings[i] = pseudo_rh; 982 } 983 } 984 985 if (err != 0) { 986 if (hw_rh_cnt != 0) { 987 for (j = 0; j < i; j++) { 988 aggr_rem_pseudo_tx_ring(tx_grp, 989 port->lp_pseudo_tx_rings[j]); 990 } 991 } 992 kmem_free(port->lp_tx_rings, 993 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 994 kmem_free(port->lp_pseudo_tx_rings, 995 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 996 port->lp_tx_ring_cnt = 0; 997 } else { 998 port->lp_tx_grp_added = B_TRUE; 999 port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch, 1000 aggr_tx_ring_update, port); 1001 } 1002 mac_perim_exit(pmph); 1003 aggr_grp_update_default(grp); 1004 return (err); 1005 } 1006 1007 /* 1008 * This function is called by aggr to remove pseudo TX rings over the 1009 * HW rings of the underlying port. 1010 */ 1011 static void 1012 aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) 1013 { 1014 aggr_grp_t *grp = port->lp_grp; 1015 mac_perim_handle_t pmph; 1016 int i; 1017 1018 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1019 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1020 1021 if (!port->lp_tx_grp_added) 1022 goto done; 1023 1024 ASSERT(tx_grp->atg_gh != NULL); 1025 1026 for (i = 0; i < port->lp_tx_ring_cnt; i++) 1027 aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]); 1028 1029 kmem_free(port->lp_tx_rings, 1030 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 1031 kmem_free(port->lp_pseudo_tx_rings, 1032 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 1033 1034 port->lp_tx_ring_cnt = 0; 1035 (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh); 1036 port->lp_tx_grp_added = B_FALSE; 1037 aggr_grp_update_default(grp); 1038 done: 1039 mac_perim_exit(pmph); 1040 } 1041 1042 static int 1043 aggr_pseudo_disable_intr(mac_intr_handle_t ih) 1044 { 1045 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih; 1046 return (mac_hwring_disable_intr(rr_ring->arr_hw_rh)); 1047 } 1048 1049 static int 1050 aggr_pseudo_enable_intr(mac_intr_handle_t ih) 1051 { 1052 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih; 1053 return (mac_hwring_enable_intr(rr_ring->arr_hw_rh)); 1054 } 1055 1056 /* 1057 * Here we need to start the pseudo-ring. As MAC already ensures that the 1058 * underlying device is set up, all we need to do is save the ring generation. 1059 * 1060 * Note, we don't end up wanting to use the underlying mac_hwring_start/stop 1061 * functions here as those don't actually stop and start the ring, they just 1062 * quiesce the ring. Regardless of whether the aggr is logically up or not, we 1063 * want to make sure that we can receive traffic for LACP. 1064 */ 1065 static int 1066 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen) 1067 { 1068 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg; 1069 1070 rr_ring->arr_gen = mr_gen; 1071 return (0); 1072 } 1073 1074 /* 1075 * Add one or more ports to an existing link aggregation group. 1076 */ 1077 int 1078 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force, 1079 laioc_port_t *ports) 1080 { 1081 int rc, i, nadded = 0; 1082 aggr_grp_t *grp = NULL; 1083 aggr_port_t *port; 1084 boolean_t link_state_changed = B_FALSE; 1085 mac_perim_handle_t mph, pmph; 1086 1087 /* get group corresponding to linkid */ 1088 rw_enter(&aggr_grp_lock, RW_READER); 1089 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1090 (mod_hash_val_t *)&grp) != 0) { 1091 rw_exit(&aggr_grp_lock); 1092 return (ENOENT); 1093 } 1094 AGGR_GRP_REFHOLD(grp); 1095 1096 /* 1097 * Hold the perimeter so that the aggregation won't be destroyed. 1098 */ 1099 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1100 rw_exit(&aggr_grp_lock); 1101 1102 /* add the specified ports to group */ 1103 for (i = 0; i < nports; i++) { 1104 /* add port to group */ 1105 if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid, 1106 force, &port)) != 0) { 1107 goto bail; 1108 } 1109 ASSERT(port != NULL); 1110 nadded++; 1111 1112 /* check capabilities */ 1113 if (!aggr_grp_capab_check(grp, port) || 1114 !aggr_grp_sdu_check(grp, port) || 1115 !aggr_grp_margin_check(grp, port)) { 1116 rc = ENOTSUP; 1117 goto bail; 1118 } 1119 1120 /* 1121 * Create the pseudo ring for each HW ring of the underlying 1122 * port. 1123 */ 1124 rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group); 1125 if (rc != 0) 1126 goto bail; 1127 rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group); 1128 if (rc != 0) 1129 goto bail; 1130 1131 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1132 1133 /* set LACP mode */ 1134 aggr_port_lacp_set_mode(grp, port); 1135 1136 /* start port if group has already been started */ 1137 if (grp->lg_started) { 1138 rc = aggr_port_start(port); 1139 if (rc != 0) { 1140 mac_perim_exit(pmph); 1141 goto bail; 1142 } 1143 1144 /* 1145 * Turn on the promiscuous mode over the port when it 1146 * is requested to be turned on to receive the 1147 * non-primary address over a port, or the promiscous 1148 * mode is enabled over the aggr. 1149 */ 1150 if (grp->lg_promisc || port->lp_prom_addr != NULL) { 1151 rc = aggr_port_promisc(port, B_TRUE); 1152 if (rc != 0) { 1153 mac_perim_exit(pmph); 1154 goto bail; 1155 } 1156 } 1157 } 1158 mac_perim_exit(pmph); 1159 1160 /* 1161 * Attach each port if necessary. 1162 */ 1163 if (aggr_port_notify_link(grp, port)) 1164 link_state_changed = B_TRUE; 1165 1166 /* 1167 * Initialize the callback functions for this port. 1168 */ 1169 aggr_port_init_callbacks(port); 1170 } 1171 1172 /* update the MAC address of the constituent ports */ 1173 if (aggr_grp_update_ports_mac(grp)) 1174 link_state_changed = B_TRUE; 1175 1176 if (link_state_changed) 1177 mac_link_update(grp->lg_mh, grp->lg_link_state); 1178 1179 bail: 1180 if (rc != 0) { 1181 /* stop and remove ports that have been added */ 1182 for (i = 0; i < nadded; i++) { 1183 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1184 ASSERT(port != NULL); 1185 if (grp->lg_started) { 1186 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1187 (void) aggr_port_promisc(port, B_FALSE); 1188 aggr_port_stop(port); 1189 mac_perim_exit(pmph); 1190 } 1191 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); 1192 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1193 (void) aggr_grp_rem_port(grp, port, NULL, NULL); 1194 } 1195 } 1196 1197 mac_perim_exit(mph); 1198 AGGR_GRP_REFRELE(grp); 1199 return (rc); 1200 } 1201 1202 static int 1203 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy, 1204 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, 1205 aggr_lacp_timer_t lacp_timer) 1206 { 1207 boolean_t mac_addr_changed = B_FALSE; 1208 boolean_t link_state_changed = B_FALSE; 1209 mac_perim_handle_t pmph; 1210 1211 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1212 1213 /* validate fixed address if specified */ 1214 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed && 1215 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) || 1216 (mac_addr[0] & 0x01))) { 1217 return (EINVAL); 1218 } 1219 1220 /* update policy if requested */ 1221 if (update_mask & AGGR_MODIFY_POLICY) 1222 aggr_send_update_policy(grp, policy); 1223 1224 /* update unicast MAC address if requested */ 1225 if (update_mask & AGGR_MODIFY_MAC) { 1226 if (mac_fixed) { 1227 /* user-supplied MAC address */ 1228 grp->lg_mac_addr_port = NULL; 1229 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) { 1230 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 1231 mac_addr_changed = B_TRUE; 1232 } 1233 } else if (grp->lg_addr_fixed) { 1234 /* switch from user-supplied to automatic */ 1235 aggr_port_t *port = grp->lg_ports; 1236 1237 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1238 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 1239 grp->lg_mac_addr_port = port; 1240 mac_addr_changed = B_TRUE; 1241 mac_perim_exit(pmph); 1242 } 1243 grp->lg_addr_fixed = mac_fixed; 1244 } 1245 1246 if (mac_addr_changed) 1247 link_state_changed = aggr_grp_update_ports_mac(grp); 1248 1249 if (update_mask & AGGR_MODIFY_LACP_MODE) 1250 aggr_lacp_update_mode(grp, lacp_mode); 1251 1252 if (update_mask & AGGR_MODIFY_LACP_TIMER) 1253 aggr_lacp_update_timer(grp, lacp_timer); 1254 1255 if (link_state_changed) 1256 mac_link_update(grp->lg_mh, grp->lg_link_state); 1257 1258 if (mac_addr_changed) 1259 mac_unicst_update(grp->lg_mh, grp->lg_addr); 1260 1261 return (0); 1262 } 1263 1264 /* 1265 * Update properties of an existing link aggregation group. 1266 */ 1267 int 1268 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy, 1269 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, 1270 aggr_lacp_timer_t lacp_timer) 1271 { 1272 aggr_grp_t *grp = NULL; 1273 mac_perim_handle_t mph; 1274 int err; 1275 1276 /* get group corresponding to linkid */ 1277 rw_enter(&aggr_grp_lock, RW_READER); 1278 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1279 (mod_hash_val_t *)&grp) != 0) { 1280 rw_exit(&aggr_grp_lock); 1281 return (ENOENT); 1282 } 1283 AGGR_GRP_REFHOLD(grp); 1284 1285 /* 1286 * Hold the perimeter so that the aggregation won't be destroyed. 1287 */ 1288 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1289 rw_exit(&aggr_grp_lock); 1290 1291 err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed, 1292 mac_addr, lacp_mode, lacp_timer); 1293 1294 mac_perim_exit(mph); 1295 AGGR_GRP_REFRELE(grp); 1296 return (err); 1297 } 1298 1299 /* 1300 * Create a new link aggregation group upon request from administrator. 1301 * Returns 0 on success, an errno on failure. 1302 */ 1303 int 1304 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, 1305 laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force, 1306 uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer, 1307 cred_t *credp) 1308 { 1309 aggr_grp_t *grp = NULL; 1310 aggr_port_t *port; 1311 mac_register_t *mac; 1312 boolean_t link_state_changed; 1313 mac_perim_handle_t mph; 1314 int err; 1315 int i; 1316 kt_did_t tid = 0; 1317 1318 /* need at least one port */ 1319 if (nports == 0) 1320 return (EINVAL); 1321 1322 rw_enter(&aggr_grp_lock, RW_WRITER); 1323 1324 /* does a group with the same linkid already exist? */ 1325 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1326 (mod_hash_val_t *)&grp); 1327 if (err == 0) { 1328 rw_exit(&aggr_grp_lock); 1329 return (EEXIST); 1330 } 1331 1332 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP); 1333 1334 grp->lg_refs = 1; 1335 grp->lg_closing = B_FALSE; 1336 grp->lg_force = force; 1337 grp->lg_linkid = linkid; 1338 grp->lg_zoneid = crgetzoneid(credp); 1339 grp->lg_ifspeed = 0; 1340 grp->lg_link_state = LINK_STATE_UNKNOWN; 1341 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 1342 grp->lg_started = B_FALSE; 1343 grp->lg_promisc = B_FALSE; 1344 grp->lg_lacp_done = B_FALSE; 1345 grp->lg_tx_notify_done = B_FALSE; 1346 grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 1347 grp->lg_lacp_rx_thread = thread_create(NULL, 0, 1348 aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri); 1349 grp->lg_tx_notify_thread = thread_create(NULL, 0, 1350 aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri); 1351 grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * 1352 MAX_RINGS_PER_GROUP), KM_SLEEP); 1353 grp->lg_tx_blocked_cnt = 0; 1354 bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t)); 1355 bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t)); 1356 aggr_lacp_init_grp(grp); 1357 1358 grp->lg_rx_group.arg_untagged = 0; 1359 list_create(&(grp->lg_rx_group.arg_vlans), sizeof (aggr_vlan_t), 1360 offsetof(aggr_vlan_t, av_link)); 1361 1362 /* add MAC ports to group */ 1363 grp->lg_ports = NULL; 1364 grp->lg_nports = 0; 1365 grp->lg_nattached_ports = 0; 1366 grp->lg_ntx_ports = 0; 1367 1368 /* 1369 * If key is not specified by the user, allocate the key. 1370 */ 1371 if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) { 1372 err = ENOMEM; 1373 goto bail; 1374 } 1375 grp->lg_key = key; 1376 1377 for (i = 0; i < nports; i++) { 1378 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, &port); 1379 if (err != 0) 1380 goto bail; 1381 } 1382 1383 /* 1384 * If no explicit MAC address was specified by the administrator, 1385 * set it to the MAC address of the first port. 1386 */ 1387 grp->lg_addr_fixed = mac_fixed; 1388 if (grp->lg_addr_fixed) { 1389 /* validate specified address */ 1390 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) { 1391 err = EINVAL; 1392 goto bail; 1393 } 1394 bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 1395 } else { 1396 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 1397 grp->lg_mac_addr_port = grp->lg_ports; 1398 } 1399 1400 /* set the initial group capabilities */ 1401 aggr_grp_capab_set(grp); 1402 1403 if ((mac = mac_alloc(MAC_VERSION)) == NULL) { 1404 err = ENOMEM; 1405 goto bail; 1406 } 1407 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1408 mac->m_driver = grp; 1409 mac->m_dip = aggr_dip; 1410 mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key; 1411 mac->m_src_addr = grp->lg_addr; 1412 mac->m_callbacks = &aggr_m_callbacks; 1413 mac->m_min_sdu = 0; 1414 mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp); 1415 mac->m_margin = aggr_grp_max_margin(grp); 1416 mac->m_v12n = MAC_VIRT_LEVEL1; 1417 err = mac_register(mac, &grp->lg_mh); 1418 mac_free(mac); 1419 if (err != 0) 1420 goto bail; 1421 1422 err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp)); 1423 if (err != 0) { 1424 (void) mac_unregister(grp->lg_mh); 1425 grp->lg_mh = NULL; 1426 goto bail; 1427 } 1428 1429 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1430 1431 /* 1432 * Update the MAC address of the constituent ports. 1433 * None of the port is attached at this time, the link state of the 1434 * aggregation will not change. 1435 */ 1436 link_state_changed = aggr_grp_update_ports_mac(grp); 1437 ASSERT(!link_state_changed); 1438 1439 /* update outbound load balancing policy */ 1440 aggr_send_update_policy(grp, policy); 1441 1442 /* set LACP mode */ 1443 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); 1444 1445 /* 1446 * Attach each port if necessary. 1447 */ 1448 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1449 /* 1450 * Create the pseudo ring for each HW ring of the underlying 1451 * port. Note that this is done after the aggr registers the 1452 * mac. 1453 */ 1454 VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0); 1455 VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0); 1456 if (aggr_port_notify_link(grp, port)) 1457 link_state_changed = B_TRUE; 1458 1459 /* 1460 * Initialize the callback functions for this port. 1461 */ 1462 aggr_port_init_callbacks(port); 1463 } 1464 1465 if (link_state_changed) 1466 mac_link_update(grp->lg_mh, grp->lg_link_state); 1467 1468 /* add new group to hash table */ 1469 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid), 1470 (mod_hash_val_t)grp); 1471 ASSERT(err == 0); 1472 aggr_grp_cnt++; 1473 1474 mac_perim_exit(mph); 1475 rw_exit(&aggr_grp_lock); 1476 return (0); 1477 1478 bail: 1479 1480 grp->lg_closing = B_TRUE; 1481 1482 port = grp->lg_ports; 1483 while (port != NULL) { 1484 aggr_port_t *cport; 1485 1486 cport = port->lp_next; 1487 aggr_port_delete(port); 1488 port = cport; 1489 } 1490 1491 /* 1492 * Inform the lacp_rx thread to exit. 1493 */ 1494 mutex_enter(&grp->lg_lacp_lock); 1495 grp->lg_lacp_done = B_TRUE; 1496 cv_signal(&grp->lg_lacp_cv); 1497 while (grp->lg_lacp_rx_thread != NULL) 1498 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 1499 mutex_exit(&grp->lg_lacp_lock); 1500 /* 1501 * Inform the tx_notify thread to exit. 1502 */ 1503 mutex_enter(&grp->lg_tx_flowctl_lock); 1504 if (grp->lg_tx_notify_thread != NULL) { 1505 tid = grp->lg_tx_notify_thread->t_did; 1506 grp->lg_tx_notify_done = B_TRUE; 1507 cv_signal(&grp->lg_tx_flowctl_cv); 1508 } 1509 mutex_exit(&grp->lg_tx_flowctl_lock); 1510 if (tid != 0) 1511 thread_join(tid); 1512 1513 kmem_free(grp->lg_tx_blocked_rings, 1514 (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); 1515 rw_exit(&aggr_grp_lock); 1516 AGGR_GRP_REFRELE(grp); 1517 return (err); 1518 } 1519 1520 /* 1521 * Return a pointer to the member of a group with specified linkid. 1522 */ 1523 static aggr_port_t * 1524 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid) 1525 { 1526 aggr_port_t *port; 1527 1528 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1529 1530 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1531 if (port->lp_linkid == linkid) 1532 break; 1533 } 1534 1535 return (port); 1536 } 1537 1538 /* 1539 * Stop, detach and remove a port from a link aggregation group. 1540 */ 1541 static int 1542 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, 1543 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 1544 { 1545 int rc = 0; 1546 aggr_port_t **pport; 1547 boolean_t mac_addr_changed = B_FALSE; 1548 boolean_t link_state_changed = B_FALSE; 1549 mac_perim_handle_t mph; 1550 uint64_t val; 1551 uint_t i; 1552 uint_t stat; 1553 1554 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 1555 ASSERT(grp->lg_nports > 1); 1556 ASSERT(!grp->lg_closing); 1557 1558 /* unlink port */ 1559 for (pport = &grp->lg_ports; *pport != port; 1560 pport = &(*pport)->lp_next) { 1561 if (*pport == NULL) { 1562 rc = ENOENT; 1563 goto done; 1564 } 1565 } 1566 *pport = port->lp_next; 1567 1568 mac_perim_enter_by_mh(port->lp_mh, &mph); 1569 1570 /* 1571 * If the MAC address of the port being removed was assigned 1572 * to the group, update the group MAC address 1573 * using the MAC address of a different port. 1574 */ 1575 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) { 1576 /* 1577 * Set the MAC address of the group to the 1578 * MAC address of its first port. 1579 */ 1580 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 1581 grp->lg_mac_addr_port = grp->lg_ports; 1582 mac_addr_changed = B_TRUE; 1583 } 1584 1585 link_state_changed = aggr_grp_detach_port(grp, port); 1586 1587 /* 1588 * Add the counter statistics of the ports while it was aggregated 1589 * to the group's residual statistics. This is done by obtaining 1590 * the current counter from the underlying MAC then subtracting the 1591 * value of the counter at the moment it was added to the 1592 * aggregation. 1593 */ 1594 for (i = 0; i < MAC_NSTAT; i++) { 1595 stat = i + MAC_STAT_MIN; 1596 if (!MAC_STAT_ISACOUNTER(stat)) 1597 continue; 1598 val = aggr_port_stat(port, stat); 1599 val -= port->lp_stat[i]; 1600 mutex_enter(&grp->lg_stat_lock); 1601 grp->lg_stat[i] += val; 1602 mutex_exit(&grp->lg_stat_lock); 1603 } 1604 for (i = 0; i < ETHER_NSTAT; i++) { 1605 stat = i + MACTYPE_STAT_MIN; 1606 if (!ETHER_STAT_ISACOUNTER(stat)) 1607 continue; 1608 val = aggr_port_stat(port, stat); 1609 val -= port->lp_ether_stat[i]; 1610 mutex_enter(&grp->lg_stat_lock); 1611 grp->lg_ether_stat[i] += val; 1612 mutex_exit(&grp->lg_stat_lock); 1613 } 1614 1615 grp->lg_nports--; 1616 mac_perim_exit(mph); 1617 1618 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); 1619 aggr_port_delete(port); 1620 1621 /* 1622 * If the group MAC address has changed, update the MAC address of 1623 * the remaining constituent ports according to the new MAC 1624 * address of the group. 1625 */ 1626 if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) 1627 link_state_changed = B_TRUE; 1628 1629 done: 1630 if (mac_addr_changedp != NULL) 1631 *mac_addr_changedp = mac_addr_changed; 1632 if (link_state_changedp != NULL) 1633 *link_state_changedp = link_state_changed; 1634 1635 return (rc); 1636 } 1637 1638 /* 1639 * Remove one or more ports from an existing link aggregation group. 1640 */ 1641 int 1642 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports) 1643 { 1644 int rc = 0, i; 1645 aggr_grp_t *grp = NULL; 1646 aggr_port_t *port; 1647 boolean_t mac_addr_update = B_FALSE, mac_addr_changed; 1648 boolean_t link_state_update = B_FALSE, link_state_changed; 1649 mac_perim_handle_t mph, pmph; 1650 1651 /* get group corresponding to linkid */ 1652 rw_enter(&aggr_grp_lock, RW_READER); 1653 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1654 (mod_hash_val_t *)&grp) != 0) { 1655 rw_exit(&aggr_grp_lock); 1656 return (ENOENT); 1657 } 1658 AGGR_GRP_REFHOLD(grp); 1659 1660 /* 1661 * Hold the perimeter so that the aggregation won't be destroyed. 1662 */ 1663 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1664 rw_exit(&aggr_grp_lock); 1665 1666 /* we need to keep at least one port per group */ 1667 if (nports >= grp->lg_nports) { 1668 rc = EINVAL; 1669 goto bail; 1670 } 1671 1672 /* first verify that all the groups are valid */ 1673 for (i = 0; i < nports; i++) { 1674 if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) { 1675 /* port not found */ 1676 rc = ENOENT; 1677 goto bail; 1678 } 1679 } 1680 1681 /* clear the promiscous mode for the specified ports */ 1682 for (i = 0; i < nports && rc == 0; i++) { 1683 /* lookup port */ 1684 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1685 ASSERT(port != NULL); 1686 1687 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1688 rc = aggr_port_promisc(port, B_FALSE); 1689 mac_perim_exit(pmph); 1690 } 1691 if (rc != 0) { 1692 for (i = 0; i < nports; i++) { 1693 port = aggr_grp_port_lookup(grp, 1694 ports[i].lp_linkid); 1695 ASSERT(port != NULL); 1696 1697 /* 1698 * Turn the promiscuous mode back on if it is required 1699 * to receive the non-primary address over a port, or 1700 * the promiscous mode is enabled over the aggr. 1701 */ 1702 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1703 if (port->lp_started && (grp->lg_promisc || 1704 port->lp_prom_addr != NULL)) { 1705 (void) aggr_port_promisc(port, B_TRUE); 1706 } 1707 mac_perim_exit(pmph); 1708 } 1709 goto bail; 1710 } 1711 1712 /* remove the specified ports from group */ 1713 for (i = 0; i < nports; i++) { 1714 /* lookup port */ 1715 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1716 ASSERT(port != NULL); 1717 1718 /* stop port if group has already been started */ 1719 if (grp->lg_started) { 1720 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1721 aggr_port_stop(port); 1722 mac_perim_exit(pmph); 1723 } 1724 1725 /* 1726 * aggr_rem_pseudo_tx_group() is not called here. Instead 1727 * it is called from inside aggr_grp_rem_port() after the 1728 * port has been detached. The reason is that 1729 * aggr_rem_pseudo_tx_group() removes one ring at a time 1730 * and if there is still traffic going on, then there 1731 * is the possibility of aggr_find_tx_ring() returning a 1732 * removed ring for transmission. Once the port has been 1733 * detached, that port will not be used and 1734 * aggr_find_tx_ring() will not return any rings 1735 * belonging to it. 1736 */ 1737 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1738 1739 /* remove port from group */ 1740 rc = aggr_grp_rem_port(grp, port, &mac_addr_changed, 1741 &link_state_changed); 1742 ASSERT(rc == 0); 1743 mac_addr_update = mac_addr_update || mac_addr_changed; 1744 link_state_update = link_state_update || link_state_changed; 1745 } 1746 1747 bail: 1748 if (mac_addr_update) 1749 mac_unicst_update(grp->lg_mh, grp->lg_addr); 1750 if (link_state_update) 1751 mac_link_update(grp->lg_mh, grp->lg_link_state); 1752 1753 mac_perim_exit(mph); 1754 AGGR_GRP_REFRELE(grp); 1755 1756 return (rc); 1757 } 1758 1759 int 1760 aggr_grp_delete(datalink_id_t linkid, cred_t *cred) 1761 { 1762 aggr_grp_t *grp = NULL; 1763 aggr_port_t *port, *cport; 1764 datalink_id_t tmpid; 1765 mod_hash_val_t val; 1766 mac_perim_handle_t mph, pmph; 1767 int err; 1768 kt_did_t tid = 0; 1769 1770 rw_enter(&aggr_grp_lock, RW_WRITER); 1771 1772 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1773 (mod_hash_val_t *)&grp) != 0) { 1774 rw_exit(&aggr_grp_lock); 1775 return (ENOENT); 1776 } 1777 1778 /* 1779 * Note that dls_devnet_destroy() must be called before lg_lock is 1780 * held. Otherwise, it will deadlock if another thread is in 1781 * aggr_m_stat() and thus has a kstat_hold() on the kstats that 1782 * dls_devnet_destroy() needs to delete. 1783 */ 1784 if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) { 1785 rw_exit(&aggr_grp_lock); 1786 return (err); 1787 } 1788 ASSERT(linkid == tmpid); 1789 1790 /* 1791 * Unregister from the MAC service module. Since this can 1792 * fail if a client hasn't closed the MAC port, we gracefully 1793 * fail the operation. 1794 */ 1795 if ((err = mac_disable(grp->lg_mh)) != 0) { 1796 (void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred)); 1797 rw_exit(&aggr_grp_lock); 1798 return (err); 1799 } 1800 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val); 1801 ASSERT(grp == (aggr_grp_t *)val); 1802 1803 ASSERT(aggr_grp_cnt > 0); 1804 aggr_grp_cnt--; 1805 rw_exit(&aggr_grp_lock); 1806 1807 /* 1808 * Inform the lacp_rx thread to exit. 1809 */ 1810 mutex_enter(&grp->lg_lacp_lock); 1811 grp->lg_lacp_done = B_TRUE; 1812 cv_signal(&grp->lg_lacp_cv); 1813 while (grp->lg_lacp_rx_thread != NULL) 1814 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 1815 mutex_exit(&grp->lg_lacp_lock); 1816 /* 1817 * Inform the tx_notify_thread to exit. 1818 */ 1819 mutex_enter(&grp->lg_tx_flowctl_lock); 1820 if (grp->lg_tx_notify_thread != NULL) { 1821 tid = grp->lg_tx_notify_thread->t_did; 1822 grp->lg_tx_notify_done = B_TRUE; 1823 cv_signal(&grp->lg_tx_flowctl_cv); 1824 } 1825 mutex_exit(&grp->lg_tx_flowctl_lock); 1826 if (tid != 0) 1827 thread_join(tid); 1828 1829 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1830 1831 grp->lg_closing = B_TRUE; 1832 /* detach and free MAC ports associated with group */ 1833 port = grp->lg_ports; 1834 while (port != NULL) { 1835 cport = port->lp_next; 1836 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1837 if (grp->lg_started) 1838 aggr_port_stop(port); 1839 (void) aggr_grp_detach_port(grp, port); 1840 mac_perim_exit(pmph); 1841 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); 1842 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1843 aggr_port_delete(port); 1844 port = cport; 1845 } 1846 1847 mac_perim_exit(mph); 1848 1849 kmem_free(grp->lg_tx_blocked_rings, 1850 (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); 1851 /* 1852 * Wait for the port's lacp timer thread and its notification callback 1853 * to exit before calling mac_unregister() since both needs to access 1854 * the mac perimeter of the grp. 1855 */ 1856 aggr_grp_port_wait(grp); 1857 1858 VERIFY(mac_unregister(grp->lg_mh) == 0); 1859 grp->lg_mh = NULL; 1860 1861 list_destroy(&(grp->lg_rx_group.arg_vlans)); 1862 1863 AGGR_GRP_REFRELE(grp); 1864 return (0); 1865 } 1866 1867 void 1868 aggr_grp_free(aggr_grp_t *grp) 1869 { 1870 ASSERT(grp->lg_refs == 0); 1871 ASSERT(grp->lg_port_ref == 0); 1872 if (grp->lg_key > AGGR_MAX_KEY) { 1873 id_free(key_ids, grp->lg_key); 1874 grp->lg_key = 0; 1875 } 1876 kmem_cache_free(aggr_grp_cache, grp); 1877 } 1878 1879 int 1880 aggr_grp_info(datalink_id_t linkid, void *fn_arg, 1881 aggr_grp_info_new_grp_fn_t new_grp_fn, 1882 aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred) 1883 { 1884 aggr_grp_t *grp; 1885 aggr_port_t *port; 1886 mac_perim_handle_t mph, pmph; 1887 int rc = 0; 1888 1889 /* 1890 * Make sure that the aggregation link is visible from the caller's 1891 * zone. 1892 */ 1893 if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred))) 1894 return (ENOENT); 1895 1896 rw_enter(&aggr_grp_lock, RW_READER); 1897 1898 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1899 (mod_hash_val_t *)&grp) != 0) { 1900 rw_exit(&aggr_grp_lock); 1901 return (ENOENT); 1902 } 1903 AGGR_GRP_REFHOLD(grp); 1904 1905 mac_perim_enter_by_mh(grp->lg_mh, &mph); 1906 rw_exit(&aggr_grp_lock); 1907 1908 rc = new_grp_fn(fn_arg, grp->lg_linkid, 1909 (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr, 1910 grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy, 1911 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); 1912 1913 if (rc != 0) 1914 goto bail; 1915 1916 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1917 mac_perim_enter_by_mh(port->lp_mh, &pmph); 1918 rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr, 1919 port->lp_state, &port->lp_lacp.ActorOperPortState); 1920 mac_perim_exit(pmph); 1921 1922 if (rc != 0) 1923 goto bail; 1924 } 1925 1926 bail: 1927 mac_perim_exit(mph); 1928 AGGR_GRP_REFRELE(grp); 1929 return (rc); 1930 } 1931 1932 /*ARGSUSED*/ 1933 static void 1934 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 1935 { 1936 miocnak(q, mp, 0, ENOTSUP); 1937 } 1938 1939 static int 1940 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) 1941 { 1942 aggr_port_t *port; 1943 uint_t stat_index; 1944 1945 ASSERT(MUTEX_HELD(&grp->lg_stat_lock)); 1946 1947 /* We only aggregate counter statistics. */ 1948 if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) || 1949 IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) { 1950 return (ENOTSUP); 1951 } 1952 1953 /* 1954 * Counter statistics for a group are computed by aggregating the 1955 * counters of the members MACs while they were aggregated, plus 1956 * the residual counter of the group itself, which is updated each 1957 * time a MAC is removed from the group. 1958 */ 1959 *val = 0; 1960 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1961 /* actual port statistic */ 1962 *val += aggr_port_stat(port, stat); 1963 /* 1964 * minus the port stat when it was added, plus any residual 1965 * amount for the group. 1966 */ 1967 if (IS_MAC_STAT(stat)) { 1968 stat_index = stat - MAC_STAT_MIN; 1969 *val -= port->lp_stat[stat_index]; 1970 *val += grp->lg_stat[stat_index]; 1971 } else if (IS_MACTYPE_STAT(stat)) { 1972 stat_index = stat - MACTYPE_STAT_MIN; 1973 *val -= port->lp_ether_stat[stat_index]; 1974 *val += grp->lg_ether_stat[stat_index]; 1975 } 1976 } 1977 return (0); 1978 } 1979 1980 int 1981 aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 1982 { 1983 aggr_pseudo_rx_ring_t *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver; 1984 1985 if (rx_ring->arr_hw_rh != NULL) { 1986 *val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat); 1987 } else { 1988 aggr_port_t *port = rx_ring->arr_port; 1989 1990 *val = mac_stat_get(port->lp_mh, stat); 1991 1992 } 1993 return (0); 1994 } 1995 1996 int 1997 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 1998 { 1999 aggr_pseudo_tx_ring_t *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver; 2000 2001 if (tx_ring->atr_hw_rh != NULL) { 2002 *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat); 2003 } else { 2004 aggr_port_t *port = tx_ring->atr_port; 2005 2006 *val = mac_stat_get(port->lp_mh, stat); 2007 } 2008 return (0); 2009 } 2010 2011 static int 2012 aggr_m_stat(void *arg, uint_t stat, uint64_t *val) 2013 { 2014 aggr_grp_t *grp = arg; 2015 int rval = 0; 2016 2017 mutex_enter(&grp->lg_stat_lock); 2018 2019 switch (stat) { 2020 case MAC_STAT_IFSPEED: 2021 *val = grp->lg_ifspeed; 2022 break; 2023 2024 case ETHER_STAT_LINK_DUPLEX: 2025 *val = grp->lg_link_duplex; 2026 break; 2027 2028 default: 2029 /* 2030 * For all other statistics, we return the aggregated stat 2031 * from the underlying ports. aggr_grp_stat() will set 2032 * rval appropriately if the statistic isn't a counter. 2033 */ 2034 rval = aggr_grp_stat(grp, stat, val); 2035 } 2036 2037 mutex_exit(&grp->lg_stat_lock); 2038 return (rval); 2039 } 2040 2041 static int 2042 aggr_m_start(void *arg) 2043 { 2044 aggr_grp_t *grp = arg; 2045 aggr_port_t *port; 2046 mac_perim_handle_t mph, pmph; 2047 2048 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2049 2050 /* 2051 * Attempts to start all configured members of the group. 2052 * Group members will be attached when their link-up notification 2053 * is received. 2054 */ 2055 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2056 mac_perim_enter_by_mh(port->lp_mh, &pmph); 2057 if (aggr_port_start(port) != 0) { 2058 mac_perim_exit(pmph); 2059 continue; 2060 } 2061 2062 /* 2063 * Turn on the promiscuous mode if it is required to receive 2064 * the non-primary address over a port, or the promiscous 2065 * mode is enabled over the aggr. 2066 */ 2067 if (grp->lg_promisc || port->lp_prom_addr != NULL) { 2068 if (aggr_port_promisc(port, B_TRUE) != 0) 2069 aggr_port_stop(port); 2070 } 2071 mac_perim_exit(pmph); 2072 } 2073 2074 grp->lg_started = B_TRUE; 2075 2076 mac_perim_exit(mph); 2077 return (0); 2078 } 2079 2080 static void 2081 aggr_m_stop(void *arg) 2082 { 2083 aggr_grp_t *grp = arg; 2084 aggr_port_t *port; 2085 mac_perim_handle_t mph, pmph; 2086 2087 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2088 2089 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2090 mac_perim_enter_by_mh(port->lp_mh, &pmph); 2091 2092 /* reset port promiscuous mode */ 2093 (void) aggr_port_promisc(port, B_FALSE); 2094 2095 aggr_port_stop(port); 2096 mac_perim_exit(pmph); 2097 } 2098 2099 grp->lg_started = B_FALSE; 2100 mac_perim_exit(mph); 2101 } 2102 2103 static int 2104 aggr_m_promisc(void *arg, boolean_t on) 2105 { 2106 aggr_grp_t *grp = arg; 2107 aggr_port_t *port; 2108 boolean_t link_state_changed = B_FALSE; 2109 mac_perim_handle_t mph, pmph; 2110 2111 AGGR_GRP_REFHOLD(grp); 2112 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2113 2114 ASSERT(!grp->lg_closing); 2115 2116 if (on == grp->lg_promisc) 2117 goto bail; 2118 2119 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2120 int err = 0; 2121 2122 mac_perim_enter_by_mh(port->lp_mh, &pmph); 2123 AGGR_PORT_REFHOLD(port); 2124 if (!on && (port->lp_prom_addr == NULL)) 2125 err = aggr_port_promisc(port, B_FALSE); 2126 else if (on && port->lp_started) 2127 err = aggr_port_promisc(port, B_TRUE); 2128 2129 if (err != 0) { 2130 if (aggr_grp_detach_port(grp, port)) 2131 link_state_changed = B_TRUE; 2132 } else { 2133 /* 2134 * If a port was detached because of a previous 2135 * failure changing the promiscuity, the port 2136 * is reattached when it successfully changes 2137 * the promiscuity now, and this might cause 2138 * the link state of the aggregation to change. 2139 */ 2140 if (aggr_grp_attach_port(grp, port)) 2141 link_state_changed = B_TRUE; 2142 } 2143 mac_perim_exit(pmph); 2144 AGGR_PORT_REFRELE(port); 2145 } 2146 2147 grp->lg_promisc = on; 2148 2149 if (link_state_changed) 2150 mac_link_update(grp->lg_mh, grp->lg_link_state); 2151 2152 bail: 2153 mac_perim_exit(mph); 2154 AGGR_GRP_REFRELE(grp); 2155 2156 return (0); 2157 } 2158 2159 static void 2160 aggr_grp_port_rename(const char *new_name, void *arg) 2161 { 2162 /* 2163 * aggr port's mac client name is the format of "aggr link name" plus 2164 * AGGR_PORT_NAME_DELIMIT plus "underneath link name". 2165 */ 2166 int aggr_len, link_len, clnt_name_len, i; 2167 char *str_end, *str_st, *str_del; 2168 char aggr_name[MAXNAMELEN]; 2169 char link_name[MAXNAMELEN]; 2170 char *clnt_name; 2171 aggr_grp_t *aggr_grp = arg; 2172 aggr_port_t *aggr_port = aggr_grp->lg_ports; 2173 2174 for (i = 0; i < aggr_grp->lg_nports; i++) { 2175 clnt_name = mac_client_name(aggr_port->lp_mch); 2176 clnt_name_len = strlen(clnt_name); 2177 str_st = clnt_name; 2178 str_end = &(clnt_name[clnt_name_len]); 2179 str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT); 2180 ASSERT(str_del != NULL); 2181 aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st); 2182 link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del); 2183 bzero(aggr_name, MAXNAMELEN); 2184 bzero(link_name, MAXNAMELEN); 2185 bcopy(clnt_name, aggr_name, aggr_len); 2186 bcopy(str_del, link_name, link_len + 1); 2187 bzero(clnt_name, MAXNAMELEN); 2188 (void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name, 2189 link_name); 2190 2191 (void) mac_rename_primary(aggr_port->lp_mh, NULL); 2192 aggr_port = aggr_port->lp_next; 2193 } 2194 } 2195 2196 /* 2197 * Initialize the capabilities that are advertised for the group 2198 * according to the capabilities of the constituent ports. 2199 */ 2200 static boolean_t 2201 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 2202 { 2203 aggr_grp_t *grp = arg; 2204 2205 switch (cap) { 2206 case MAC_CAPAB_HCKSUM: { 2207 uint32_t *hcksum_txflags = cap_data; 2208 *hcksum_txflags = grp->lg_hcksum_txflags; 2209 break; 2210 } 2211 case MAC_CAPAB_LSO: { 2212 mac_capab_lso_t *cap_lso = cap_data; 2213 2214 if (grp->lg_lso) { 2215 *cap_lso = grp->lg_cap_lso; 2216 break; 2217 } else { 2218 return (B_FALSE); 2219 } 2220 } 2221 case MAC_CAPAB_NO_NATIVEVLAN: 2222 return (!grp->lg_vlan); 2223 case MAC_CAPAB_NO_ZCOPY: 2224 return (!grp->lg_zcopy); 2225 case MAC_CAPAB_RINGS: { 2226 mac_capab_rings_t *cap_rings = cap_data; 2227 2228 if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 2229 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2230 cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt; 2231 2232 /* 2233 * An aggregation advertises only one (pseudo) RX 2234 * group, which virtualizes the main/primary group of 2235 * the underlying devices. 2236 */ 2237 cap_rings->mr_gnum = 1; 2238 cap_rings->mr_gaddring = NULL; 2239 cap_rings->mr_gremring = NULL; 2240 } else { 2241 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2242 cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt; 2243 cap_rings->mr_gnum = 0; 2244 } 2245 cap_rings->mr_rget = aggr_fill_ring; 2246 cap_rings->mr_gget = aggr_fill_group; 2247 break; 2248 } 2249 case MAC_CAPAB_AGGR: 2250 { 2251 mac_capab_aggr_t *aggr_cap; 2252 2253 if (cap_data != NULL) { 2254 aggr_cap = cap_data; 2255 aggr_cap->mca_rename_fn = aggr_grp_port_rename; 2256 aggr_cap->mca_unicst = aggr_m_unicst; 2257 aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring; 2258 aggr_cap->mca_arg = arg; 2259 } 2260 return (B_TRUE); 2261 } 2262 default: 2263 return (B_FALSE); 2264 } 2265 return (B_TRUE); 2266 } 2267 2268 /* 2269 * Callback function for MAC layer to register groups. 2270 */ 2271 static void 2272 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, 2273 mac_group_info_t *infop, mac_group_handle_t gh) 2274 { 2275 aggr_grp_t *grp = arg; 2276 aggr_pseudo_rx_group_t *rx_group; 2277 aggr_pseudo_tx_group_t *tx_group; 2278 2279 ASSERT(index == 0); 2280 if (rtype == MAC_RING_TYPE_RX) { 2281 rx_group = &grp->lg_rx_group; 2282 rx_group->arg_gh = gh; 2283 rx_group->arg_grp = grp; 2284 2285 infop->mgi_driver = (mac_group_driver_t)rx_group; 2286 infop->mgi_start = NULL; 2287 infop->mgi_stop = NULL; 2288 infop->mgi_addmac = aggr_addmac; 2289 infop->mgi_remmac = aggr_remmac; 2290 infop->mgi_count = rx_group->arg_ring_cnt; 2291 2292 /* 2293 * Always set the HW VLAN callbacks. They are smart 2294 * enough to know when a port has HW VLAN filters to 2295 * program and when it doesn't. 2296 */ 2297 infop->mgi_addvlan = aggr_addvlan; 2298 infop->mgi_remvlan = aggr_remvlan; 2299 } else { 2300 tx_group = &grp->lg_tx_group; 2301 tx_group->atg_gh = gh; 2302 } 2303 } 2304 2305 /* 2306 * Callback funtion for MAC layer to register all rings. 2307 */ 2308 static void 2309 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 2310 const int index, mac_ring_info_t *infop, mac_ring_handle_t rh) 2311 { 2312 aggr_grp_t *grp = arg; 2313 2314 switch (rtype) { 2315 case MAC_RING_TYPE_RX: { 2316 aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_group; 2317 aggr_pseudo_rx_ring_t *rx_ring; 2318 mac_intr_t aggr_mac_intr; 2319 2320 ASSERT(rg_index == 0); 2321 2322 ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt)); 2323 rx_ring = rx_group->arg_rings + index; 2324 rx_ring->arr_rh = rh; 2325 2326 /* 2327 * Entrypoint to enable interrupt (disable poll) and 2328 * disable interrupt (enable poll). 2329 */ 2330 aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring; 2331 aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr; 2332 aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr; 2333 aggr_mac_intr.mi_ddi_handle = NULL; 2334 2335 infop->mri_driver = (mac_ring_driver_t)rx_ring; 2336 infop->mri_start = aggr_pseudo_start_ring; 2337 infop->mri_stop = NULL; 2338 2339 infop->mri_intr = aggr_mac_intr; 2340 infop->mri_poll = aggr_rx_poll; 2341 2342 infop->mri_stat = aggr_rx_ring_stat; 2343 break; 2344 } 2345 case MAC_RING_TYPE_TX: { 2346 aggr_pseudo_tx_group_t *tx_group = &grp->lg_tx_group; 2347 aggr_pseudo_tx_ring_t *tx_ring; 2348 2349 ASSERT(rg_index == -1); 2350 ASSERT(index < tx_group->atg_ring_cnt); 2351 2352 tx_ring = &tx_group->atg_rings[index]; 2353 tx_ring->atr_rh = rh; 2354 2355 infop->mri_driver = (mac_ring_driver_t)tx_ring; 2356 infop->mri_start = NULL; 2357 infop->mri_stop = NULL; 2358 infop->mri_tx = aggr_ring_tx; 2359 infop->mri_stat = aggr_tx_ring_stat; 2360 /* 2361 * Use the hw TX ring handle to find if the ring needs 2362 * serialization or not. For NICs that do not expose 2363 * Tx rings, atr_hw_rh will be NULL. 2364 */ 2365 if (tx_ring->atr_hw_rh != NULL) { 2366 infop->mri_flags = 2367 mac_hwring_getinfo(tx_ring->atr_hw_rh); 2368 } 2369 break; 2370 } 2371 default: 2372 break; 2373 } 2374 } 2375 2376 static mblk_t * 2377 aggr_rx_poll(void *arg, int bytes_to_pickup) 2378 { 2379 aggr_pseudo_rx_ring_t *rr_ring = arg; 2380 aggr_port_t *port = rr_ring->arr_port; 2381 aggr_grp_t *grp = port->lp_grp; 2382 mblk_t *mp_chain, *mp, **mpp; 2383 2384 mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup); 2385 2386 if (grp->lg_lacp_mode == AGGR_LACP_OFF) 2387 return (mp_chain); 2388 2389 mpp = &mp_chain; 2390 while ((mp = *mpp) != NULL) { 2391 if (MBLKL(mp) >= sizeof (struct ether_header)) { 2392 struct ether_header *ehp; 2393 2394 ehp = (struct ether_header *)mp->b_rptr; 2395 if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) { 2396 *mpp = mp->b_next; 2397 mp->b_next = NULL; 2398 aggr_recv_lacp(port, 2399 (mac_resource_handle_t)rr_ring, mp); 2400 continue; 2401 } 2402 } 2403 2404 if (!port->lp_collector_enabled) { 2405 *mpp = mp->b_next; 2406 mp->b_next = NULL; 2407 freemsg(mp); 2408 continue; 2409 } 2410 mpp = &mp->b_next; 2411 } 2412 return (mp_chain); 2413 } 2414 2415 static int 2416 aggr_addmac(void *arg, const uint8_t *mac_addr) 2417 { 2418 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg; 2419 aggr_unicst_addr_t *addr, **pprev; 2420 aggr_grp_t *grp = rx_group->arg_grp; 2421 aggr_port_t *port, *p; 2422 mac_perim_handle_t mph; 2423 int err = 0; 2424 2425 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2426 2427 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 2428 mac_perim_exit(mph); 2429 return (0); 2430 } 2431 2432 /* 2433 * Insert this mac address into the list of mac addresses owned by 2434 * the aggregation pseudo group. 2435 */ 2436 pprev = &rx_group->arg_macaddr; 2437 while ((addr = *pprev) != NULL) { 2438 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) { 2439 mac_perim_exit(mph); 2440 return (EEXIST); 2441 } 2442 pprev = &addr->aua_next; 2443 } 2444 addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP); 2445 bcopy(mac_addr, addr->aua_addr, ETHERADDRL); 2446 addr->aua_next = NULL; 2447 *pprev = addr; 2448 2449 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 2450 if ((err = aggr_port_addmac(port, mac_addr)) != 0) 2451 break; 2452 2453 if (err != 0) { 2454 for (p = grp->lg_ports; p != port; p = p->lp_next) 2455 aggr_port_remmac(p, mac_addr); 2456 2457 *pprev = NULL; 2458 kmem_free(addr, sizeof (aggr_unicst_addr_t)); 2459 } 2460 2461 mac_perim_exit(mph); 2462 return (err); 2463 } 2464 2465 static int 2466 aggr_remmac(void *arg, const uint8_t *mac_addr) 2467 { 2468 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg; 2469 aggr_unicst_addr_t *addr, **pprev; 2470 aggr_grp_t *grp = rx_group->arg_grp; 2471 aggr_port_t *port; 2472 mac_perim_handle_t mph; 2473 int err = 0; 2474 2475 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2476 2477 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 2478 mac_perim_exit(mph); 2479 return (0); 2480 } 2481 2482 /* 2483 * Insert this mac address into the list of mac addresses owned by 2484 * the aggregation pseudo group. 2485 */ 2486 pprev = &rx_group->arg_macaddr; 2487 while ((addr = *pprev) != NULL) { 2488 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) { 2489 pprev = &addr->aua_next; 2490 continue; 2491 } 2492 break; 2493 } 2494 if (addr == NULL) { 2495 mac_perim_exit(mph); 2496 return (EINVAL); 2497 } 2498 2499 for (port = grp->lg_ports; port != NULL; port = port->lp_next) 2500 aggr_port_remmac(port, mac_addr); 2501 2502 *pprev = addr->aua_next; 2503 kmem_free(addr, sizeof (aggr_unicst_addr_t)); 2504 2505 mac_perim_exit(mph); 2506 return (err); 2507 } 2508 2509 /* 2510 * Search for VID in the Rx group's list and return a pointer if 2511 * found. Otherwise return NULL. 2512 */ 2513 static aggr_vlan_t * 2514 aggr_find_vlan(aggr_pseudo_rx_group_t *rx_group, uint16_t vid) 2515 { 2516 ASSERT(MAC_PERIM_HELD(rx_group->arg_grp->lg_mh)); 2517 for (aggr_vlan_t *avp = list_head(&rx_group->arg_vlans); avp != NULL; 2518 avp = list_next(&rx_group->arg_vlans, avp)) { 2519 if (avp->av_vid == vid) 2520 return (avp); 2521 } 2522 2523 return (NULL); 2524 } 2525 2526 /* 2527 * Accept traffic on the specified VID. 2528 * 2529 * Persist VLAN state in the aggr so that ports added later will 2530 * receive the correct filters. In the future it would be nice to 2531 * allow aggr to iterate its clients instead of duplicating state. 2532 */ 2533 static int 2534 aggr_addvlan(mac_group_driver_t gdriver, uint16_t vid) 2535 { 2536 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver; 2537 aggr_grp_t *aggr = rx_group->arg_grp; 2538 aggr_port_t *port, *p; 2539 mac_perim_handle_t mph; 2540 int err = 0; 2541 aggr_vlan_t *avp = NULL; 2542 2543 mac_perim_enter_by_mh(aggr->lg_mh, &mph); 2544 2545 if (vid == MAC_VLAN_UNTAGGED) { 2546 /* 2547 * Aggr is both a MAC provider and MAC client. As a 2548 * MAC provider it is passed MAC_VLAN_UNTAGGED by its 2549 * client. As a client itself, it should pass 2550 * VLAN_ID_NONE to its ports. 2551 */ 2552 vid = VLAN_ID_NONE; 2553 rx_group->arg_untagged++; 2554 goto update_ports; 2555 } 2556 2557 avp = aggr_find_vlan(rx_group, vid); 2558 2559 if (avp != NULL) { 2560 avp->av_refs++; 2561 mac_perim_exit(mph); 2562 return (0); 2563 } 2564 2565 avp = kmem_zalloc(sizeof (aggr_vlan_t), KM_SLEEP); 2566 avp->av_vid = vid; 2567 avp->av_refs = 1; 2568 2569 update_ports: 2570 for (port = aggr->lg_ports; port != NULL; port = port->lp_next) 2571 if ((err = aggr_port_addvlan(port, vid)) != 0) 2572 break; 2573 2574 if (err != 0) { 2575 /* 2576 * If any of these calls fail then we are in a 2577 * situation where the ports have different HW state. 2578 * There's no reasonable action the MAC client can 2579 * take in this scenario to rectify the situation. 2580 */ 2581 for (p = aggr->lg_ports; p != port; p = p->lp_next) { 2582 int err2; 2583 2584 if ((err2 = aggr_port_remvlan(p, vid)) != 0) { 2585 cmn_err(CE_WARN, "Failed to remove VLAN %u" 2586 " from port %s: errno %d.", vid, 2587 mac_client_name(p->lp_mch), err2); 2588 } 2589 2590 } 2591 2592 if (vid == VLAN_ID_NONE) 2593 rx_group->arg_untagged--; 2594 2595 if (avp != NULL) { 2596 kmem_free(avp, sizeof (aggr_vlan_t)); 2597 avp = NULL; 2598 } 2599 } 2600 2601 if (avp != NULL) 2602 list_insert_tail(&rx_group->arg_vlans, avp); 2603 2604 done: 2605 mac_perim_exit(mph); 2606 return (err); 2607 } 2608 2609 /* 2610 * Stop accepting traffic on this VLAN if it's the last use of this VLAN. 2611 */ 2612 static int 2613 aggr_remvlan(mac_group_driver_t gdriver, uint16_t vid) 2614 { 2615 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver; 2616 aggr_grp_t *aggr = rx_group->arg_grp; 2617 aggr_port_t *port, *p; 2618 mac_perim_handle_t mph; 2619 int err = 0; 2620 aggr_vlan_t *avp = NULL; 2621 2622 mac_perim_enter_by_mh(aggr->lg_mh, &mph); 2623 2624 /* 2625 * See the comment in aggr_addvlan(). 2626 */ 2627 if (vid == MAC_VLAN_UNTAGGED) { 2628 vid = VLAN_ID_NONE; 2629 rx_group->arg_untagged--; 2630 2631 if (rx_group->arg_untagged > 0) 2632 goto done; 2633 2634 goto update_ports; 2635 } 2636 2637 avp = aggr_find_vlan(rx_group, vid); 2638 2639 if (avp == NULL) { 2640 err = ENOENT; 2641 goto done; 2642 } 2643 2644 avp->av_refs--; 2645 2646 if (avp->av_refs > 0) 2647 goto done; 2648 2649 update_ports: 2650 for (port = aggr->lg_ports; port != NULL; port = port->lp_next) 2651 if ((err = aggr_port_remvlan(port, vid)) != 0) 2652 break; 2653 2654 /* 2655 * See the comment in aggr_addvlan() for justification of the 2656 * use of VERIFY here. 2657 */ 2658 if (err != 0) { 2659 for (p = aggr->lg_ports; p != port; p = p->lp_next) { 2660 int err2; 2661 2662 if ((err2 = aggr_port_addvlan(p, vid)) != 0) { 2663 cmn_err(CE_WARN, "Failed to add VLAN %u" 2664 " to port %s: errno %d.", vid, 2665 mac_client_name(p->lp_mch), err2); 2666 } 2667 } 2668 2669 if (avp != NULL) 2670 avp->av_refs++; 2671 2672 if (vid == VLAN_ID_NONE) 2673 rx_group->arg_untagged++; 2674 2675 goto done; 2676 } 2677 2678 if (err == 0 && avp != NULL) { 2679 VERIFY3U(avp->av_refs, ==, 0); 2680 list_remove(&rx_group->arg_vlans, avp); 2681 kmem_free(avp, sizeof (aggr_vlan_t)); 2682 } 2683 2684 done: 2685 mac_perim_exit(mph); 2686 return (err); 2687 } 2688 2689 /* 2690 * Add or remove the multicast addresses that are defined for the group 2691 * to or from the specified port. 2692 * 2693 * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port 2694 * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is 2695 * called when the port is either stopped or detached. 2696 */ 2697 void 2698 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add) 2699 { 2700 aggr_grp_t *grp = port->lp_grp; 2701 2702 ASSERT(MAC_PERIM_HELD(port->lp_mh)); 2703 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2704 2705 if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED) 2706 return; 2707 2708 mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add); 2709 } 2710 2711 static int 2712 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 2713 { 2714 aggr_grp_t *grp = arg; 2715 aggr_port_t *port = NULL, *errport = NULL; 2716 mac_perim_handle_t mph; 2717 int err = 0; 2718 2719 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2720 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2721 if (port->lp_state != AGGR_PORT_STATE_ATTACHED || 2722 !port->lp_started) { 2723 continue; 2724 } 2725 err = aggr_port_multicst(port, add, addrp); 2726 if (err != 0) { 2727 errport = port; 2728 break; 2729 } 2730 } 2731 2732 /* 2733 * At least one port caused error return and this error is returned to 2734 * mac, eventually a NAK would be sent upwards. 2735 * Some ports have this multicast address listed now, and some don't. 2736 * Treat this error as a whole aggr failure not individual port failure. 2737 * Therefore remove this multicast address from other ports. 2738 */ 2739 if ((err != 0) && add) { 2740 for (port = grp->lg_ports; port != errport; 2741 port = port->lp_next) { 2742 if (port->lp_state != AGGR_PORT_STATE_ATTACHED || 2743 !port->lp_started) { 2744 continue; 2745 } 2746 (void) aggr_port_multicst(port, B_FALSE, addrp); 2747 } 2748 } 2749 mac_perim_exit(mph); 2750 return (err); 2751 } 2752 2753 static int 2754 aggr_m_unicst(void *arg, const uint8_t *macaddr) 2755 { 2756 aggr_grp_t *grp = arg; 2757 mac_perim_handle_t mph; 2758 int err; 2759 2760 mac_perim_enter_by_mh(grp->lg_mh, &mph); 2761 err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr, 2762 0, 0); 2763 mac_perim_exit(mph); 2764 return (err); 2765 } 2766 2767 /* 2768 * Initialize the capabilities that are advertised for the group 2769 * according to the capabilities of the constituent ports. 2770 */ 2771 static void 2772 aggr_grp_capab_set(aggr_grp_t *grp) 2773 { 2774 uint32_t cksum; 2775 aggr_port_t *port; 2776 mac_capab_lso_t cap_lso; 2777 2778 ASSERT(grp->lg_mh == NULL); 2779 ASSERT(grp->lg_ports != NULL); 2780 2781 grp->lg_hcksum_txflags = (uint32_t)-1; 2782 grp->lg_zcopy = B_TRUE; 2783 grp->lg_vlan = B_TRUE; 2784 2785 grp->lg_lso = B_TRUE; 2786 grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1; 2787 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1; 2788 2789 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2790 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum)) 2791 cksum = 0; 2792 grp->lg_hcksum_txflags &= cksum; 2793 2794 grp->lg_vlan &= 2795 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL); 2796 2797 grp->lg_zcopy &= 2798 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL); 2799 2800 grp->lg_lso &= 2801 mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso); 2802 if (grp->lg_lso) { 2803 grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags; 2804 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max > 2805 cap_lso.lso_basic_tcp_ipv4.lso_max) 2806 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = 2807 cap_lso.lso_basic_tcp_ipv4.lso_max; 2808 } 2809 } 2810 } 2811 2812 /* 2813 * Checks whether the capabilities of the port being added are compatible 2814 * with the current capabilities of the aggregation. 2815 */ 2816 static boolean_t 2817 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) 2818 { 2819 uint32_t hcksum_txflags; 2820 2821 ASSERT(grp->lg_ports != NULL); 2822 2823 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) & 2824 grp->lg_vlan) != grp->lg_vlan) { 2825 return (B_FALSE); 2826 } 2827 2828 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) & 2829 grp->lg_zcopy) != grp->lg_zcopy) { 2830 return (B_FALSE); 2831 } 2832 2833 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) { 2834 if (grp->lg_hcksum_txflags != 0) 2835 return (B_FALSE); 2836 } else if ((hcksum_txflags & grp->lg_hcksum_txflags) != 2837 grp->lg_hcksum_txflags) { 2838 return (B_FALSE); 2839 } 2840 2841 if (grp->lg_lso) { 2842 mac_capab_lso_t cap_lso; 2843 2844 if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) { 2845 if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) != 2846 grp->lg_cap_lso.lso_flags) 2847 return (B_FALSE); 2848 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max > 2849 cap_lso.lso_basic_tcp_ipv4.lso_max) 2850 return (B_FALSE); 2851 } else { 2852 return (B_FALSE); 2853 } 2854 } 2855 2856 return (B_TRUE); 2857 } 2858 2859 /* 2860 * Returns the maximum SDU according to the SDU of the constituent ports. 2861 */ 2862 static uint_t 2863 aggr_grp_max_sdu(aggr_grp_t *grp) 2864 { 2865 uint_t max_sdu = (uint_t)-1; 2866 aggr_port_t *port; 2867 2868 ASSERT(grp->lg_ports != NULL); 2869 2870 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2871 uint_t port_sdu_max; 2872 2873 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max); 2874 if (max_sdu > port_sdu_max) 2875 max_sdu = port_sdu_max; 2876 } 2877 2878 return (max_sdu); 2879 } 2880 2881 /* 2882 * Checks if the maximum SDU of the specified port is compatible 2883 * with the maximum SDU of the specified aggregation group, returns 2884 * B_TRUE if it is, B_FALSE otherwise. 2885 */ 2886 static boolean_t 2887 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port) 2888 { 2889 uint_t port_sdu_max; 2890 2891 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max); 2892 return (port_sdu_max >= grp->lg_max_sdu); 2893 } 2894 2895 /* 2896 * Returns the maximum margin according to the margin of the constituent ports. 2897 */ 2898 static uint32_t 2899 aggr_grp_max_margin(aggr_grp_t *grp) 2900 { 2901 uint32_t margin = UINT32_MAX; 2902 aggr_port_t *port; 2903 2904 ASSERT(grp->lg_mh == NULL); 2905 ASSERT(grp->lg_ports != NULL); 2906 2907 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2908 if (margin > port->lp_margin) 2909 margin = port->lp_margin; 2910 } 2911 2912 grp->lg_margin = margin; 2913 return (margin); 2914 } 2915 2916 /* 2917 * Checks if the maximum margin of the specified port is compatible 2918 * with the maximum margin of the specified aggregation group, returns 2919 * B_TRUE if it is, B_FALSE otherwise. 2920 */ 2921 static boolean_t 2922 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port) 2923 { 2924 if (port->lp_margin >= grp->lg_margin) 2925 return (B_TRUE); 2926 2927 /* 2928 * See whether the current margin value is allowed to be changed to 2929 * the new value. 2930 */ 2931 if (!mac_margin_update(grp->lg_mh, port->lp_margin)) 2932 return (B_FALSE); 2933 2934 grp->lg_margin = port->lp_margin; 2935 return (B_TRUE); 2936 } 2937 2938 /* 2939 * Set MTU on individual ports of an aggregation group 2940 */ 2941 static int 2942 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu, 2943 uint32_t *old_mtu) 2944 { 2945 boolean_t removed = B_FALSE; 2946 mac_perim_handle_t mph; 2947 mac_diag_t diag; 2948 int err, rv, retry = 0; 2949 2950 if (port->lp_mah != NULL) { 2951 (void) mac_unicast_remove(port->lp_mch, port->lp_mah); 2952 port->lp_mah = NULL; 2953 removed = B_TRUE; 2954 } 2955 err = mac_set_mtu(port->lp_mh, sdu, old_mtu); 2956 try_again: 2957 if (removed && (rv = mac_unicast_add(port->lp_mch, NULL, 2958 MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK, 2959 &port->lp_mah, 0, &diag)) != 0) { 2960 /* 2961 * following is a workaround for a bug in 'bge' driver. 2962 * See CR 6794654 for more information and this work around 2963 * will be removed once the CR is fixed. 2964 */ 2965 if (rv == EIO && retry++ < 3) { 2966 delay(2 * hz); 2967 goto try_again; 2968 } 2969 /* 2970 * if mac_unicast_add() failed while setting the MTU, 2971 * detach the port from the group. 2972 */ 2973 mac_perim_enter_by_mh(port->lp_mh, &mph); 2974 (void) aggr_grp_detach_port(grp, port); 2975 mac_perim_exit(mph); 2976 cmn_err(CE_WARN, "Unable to restart the port %s while " 2977 "setting MTU. Detaching the port from the aggregation.", 2978 mac_client_name(port->lp_mch)); 2979 } 2980 return (err); 2981 } 2982 2983 static int 2984 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu) 2985 { 2986 int err = 0, i, rv; 2987 aggr_port_t *port; 2988 uint32_t *mtu; 2989 2990 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2991 2992 /* 2993 * If the MTU being set is equal to aggr group's maximum 2994 * allowable value, then there is nothing to change 2995 */ 2996 if (sdu == grp->lg_max_sdu) 2997 return (0); 2998 2999 /* 0 is aggr group's min sdu */ 3000 if (sdu == 0) 3001 return (EINVAL); 3002 3003 mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP); 3004 for (port = grp->lg_ports, i = 0; port != NULL && err == 0; 3005 port = port->lp_next, i++) { 3006 err = aggr_set_port_sdu(grp, port, sdu, mtu + i); 3007 } 3008 if (err != 0) { 3009 /* recover from error: reset the mtus of the ports */ 3010 aggr_port_t *tmp; 3011 3012 for (tmp = grp->lg_ports, i = 0; tmp != port; 3013 tmp = tmp->lp_next, i++) { 3014 (void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL); 3015 } 3016 goto bail; 3017 } 3018 grp->lg_max_sdu = aggr_grp_max_sdu(grp); 3019 rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu); 3020 ASSERT(rv == 0); 3021 bail: 3022 kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports); 3023 return (err); 3024 } 3025 3026 /* 3027 * Callback functions for set/get of properties 3028 */ 3029 /*ARGSUSED*/ 3030 static int 3031 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 3032 uint_t pr_valsize, const void *pr_val) 3033 { 3034 int err = ENOTSUP; 3035 aggr_grp_t *grp = m_driver; 3036 3037 switch (pr_num) { 3038 case MAC_PROP_MTU: { 3039 uint32_t mtu; 3040 3041 if (pr_valsize < sizeof (mtu)) { 3042 err = EINVAL; 3043 break; 3044 } 3045 bcopy(pr_val, &mtu, sizeof (mtu)); 3046 err = aggr_sdu_update(grp, mtu); 3047 break; 3048 } 3049 default: 3050 break; 3051 } 3052 return (err); 3053 } 3054 3055 typedef struct rboundary { 3056 uint32_t bval; 3057 int btype; 3058 } rboundary_t; 3059 3060 /* 3061 * This function finds the intersection of mtu ranges stored in arrays - 3062 * mrange[0] ... mrange[mcount -1]. It returns the intersection in rval. 3063 * Individual arrays are assumed to contain non-overlapping ranges. 3064 * Algorithm: 3065 * A range has two boundaries - min and max. We scan all arrays and store 3066 * each boundary as a separate element in a temporary array. We also store 3067 * the boundary types, min or max, as +1 or -1 respectively in the temporary 3068 * array. Then we sort the temporary array in ascending order. We scan the 3069 * sorted array from lower to higher values and keep a cumulative sum of 3070 * boundary types. Element in the temporary array for which the sum reaches 3071 * mcount is a min boundary of a range in the result and next element will be 3072 * max boundary. 3073 * 3074 * Example for mcount = 3, 3075 * 3076 * ----|_________|-------|_______|----|__|------ mrange[0] 3077 * 3078 * -------|________|--|____________|-----|___|-- mrange[1] 3079 * 3080 * --------|________________|-------|____|------ mrange[2] 3081 * 3082 * 3 2 1 3083 * \|/ 3084 * 1 23 2 1 2 3 2 1 01 2 V 0 <- the sum 3085 * ----|--||-----|-|--|--|--|----|-||-|--|---|-- sorted array 3086 * 3087 * same min and max 3088 * V 3089 * --------|_____|-------|__|------------|------ intersecting ranges 3090 */ 3091 void 3092 aggr_mtu_range_intersection(mac_propval_range_t **mrange, int mcount, 3093 mac_propval_uint32_range_t **prval, int *prmaxcnt, int *prcount) 3094 { 3095 mac_propval_uint32_range_t *rval, *ur; 3096 int rmaxcnt, rcount; 3097 size_t sz_range32; 3098 rboundary_t *ta; /* temporary array */ 3099 rboundary_t temp; 3100 boolean_t range_started = B_FALSE; 3101 int i, j, m, sum; 3102 3103 sz_range32 = sizeof (mac_propval_uint32_range_t); 3104 3105 for (i = 0, rmaxcnt = 0; i < mcount; i++) 3106 rmaxcnt += mrange[i]->mpr_count; 3107 3108 /* Allocate enough space to store the results */ 3109 rval = kmem_alloc(rmaxcnt * sz_range32, KM_SLEEP); 3110 3111 /* Number of boundaries are twice as many as ranges */ 3112 ta = kmem_alloc(2 * rmaxcnt * sizeof (rboundary_t), KM_SLEEP); 3113 3114 for (i = 0, m = 0; i < mcount; i++) { 3115 ur = &(mrange[i]->mpr_range_uint32[0]); 3116 for (j = 0; j < mrange[i]->mpr_count; j++) { 3117 ta[m].bval = ur[j].mpur_min; 3118 ta[m++].btype = 1; 3119 ta[m].bval = ur[j].mpur_max; 3120 ta[m++].btype = -1; 3121 } 3122 } 3123 3124 /* 3125 * Sort the temporary array in ascending order of bval; 3126 * if boundary values are same then sort on btype. 3127 */ 3128 for (i = 0; i < m-1; i++) { 3129 for (j = i+1; j < m; j++) { 3130 if ((ta[i].bval > ta[j].bval) || 3131 ((ta[i].bval == ta[j].bval) && 3132 (ta[i].btype < ta[j].btype))) { 3133 temp = ta[i]; 3134 ta[i] = ta[j]; 3135 ta[j] = temp; 3136 } 3137 } 3138 } 3139 3140 /* Walk through temporary array to find all ranges in the results */ 3141 for (i = 0, sum = 0, rcount = 0; i < m; i++) { 3142 sum += ta[i].btype; 3143 if (sum == mcount) { 3144 rval[rcount].mpur_min = ta[i].bval; 3145 range_started = B_TRUE; 3146 } else if (sum < mcount && range_started) { 3147 rval[rcount++].mpur_max = ta[i].bval; 3148 range_started = B_FALSE; 3149 } 3150 } 3151 3152 *prval = rval; 3153 *prmaxcnt = rmaxcnt; 3154 *prcount = rcount; 3155 3156 kmem_free(ta, 2 * rmaxcnt * sizeof (rboundary_t)); 3157 } 3158 3159 /* 3160 * Returns the mtu ranges which could be supported by aggr group. 3161 * prmaxcnt returns the size of the buffer prval, prcount returns 3162 * the number of valid entries in prval. Caller is responsible 3163 * for freeing up prval. 3164 */ 3165 int 3166 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_uint32_range_t **prval, 3167 int *prmaxcnt, int *prcount) 3168 { 3169 mac_propval_range_t **vals; 3170 aggr_port_t *port; 3171 mac_perim_handle_t mph; 3172 uint_t i, numr; 3173 int err = 0; 3174 size_t sz_propval, sz_range32; 3175 size_t size; 3176 3177 sz_propval = sizeof (mac_propval_range_t); 3178 sz_range32 = sizeof (mac_propval_uint32_range_t); 3179 3180 ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 3181 3182 vals = kmem_zalloc(sizeof (mac_propval_range_t *) * grp->lg_nports, 3183 KM_SLEEP); 3184 3185 for (port = grp->lg_ports, i = 0; port != NULL; 3186 port = port->lp_next, i++) { 3187 3188 size = sz_propval; 3189 vals[i] = kmem_alloc(size, KM_SLEEP); 3190 vals[i]->mpr_count = 1; 3191 3192 mac_perim_enter_by_mh(port->lp_mh, &mph); 3193 3194 err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL, 3195 NULL, 0, vals[i], NULL); 3196 if (err == ENOSPC) { 3197 /* 3198 * Not enough space to hold all ranges. 3199 * Allocate extra space as indicated and retry. 3200 */ 3201 numr = vals[i]->mpr_count; 3202 kmem_free(vals[i], sz_propval); 3203 size = sz_propval + (numr - 1) * sz_range32; 3204 vals[i] = kmem_alloc(size, KM_SLEEP); 3205 vals[i]->mpr_count = numr; 3206 err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL, 3207 NULL, 0, vals[i], NULL); 3208 ASSERT(err != ENOSPC); 3209 } 3210 mac_perim_exit(mph); 3211 if (err != 0) { 3212 kmem_free(vals[i], size); 3213 vals[i] = NULL; 3214 break; 3215 } 3216 } 3217 3218 /* 3219 * if any of the underlying ports does not support changing MTU then 3220 * just return ENOTSUP 3221 */ 3222 if (port != NULL) { 3223 ASSERT(err != 0); 3224 goto done; 3225 } 3226 3227 aggr_mtu_range_intersection(vals, grp->lg_nports, prval, prmaxcnt, 3228 prcount); 3229 3230 done: 3231 for (i = 0; i < grp->lg_nports; i++) { 3232 if (vals[i] != NULL) { 3233 numr = vals[i]->mpr_count; 3234 size = sz_propval + (numr - 1) * sz_range32; 3235 kmem_free(vals[i], size); 3236 } 3237 } 3238 3239 kmem_free(vals, sizeof (mac_propval_range_t *) * grp->lg_nports); 3240 return (err); 3241 } 3242 3243 static void 3244 aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 3245 mac_prop_info_handle_t prh) 3246 { 3247 aggr_grp_t *grp = m_driver; 3248 mac_propval_uint32_range_t *rval = NULL; 3249 int i, rcount, rmaxcnt; 3250 int err = 0; 3251 3252 _NOTE(ARGUNUSED(pr_name)); 3253 3254 switch (pr_num) { 3255 case MAC_PROP_MTU: 3256 3257 err = aggr_grp_possible_mtu_range(grp, &rval, &rmaxcnt, 3258 &rcount); 3259 if (err != 0) { 3260 ASSERT(rval == NULL); 3261 return; 3262 } 3263 for (i = 0; i < rcount; i++) { 3264 mac_prop_info_set_range_uint32(prh, 3265 rval[i].mpur_min, rval[i].mpur_max); 3266 } 3267 kmem_free(rval, sizeof (mac_propval_uint32_range_t) * rmaxcnt); 3268 break; 3269 } 3270 }