1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2018 Joyent, Inc.
  25  * Copyright 2017 RackTop Systems.
  26  */
  27 
  28 /*
  29  * - General Introduction:
  30  *
  31  * This file contains the implementation of the MAC client kernel
  32  * API and related code. The MAC client API allows a kernel module
  33  * to gain access to a MAC instance (physical NIC, link aggregation, etc).
  34  * It allows a MAC client to associate itself with a MAC address,
  35  * VLANs, callback functions for data traffic and for promiscuous mode.
  36  * The MAC client API is also used to specify the properties associated
  37  * with a MAC client, such as bandwidth limits, priority, CPUS, etc.
  38  * These properties are further used to determine the hardware resources
  39  * to allocate to the various MAC clients.
  40  *
  41  * - Primary MAC clients:
  42  *
  43  * The MAC client API refers to "primary MAC clients". A primary MAC
  44  * client is a client which "owns" the primary MAC address of
  45  * the underlying MAC instance. The primary MAC address is called out
  46  * since it is associated with specific semantics: the primary MAC
  47  * address is the MAC address which is assigned to the IP interface
  48  * when it is plumbed, and the primary MAC address is assigned
  49  * to VLAN data-links. The primary address of a MAC instance can
  50  * also change dynamically from under the MAC client, for example
  51  * as a result of a change of state of a link aggregation. In that
  52  * case the MAC layer automatically updates all data-structures which
  53  * refer to the current value of the primary MAC address. Typical
  54  * primary MAC clients are dls, aggr, and xnb. A typical non-primary
  55  * MAC client is the vnic driver.
  56  *
  57  * - Virtual Switching:
  58  *
  59  * The MAC layer implements a virtual switch between the MAC clients
  60  * (primary and non-primary) defined on top of the same underlying
  61  * NIC (physical, link aggregation, etc). The virtual switch is
  62  * VLAN-aware, i.e. it allows multiple MAC clients to be member
  63  * of one or more VLANs, and the virtual switch will distribute
  64  * multicast tagged packets only to the member of the corresponding
  65  * VLANs.
  66  *
  67  * - Upper vs Lower MAC:
  68  *
  69  * Creating a VNIC on top of a MAC instance effectively causes
  70  * two MAC instances to be layered on top of each other, one for
  71  * the VNIC(s), one for the underlying MAC instance (physical NIC,
  72  * link aggregation, etc). In the code below we refer to the
  73  * underlying NIC as the "lower MAC", and we refer to VNICs as
  74  * the "upper MAC".
  75  *
  76  * - Pass-through for VNICs:
  77  *
  78  * When VNICs are created on top of an underlying MAC, this causes
  79  * a layering of two MAC instances. Since the lower MAC already
  80  * does the switching and demultiplexing to its MAC clients, the
  81  * upper MAC would simply have to pass packets to the layer below
  82  * or above it, which would introduce overhead. In order to avoid
  83  * this overhead, the MAC layer implements a pass-through mechanism
  84  * for VNICs. When a VNIC opens the lower MAC instance, it saves
  85  * the MAC client handle it optains from the MAC layer. When a MAC
  86  * client opens a VNIC (upper MAC), the MAC layer detects that
  87  * the MAC being opened is a VNIC, and gets the MAC client handle
  88  * that the VNIC driver obtained from the lower MAC. This exchange
  89  * is done through a private capability between the MAC layer
  90  * and the VNIC driver. The upper MAC then returns that handle
  91  * directly to its MAC client. Any operation done by the upper
  92  * MAC client is now done on the lower MAC client handle, which
  93  * allows the VNIC driver to be completely bypassed for the
  94  * performance sensitive data-path.
  95  *
  96  * - Secondary MACs for VNICs:
  97  *
  98  * VNICs support multiple upper mac clients to enable support for
  99  * multiple MAC addresses on the VNIC. When the VNIC is created the
 100  * initial mac client is the primary upper mac. Any additional mac
 101  * clients are secondary macs. These are kept in sync with the primary
 102  * (for things such as the rx function and resource control settings)
 103  * using the same private capability interface between the MAC layer
 104  * and the VNIC layer.
 105  *
 106  */
 107 
 108 #include <sys/types.h>
 109 #include <sys/conf.h>
 110 #include <sys/id_space.h>
 111 #include <sys/esunddi.h>
 112 #include <sys/stat.h>
 113 #include <sys/mkdev.h>
 114 #include <sys/stream.h>
 115 #include <sys/strsun.h>
 116 #include <sys/strsubr.h>
 117 #include <sys/dlpi.h>
 118 #include <sys/modhash.h>
 119 #include <sys/mac_impl.h>
 120 #include <sys/mac_client_impl.h>
 121 #include <sys/mac_soft_ring.h>
 122 #include <sys/mac_stat.h>
 123 #include <sys/dls.h>
 124 #include <sys/dld.h>
 125 #include <sys/modctl.h>
 126 #include <sys/fs/dv_node.h>
 127 #include <sys/thread.h>
 128 #include <sys/proc.h>
 129 #include <sys/callb.h>
 130 #include <sys/cpuvar.h>
 131 #include <sys/atomic.h>
 132 #include <sys/sdt.h>
 133 #include <sys/mac_flow.h>
 134 #include <sys/ddi_intr_impl.h>
 135 #include <sys/disp.h>
 136 #include <sys/sdt.h>
 137 #include <sys/vnic.h>
 138 #include <sys/vnic_impl.h>
 139 #include <sys/vlan.h>
 140 #include <inet/ip.h>
 141 #include <inet/ip6.h>
 142 #include <sys/exacct.h>
 143 #include <sys/exacct_impl.h>
 144 #include <inet/nd.h>
 145 #include <sys/ethernet.h>
 146 
 147 kmem_cache_t    *mac_client_impl_cache;
 148 kmem_cache_t    *mac_promisc_impl_cache;
 149 
 150 static boolean_t mac_client_single_rcvr(mac_client_impl_t *);
 151 static flow_entry_t *mac_client_swap_mciflent(mac_client_impl_t *);
 152 static flow_entry_t *mac_client_get_flow(mac_client_impl_t *,
 153     mac_unicast_impl_t *);
 154 static void mac_client_remove_flow_from_list(mac_client_impl_t *,
 155     flow_entry_t *);
 156 static void mac_client_add_to_flow_list(mac_client_impl_t *, flow_entry_t *);
 157 static void mac_rename_flow_names(mac_client_impl_t *, const char *);
 158 static void mac_virtual_link_update(mac_impl_t *);
 159 static int mac_client_datapath_setup(mac_client_impl_t *, uint16_t,
 160     uint8_t *, mac_resource_props_t *, boolean_t, mac_unicast_impl_t *);
 161 static void mac_client_datapath_teardown(mac_client_handle_t,
 162     mac_unicast_impl_t *, flow_entry_t *);
 163 static int mac_resource_ctl_set(mac_client_handle_t, mac_resource_props_t *);
 164 
 165 /* ARGSUSED */
 166 static int
 167 i_mac_client_impl_ctor(void *buf, void *arg, int kmflag)
 168 {
 169         int     i;
 170         mac_client_impl_t       *mcip = buf;
 171 
 172         bzero(buf, MAC_CLIENT_IMPL_SIZE);
 173         mutex_init(&mcip->mci_tx_cb_lock, NULL, MUTEX_DRIVER, NULL);
 174         mcip->mci_tx_notify_cb_info.mcbi_lockp = &mcip->mci_tx_cb_lock;
 175 
 176         ASSERT(mac_tx_percpu_cnt >= 0);
 177         for (i = 0; i <= mac_tx_percpu_cnt; i++) {
 178                 mutex_init(&mcip->mci_tx_pcpu[i].pcpu_tx_lock, NULL,
 179                     MUTEX_DRIVER, NULL);
 180         }
 181         cv_init(&mcip->mci_tx_cv, NULL, CV_DRIVER, NULL);
 182 
 183         return (0);
 184 }
 185 
 186 /* ARGSUSED */
 187 static void
 188 i_mac_client_impl_dtor(void *buf, void *arg)
 189 {
 190         int     i;
 191         mac_client_impl_t *mcip = buf;
 192 
 193         ASSERT(mcip->mci_promisc_list == NULL);
 194         ASSERT(mcip->mci_unicast_list == NULL);
 195         ASSERT(mcip->mci_state_flags == 0);
 196         ASSERT(mcip->mci_tx_flag == 0);
 197 
 198         mutex_destroy(&mcip->mci_tx_cb_lock);
 199 
 200         ASSERT(mac_tx_percpu_cnt >= 0);
 201         for (i = 0; i <= mac_tx_percpu_cnt; i++) {
 202                 ASSERT(mcip->mci_tx_pcpu[i].pcpu_tx_refcnt == 0);
 203                 mutex_destroy(&mcip->mci_tx_pcpu[i].pcpu_tx_lock);
 204         }
 205         cv_destroy(&mcip->mci_tx_cv);
 206 }
 207 
 208 /* ARGSUSED */
 209 static int
 210 i_mac_promisc_impl_ctor(void *buf, void *arg, int kmflag)
 211 {
 212         mac_promisc_impl_t      *mpip = buf;
 213 
 214         bzero(buf, sizeof (mac_promisc_impl_t));
 215         mpip->mpi_mci_link.mcb_objp = buf;
 216         mpip->mpi_mci_link.mcb_objsize = sizeof (mac_promisc_impl_t);
 217         mpip->mpi_mi_link.mcb_objp = buf;
 218         mpip->mpi_mi_link.mcb_objsize = sizeof (mac_promisc_impl_t);
 219         return (0);
 220 }
 221 
 222 /* ARGSUSED */
 223 static void
 224 i_mac_promisc_impl_dtor(void *buf, void *arg)
 225 {
 226         mac_promisc_impl_t      *mpip = buf;
 227 
 228         ASSERT(mpip->mpi_mci_link.mcb_objp != NULL);
 229         ASSERT(mpip->mpi_mci_link.mcb_objsize == sizeof (mac_promisc_impl_t));
 230         ASSERT(mpip->mpi_mi_link.mcb_objp == mpip->mpi_mci_link.mcb_objp);
 231         ASSERT(mpip->mpi_mi_link.mcb_objsize == sizeof (mac_promisc_impl_t));
 232 
 233         mpip->mpi_mci_link.mcb_objp = NULL;
 234         mpip->mpi_mci_link.mcb_objsize = 0;
 235         mpip->mpi_mi_link.mcb_objp = NULL;
 236         mpip->mpi_mi_link.mcb_objsize = 0;
 237 
 238         ASSERT(mpip->mpi_mci_link.mcb_flags == 0);
 239         mpip->mpi_mci_link.mcb_objsize = 0;
 240 }
 241 
 242 void
 243 mac_client_init(void)
 244 {
 245         ASSERT(mac_tx_percpu_cnt >= 0);
 246 
 247         mac_client_impl_cache = kmem_cache_create("mac_client_impl_cache",
 248             MAC_CLIENT_IMPL_SIZE, 0, i_mac_client_impl_ctor,
 249             i_mac_client_impl_dtor, NULL, NULL, NULL, 0);
 250         ASSERT(mac_client_impl_cache != NULL);
 251 
 252         mac_promisc_impl_cache = kmem_cache_create("mac_promisc_impl_cache",
 253             sizeof (mac_promisc_impl_t), 0, i_mac_promisc_impl_ctor,
 254             i_mac_promisc_impl_dtor, NULL, NULL, NULL, 0);
 255         ASSERT(mac_promisc_impl_cache != NULL);
 256 }
 257 
 258 void
 259 mac_client_fini(void)
 260 {
 261         kmem_cache_destroy(mac_client_impl_cache);
 262         kmem_cache_destroy(mac_promisc_impl_cache);
 263 }
 264 
 265 /*
 266  * Return the lower MAC client handle from the VNIC driver for the
 267  * specified VNIC MAC instance.
 268  */
 269 mac_client_impl_t *
 270 mac_vnic_lower(mac_impl_t *mip)
 271 {
 272         mac_capab_vnic_t cap;
 273         mac_client_impl_t *mcip;
 274 
 275         VERIFY(i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, &cap));
 276         mcip = cap.mcv_mac_client_handle(cap.mcv_arg);
 277 
 278         return (mcip);
 279 }
 280 
 281 /*
 282  * Update the secondary macs
 283  */
 284 void
 285 mac_vnic_secondary_update(mac_impl_t *mip)
 286 {
 287         mac_capab_vnic_t cap;
 288 
 289         VERIFY(i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, &cap));
 290         cap.mcv_mac_secondary_update(cap.mcv_arg);
 291 }
 292 
 293 /*
 294  * Return the MAC client handle of the primary MAC client for the
 295  * specified MAC instance, or NULL otherwise.
 296  */
 297 mac_client_impl_t *
 298 mac_primary_client_handle(mac_impl_t *mip)
 299 {
 300         mac_client_impl_t *mcip;
 301 
 302         if (mip->mi_state_flags & MIS_IS_VNIC)
 303                 return (mac_vnic_lower(mip));
 304 
 305         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
 306 
 307         for (mcip = mip->mi_clients_list; mcip != NULL;
 308             mcip = mcip->mci_client_next) {
 309                 if (MCIP_DATAPATH_SETUP(mcip) && mac_is_primary_client(mcip))
 310                         return (mcip);
 311         }
 312         return (NULL);
 313 }
 314 
 315 /*
 316  * Open a MAC specified by its MAC name.
 317  */
 318 int
 319 mac_open(const char *macname, mac_handle_t *mhp)
 320 {
 321         mac_impl_t      *mip;
 322         int             err;
 323 
 324         /*
 325          * Look up its entry in the global hash table.
 326          */
 327         if ((err = mac_hold(macname, &mip)) != 0)
 328                 return (err);
 329 
 330         /*
 331          * Hold the dip associated to the MAC to prevent it from being
 332          * detached. For a softmac, its underlying dip is held by the
 333          * mi_open() callback.
 334          *
 335          * This is done to be more tolerant with some defective drivers,
 336          * which incorrectly handle mac_unregister() failure in their
 337          * xxx_detach() routine. For example, some drivers ignore the
 338          * failure of mac_unregister() and free all resources that
 339          * that are needed for data transmition.
 340          */
 341         e_ddi_hold_devi(mip->mi_dip);
 342 
 343         if (!(mip->mi_callbacks->mc_callbacks & MC_OPEN)) {
 344                 *mhp = (mac_handle_t)mip;
 345                 return (0);
 346         }
 347 
 348         /*
 349          * The mac perimeter is used in both mac_open and mac_close by the
 350          * framework to single thread the MC_OPEN/MC_CLOSE of drivers.
 351          */
 352         i_mac_perim_enter(mip);
 353         mip->mi_oref++;
 354         if (mip->mi_oref != 1 || ((err = mip->mi_open(mip->mi_driver)) == 0)) {
 355                 *mhp = (mac_handle_t)mip;
 356                 i_mac_perim_exit(mip);
 357                 return (0);
 358         }
 359         mip->mi_oref--;
 360         ddi_release_devi(mip->mi_dip);
 361         mac_rele(mip);
 362         i_mac_perim_exit(mip);
 363         return (err);
 364 }
 365 
 366 /*
 367  * Open a MAC specified by its linkid.
 368  */
 369 int
 370 mac_open_by_linkid(datalink_id_t linkid, mac_handle_t *mhp)
 371 {
 372         dls_dl_handle_t dlh;
 373         int             err;
 374 
 375         if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
 376                 return (err);
 377 
 378         dls_devnet_prop_task_wait(dlh);
 379 
 380         err = mac_open(dls_devnet_mac(dlh), mhp);
 381 
 382         dls_devnet_rele_tmp(dlh);
 383         return (err);
 384 }
 385 
 386 /*
 387  * Open a MAC specified by its link name.
 388  */
 389 int
 390 mac_open_by_linkname(const char *link, mac_handle_t *mhp)
 391 {
 392         datalink_id_t   linkid;
 393         int             err;
 394 
 395         if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0)
 396                 return (err);
 397         return (mac_open_by_linkid(linkid, mhp));
 398 }
 399 
 400 /*
 401  * Close the specified MAC.
 402  */
 403 void
 404 mac_close(mac_handle_t mh)
 405 {
 406         mac_impl_t      *mip = (mac_impl_t *)mh;
 407 
 408         i_mac_perim_enter(mip);
 409         /*
 410          * The mac perimeter is used in both mac_open and mac_close by the
 411          * framework to single thread the MC_OPEN/MC_CLOSE of drivers.
 412          */
 413         if (mip->mi_callbacks->mc_callbacks & MC_OPEN) {
 414                 ASSERT(mip->mi_oref != 0);
 415                 if (--mip->mi_oref == 0) {
 416                         if ((mip->mi_callbacks->mc_callbacks & MC_CLOSE))
 417                                 mip->mi_close(mip->mi_driver);
 418                 }
 419         }
 420         i_mac_perim_exit(mip);
 421         ddi_release_devi(mip->mi_dip);
 422         mac_rele(mip);
 423 }
 424 
 425 /*
 426  * Misc utility functions to retrieve various information about a MAC
 427  * instance or a MAC client.
 428  */
 429 
 430 const mac_info_t *
 431 mac_info(mac_handle_t mh)
 432 {
 433         return (&((mac_impl_t *)mh)->mi_info);
 434 }
 435 
 436 dev_info_t *
 437 mac_devinfo_get(mac_handle_t mh)
 438 {
 439         return (((mac_impl_t *)mh)->mi_dip);
 440 }
 441 
 442 void *
 443 mac_driver(mac_handle_t mh)
 444 {
 445         return (((mac_impl_t *)mh)->mi_driver);
 446 }
 447 
 448 const char *
 449 mac_name(mac_handle_t mh)
 450 {
 451         return (((mac_impl_t *)mh)->mi_name);
 452 }
 453 
 454 int
 455 mac_type(mac_handle_t mh)
 456 {
 457         return (((mac_impl_t *)mh)->mi_type->mt_type);
 458 }
 459 
 460 int
 461 mac_nativetype(mac_handle_t mh)
 462 {
 463         return (((mac_impl_t *)mh)->mi_type->mt_nativetype);
 464 }
 465 
 466 char *
 467 mac_client_name(mac_client_handle_t mch)
 468 {
 469         return (((mac_client_impl_t *)mch)->mci_name);
 470 }
 471 
 472 minor_t
 473 mac_minor(mac_handle_t mh)
 474 {
 475         return (((mac_impl_t *)mh)->mi_minor);
 476 }
 477 
 478 /*
 479  * Return the VID associated with a MAC client. This function should
 480  * be called for clients which are associated with only one VID.
 481  */
 482 uint16_t
 483 mac_client_vid(mac_client_handle_t mch)
 484 {
 485         uint16_t                vid = VLAN_ID_NONE;
 486         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
 487         flow_desc_t             flow_desc;
 488 
 489         if (mcip->mci_nflents == 0)
 490                 return (vid);
 491 
 492         ASSERT(MCIP_DATAPATH_SETUP(mcip) && mac_client_single_rcvr(mcip));
 493 
 494         mac_flow_get_desc(mcip->mci_flent, &flow_desc);
 495         if ((flow_desc.fd_mask & FLOW_LINK_VID) != 0)
 496                 vid = flow_desc.fd_vid;
 497 
 498         return (vid);
 499 }
 500 
 501 /*
 502  * Return whether the specified MAC client corresponds to a VLAN VNIC.
 503  */
 504 boolean_t
 505 mac_client_is_vlan_vnic(mac_client_handle_t mch)
 506 {
 507         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
 508 
 509         return (((mcip->mci_state_flags & MCIS_IS_VNIC) != 0) &&
 510             ((mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) != 0));
 511 }
 512 
 513 /*
 514  * Return the link speed associated with the specified MAC client.
 515  *
 516  * The link speed of a MAC client is equal to the smallest value of
 517  * 1) the current link speed of the underlying NIC, or
 518  * 2) the bandwidth limit set for the MAC client.
 519  *
 520  * Note that the bandwidth limit can be higher than the speed
 521  * of the underlying NIC. This is allowed to avoid spurious
 522  * administration action failures or artifically lowering the
 523  * bandwidth limit of a link that may  have temporarily lowered
 524  * its link speed due to hardware problem or administrator action.
 525  */
 526 static uint64_t
 527 mac_client_ifspeed(mac_client_impl_t *mcip)
 528 {
 529         mac_impl_t *mip = mcip->mci_mip;
 530         uint64_t nic_speed;
 531 
 532         nic_speed = mac_stat_get((mac_handle_t)mip, MAC_STAT_IFSPEED);
 533 
 534         if (nic_speed == 0) {
 535                 return (0);
 536         } else {
 537                 uint64_t policy_limit = (uint64_t)-1;
 538 
 539                 if (MCIP_RESOURCE_PROPS_MASK(mcip) & MRP_MAXBW)
 540                         policy_limit = MCIP_RESOURCE_PROPS_MAXBW(mcip);
 541 
 542                 return (MIN(policy_limit, nic_speed));
 543         }
 544 }
 545 
 546 /*
 547  * Return the link state of the specified client. If here are more
 548  * than one clients of the underying mac_impl_t, the link state
 549  * will always be UP regardless of the link state of the underlying
 550  * mac_impl_t. This is needed to allow the MAC clients to continue
 551  * to communicate with each other even when the physical link of
 552  * their mac_impl_t is down.
 553  */
 554 static uint64_t
 555 mac_client_link_state(mac_client_impl_t *mcip)
 556 {
 557         mac_impl_t *mip = mcip->mci_mip;
 558         uint16_t vid;
 559         mac_client_impl_t *mci_list;
 560         mac_unicast_impl_t *mui_list, *oth_mui_list;
 561 
 562         /*
 563          * Returns LINK_STATE_UP if there are other MAC clients defined on
 564          * mac_impl_t which share same VLAN ID as that of mcip. Note that
 565          * if 'mcip' has more than one VID's then we match ANY one of the
 566          * VID's with other MAC client's VID's and return LINK_STATE_UP.
 567          */
 568         rw_enter(&mcip->mci_rw_lock, RW_READER);
 569         for (mui_list = mcip->mci_unicast_list; mui_list != NULL;
 570             mui_list = mui_list->mui_next) {
 571                 vid = mui_list->mui_vid;
 572                 for (mci_list = mip->mi_clients_list; mci_list != NULL;
 573                     mci_list = mci_list->mci_client_next) {
 574                         if (mci_list == mcip)
 575                                 continue;
 576                         for (oth_mui_list = mci_list->mci_unicast_list;
 577                             oth_mui_list != NULL; oth_mui_list = oth_mui_list->
 578                             mui_next) {
 579                                 if (vid == oth_mui_list->mui_vid) {
 580                                         rw_exit(&mcip->mci_rw_lock);
 581                                         return (LINK_STATE_UP);
 582                                 }
 583                         }
 584                 }
 585         }
 586         rw_exit(&mcip->mci_rw_lock);
 587 
 588         return (mac_stat_get((mac_handle_t)mip, MAC_STAT_LINK_STATE));
 589 }
 590 
 591 /*
 592  * These statistics are consumed by dladm show-link -s <vnic>,
 593  * dladm show-vnic -s and netstat. With the introduction of dlstat,
 594  * dladm show-link -s and dladm show-vnic -s witll be EOL'ed while
 595  * netstat will consume from kstats introduced for dlstat. This code
 596  * will be removed at that time.
 597  */
 598 
 599 /*
 600  * Return the statistics of a MAC client. These statistics are different
 601  * then the statistics of the underlying MAC which are returned by
 602  * mac_stat_get().
 603  *
 604  * Note that for things based on the tx and rx stats, mac will end up clobbering
 605  * those stats when the underlying set of rings in the srs changes. As such, we
 606  * need to source not only the current set, but also the historical set when
 607  * returning to the client, lest our counters appear to go backwards.
 608  */
 609 uint64_t
 610 mac_client_stat_get(mac_client_handle_t mch, uint_t stat)
 611 {
 612         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
 613         mac_impl_t              *mip = mcip->mci_mip;
 614         flow_entry_t            *flent = mcip->mci_flent;
 615         mac_soft_ring_set_t     *mac_srs;
 616         mac_rx_stats_t          *mac_rx_stat, *old_rx_stat;
 617         mac_tx_stats_t          *mac_tx_stat, *old_tx_stat;
 618         int i;
 619         uint64_t val = 0;
 620 
 621         mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs);
 622         mac_tx_stat = &mac_srs->srs_tx.st_stat;
 623         old_rx_stat = &mcip->mci_misc_stat.mms_defunctrxlanestats;
 624         old_tx_stat = &mcip->mci_misc_stat.mms_defuncttxlanestats;
 625 
 626         switch (stat) {
 627         case MAC_STAT_LINK_STATE:
 628                 val = mac_client_link_state(mcip);
 629                 break;
 630         case MAC_STAT_LINK_UP:
 631                 val = (mac_client_link_state(mcip) == LINK_STATE_UP);
 632                 break;
 633         case MAC_STAT_PROMISC:
 634                 val = mac_stat_get((mac_handle_t)mip, MAC_STAT_PROMISC);
 635                 break;
 636         case MAC_STAT_LOWLINK_STATE:
 637                 val = mac_stat_get((mac_handle_t)mip, MAC_STAT_LOWLINK_STATE);
 638                 break;
 639         case MAC_STAT_IFSPEED:
 640                 val = mac_client_ifspeed(mcip);
 641                 break;
 642         case MAC_STAT_MULTIRCV:
 643                 val = mcip->mci_misc_stat.mms_multircv;
 644                 break;
 645         case MAC_STAT_BRDCSTRCV:
 646                 val = mcip->mci_misc_stat.mms_brdcstrcv;
 647                 break;
 648         case MAC_STAT_MULTIXMT:
 649                 val = mcip->mci_misc_stat.mms_multixmt;
 650                 break;
 651         case MAC_STAT_BRDCSTXMT:
 652                 val = mcip->mci_misc_stat.mms_brdcstxmt;
 653                 break;
 654         case MAC_STAT_OBYTES:
 655                 val = mac_tx_stat->mts_obytes;
 656                 val += old_tx_stat->mts_obytes;
 657                 break;
 658         case MAC_STAT_OPACKETS:
 659                 val = mac_tx_stat->mts_opackets;
 660                 val += old_tx_stat->mts_opackets;
 661                 break;
 662         case MAC_STAT_OERRORS:
 663                 val = mac_tx_stat->mts_oerrors;
 664                 val += old_tx_stat->mts_oerrors;
 665                 break;
 666         case MAC_STAT_IPACKETS:
 667                 for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
 668                         mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
 669                         mac_rx_stat = &mac_srs->srs_rx.sr_stat;
 670                         val += mac_rx_stat->mrs_intrcnt +
 671                             mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt;
 672                 }
 673                 val += old_rx_stat->mrs_intrcnt + old_rx_stat->mrs_pollcnt +
 674                     old_rx_stat->mrs_lclcnt;
 675                 break;
 676         case MAC_STAT_RBYTES:
 677                 for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
 678                         mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
 679                         mac_rx_stat = &mac_srs->srs_rx.sr_stat;
 680                         val += mac_rx_stat->mrs_intrbytes +
 681                             mac_rx_stat->mrs_pollbytes +
 682                             mac_rx_stat->mrs_lclbytes;
 683                 }
 684                 val += old_rx_stat->mrs_intrbytes + old_rx_stat->mrs_pollbytes +
 685                     old_rx_stat->mrs_lclbytes;
 686                 break;
 687         case MAC_STAT_IERRORS:
 688                 for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
 689                         mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
 690                         mac_rx_stat = &mac_srs->srs_rx.sr_stat;
 691                         val += mac_rx_stat->mrs_ierrors;
 692                 }
 693                 val += old_rx_stat->mrs_ierrors;
 694                 break;
 695         default:
 696                 val = mac_driver_stat_default(mip, stat);
 697                 break;
 698         }
 699 
 700         return (val);
 701 }
 702 
 703 /*
 704  * Return the statistics of the specified MAC instance.
 705  */
 706 uint64_t
 707 mac_stat_get(mac_handle_t mh, uint_t stat)
 708 {
 709         mac_impl_t      *mip = (mac_impl_t *)mh;
 710         uint64_t        val;
 711         int             ret;
 712 
 713         /*
 714          * The range of stat determines where it is maintained.  Stat
 715          * values from 0 up to (but not including) MAC_STAT_MIN are
 716          * mainteined by the mac module itself.  Everything else is
 717          * maintained by the driver.
 718          *
 719          * If the mac_impl_t being queried corresponds to a VNIC,
 720          * the stats need to be queried from the lower MAC client
 721          * corresponding to the VNIC. (The mac_link_update()
 722          * invoked by the driver to the lower MAC causes the *lower
 723          * MAC* to update its mi_linkstate, and send a notification
 724          * to its MAC clients. Due to the VNIC passthrough,
 725          * these notifications are sent to the upper MAC clients
 726          * of the VNIC directly, and the upper mac_impl_t of the VNIC
 727          * does not have a valid mi_linkstate.
 728          */
 729         if (stat < MAC_STAT_MIN && !(mip->mi_state_flags & MIS_IS_VNIC)) {
 730                 /* these stats are maintained by the mac module itself */
 731                 switch (stat) {
 732                 case MAC_STAT_LINK_STATE:
 733                         return (mip->mi_linkstate);
 734                 case MAC_STAT_LINK_UP:
 735                         return (mip->mi_linkstate == LINK_STATE_UP);
 736                 case MAC_STAT_PROMISC:
 737                         return (mip->mi_devpromisc != 0);
 738                 case MAC_STAT_LOWLINK_STATE:
 739                         return (mip->mi_lowlinkstate);
 740                 default:
 741                         ASSERT(B_FALSE);
 742                 }
 743         }
 744 
 745         /*
 746          * Call the driver to get the given statistic.
 747          */
 748         ret = mip->mi_getstat(mip->mi_driver, stat, &val);
 749         if (ret != 0) {
 750                 /*
 751                  * The driver doesn't support this statistic.  Get the
 752                  * statistic's default value.
 753                  */
 754                 val = mac_driver_stat_default(mip, stat);
 755         }
 756         return (val);
 757 }
 758 
 759 /*
 760  * Query hardware rx ring corresponding to the pseudo ring.
 761  */
 762 uint64_t
 763 mac_pseudo_rx_ring_stat_get(mac_ring_handle_t handle, uint_t stat)
 764 {
 765         return (mac_rx_ring_stat_get(handle, stat));
 766 }
 767 
 768 /*
 769  * Query hardware tx ring corresponding to the pseudo ring.
 770  */
 771 uint64_t
 772 mac_pseudo_tx_ring_stat_get(mac_ring_handle_t handle, uint_t stat)
 773 {
 774         return (mac_tx_ring_stat_get(handle, stat));
 775 }
 776 
 777 /*
 778  * Utility function which returns the VID associated with a flow entry.
 779  */
 780 uint16_t
 781 i_mac_flow_vid(flow_entry_t *flent)
 782 {
 783         flow_desc_t     flow_desc;
 784 
 785         mac_flow_get_desc(flent, &flow_desc);
 786 
 787         if ((flow_desc.fd_mask & FLOW_LINK_VID) != 0)
 788                 return (flow_desc.fd_vid);
 789         return (VLAN_ID_NONE);
 790 }
 791 
 792 /*
 793  * Verify the validity of the specified unicast MAC address. Returns B_TRUE
 794  * if the address is valid, B_FALSE otherwise (multicast address, or incorrect
 795  * length.
 796  */
 797 boolean_t
 798 mac_unicst_verify(mac_handle_t mh, const uint8_t *addr, uint_t len)
 799 {
 800         mac_impl_t      *mip = (mac_impl_t *)mh;
 801 
 802         /*
 803          * Verify the address. No lock is needed since mi_type and plugin
 804          * details don't change after mac_register().
 805          */
 806         if ((len != mip->mi_type->mt_addr_length) ||
 807             (mip->mi_type->mt_ops.mtops_unicst_verify(addr,
 808             mip->mi_pdata)) != 0) {
 809                 return (B_FALSE);
 810         } else {
 811                 return (B_TRUE);
 812         }
 813 }
 814 
 815 void
 816 mac_sdu_get(mac_handle_t mh, uint_t *min_sdu, uint_t *max_sdu)
 817 {
 818         mac_impl_t      *mip = (mac_impl_t *)mh;
 819 
 820         if (min_sdu != NULL)
 821                 *min_sdu = mip->mi_sdu_min;
 822         if (max_sdu != NULL)
 823                 *max_sdu = mip->mi_sdu_max;
 824 }
 825 
 826 void
 827 mac_sdu_get2(mac_handle_t mh, uint_t *min_sdu, uint_t *max_sdu,
 828     uint_t *multicast_sdu)
 829 {
 830         mac_impl_t      *mip = (mac_impl_t *)mh;
 831 
 832         if (min_sdu != NULL)
 833                 *min_sdu = mip->mi_sdu_min;
 834         if (max_sdu != NULL)
 835                 *max_sdu = mip->mi_sdu_max;
 836         if (multicast_sdu != NULL)
 837                 *multicast_sdu = mip->mi_sdu_multicast;
 838 }
 839 
 840 /*
 841  * Update the MAC unicast address of the specified client's flows. Currently
 842  * only one unicast MAC unicast address is allowed per client.
 843  */
 844 static void
 845 mac_unicast_update_client_flow(mac_client_impl_t *mcip)
 846 {
 847         mac_impl_t *mip = mcip->mci_mip;
 848         flow_entry_t *flent = mcip->mci_flent;
 849         mac_address_t *map = mcip->mci_unicast;
 850         flow_desc_t flow_desc;
 851 
 852         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
 853         ASSERT(flent != NULL);
 854 
 855         mac_flow_get_desc(flent, &flow_desc);
 856         ASSERT(flow_desc.fd_mask & FLOW_LINK_DST);
 857 
 858         bcopy(map->ma_addr, flow_desc.fd_dst_mac, map->ma_len);
 859         mac_flow_set_desc(flent, &flow_desc);
 860 
 861         /*
 862          * The v6 local and SLAAC addrs (used by mac protection) need to be
 863          * regenerated because our mac address has changed.
 864          */
 865         mac_protect_update_mac_token(mcip);
 866 
 867         /*
 868          * When there are multiple VLANs sharing the same MAC address,
 869          * each gets its own MAC client, except when running on sun4v
 870          * vsw. In that case the mci_flent_list is used to place
 871          * multiple VLAN flows on one MAC client. If we ever get rid
 872          * of vsw then this code can go, but until then we need to
 873          * update all flow entries.
 874          */
 875         for (flent = mcip->mci_flent_list; flent != NULL;
 876             flent = flent->fe_client_next) {
 877                 mac_flow_get_desc(flent, &flow_desc);
 878                 if (!(flent->fe_type & FLOW_PRIMARY_MAC ||
 879                     flent->fe_type & FLOW_VNIC_MAC))
 880                         continue;
 881 
 882                 bcopy(map->ma_addr, flow_desc.fd_dst_mac, map->ma_len);
 883                 mac_flow_set_desc(flent, &flow_desc);
 884         }
 885 }
 886 
 887 /*
 888  * Update all clients that share the same unicast address.
 889  */
 890 void
 891 mac_unicast_update_clients(mac_impl_t *mip, mac_address_t *map)
 892 {
 893         mac_client_impl_t *mcip;
 894 
 895         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
 896 
 897         /*
 898          * Find all clients that share the same unicast MAC address and update
 899          * them appropriately.
 900          */
 901         for (mcip = mip->mi_clients_list; mcip != NULL;
 902             mcip = mcip->mci_client_next) {
 903                 /*
 904                  * Ignore clients that don't share this MAC address.
 905                  */
 906                 if (map != mcip->mci_unicast)
 907                         continue;
 908 
 909                 /*
 910                  * Update those clients with same old unicast MAC address.
 911                  */
 912                 mac_unicast_update_client_flow(mcip);
 913         }
 914 }
 915 
 916 /*
 917  * Update the unicast MAC address of the specified VNIC MAC client.
 918  *
 919  * Check whether the operation is valid. Any of following cases should fail:
 920  *
 921  * 1. It's a VLAN type of VNIC.
 922  * 2. The new value is current "primary" MAC address.
 923  * 3. The current MAC address is shared with other clients.
 924  * 4. The new MAC address has been used. This case will be valid when
 925  *    client migration is fully supported.
 926  */
 927 int
 928 mac_vnic_unicast_set(mac_client_handle_t mch, const uint8_t *addr)
 929 {
 930         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
 931         mac_impl_t *mip = mcip->mci_mip;
 932         mac_address_t *map = mcip->mci_unicast;
 933         int err;
 934 
 935         ASSERT(!(mip->mi_state_flags & MIS_IS_VNIC));
 936         ASSERT(mcip->mci_state_flags & MCIS_IS_VNIC);
 937         ASSERT(mcip->mci_flags != MAC_CLIENT_FLAGS_PRIMARY);
 938 
 939         i_mac_perim_enter(mip);
 940 
 941         /*
 942          * If this is a VLAN type of VNIC, it's using "primary" MAC address
 943          * of the underlying interface. Must fail here. Refer to case 1 above.
 944          */
 945         if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0) {
 946                 i_mac_perim_exit(mip);
 947                 return (ENOTSUP);
 948         }
 949 
 950         /*
 951          * If the new address is the "primary" one, must fail. Refer to
 952          * case 2 above.
 953          */
 954         if (bcmp(addr, mip->mi_addr, map->ma_len) == 0) {
 955                 i_mac_perim_exit(mip);
 956                 return (EACCES);
 957         }
 958 
 959         /*
 960          * If the address is shared by multiple clients, must fail. Refer
 961          * to case 3 above.
 962          */
 963         if (mac_check_macaddr_shared(map)) {
 964                 i_mac_perim_exit(mip);
 965                 return (EBUSY);
 966         }
 967 
 968         /*
 969          * If the new address has been used, must fail for now. Refer to
 970          * case 4 above.
 971          */
 972         if (mac_find_macaddr(mip, (uint8_t *)addr) != NULL) {
 973                 i_mac_perim_exit(mip);
 974                 return (ENOTSUP);
 975         }
 976 
 977         /*
 978          * Update the MAC address.
 979          */
 980         err = mac_update_macaddr(map, (uint8_t *)addr);
 981 
 982         if (err != 0) {
 983                 i_mac_perim_exit(mip);
 984                 return (err);
 985         }
 986 
 987         /*
 988          * Update all flows of this MAC client.
 989          */
 990         mac_unicast_update_client_flow(mcip);
 991 
 992         i_mac_perim_exit(mip);
 993         return (0);
 994 }
 995 
 996 /*
 997  * Program the new primary unicast address of the specified MAC.
 998  *
 999  * Function mac_update_macaddr() takes care different types of underlying
1000  * MAC. If the underlying MAC is VNIC, the VNIC driver must have registerd
1001  * mi_unicst() entry point, that indirectly calls mac_vnic_unicast_set()
1002  * which will take care of updating the MAC address of the corresponding
1003  * MAC client.
1004  *
1005  * This is the only interface that allow the client to update the "primary"
1006  * MAC address of the underlying MAC. The new value must have not been
1007  * used by other clients.
1008  */
1009 int
1010 mac_unicast_primary_set(mac_handle_t mh, const uint8_t *addr)
1011 {
1012         mac_impl_t *mip = (mac_impl_t *)mh;
1013         mac_address_t *map;
1014         int err;
1015 
1016         /* verify the address validity */
1017         if (!mac_unicst_verify(mh, addr, mip->mi_type->mt_addr_length))
1018                 return (EINVAL);
1019 
1020         i_mac_perim_enter(mip);
1021 
1022         /*
1023          * If the new value is the same as the current primary address value,
1024          * there's nothing to do.
1025          */
1026         if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) {
1027                 i_mac_perim_exit(mip);
1028                 return (0);
1029         }
1030 
1031         if (mac_find_macaddr(mip, (uint8_t *)addr) != NULL) {
1032                 i_mac_perim_exit(mip);
1033                 return (EBUSY);
1034         }
1035 
1036         map = mac_find_macaddr(mip, mip->mi_addr);
1037         ASSERT(map != NULL);
1038 
1039         /*
1040          * Update the MAC address.
1041          */
1042         if (mip->mi_state_flags & MIS_IS_AGGR) {
1043                 mac_capab_aggr_t aggr_cap;
1044 
1045                 /*
1046                  * If the MAC is an aggregation, other than the unicast
1047                  * addresses programming, aggr must be informed about this
1048                  * primary unicst address change to change its MAC address
1049                  * policy to be user-specified.
1050                  */
1051                 ASSERT(map->ma_type == MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED);
1052                 VERIFY(i_mac_capab_get(mh, MAC_CAPAB_AGGR, &aggr_cap));
1053                 err = aggr_cap.mca_unicst(mip->mi_driver, addr);
1054                 if (err == 0)
1055                         bcopy(addr, map->ma_addr, map->ma_len);
1056         } else {
1057                 err = mac_update_macaddr(map, (uint8_t *)addr);
1058         }
1059 
1060         if (err != 0) {
1061                 i_mac_perim_exit(mip);
1062                 return (err);
1063         }
1064 
1065         mac_unicast_update_clients(mip, map);
1066 
1067         /*
1068          * Save the new primary MAC address in mac_impl_t.
1069          */
1070         bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
1071 
1072         i_mac_perim_exit(mip);
1073 
1074         if (err == 0)
1075                 i_mac_notify(mip, MAC_NOTE_UNICST);
1076 
1077         return (err);
1078 }
1079 
1080 /*
1081  * Return the current primary MAC address of the specified MAC.
1082  */
1083 void
1084 mac_unicast_primary_get(mac_handle_t mh, uint8_t *addr)
1085 {
1086         mac_impl_t *mip = (mac_impl_t *)mh;
1087 
1088         rw_enter(&mip->mi_rw_lock, RW_READER);
1089         bcopy(mip->mi_addr, addr, mip->mi_type->mt_addr_length);
1090         rw_exit(&mip->mi_rw_lock);
1091 }
1092 
1093 /*
1094  * Return the secondary MAC address for the specified handle
1095  */
1096 void
1097 mac_unicast_secondary_get(mac_client_handle_t mh, uint8_t *addr)
1098 {
1099         mac_client_impl_t *mcip = (mac_client_impl_t *)mh;
1100 
1101         ASSERT(mcip->mci_unicast != NULL);
1102         bcopy(mcip->mci_unicast->ma_addr, addr, mcip->mci_unicast->ma_len);
1103 }
1104 
1105 /*
1106  * Return information about the use of the primary MAC address of the
1107  * specified MAC instance:
1108  *
1109  * - if client_name is non-NULL, it must point to a string of at
1110  *   least MAXNAMELEN bytes, and will be set to the name of the MAC
1111  *   client which uses the primary MAC address.
1112  *
1113  * - if in_use is non-NULL, used to return whether the primary MAC
1114  *   address is currently in use.
1115  */
1116 void
1117 mac_unicast_primary_info(mac_handle_t mh, char *client_name, boolean_t *in_use)
1118 {
1119         mac_impl_t *mip = (mac_impl_t *)mh;
1120         mac_client_impl_t *cur_client;
1121 
1122         if (in_use != NULL)
1123                 *in_use = B_FALSE;
1124         if (client_name != NULL)
1125                 bzero(client_name, MAXNAMELEN);
1126 
1127         /*
1128          * The mi_rw_lock is used to protect threads that don't hold the
1129          * mac perimeter to get a consistent view of the mi_clients_list.
1130          * Threads that modify the list must hold both the mac perimeter and
1131          * mi_rw_lock(RW_WRITER)
1132          */
1133         rw_enter(&mip->mi_rw_lock, RW_READER);
1134         for (cur_client = mip->mi_clients_list; cur_client != NULL;
1135             cur_client = cur_client->mci_client_next) {
1136                 if (mac_is_primary_client(cur_client) ||
1137                     (mip->mi_state_flags & MIS_IS_VNIC)) {
1138                         rw_exit(&mip->mi_rw_lock);
1139                         if (in_use != NULL)
1140                                 *in_use = B_TRUE;
1141                         if (client_name != NULL) {
1142                                 bcopy(cur_client->mci_name, client_name,
1143                                     MAXNAMELEN);
1144                         }
1145                         return;
1146                 }
1147         }
1148         rw_exit(&mip->mi_rw_lock);
1149 }
1150 
1151 /*
1152  * Return the current destination MAC address of the specified MAC.
1153  */
1154 boolean_t
1155 mac_dst_get(mac_handle_t mh, uint8_t *addr)
1156 {
1157         mac_impl_t *mip = (mac_impl_t *)mh;
1158 
1159         rw_enter(&mip->mi_rw_lock, RW_READER);
1160         if (mip->mi_dstaddr_set)
1161                 bcopy(mip->mi_dstaddr, addr, mip->mi_type->mt_addr_length);
1162         rw_exit(&mip->mi_rw_lock);
1163         return (mip->mi_dstaddr_set);
1164 }
1165 
1166 /*
1167  * Add the specified MAC client to the list of clients which opened
1168  * the specified MAC.
1169  */
1170 static void
1171 mac_client_add(mac_client_impl_t *mcip)
1172 {
1173         mac_impl_t *mip = mcip->mci_mip;
1174 
1175         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1176 
1177         /* add VNIC to the front of the list */
1178         rw_enter(&mip->mi_rw_lock, RW_WRITER);
1179         mcip->mci_client_next = mip->mi_clients_list;
1180         mip->mi_clients_list = mcip;
1181         mip->mi_nclients++;
1182         rw_exit(&mip->mi_rw_lock);
1183 }
1184 
1185 /*
1186  * Remove the specified MAC client from the list of clients which opened
1187  * the specified MAC.
1188  */
1189 static void
1190 mac_client_remove(mac_client_impl_t *mcip)
1191 {
1192         mac_impl_t *mip = mcip->mci_mip;
1193         mac_client_impl_t **prev, *cclient;
1194 
1195         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1196 
1197         rw_enter(&mip->mi_rw_lock, RW_WRITER);
1198         prev = &mip->mi_clients_list;
1199         cclient = *prev;
1200         while (cclient != NULL && cclient != mcip) {
1201                 prev = &cclient->mci_client_next;
1202                 cclient = *prev;
1203         }
1204         ASSERT(cclient != NULL);
1205         *prev = cclient->mci_client_next;
1206         mip->mi_nclients--;
1207         rw_exit(&mip->mi_rw_lock);
1208 }
1209 
1210 static mac_unicast_impl_t *
1211 mac_client_find_vid(mac_client_impl_t *mcip, uint16_t vid)
1212 {
1213         mac_unicast_impl_t *muip = mcip->mci_unicast_list;
1214 
1215         while ((muip != NULL) && (muip->mui_vid != vid))
1216                 muip = muip->mui_next;
1217 
1218         return (muip);
1219 }
1220 
1221 /*
1222  * Return whether the specified (MAC address, VID) tuple is already used by
1223  * one of the MAC clients associated with the specified MAC.
1224  */
1225 static boolean_t
1226 mac_addr_in_use(mac_impl_t *mip, uint8_t *mac_addr, uint16_t vid)
1227 {
1228         mac_client_impl_t *client;
1229         mac_address_t *map;
1230 
1231         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1232 
1233         for (client = mip->mi_clients_list; client != NULL;
1234             client = client->mci_client_next) {
1235 
1236                 /*
1237                  * Ignore clients that don't have unicast address.
1238                  */
1239                 if (client->mci_unicast_list == NULL)
1240                         continue;
1241 
1242                 map = client->mci_unicast;
1243 
1244                 if ((bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) &&
1245                     (mac_client_find_vid(client, vid) != NULL)) {
1246                         return (B_TRUE);
1247                 }
1248         }
1249 
1250         return (B_FALSE);
1251 }
1252 
1253 /*
1254  * Generate a random MAC address. The MAC address prefix is
1255  * stored in the array pointed to by mac_addr, and its length, in bytes,
1256  * is specified by prefix_len. The least significant bits
1257  * after prefix_len bytes are generated, and stored after the prefix
1258  * in the mac_addr array.
1259  */
1260 int
1261 mac_addr_random(mac_client_handle_t mch, uint_t prefix_len,
1262     uint8_t *mac_addr, mac_diag_t *diag)
1263 {
1264         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1265         mac_impl_t *mip = mcip->mci_mip;
1266         size_t addr_len = mip->mi_type->mt_addr_length;
1267 
1268         if (prefix_len >= addr_len) {
1269                 *diag = MAC_DIAG_MACPREFIXLEN_INVALID;
1270                 return (EINVAL);
1271         }
1272 
1273         /* check the prefix value */
1274         if (prefix_len > 0) {
1275                 bzero(mac_addr + prefix_len, addr_len - prefix_len);
1276                 if (!mac_unicst_verify((mac_handle_t)mip, mac_addr,
1277                     addr_len)) {
1278                         *diag = MAC_DIAG_MACPREFIX_INVALID;
1279                         return (EINVAL);
1280                 }
1281         }
1282 
1283         /* generate the MAC address */
1284         if (prefix_len < addr_len) {
1285                 (void) random_get_pseudo_bytes(mac_addr +
1286                     prefix_len, addr_len - prefix_len);
1287         }
1288 
1289         *diag = 0;
1290         return (0);
1291 }
1292 
1293 /*
1294  * Set the priority range for this MAC client. This will be used to
1295  * determine the absolute priority for the threads created for this
1296  * MAC client using the specified "low", "medium" and "high" level.
1297  * This will also be used for any subflows on this MAC client.
1298  */
1299 #define MAC_CLIENT_SET_PRIORITY_RANGE(mcip, pri) {                      \
1300         (mcip)->mci_min_pri = FLOW_MIN_PRIORITY(MINCLSYSPRI, \
1301             MAXCLSYSPRI, (pri));                                        \
1302         (mcip)->mci_max_pri = FLOW_MAX_PRIORITY(MINCLSYSPRI, \
1303             MAXCLSYSPRI, (mcip)->mci_min_pri);                               \
1304         }
1305 
1306 /*
1307  * MAC client open entry point. Return a new MAC client handle. Each
1308  * MAC client is associated with a name, specified through the 'name'
1309  * argument.
1310  */
1311 int
1312 mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name,
1313     uint16_t flags)
1314 {
1315         mac_impl_t              *mip = (mac_impl_t *)mh;
1316         mac_client_impl_t       *mcip;
1317         int                     err = 0;
1318         boolean_t               share_desired;
1319         flow_entry_t            *flent = NULL;
1320 
1321         share_desired = (flags & MAC_OPEN_FLAGS_SHARES_DESIRED) != 0;
1322         *mchp = NULL;
1323 
1324         i_mac_perim_enter(mip);
1325 
1326         if (mip->mi_state_flags & MIS_IS_VNIC) {
1327                 /*
1328                  * The underlying MAC is a VNIC. Return the MAC client
1329                  * handle of the lower MAC which was obtained by
1330                  * the VNIC driver when it did its mac_client_open().
1331                  */
1332 
1333                 mcip = mac_vnic_lower(mip);
1334 
1335                 /*
1336                  * Note that multiple mac clients share the same mcip in
1337                  * this case.
1338                  */
1339                 if (flags & MAC_OPEN_FLAGS_EXCLUSIVE)
1340                         mcip->mci_state_flags |= MCIS_EXCLUSIVE;
1341 
1342                 if (flags & MAC_OPEN_FLAGS_MULTI_PRIMARY)
1343                         mcip->mci_flags |= MAC_CLIENT_FLAGS_MULTI_PRIMARY;
1344 
1345                 mip->mi_clients_list = mcip;
1346                 i_mac_perim_exit(mip);
1347                 *mchp = (mac_client_handle_t)mcip;
1348 
1349                 DTRACE_PROBE2(mac__client__open__nonallocated, mac_impl_t *,
1350                     mcip->mci_mip, mac_client_impl_t *, mcip);
1351 
1352                 return (err);
1353         }
1354 
1355         mcip = kmem_cache_alloc(mac_client_impl_cache, KM_SLEEP);
1356 
1357         mcip->mci_mip = mip;
1358         mcip->mci_upper_mip = NULL;
1359         mcip->mci_rx_fn = mac_pkt_drop;
1360         mcip->mci_rx_arg = NULL;
1361         mcip->mci_rx_p_fn = NULL;
1362         mcip->mci_rx_p_arg = NULL;
1363         mcip->mci_p_unicast_list = NULL;
1364         mcip->mci_direct_rx_fn = NULL;
1365         mcip->mci_direct_rx_arg = NULL;
1366         mcip->mci_vidcache = MCIP_VIDCACHE_INVALID;
1367 
1368         mcip->mci_unicast_list = NULL;
1369 
1370         if ((flags & MAC_OPEN_FLAGS_IS_VNIC) != 0)
1371                 mcip->mci_state_flags |= MCIS_IS_VNIC;
1372 
1373         if ((flags & MAC_OPEN_FLAGS_EXCLUSIVE) != 0)
1374                 mcip->mci_state_flags |= MCIS_EXCLUSIVE;
1375 
1376         if ((flags & MAC_OPEN_FLAGS_IS_AGGR_PORT) != 0)
1377                 mcip->mci_state_flags |= MCIS_IS_AGGR_PORT;
1378 
1379         if (mip->mi_state_flags & MIS_IS_AGGR)
1380                 mcip->mci_state_flags |= MCIS_IS_AGGR_CLIENT;
1381 
1382         if ((flags & MAC_OPEN_FLAGS_USE_DATALINK_NAME) != 0) {
1383                 datalink_id_t   linkid;
1384 
1385                 ASSERT(name == NULL);
1386                 if ((err = dls_devnet_macname2linkid(mip->mi_name,
1387                     &linkid)) != 0) {
1388                         goto done;
1389                 }
1390                 if ((err = dls_mgmt_get_linkinfo(linkid, mcip->mci_name, NULL,
1391                     NULL, NULL)) != 0) {
1392                         /*
1393                          * Use mac name if dlmgmtd is not available.
1394                          */
1395                         if (err == EBADF) {
1396                                 (void) strlcpy(mcip->mci_name, mip->mi_name,
1397                                     sizeof (mcip->mci_name));
1398                                 err = 0;
1399                         } else {
1400                                 goto done;
1401                         }
1402                 }
1403                 mcip->mci_state_flags |= MCIS_USE_DATALINK_NAME;
1404         } else {
1405                 ASSERT(name != NULL);
1406                 if (strlen(name) > MAXNAMELEN) {
1407                         err = EINVAL;
1408                         goto done;
1409                 }
1410                 (void) strlcpy(mcip->mci_name, name, sizeof (mcip->mci_name));
1411         }
1412 
1413         if (flags & MAC_OPEN_FLAGS_MULTI_PRIMARY)
1414                 mcip->mci_flags |= MAC_CLIENT_FLAGS_MULTI_PRIMARY;
1415 
1416         if (flags & MAC_OPEN_FLAGS_NO_UNICAST_ADDR)
1417                 mcip->mci_state_flags |= MCIS_NO_UNICAST_ADDR;
1418 
1419         mac_protect_init(mcip);
1420 
1421         /* the subflow table will be created dynamically */
1422         mcip->mci_subflow_tab = NULL;
1423 
1424         mcip->mci_misc_stat.mms_multircv = 0;
1425         mcip->mci_misc_stat.mms_brdcstrcv = 0;
1426         mcip->mci_misc_stat.mms_multixmt = 0;
1427         mcip->mci_misc_stat.mms_brdcstxmt = 0;
1428 
1429         /* Create an initial flow */
1430 
1431         err = mac_flow_create(NULL, NULL, mcip->mci_name, NULL,
1432             mcip->mci_state_flags & MCIS_IS_VNIC ? FLOW_VNIC_MAC :
1433             FLOW_PRIMARY_MAC, &flent);
1434         if (err != 0)
1435                 goto done;
1436         mcip->mci_flent = flent;
1437         FLOW_MARK(flent, FE_MC_NO_DATAPATH);
1438         flent->fe_mcip = mcip;
1439 
1440         /*
1441          * Place initial creation reference on the flow. This reference
1442          * is released in the corresponding delete action viz.
1443          * mac_unicast_remove after waiting for all transient refs to
1444          * to go away. The wait happens in mac_flow_wait.
1445          */
1446         FLOW_REFHOLD(flent);
1447 
1448         /*
1449          * Do this ahead of the mac_bcast_add() below so that the mi_nclients
1450          * will have the right value for mac_rx_srs_setup().
1451          */
1452         mac_client_add(mcip);
1453 
1454         mcip->mci_share = 0;
1455         if (share_desired)
1456                 i_mac_share_alloc(mcip);
1457 
1458         /*
1459          * We will do mimimal datapath setup to allow a MAC client to
1460          * transmit or receive non-unicast packets without waiting
1461          * for mac_unicast_add.
1462          */
1463         if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) {
1464                 if ((err = mac_client_datapath_setup(mcip, VLAN_ID_NONE,
1465                     NULL, NULL, B_TRUE, NULL)) != 0) {
1466                         goto done;
1467                 }
1468         }
1469 
1470         DTRACE_PROBE2(mac__client__open__allocated, mac_impl_t *,
1471             mcip->mci_mip, mac_client_impl_t *, mcip);
1472 
1473         *mchp = (mac_client_handle_t)mcip;
1474         i_mac_perim_exit(mip);
1475         return (0);
1476 
1477 done:
1478         i_mac_perim_exit(mip);
1479         mcip->mci_state_flags = 0;
1480         mcip->mci_tx_flag = 0;
1481         kmem_cache_free(mac_client_impl_cache, mcip);
1482         return (err);
1483 }
1484 
1485 /*
1486  * Close the specified MAC client handle.
1487  */
1488 void
1489 mac_client_close(mac_client_handle_t mch, uint16_t flags)
1490 {
1491         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
1492         mac_impl_t              *mip = mcip->mci_mip;
1493         flow_entry_t            *flent;
1494 
1495         i_mac_perim_enter(mip);
1496 
1497         if (flags & MAC_CLOSE_FLAGS_EXCLUSIVE)
1498                 mcip->mci_state_flags &= ~MCIS_EXCLUSIVE;
1499 
1500         if ((mcip->mci_state_flags & MCIS_IS_VNIC) &&
1501             !(flags & MAC_CLOSE_FLAGS_IS_VNIC)) {
1502                 /*
1503                  * This is an upper VNIC client initiated operation.
1504                  * The lower MAC client will be closed by the VNIC driver
1505                  * when the VNIC is deleted.
1506                  */
1507 
1508                 i_mac_perim_exit(mip);
1509                 return;
1510         }
1511 
1512         /* If we have only setup up minimal datapth setup, tear it down */
1513         if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) {
1514                 mac_client_datapath_teardown((mac_client_handle_t)mcip, NULL,
1515                     mcip->mci_flent);
1516                 mcip->mci_state_flags &= ~MCIS_NO_UNICAST_ADDR;
1517         }
1518 
1519         /*
1520          * Remove the flent associated with the MAC client
1521          */
1522         flent = mcip->mci_flent;
1523         mcip->mci_flent = NULL;
1524         FLOW_FINAL_REFRELE(flent);
1525 
1526         /*
1527          * MAC clients must remove the unicast addresses and promisc callbacks
1528          * they added before issuing a mac_client_close().
1529          */
1530         ASSERT(mcip->mci_unicast_list == NULL);
1531         ASSERT(mcip->mci_promisc_list == NULL);
1532         ASSERT(mcip->mci_tx_notify_cb_list == NULL);
1533 
1534         i_mac_share_free(mcip);
1535         mac_protect_fini(mcip);
1536         mac_client_remove(mcip);
1537 
1538         i_mac_perim_exit(mip);
1539         mcip->mci_subflow_tab = NULL;
1540         mcip->mci_state_flags = 0;
1541         mcip->mci_tx_flag = 0;
1542         kmem_cache_free(mac_client_impl_cache, mch);
1543 }
1544 
1545 /*
1546  * Set the Rx bypass receive callback and return B_TRUE. Return
1547  * B_FALSE if it's not possible to enable bypass.
1548  */
1549 boolean_t
1550 mac_rx_bypass_set(mac_client_handle_t mch, mac_direct_rx_t rx_fn, void *arg1)
1551 {
1552         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
1553         mac_impl_t              *mip = mcip->mci_mip;
1554 
1555         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1556 
1557         /*
1558          * If the client has more than one VLAN then process packets
1559          * through DLS. This should happen only when sun4v vsw is on
1560          * the scene.
1561          */
1562         if (mcip->mci_nvids > 1)
1563                 return (B_FALSE);
1564 
1565         /*
1566          * These are not accessed directly in the data path, and hence
1567          * don't need any protection
1568          */
1569         mcip->mci_direct_rx_fn = rx_fn;
1570         mcip->mci_direct_rx_arg = arg1;
1571         return (B_TRUE);
1572 }
1573 
1574 /*
1575  * Enable/Disable rx bypass. By default, bypass is assumed to be enabled.
1576  */
1577 void
1578 mac_rx_bypass_enable(mac_client_handle_t mch)
1579 {
1580         ((mac_client_impl_t *)mch)->mci_state_flags &= ~MCIS_RX_BYPASS_DISABLE;
1581 }
1582 
1583 void
1584 mac_rx_bypass_disable(mac_client_handle_t mch)
1585 {
1586         ((mac_client_impl_t *)mch)->mci_state_flags |= MCIS_RX_BYPASS_DISABLE;
1587 }
1588 
1589 /*
1590  * Set the receive callback for the specified MAC client. There can be
1591  * at most one such callback per MAC client.
1592  */
1593 void
1594 mac_rx_set(mac_client_handle_t mch, mac_rx_t rx_fn, void *arg)
1595 {
1596         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1597         mac_impl_t      *mip = mcip->mci_mip;
1598         mac_impl_t      *umip = mcip->mci_upper_mip;
1599 
1600         /*
1601          * Instead of adding an extra set of locks and refcnts in
1602          * the datapath at the mac client boundary, we temporarily quiesce
1603          * the SRS and related entities. We then change the receive function
1604          * without interference from any receive data thread and then reenable
1605          * the data flow subsequently.
1606          */
1607         i_mac_perim_enter(mip);
1608         mac_rx_client_quiesce(mch);
1609 
1610         mcip->mci_rx_fn = rx_fn;
1611         mcip->mci_rx_arg = arg;
1612         mac_rx_client_restart(mch);
1613         i_mac_perim_exit(mip);
1614 
1615         /*
1616          * If we're changing the Rx function on the primary MAC of a VNIC,
1617          * make sure any secondary addresses on the VNIC are updated as well.
1618          */
1619         if (umip != NULL) {
1620                 ASSERT((umip->mi_state_flags & MIS_IS_VNIC) != 0);
1621                 mac_vnic_secondary_update(umip);
1622         }
1623 }
1624 
1625 /*
1626  * Reset the receive callback for the specified MAC client.
1627  */
1628 void
1629 mac_rx_clear(mac_client_handle_t mch)
1630 {
1631         mac_rx_set(mch, mac_pkt_drop, NULL);
1632 }
1633 
1634 void
1635 mac_secondary_dup(mac_client_handle_t smch, mac_client_handle_t dmch)
1636 {
1637         mac_client_impl_t *smcip = (mac_client_impl_t *)smch;
1638         mac_client_impl_t *dmcip = (mac_client_impl_t *)dmch;
1639         flow_entry_t *flent = dmcip->mci_flent;
1640 
1641         /* This should only be called to setup secondary macs */
1642         ASSERT((flent->fe_type & FLOW_PRIMARY_MAC) == 0);
1643 
1644         mac_rx_set(dmch, smcip->mci_rx_fn, smcip->mci_rx_arg);
1645         dmcip->mci_promisc_list = smcip->mci_promisc_list;
1646 
1647         /*
1648          * Duplicate the primary mac resources to the secondary.
1649          * Since we already validated the resource controls when setting
1650          * them on the primary, we can ignore errors here.
1651          */
1652         (void) mac_resource_ctl_set(dmch, MCIP_RESOURCE_PROPS(smcip));
1653 }
1654 
1655 /*
1656  * Called when removing a secondary MAC. Currently only clears the promisc_list
1657  * since we share the primary mac's promisc_list.
1658  */
1659 void
1660 mac_secondary_cleanup(mac_client_handle_t mch)
1661 {
1662         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1663         flow_entry_t *flent = mcip->mci_flent;
1664 
1665         /* This should only be called for secondary macs */
1666         ASSERT((flent->fe_type & FLOW_PRIMARY_MAC) == 0);
1667         mcip->mci_promisc_list = NULL;
1668 }
1669 
1670 /*
1671  * Walk the MAC client subflow table and updates their priority values.
1672  */
1673 static int
1674 mac_update_subflow_priority_cb(flow_entry_t *flent, void *arg)
1675 {
1676         mac_flow_update_priority(arg, flent);
1677         return (0);
1678 }
1679 
1680 void
1681 mac_update_subflow_priority(mac_client_impl_t *mcip)
1682 {
1683         (void) mac_flow_walk(mcip->mci_subflow_tab,
1684             mac_update_subflow_priority_cb, mcip);
1685 }
1686 
1687 /*
1688  * Modify the TX or RX ring properties. We could either just move around
1689  * rings, i.e add/remove rings given to a client. Or this might cause the
1690  * client to move from hardware based to software or the other way around.
1691  * If we want to reset this property, then we clear the mask, additionally
1692  * if the client was given a non-default group we remove all rings except
1693  * for 1 and give it back to the default group.
1694  */
1695 int
1696 mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp,
1697     mac_resource_props_t *tmrp)
1698 {
1699         mac_impl_t              *mip = mcip->mci_mip;
1700         flow_entry_t            *flent = mcip->mci_flent;
1701         uint8_t                 *mac_addr;
1702         int                     err = 0;
1703         mac_group_t             *defgrp;
1704         mac_group_t             *group;
1705         mac_group_t             *ngrp;
1706         mac_resource_props_t    *cmrp = MCIP_RESOURCE_PROPS(mcip);
1707         uint_t                  ringcnt;
1708         boolean_t               unspec;
1709 
1710         if (mcip->mci_share != 0)
1711                 return (EINVAL);
1712 
1713         if (mrp->mrp_mask & MRP_RX_RINGS) {
1714                 unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC;
1715                 group = flent->fe_rx_ring_group;
1716                 defgrp = MAC_DEFAULT_RX_GROUP(mip);
1717                 mac_addr = flent->fe_flow_desc.fd_dst_mac;
1718 
1719                 /*
1720                  * No resulting change. If we are resetting on a client on
1721                  * which there was no rx rings property. For dynamic group
1722                  * if we are setting the same number of rings already set.
1723                  * For static group if we are requesting a group again.
1724                  */
1725                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
1726                         if (!(tmrp->mrp_mask & MRP_RX_RINGS))
1727                                 return (0);
1728                 } else {
1729                         if (unspec) {
1730                                 if (tmrp->mrp_mask & MRP_RXRINGS_UNSPEC)
1731                                         return (0);
1732                         } else if (mip->mi_rx_group_type ==
1733                             MAC_GROUP_TYPE_DYNAMIC) {
1734                                 if ((tmrp->mrp_mask & MRP_RX_RINGS) &&
1735                                     !(tmrp->mrp_mask & MRP_RXRINGS_UNSPEC) &&
1736                                     mrp->mrp_nrxrings == tmrp->mrp_nrxrings) {
1737                                         return (0);
1738                                 }
1739                         }
1740                 }
1741                 /* Resetting the prop */
1742                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
1743                         /*
1744                          * We will just keep one ring and give others back if
1745                          * we are not the primary. For the primary we give
1746                          * all the rings in the default group except the
1747                          * default ring. If it is a static group, then
1748                          * we don't do anything, but clear the MRP_RX_RINGS
1749                          * flag.
1750                          */
1751                         if (group != defgrp) {
1752                                 if (mip->mi_rx_group_type ==
1753                                     MAC_GROUP_TYPE_DYNAMIC) {
1754                                         /*
1755                                          * This group has reserved rings
1756                                          * that need to be released now,
1757                                          * so does the group.
1758                                          */
1759                                         MAC_RX_RING_RELEASED(mip,
1760                                             group->mrg_cur_count);
1761                                         MAC_RX_GRP_RELEASED(mip);
1762                                         if ((flent->fe_type &
1763                                             FLOW_PRIMARY_MAC) != 0) {
1764                                                 if (mip->mi_nactiveclients ==
1765                                                     1) {
1766                                                         (void)
1767                                                             mac_rx_switch_group(
1768                                                             mcip, group,
1769                                                             defgrp);
1770                                                         return (0);
1771                                                 } else {
1772                                                         cmrp->mrp_nrxrings =
1773                                                             group->
1774                                                             mrg_cur_count +
1775                                                             defgrp->
1776                                                             mrg_cur_count - 1;
1777                                                 }
1778                                         } else {
1779                                                 cmrp->mrp_nrxrings = 1;
1780                                         }
1781                                         (void) mac_group_ring_modify(mcip,
1782                                             group, defgrp);
1783                                 } else {
1784                                         /*
1785                                          * If this is a static group, we
1786                                          * need to release the group. The
1787                                          * client will remain in the same
1788                                          * group till some other client
1789                                          * needs this group.
1790                                          */
1791                                         MAC_RX_GRP_RELEASED(mip);
1792                                 }
1793                         /* Let check if we can give this an excl group */
1794                         } else if (group == defgrp) {
1795                                 /*
1796                                  * If multiple clients share an
1797                                  * address then they must stay on the
1798                                  * default group.
1799                                  */
1800                                 if (mac_check_macaddr_shared(mcip->mci_unicast))
1801                                         return (0);
1802 
1803                                 ngrp = mac_reserve_rx_group(mcip, mac_addr,
1804                                     B_TRUE);
1805                                 /* Couldn't give it a group, that's fine */
1806                                 if (ngrp == NULL)
1807                                         return (0);
1808                                 /* Switch to H/W */
1809                                 if (mac_rx_switch_group(mcip, defgrp, ngrp) !=
1810                                     0) {
1811                                         mac_stop_group(ngrp);
1812                                         return (0);
1813                                 }
1814                         }
1815                         /*
1816                          * If the client is in the default group, we will
1817                          * just clear the MRP_RX_RINGS and leave it as
1818                          * it rather than look for an exclusive group
1819                          * for it.
1820                          */
1821                         return (0);
1822                 }
1823 
1824                 if (group == defgrp && ((mrp->mrp_nrxrings > 0) || unspec)) {
1825                         /*
1826                          * We are requesting Rx rings. Try to reserve
1827                          * a non-default group.
1828                          *
1829                          * If multiple clients share an address then
1830                          * they must stay on the default group.
1831                          */
1832                         if (mac_check_macaddr_shared(mcip->mci_unicast))
1833                                 return (EINVAL);
1834 
1835                         ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE);
1836                         if (ngrp == NULL)
1837                                 return (ENOSPC);
1838 
1839                         /* Switch to H/W */
1840                         if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) {
1841                                 mac_release_rx_group(mcip, ngrp);
1842                                 return (ENOSPC);
1843                         }
1844                         MAC_RX_GRP_RESERVED(mip);
1845                         if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC)
1846                                 MAC_RX_RING_RESERVED(mip, ngrp->mrg_cur_count);
1847                 } else if (group != defgrp && !unspec &&
1848                     mrp->mrp_nrxrings == 0) {
1849                         /* Switch to S/W */
1850                         ringcnt = group->mrg_cur_count;
1851                         if (mac_rx_switch_group(mcip, group, defgrp) != 0)
1852                                 return (ENOSPC);
1853                         if (tmrp->mrp_mask & MRP_RX_RINGS) {
1854                                 MAC_RX_GRP_RELEASED(mip);
1855                                 if (mip->mi_rx_group_type ==
1856                                     MAC_GROUP_TYPE_DYNAMIC) {
1857                                         MAC_RX_RING_RELEASED(mip, ringcnt);
1858                                 }
1859                         }
1860                 } else if (group != defgrp && mip->mi_rx_group_type ==
1861                     MAC_GROUP_TYPE_DYNAMIC) {
1862                         ringcnt = group->mrg_cur_count;
1863                         err = mac_group_ring_modify(mcip, group, defgrp);
1864                         if (err != 0)
1865                                 return (err);
1866                         /*
1867                          * Update the accounting. If this group
1868                          * already had explicitly reserved rings,
1869                          * we need to update the rings based on
1870                          * the new ring count. If this group
1871                          * had not explicitly reserved rings,
1872                          * then we just reserve the rings asked for
1873                          * and reserve the group.
1874                          */
1875                         if (tmrp->mrp_mask & MRP_RX_RINGS) {
1876                                 if (ringcnt > group->mrg_cur_count) {
1877                                         MAC_RX_RING_RELEASED(mip,
1878                                             ringcnt - group->mrg_cur_count);
1879                                 } else {
1880                                         MAC_RX_RING_RESERVED(mip,
1881                                             group->mrg_cur_count - ringcnt);
1882                                 }
1883                         } else {
1884                                 MAC_RX_RING_RESERVED(mip, group->mrg_cur_count);
1885                                 MAC_RX_GRP_RESERVED(mip);
1886                         }
1887                 }
1888         }
1889         if (mrp->mrp_mask & MRP_TX_RINGS) {
1890                 unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC;
1891                 group = flent->fe_tx_ring_group;
1892                 defgrp = MAC_DEFAULT_TX_GROUP(mip);
1893 
1894                 /*
1895                  * For static groups we only allow rings=0 or resetting the
1896                  * rings property.
1897                  */
1898                 if (mrp->mrp_ntxrings > 0 &&
1899                     mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) {
1900                         return (ENOTSUP);
1901                 }
1902                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
1903                         if (!(tmrp->mrp_mask & MRP_TX_RINGS))
1904                                 return (0);
1905                 } else {
1906                         if (unspec) {
1907                                 if (tmrp->mrp_mask & MRP_TXRINGS_UNSPEC)
1908                                         return (0);
1909                         } else if (mip->mi_tx_group_type ==
1910                             MAC_GROUP_TYPE_DYNAMIC) {
1911                                 if ((tmrp->mrp_mask & MRP_TX_RINGS) &&
1912                                     !(tmrp->mrp_mask & MRP_TXRINGS_UNSPEC) &&
1913                                     mrp->mrp_ntxrings == tmrp->mrp_ntxrings) {
1914                                         return (0);
1915                                 }
1916                         }
1917                 }
1918                 /* Resetting the prop */
1919                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
1920                         if (group != defgrp) {
1921                                 if (mip->mi_tx_group_type ==
1922                                     MAC_GROUP_TYPE_DYNAMIC) {
1923                                         ringcnt = group->mrg_cur_count;
1924                                         if ((flent->fe_type &
1925                                             FLOW_PRIMARY_MAC) != 0) {
1926                                                 mac_tx_client_quiesce(
1927                                                     (mac_client_handle_t)
1928                                                     mcip);
1929                                                 mac_tx_switch_group(mcip,
1930                                                     group, defgrp);
1931                                                 mac_tx_client_restart(
1932                                                     (mac_client_handle_t)
1933                                                     mcip);
1934                                                 MAC_TX_GRP_RELEASED(mip);
1935                                                 MAC_TX_RING_RELEASED(mip,
1936                                                     ringcnt);
1937                                                 return (0);
1938                                         }
1939                                         cmrp->mrp_ntxrings = 1;
1940                                         (void) mac_group_ring_modify(mcip,
1941                                             group, defgrp);
1942                                         /*
1943                                          * This group has reserved rings
1944                                          * that need to be released now.
1945                                          */
1946                                         MAC_TX_RING_RELEASED(mip, ringcnt);
1947                                 }
1948                                 /*
1949                                  * If this is a static group, we
1950                                  * need to release the group. The
1951                                  * client will remain in the same
1952                                  * group till some other client
1953                                  * needs this group.
1954                                  */
1955                                 MAC_TX_GRP_RELEASED(mip);
1956                         } else if (group == defgrp &&
1957                             (flent->fe_type & FLOW_PRIMARY_MAC) == 0) {
1958                                 ngrp = mac_reserve_tx_group(mcip, B_TRUE);
1959                                 if (ngrp == NULL)
1960                                         return (0);
1961                                 mac_tx_client_quiesce(
1962                                     (mac_client_handle_t)mcip);
1963                                 mac_tx_switch_group(mcip, defgrp, ngrp);
1964                                 mac_tx_client_restart(
1965                                     (mac_client_handle_t)mcip);
1966                         }
1967                         /*
1968                          * If the client is in the default group, we will
1969                          * just clear the MRP_TX_RINGS and leave it as
1970                          * it rather than look for an exclusive group
1971                          * for it.
1972                          */
1973                         return (0);
1974                 }
1975 
1976                 /* Switch to H/W */
1977                 if (group == defgrp && ((mrp->mrp_ntxrings > 0) || unspec)) {
1978                         ngrp = mac_reserve_tx_group(mcip, B_TRUE);
1979                         if (ngrp == NULL)
1980                                 return (ENOSPC);
1981                         mac_tx_client_quiesce((mac_client_handle_t)mcip);
1982                         mac_tx_switch_group(mcip, defgrp, ngrp);
1983                         mac_tx_client_restart((mac_client_handle_t)mcip);
1984                         MAC_TX_GRP_RESERVED(mip);
1985                         if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC)
1986                                 MAC_TX_RING_RESERVED(mip, ngrp->mrg_cur_count);
1987                 /* Switch to S/W */
1988                 } else if (group != defgrp && !unspec &&
1989                     mrp->mrp_ntxrings == 0) {
1990                         /* Switch to S/W */
1991                         ringcnt = group->mrg_cur_count;
1992                         mac_tx_client_quiesce((mac_client_handle_t)mcip);
1993                         mac_tx_switch_group(mcip, group, defgrp);
1994                         mac_tx_client_restart((mac_client_handle_t)mcip);
1995                         if (tmrp->mrp_mask & MRP_TX_RINGS) {
1996                                 MAC_TX_GRP_RELEASED(mip);
1997                                 if (mip->mi_tx_group_type ==
1998                                     MAC_GROUP_TYPE_DYNAMIC) {
1999                                         MAC_TX_RING_RELEASED(mip, ringcnt);
2000                                 }
2001                         }
2002                 } else if (group != defgrp && mip->mi_tx_group_type ==
2003                     MAC_GROUP_TYPE_DYNAMIC) {
2004                         ringcnt = group->mrg_cur_count;
2005                         err = mac_group_ring_modify(mcip, group, defgrp);
2006                         if (err != 0)
2007                                 return (err);
2008                         /*
2009                          * Update the accounting. If this group
2010                          * already had explicitly reserved rings,
2011                          * we need to update the rings based on
2012                          * the new ring count. If this group
2013                          * had not explicitly reserved rings,
2014                          * then we just reserve the rings asked for
2015                          * and reserve the group.
2016                          */
2017                         if (tmrp->mrp_mask & MRP_TX_RINGS) {
2018                                 if (ringcnt > group->mrg_cur_count) {
2019                                         MAC_TX_RING_RELEASED(mip,
2020                                             ringcnt - group->mrg_cur_count);
2021                                 } else {
2022                                         MAC_TX_RING_RESERVED(mip,
2023                                             group->mrg_cur_count - ringcnt);
2024                                 }
2025                         } else {
2026                                 MAC_TX_RING_RESERVED(mip, group->mrg_cur_count);
2027                                 MAC_TX_GRP_RESERVED(mip);
2028                         }
2029                 }
2030         }
2031         return (0);
2032 }
2033 
2034 /*
2035  * When the MAC client is being brought up (i.e. we do a unicast_add) we need
2036  * to initialize the cpu and resource control structure in the
2037  * mac_client_impl_t from the mac_impl_t (i.e if there are any cached
2038  * properties before the flow entry for the unicast address was created).
2039  */
2040 static int
2041 mac_resource_ctl_set(mac_client_handle_t mch, mac_resource_props_t *mrp)
2042 {
2043         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
2044         mac_impl_t              *mip = (mac_impl_t *)mcip->mci_mip;
2045         mac_impl_t              *umip = mcip->mci_upper_mip;
2046         int                     err = 0;
2047         flow_entry_t            *flent = mcip->mci_flent;
2048         mac_resource_props_t    *omrp, *nmrp = MCIP_RESOURCE_PROPS(mcip);
2049 
2050         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2051 
2052         err = mac_validate_props(mcip->mci_state_flags & MCIS_IS_VNIC ?
2053             mcip->mci_upper_mip : mip, mrp);
2054         if (err != 0)
2055                 return (err);
2056 
2057         /*
2058          * Copy over the existing properties since mac_update_resources
2059          * will modify the client's mrp. Currently, the saved property
2060          * is used to determine the difference between existing and
2061          * modified rings property.
2062          */
2063         omrp = kmem_zalloc(sizeof (*omrp), KM_SLEEP);
2064         bcopy(nmrp, omrp, sizeof (*omrp));
2065         mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE);
2066         if (MCIP_DATAPATH_SETUP(mcip)) {
2067                 /*
2068                  * We support rings only for primary client when there are
2069                  * multiple clients sharing the same MAC address (e.g. VLAN).
2070                  */
2071                 if (mrp->mrp_mask & MRP_RX_RINGS ||
2072                     mrp->mrp_mask & MRP_TX_RINGS) {
2073 
2074                         if ((err = mac_client_set_rings_prop(mcip, mrp,
2075                             omrp)) != 0) {
2076                                 if (omrp->mrp_mask & MRP_RX_RINGS) {
2077                                         nmrp->mrp_mask |= MRP_RX_RINGS;
2078                                         nmrp->mrp_nrxrings = omrp->mrp_nrxrings;
2079                                 } else {
2080                                         nmrp->mrp_mask &= ~MRP_RX_RINGS;
2081                                         nmrp->mrp_nrxrings = 0;
2082                                 }
2083                                 if (omrp->mrp_mask & MRP_TX_RINGS) {
2084                                         nmrp->mrp_mask |= MRP_TX_RINGS;
2085                                         nmrp->mrp_ntxrings = omrp->mrp_ntxrings;
2086                                 } else {
2087                                         nmrp->mrp_mask &= ~MRP_TX_RINGS;
2088                                         nmrp->mrp_ntxrings = 0;
2089                                 }
2090                                 if (omrp->mrp_mask & MRP_RXRINGS_UNSPEC)
2091                                         omrp->mrp_mask |= MRP_RXRINGS_UNSPEC;
2092                                 else
2093                                         omrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC;
2094 
2095                                 if (omrp->mrp_mask & MRP_TXRINGS_UNSPEC)
2096                                         omrp->mrp_mask |= MRP_TXRINGS_UNSPEC;
2097                                 else
2098                                         omrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC;
2099                                 kmem_free(omrp, sizeof (*omrp));
2100                                 return (err);
2101                         }
2102 
2103                         /*
2104                          * If we modified the rings property of the primary
2105                          * we need to update the property fields of its
2106                          * VLANs as they inherit the primary's properites.
2107                          */
2108                         if (mac_is_primary_client(mcip)) {
2109                                 mac_set_prim_vlan_rings(mip,
2110                                     MCIP_RESOURCE_PROPS(mcip));
2111                         }
2112                 }
2113                 /*
2114                  * We have to set this prior to calling mac_flow_modify.
2115                  */
2116                 if (mrp->mrp_mask & MRP_PRIORITY) {
2117                         if (mrp->mrp_priority == MPL_RESET) {
2118                                 MAC_CLIENT_SET_PRIORITY_RANGE(mcip,
2119                                     MPL_LINK_DEFAULT);
2120                         } else {
2121                                 MAC_CLIENT_SET_PRIORITY_RANGE(mcip,
2122                                     mrp->mrp_priority);
2123                         }
2124                 }
2125 
2126                 mac_flow_modify(mip->mi_flow_tab, flent, mrp);
2127                 if (mrp->mrp_mask & MRP_PRIORITY)
2128                         mac_update_subflow_priority(mcip);
2129 
2130                 /* Apply these resource settings to any secondary macs */
2131                 if (umip != NULL) {
2132                         ASSERT((umip->mi_state_flags & MIS_IS_VNIC) != 0);
2133                         mac_vnic_secondary_update(umip);
2134                 }
2135         }
2136         kmem_free(omrp, sizeof (*omrp));
2137         return (0);
2138 }
2139 
2140 static int
2141 mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr,
2142     uint16_t vid, boolean_t is_primary, boolean_t first_flow,
2143     flow_entry_t **flent, mac_resource_props_t *mrp)
2144 {
2145         mac_impl_t      *mip = (mac_impl_t *)mcip->mci_mip;
2146         flow_desc_t     flow_desc;
2147         char            flowname[MAXFLOWNAMELEN];
2148         int             err;
2149         uint_t          flent_flags;
2150 
2151         /*
2152          * First unicast address being added, create a new flow
2153          * for that MAC client.
2154          */
2155         bzero(&flow_desc, sizeof (flow_desc));
2156 
2157         ASSERT(mac_addr != NULL ||
2158             (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR));
2159         if (mac_addr != NULL) {
2160                 flow_desc.fd_mac_len = mip->mi_type->mt_addr_length;
2161                 bcopy(mac_addr, flow_desc.fd_dst_mac, flow_desc.fd_mac_len);
2162         }
2163         flow_desc.fd_mask = FLOW_LINK_DST;
2164         if (vid != 0) {
2165                 flow_desc.fd_vid = vid;
2166                 flow_desc.fd_mask |= FLOW_LINK_VID;
2167         }
2168 
2169         /*
2170          * XXX-nicolas. For now I'm keeping the FLOW_PRIMARY_MAC
2171          * and FLOW_VNIC. Even though they're a hack inherited
2172          * from the SRS code, we'll keep them for now. They're currently
2173          * consumed by mac_datapath_setup() to create the SRS.
2174          * That code should be eventually moved out of
2175          * mac_datapath_setup() and moved to a mac_srs_create()
2176          * function of some sort to keep things clean.
2177          *
2178          * Also, there's no reason why the SRS for the primary MAC
2179          * client should be different than any other MAC client. Until
2180          * this is cleaned-up, we support only one MAC unicast address
2181          * per client.
2182          *
2183          * We set FLOW_PRIMARY_MAC for the primary MAC address,
2184          * FLOW_VNIC for everything else.
2185          */
2186         if (is_primary)
2187                 flent_flags = FLOW_PRIMARY_MAC;
2188         else
2189                 flent_flags = FLOW_VNIC_MAC;
2190 
2191         /*
2192          * For the first flow we use the MAC client's name - mci_name, for
2193          * subsequent ones we just create a name with the VID. This is
2194          * so that we can add these flows to the same flow table. This is
2195          * fine as the flow name (except for the one with the MAC client's
2196          * name) is not visible. When the first flow is removed, we just replace
2197          * its fdesc with another from the list, so we will still retain the
2198          * flent with the MAC client's flow name.
2199          */
2200         if (first_flow) {
2201                 bcopy(mcip->mci_name, flowname, MAXFLOWNAMELEN);
2202         } else {
2203                 (void) sprintf(flowname, "%s%u", mcip->mci_name, vid);
2204                 flent_flags = FLOW_NO_STATS;
2205         }
2206 
2207         if ((err = mac_flow_create(&flow_desc, mrp, flowname, NULL,
2208             flent_flags, flent)) != 0)
2209                 return (err);
2210 
2211         mac_misc_stat_create(*flent);
2212         FLOW_MARK(*flent, FE_INCIPIENT);
2213         (*flent)->fe_mcip = mcip;
2214 
2215         /*
2216          * Place initial creation reference on the flow. This reference
2217          * is released in the corresponding delete action viz.
2218          * mac_unicast_remove after waiting for all transient refs to
2219          * to go away. The wait happens in mac_flow_wait.
2220          * We have already held the reference in mac_client_open().
2221          */
2222         if (!first_flow)
2223                 FLOW_REFHOLD(*flent);
2224         return (0);
2225 }
2226 
2227 /* Refresh the multicast grouping for this VID. */
2228 int
2229 mac_client_update_mcast(void *arg, boolean_t add, const uint8_t *addrp)
2230 {
2231         flow_entry_t            *flent = arg;
2232         mac_client_impl_t       *mcip = flent->fe_mcip;
2233         uint16_t                vid;
2234         flow_desc_t             flow_desc;
2235 
2236         mac_flow_get_desc(flent, &flow_desc);
2237         vid = (flow_desc.fd_mask & FLOW_LINK_VID) != 0 ?
2238             flow_desc.fd_vid : VLAN_ID_NONE;
2239 
2240         /*
2241          * We don't call mac_multicast_add()/mac_multicast_remove() as
2242          * we want to add/remove for this specific vid.
2243          */
2244         if (add) {
2245                 return (mac_bcast_add(mcip, addrp, vid,
2246                     MAC_ADDRTYPE_MULTICAST));
2247         } else {
2248                 mac_bcast_delete(mcip, addrp, vid);
2249                 return (0);
2250         }
2251 }
2252 
2253 static void
2254 mac_update_single_active_client(mac_impl_t *mip)
2255 {
2256         mac_client_impl_t *client = NULL;
2257 
2258         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2259 
2260         rw_enter(&mip->mi_rw_lock, RW_WRITER);
2261         if (mip->mi_nactiveclients == 1) {
2262                 /*
2263                  * Find the one active MAC client from the list of MAC
2264                  * clients. The active MAC client has at least one
2265                  * unicast address.
2266                  */
2267                 for (client = mip->mi_clients_list; client != NULL;
2268                     client = client->mci_client_next) {
2269                         if (client->mci_unicast_list != NULL)
2270                                 break;
2271                 }
2272                 ASSERT(client != NULL);
2273         }
2274 
2275         /*
2276          * mi_single_active_client is protected by the MAC impl's read/writer
2277          * lock, which allows mac_rx() to check the value of that pointer
2278          * as a reader.
2279          */
2280         mip->mi_single_active_client = client;
2281         rw_exit(&mip->mi_rw_lock);
2282 }
2283 
2284 /*
2285  * Set up the data path. Called from i_mac_unicast_add after having
2286  * done all the validations including making sure this is an active
2287  * client (i.e that is ready to process packets.)
2288  */
2289 static int
2290 mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid,
2291     uint8_t *mac_addr, mac_resource_props_t *mrp, boolean_t isprimary,
2292     mac_unicast_impl_t *muip)
2293 {
2294         mac_impl_t      *mip = mcip->mci_mip;
2295         boolean_t       mac_started = B_FALSE;
2296         boolean_t       bcast_added = B_FALSE;
2297         boolean_t       nactiveclients_added = B_FALSE;
2298         flow_entry_t    *flent;
2299         int             err = 0;
2300         boolean_t       no_unicast;
2301 
2302         no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR;
2303 
2304         if ((err = mac_start((mac_handle_t)mip)) != 0)
2305                 goto bail;
2306 
2307         mac_started = B_TRUE;
2308 
2309         /* add the MAC client to the broadcast address group by default */
2310         if (mip->mi_type->mt_brdcst_addr != NULL) {
2311                 err = mac_bcast_add(mcip, mip->mi_type->mt_brdcst_addr, vid,
2312                     MAC_ADDRTYPE_BROADCAST);
2313                 if (err != 0)
2314                         goto bail;
2315                 bcast_added = B_TRUE;
2316         }
2317 
2318         /*
2319          * If this is the first unicast address addition for this
2320          * client, reuse the pre-allocated larval flow entry associated with
2321          * the MAC client.
2322          */
2323         flent = (mcip->mci_nflents == 0) ? mcip->mci_flent : NULL;
2324 
2325         /* We are configuring the unicast flow now */
2326         if (!MCIP_DATAPATH_SETUP(mcip)) {
2327 
2328                 if (mrp != NULL) {
2329                         MAC_CLIENT_SET_PRIORITY_RANGE(mcip,
2330                             (mrp->mrp_mask & MRP_PRIORITY) ? mrp->mrp_priority :
2331                             MPL_LINK_DEFAULT);
2332                 }
2333                 if ((err = mac_unicast_flow_create(mcip, mac_addr, vid,
2334                     isprimary, B_TRUE, &flent, mrp)) != 0)
2335                         goto bail;
2336 
2337                 mip->mi_nactiveclients++;
2338                 nactiveclients_added = B_TRUE;
2339 
2340                 /*
2341                  * This will allocate the RX ring group if possible for the
2342                  * flow and program the software classifier as needed.
2343                  */
2344                 if ((err = mac_datapath_setup(mcip, flent, SRST_LINK)) != 0)
2345                         goto bail;
2346 
2347                 if (no_unicast)
2348                         goto done_setup;
2349                 /*
2350                  * The unicast MAC address must have been added successfully.
2351                  */
2352                 ASSERT(mcip->mci_unicast != NULL);
2353 
2354                 /*
2355                  * Push down the sub-flows that were defined on this link
2356                  * hitherto. The flows are added to the active flow table
2357                  * and SRS, softrings etc. are created as needed.
2358                  */
2359                 mac_link_init_flows((mac_client_handle_t)mcip);
2360         } else {
2361                 mac_address_t *map = mcip->mci_unicast;
2362 
2363                 ASSERT(!no_unicast);
2364                 /*
2365                  * A unicast flow already exists for that MAC client
2366                  * so this flow must be the same MAC address but with
2367                  * a different VID. It has been checked by
2368                  * mac_addr_in_use().
2369                  *
2370                  * We will use the SRS etc. from the initial
2371                  * mci_flent. We don't need to create a kstat for
2372                  * this, as except for the fdesc, everything will be
2373                  * used from the first flent.
2374                  *
2375                  * The only time we should see multiple flents on the
2376                  * same MAC client is on the sun4v vsw. If we removed
2377                  * that code we should be able to remove the entire
2378                  * notion of multiple flents on a MAC client (this
2379                  * doesn't affect sub/user flows because they have
2380                  * their own list unrelated to mci_flent_list).
2381                  */
2382                 if (bcmp(mac_addr, map->ma_addr, map->ma_len) != 0) {
2383                         err = EINVAL;
2384                         goto bail;
2385                 }
2386 
2387                 if ((err = mac_unicast_flow_create(mcip, mac_addr, vid,
2388                     isprimary, B_FALSE, &flent, NULL)) != 0) {
2389                         goto bail;
2390                 }
2391                 if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0) {
2392                         FLOW_FINAL_REFRELE(flent);
2393                         goto bail;
2394                 }
2395 
2396                 /* update the multicast group for this vid */
2397                 mac_client_bcast_refresh(mcip, mac_client_update_mcast,
2398                     (void *)flent, B_TRUE);
2399 
2400         }
2401 
2402         /* populate the shared MAC address */
2403         muip->mui_map = mcip->mci_unicast;
2404 
2405         rw_enter(&mcip->mci_rw_lock, RW_WRITER);
2406         muip->mui_next = mcip->mci_unicast_list;
2407         mcip->mci_unicast_list = muip;
2408         rw_exit(&mcip->mci_rw_lock);
2409 
2410 done_setup:
2411         /*
2412          * First add the flent to the flow list of this mcip. Then set
2413          * the mip's mi_single_active_client if needed. The Rx path assumes
2414          * that mip->mi_single_active_client will always have an associated
2415          * flent.
2416          */
2417         mac_client_add_to_flow_list(mcip, flent);
2418         if (nactiveclients_added)
2419                 mac_update_single_active_client(mip);
2420         /*
2421          * Trigger a renegotiation of the capabilities when the number of
2422          * active clients changes from 1 to 2, since some of the capabilities
2423          * might have to be disabled. Also send a MAC_NOTE_LINK notification
2424          * to all the MAC clients whenever physical link is DOWN.
2425          */
2426         if (mip->mi_nactiveclients == 2) {
2427                 mac_capab_update((mac_handle_t)mip);
2428                 mac_virtual_link_update(mip);
2429         }
2430         /*
2431          * Now that the setup is complete, clear the INCIPIENT flag.
2432          * The flag was set to avoid incoming packets seeing inconsistent
2433          * structures while the setup was in progress. Clear the mci_tx_flag
2434          * by calling mac_tx_client_block. It is possible that
2435          * mac_unicast_remove was called prior to this mac_unicast_add which
2436          * could have set the MCI_TX_QUIESCE flag.
2437          */
2438         if (flent->fe_rx_ring_group != NULL)
2439                 mac_rx_group_unmark(flent->fe_rx_ring_group, MR_INCIPIENT);
2440         FLOW_UNMARK(flent, FE_INCIPIENT);
2441 
2442         /*
2443          * If this is an aggr port client, don't enable the flow's
2444          * datapath at this stage. Otherwise, bcast traffic could
2445          * arrive while the aggr port is in the process of
2446          * initializing. Instead, the flow's datapath is started later
2447          * when mac_client_set_flow_cb() is called.
2448          */
2449         if ((mcip->mci_state_flags & MCIS_IS_AGGR_PORT) == 0)
2450                 FLOW_UNMARK(flent, FE_MC_NO_DATAPATH);
2451 
2452         mac_tx_client_unblock(mcip);
2453         return (0);
2454 bail:
2455         if (bcast_added)
2456                 mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr, vid);
2457 
2458         if (nactiveclients_added)
2459                 mip->mi_nactiveclients--;
2460 
2461         if (mac_started)
2462                 mac_stop((mac_handle_t)mip);
2463 
2464         return (err);
2465 }
2466 
2467 /*
2468  * Return the passive primary MAC client, if present. The passive client is
2469  * a stand-by client that has the same unicast address as another that is
2470  * currenly active. Once the active client goes away, the passive client
2471  * becomes active.
2472  */
2473 static mac_client_impl_t *
2474 mac_get_passive_primary_client(mac_impl_t *mip)
2475 {
2476         mac_client_impl_t       *mcip;
2477 
2478         for (mcip = mip->mi_clients_list; mcip != NULL;
2479             mcip = mcip->mci_client_next) {
2480                 if (mac_is_primary_client(mcip) &&
2481                     (mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
2482                         return (mcip);
2483                 }
2484         }
2485         return (NULL);
2486 }
2487 
2488 /*
2489  * Add a new unicast address to the MAC client.
2490  *
2491  * The MAC address can be specified either by value, or the MAC client
2492  * can specify that it wants to use the primary MAC address of the
2493  * underlying MAC. See the introductory comments at the beginning
2494  * of this file for more more information on primary MAC addresses.
2495  *
2496  * Note also the tuple (MAC address, VID) must be unique
2497  * for the MAC clients defined on top of the same underlying MAC
2498  * instance, unless the MAC_UNICAST_NODUPCHECK is specified.
2499  *
2500  * In no case can a client use the PVID for the MAC, if the MAC has one set.
2501  */
2502 int
2503 i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
2504     mac_unicast_handle_t *mah, uint16_t vid, mac_diag_t *diag)
2505 {
2506         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
2507         mac_impl_t              *mip = mcip->mci_mip;
2508         int                     err;
2509         uint_t                  mac_len = mip->mi_type->mt_addr_length;
2510         boolean_t               check_dups = !(flags & MAC_UNICAST_NODUPCHECK);
2511         boolean_t               fastpath_disabled = B_FALSE;
2512         boolean_t               is_primary = (flags & MAC_UNICAST_PRIMARY);
2513         boolean_t               is_unicast_hw = (flags & MAC_UNICAST_HW);
2514         mac_resource_props_t    *mrp;
2515         boolean_t               passive_client = B_FALSE;
2516         mac_unicast_impl_t      *muip;
2517         boolean_t               is_vnic_primary =
2518             (flags & MAC_UNICAST_VNIC_PRIMARY);
2519 
2520         /*
2521          * When the VID is non-zero the underlying MAC cannot be a
2522          * VNIC. I.e., dladm create-vlan cannot take a VNIC as
2523          * argument, only the primary MAC client.
2524          */
2525         ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != VLAN_ID_NONE)));
2526 
2527         /*
2528          * Can't unicast add if the client asked only for minimal datapath
2529          * setup.
2530          */
2531         if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR)
2532                 return (ENOTSUP);
2533 
2534         /*
2535          * Check for an attempted use of the current Port VLAN ID, if enabled.
2536          * No client may use it.
2537          */
2538         if (mip->mi_pvid != VLAN_ID_NONE && vid == mip->mi_pvid)
2539                 return (EBUSY);
2540 
2541         /*
2542          * Check whether it's the primary client and flag it.
2543          */
2544         if (!(mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary &&
2545             vid == VLAN_ID_NONE)
2546                 mcip->mci_flags |= MAC_CLIENT_FLAGS_PRIMARY;
2547 
2548         /*
2549          * is_vnic_primary is true when we come here as a VLAN VNIC
2550          * which uses the primary MAC client's address but with a non-zero
2551          * VID. In this case the MAC address is not specified by an upper
2552          * MAC client.
2553          */
2554         if ((mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary &&
2555             !is_vnic_primary) {
2556                 /*
2557                  * The address is being set by the upper MAC client
2558                  * of a VNIC. The MAC address was already set by the
2559                  * VNIC driver during VNIC creation.
2560                  *
2561                  * Note: a VNIC has only one MAC address. We return
2562                  * the MAC unicast address handle of the lower MAC client
2563                  * corresponding to the VNIC. We allocate a new entry
2564                  * which is flagged appropriately, so that mac_unicast_remove()
2565                  * doesn't attempt to free the original entry that
2566                  * was allocated by the VNIC driver.
2567                  */
2568                 ASSERT(mcip->mci_unicast != NULL);
2569 
2570                 /* Check for VLAN flags, if present */
2571                 if ((flags & MAC_UNICAST_TAG_DISABLE) != 0)
2572                         mcip->mci_state_flags |= MCIS_TAG_DISABLE;
2573 
2574                 if ((flags & MAC_UNICAST_STRIP_DISABLE) != 0)
2575                         mcip->mci_state_flags |= MCIS_STRIP_DISABLE;
2576 
2577                 if ((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) != 0)
2578                         mcip->mci_state_flags |= MCIS_DISABLE_TX_VID_CHECK;
2579 
2580                 /*
2581                  * Ensure that the primary unicast address of the VNIC
2582                  * is added only once unless we have the
2583                  * MAC_CLIENT_FLAGS_MULTI_PRIMARY set (and this is not
2584                  * a passive MAC client).
2585                  */
2586                 if ((mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) != 0) {
2587                         if ((mcip->mci_flags &
2588                             MAC_CLIENT_FLAGS_MULTI_PRIMARY) == 0 ||
2589                             (mcip->mci_flags &
2590                             MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
2591                                 return (EBUSY);
2592                         }
2593                         mcip->mci_flags |= MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
2594                         passive_client = B_TRUE;
2595                 }
2596 
2597                 mcip->mci_flags |= MAC_CLIENT_FLAGS_VNIC_PRIMARY;
2598 
2599                 /*
2600                  * Create a handle for vid 0.
2601                  */
2602                 ASSERT(vid == VLAN_ID_NONE);
2603                 muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP);
2604                 muip->mui_vid = vid;
2605                 *mah = (mac_unicast_handle_t)muip;
2606                 /*
2607                  * This will be used by the caller to defer setting the
2608                  * rx functions.
2609                  */
2610                 if (passive_client)
2611                         return (EAGAIN);
2612                 return (0);
2613         }
2614 
2615         /* primary MAC clients cannot be opened on top of anchor VNICs */
2616         if ((is_vnic_primary || is_primary) &&
2617             i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_ANCHOR_VNIC, NULL)) {
2618                 return (ENXIO);
2619         }
2620 
2621         /*
2622          * If this is a VNIC/VLAN, disable softmac fast-path. This is
2623          * only relevant to legacy devices which use softmac to
2624          * interface with GLDv3.
2625          */
2626         if (mcip->mci_state_flags & MCIS_IS_VNIC) {
2627                 err = mac_fastpath_disable((mac_handle_t)mip);
2628                 if (err != 0)
2629                         return (err);
2630                 fastpath_disabled = B_TRUE;
2631         }
2632 
2633         /*
2634          * Return EBUSY if:
2635          *  - there is an exclusively active mac client exists.
2636          *  - this is an exclusive active mac client but
2637          *      a. there is already active mac clients exist, or
2638          *      b. fastpath streams are already plumbed on this legacy device
2639          *  - the mac creator has disallowed active mac clients.
2640          */
2641         if (mip->mi_state_flags & (MIS_EXCLUSIVE|MIS_NO_ACTIVE)) {
2642                 if (fastpath_disabled)
2643                         mac_fastpath_enable((mac_handle_t)mip);
2644                 return (EBUSY);
2645         }
2646 
2647         if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
2648                 ASSERT(!fastpath_disabled);
2649                 if (mip->mi_nactiveclients != 0)
2650                         return (EBUSY);
2651 
2652                 if ((mip->mi_state_flags & MIS_LEGACY) &&
2653                     !(mip->mi_capab_legacy.ml_active_set(mip->mi_driver))) {
2654                         return (EBUSY);
2655                 }
2656                 mip->mi_state_flags |= MIS_EXCLUSIVE;
2657         }
2658 
2659         mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP);
2660         if (is_primary && !(mcip->mci_state_flags & (MCIS_IS_VNIC |
2661             MCIS_IS_AGGR_PORT))) {
2662                 /*
2663                  * Apply the property cached in the mac_impl_t to the primary
2664                  * mac client. If the mac client is a VNIC or an aggregation
2665                  * port, its property should be set in the mcip when the
2666                  * VNIC/aggr was created.
2667                  */
2668                 mac_get_resources((mac_handle_t)mip, mrp);
2669                 (void) mac_client_set_resources(mch, mrp);
2670         } else if (mcip->mci_state_flags & MCIS_IS_VNIC) {
2671                 /*
2672                  * This is a VLAN client sharing the address of the
2673                  * primary MAC client; i.e., one created via dladm
2674                  * create-vlan. We don't support specifying ring
2675                  * properties for this type of client as it inherits
2676                  * these from the primary MAC client.
2677                  */
2678                 if (is_vnic_primary) {
2679                         mac_resource_props_t    *vmrp;
2680 
2681                         vmrp = MCIP_RESOURCE_PROPS(mcip);
2682                         if (vmrp->mrp_mask & MRP_RX_RINGS ||
2683                             vmrp->mrp_mask & MRP_TX_RINGS) {
2684                                 if (fastpath_disabled)
2685                                         mac_fastpath_enable((mac_handle_t)mip);
2686                                 kmem_free(mrp, sizeof (*mrp));
2687                                 return (ENOTSUP);
2688                         }
2689                         /*
2690                          * Additionally we also need to inherit any
2691                          * rings property from the MAC.
2692                          */
2693                         mac_get_resources((mac_handle_t)mip, mrp);
2694                         if (mrp->mrp_mask & MRP_RX_RINGS) {
2695                                 vmrp->mrp_mask |= MRP_RX_RINGS;
2696                                 vmrp->mrp_nrxrings = mrp->mrp_nrxrings;
2697                         }
2698                         if (mrp->mrp_mask & MRP_TX_RINGS) {
2699                                 vmrp->mrp_mask |= MRP_TX_RINGS;
2700                                 vmrp->mrp_ntxrings = mrp->mrp_ntxrings;
2701                         }
2702                 }
2703                 bcopy(MCIP_RESOURCE_PROPS(mcip), mrp, sizeof (*mrp));
2704         }
2705 
2706         muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP);
2707         muip->mui_vid = vid;
2708 
2709         if (is_primary || is_vnic_primary) {
2710                 mac_addr = mip->mi_addr;
2711         } else {
2712 
2713                 /*
2714                  * Verify the validity of the specified MAC addresses value.
2715                  */
2716                 if (!mac_unicst_verify((mac_handle_t)mip, mac_addr, mac_len)) {
2717                         *diag = MAC_DIAG_MACADDR_INVALID;
2718                         err = EINVAL;
2719                         goto bail_out;
2720                 }
2721 
2722                 /*
2723                  * Make sure that the specified MAC address is different
2724                  * than the unicast MAC address of the underlying NIC.
2725                  */
2726                 if (check_dups && bcmp(mip->mi_addr, mac_addr, mac_len) == 0) {
2727                         *diag = MAC_DIAG_MACADDR_NIC;
2728                         err = EINVAL;
2729                         goto bail_out;
2730                 }
2731         }
2732 
2733         /*
2734          * Set the flags here so that if this is a passive client, we
2735          * can return and set it when we call mac_client_datapath_setup
2736          * when this becomes the active client. If we defer to using these
2737          * flags to mac_client_datapath_setup, then for a passive client,
2738          * we'd have to store the flags somewhere (probably fe_flags)
2739          * and then use it.
2740          */
2741         if (!MCIP_DATAPATH_SETUP(mcip)) {
2742                 if (is_unicast_hw) {
2743                         /*
2744                          * The client requires a hardware MAC address slot
2745                          * for that unicast address. Since we support only
2746                          * one unicast MAC address per client, flag the
2747                          * MAC client itself.
2748                          */
2749                         mcip->mci_state_flags |= MCIS_UNICAST_HW;
2750                 }
2751 
2752                 /* Check for VLAN flags, if present */
2753                 if ((flags & MAC_UNICAST_TAG_DISABLE) != 0)
2754                         mcip->mci_state_flags |= MCIS_TAG_DISABLE;
2755 
2756                 if ((flags & MAC_UNICAST_STRIP_DISABLE) != 0)
2757                         mcip->mci_state_flags |= MCIS_STRIP_DISABLE;
2758 
2759                 if ((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) != 0)
2760                         mcip->mci_state_flags |= MCIS_DISABLE_TX_VID_CHECK;
2761         } else {
2762                 /*
2763                  * Assert that the specified flags are consistent with the
2764                  * flags specified by previous calls to mac_unicast_add().
2765                  */
2766                 ASSERT(((flags & MAC_UNICAST_TAG_DISABLE) != 0 &&
2767                     (mcip->mci_state_flags & MCIS_TAG_DISABLE) != 0) ||
2768                     ((flags & MAC_UNICAST_TAG_DISABLE) == 0 &&
2769                     (mcip->mci_state_flags & MCIS_TAG_DISABLE) == 0));
2770 
2771                 ASSERT(((flags & MAC_UNICAST_STRIP_DISABLE) != 0 &&
2772                     (mcip->mci_state_flags & MCIS_STRIP_DISABLE) != 0) ||
2773                     ((flags & MAC_UNICAST_STRIP_DISABLE) == 0 &&
2774                     (mcip->mci_state_flags & MCIS_STRIP_DISABLE) == 0));
2775 
2776                 ASSERT(((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) != 0 &&
2777                     (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK) != 0) ||
2778                     ((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) == 0 &&
2779                     (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK) == 0));
2780 
2781                 /*
2782                  * Make sure the client is consistent about its requests
2783                  * for MAC addresses. I.e. all requests from the clients
2784                  * must have the MAC_UNICAST_HW flag set or clear.
2785                  */
2786                 if (((mcip->mci_state_flags & MCIS_UNICAST_HW) != 0 &&
2787                     !is_unicast_hw) ||
2788                     ((mcip->mci_state_flags & MCIS_UNICAST_HW) == 0 &&
2789                     is_unicast_hw)) {
2790                         err = EINVAL;
2791                         goto bail_out;
2792                 }
2793         }
2794         /*
2795          * Make sure the MAC address is not already used by
2796          * another MAC client defined on top of the same
2797          * underlying NIC. Unless we have MAC_CLIENT_FLAGS_MULTI_PRIMARY
2798          * set when we allow a passive client to be present which will
2799          * be activated when the currently active client goes away - this
2800          * works only with primary addresses.
2801          */
2802         if ((check_dups || is_primary || is_vnic_primary) &&
2803             mac_addr_in_use(mip, mac_addr, vid)) {
2804                 /*
2805                  * Must have set the multiple primary address flag when
2806                  * we did a mac_client_open AND this should be a primary
2807                  * MAC client AND there should not already be a passive
2808                  * primary. If all is true then we let this succeed
2809                  * even if the address is a dup.
2810                  */
2811                 if ((mcip->mci_flags & MAC_CLIENT_FLAGS_MULTI_PRIMARY) == 0 ||
2812                     (mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) == 0 ||
2813                     mac_get_passive_primary_client(mip) != NULL) {
2814                         *diag = MAC_DIAG_MACADDR_INUSE;
2815                         err = EEXIST;
2816                         goto bail_out;
2817                 }
2818                 ASSERT((mcip->mci_flags &
2819                     MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) == 0);
2820                 mcip->mci_flags |= MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
2821                 kmem_free(mrp, sizeof (*mrp));
2822 
2823                 /*
2824                  * Stash the unicast address handle, we will use it when
2825                  * we set up the passive client.
2826                  */
2827                 mcip->mci_p_unicast_list = muip;
2828                 *mah = (mac_unicast_handle_t)muip;
2829                 return (0);
2830         }
2831 
2832         err = mac_client_datapath_setup(mcip, vid, mac_addr, mrp,
2833             is_primary || is_vnic_primary, muip);
2834         if (err != 0)
2835                 goto bail_out;
2836 
2837         kmem_free(mrp, sizeof (*mrp));
2838         *mah = (mac_unicast_handle_t)muip;
2839         return (0);
2840 
2841 bail_out:
2842         if (fastpath_disabled)
2843                 mac_fastpath_enable((mac_handle_t)mip);
2844         if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
2845                 mip->mi_state_flags &= ~MIS_EXCLUSIVE;
2846                 if (mip->mi_state_flags & MIS_LEGACY) {
2847                         mip->mi_capab_legacy.ml_active_clear(
2848                             mip->mi_driver);
2849                 }
2850         }
2851         kmem_free(mrp, sizeof (*mrp));
2852         kmem_free(muip, sizeof (mac_unicast_impl_t));
2853         return (err);
2854 }
2855 
2856 /*
2857  * Wrapper function to mac_unicast_add when we want to have the same mac
2858  * client open for two instances, one that is currently active and another
2859  * that will become active when the current one is removed. In this case
2860  * mac_unicast_add will return EGAIN and we will save the rx function and
2861  * arg which will be used when we activate the passive client in
2862  * mac_unicast_remove.
2863  */
2864 int
2865 mac_unicast_add_set_rx(mac_client_handle_t mch, uint8_t *mac_addr,
2866     uint16_t flags, mac_unicast_handle_t *mah,  uint16_t vid, mac_diag_t *diag,
2867     mac_rx_t rx_fn, void *arg)
2868 {
2869         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
2870         uint_t                  err;
2871 
2872         err = mac_unicast_add(mch, mac_addr, flags, mah, vid, diag);
2873         if (err != 0 && err != EAGAIN)
2874                 return (err);
2875         if (err == EAGAIN) {
2876                 if (rx_fn != NULL) {
2877                         mcip->mci_rx_p_fn = rx_fn;
2878                         mcip->mci_rx_p_arg = arg;
2879                 }
2880                 return (0);
2881         }
2882         if (rx_fn != NULL)
2883                 mac_rx_set(mch, rx_fn, arg);
2884         return (err);
2885 }
2886 
2887 int
2888 mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
2889     mac_unicast_handle_t *mah, uint16_t vid, mac_diag_t *diag)
2890 {
2891         mac_impl_t *mip = ((mac_client_impl_t *)mch)->mci_mip;
2892         uint_t err;
2893 
2894         i_mac_perim_enter(mip);
2895         err = i_mac_unicast_add(mch, mac_addr, flags, mah, vid, diag);
2896         i_mac_perim_exit(mip);
2897 
2898         return (err);
2899 }
2900 
2901 static void
2902 mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip,
2903     flow_entry_t *flent)
2904 {
2905         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
2906         mac_impl_t              *mip = mcip->mci_mip;
2907         boolean_t               no_unicast;
2908 
2909         /*
2910          * If we have not added a unicast address for this MAC client, just
2911          * teardown the datapath.
2912          */
2913         no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR;
2914 
2915         if (!no_unicast) {
2916                 /*
2917                  * We would have initialized subflows etc. only if we brought
2918                  * up the primary client and set the unicast unicast address
2919                  * etc. Deactivate the flows. The flow entry will be removed
2920                  * from the active flow tables, and the associated SRS,
2921                  * softrings etc will be deleted. But the flow entry itself
2922                  * won't be destroyed, instead it will continue to be archived
2923                  * off the  the global flow hash list, for a possible future
2924                  * activation when say IP is plumbed again.
2925                  */
2926                 mac_link_release_flows(mch);
2927         }
2928         mip->mi_nactiveclients--;
2929         mac_update_single_active_client(mip);
2930 
2931         /* Tear down the data path */
2932         mac_datapath_teardown(mcip, mcip->mci_flent, SRST_LINK);
2933 
2934         /*
2935          * Prevent any future access to the flow entry through the mci_flent
2936          * pointer by setting the mci_flent to NULL. Access to mci_flent in
2937          * mac_bcast_send is also under mi_rw_lock.
2938          */
2939         rw_enter(&mip->mi_rw_lock, RW_WRITER);
2940         flent = mcip->mci_flent;
2941         mac_client_remove_flow_from_list(mcip, flent);
2942 
2943         if (mcip->mci_state_flags & MCIS_DESC_LOGGED)
2944                 mcip->mci_state_flags &= ~MCIS_DESC_LOGGED;
2945 
2946         /*
2947          * This is the last unicast address being removed and there shouldn't
2948          * be any outbound data threads at this point coming down from mac
2949          * clients. We have waited for the data threads to finish before
2950          * starting dld_str_detach. Non-data threads must access TX SRS
2951          * under mi_rw_lock.
2952          */
2953         rw_exit(&mip->mi_rw_lock);
2954 
2955         /*
2956          * Don't use FLOW_MARK with FE_MC_NO_DATAPATH, as the flow might
2957          * contain other flags, such as FE_CONDEMNED, which we need to
2958          * cleared. We don't call mac_flow_cleanup() for this unicast
2959          * flow as we have a already cleaned up SRSs etc. (via the teadown
2960          * path). We just clear the stats and reset the initial callback
2961          * function, the rest will be set when we call mac_flow_create,
2962          * if at all.
2963          */
2964         mutex_enter(&flent->fe_lock);
2965         ASSERT(flent->fe_refcnt == 1 && flent->fe_mbg == NULL &&
2966             flent->fe_tx_srs == NULL && flent->fe_rx_srs_cnt == 0);
2967         flent->fe_flags = FE_MC_NO_DATAPATH;
2968         flow_stat_destroy(flent);
2969         mac_misc_stat_delete(flent);
2970 
2971         /* Initialize the receiver function to a safe routine */
2972         flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop;
2973         flent->fe_cb_arg1 = NULL;
2974         flent->fe_cb_arg2 = NULL;
2975 
2976         flent->fe_index = -1;
2977         mutex_exit(&flent->fe_lock);
2978 
2979         if (mip->mi_type->mt_brdcst_addr != NULL) {
2980                 ASSERT(muip != NULL || no_unicast);
2981                 mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr,
2982                     muip != NULL ? muip->mui_vid : VLAN_ID_NONE);
2983         }
2984 
2985         if (mip->mi_nactiveclients == 1) {
2986                 mac_capab_update((mac_handle_t)mip);
2987                 mac_virtual_link_update(mip);
2988         }
2989 
2990         if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
2991                 mip->mi_state_flags &= ~MIS_EXCLUSIVE;
2992 
2993                 if (mip->mi_state_flags & MIS_LEGACY)
2994                         mip->mi_capab_legacy.ml_active_clear(mip->mi_driver);
2995         }
2996 
2997         mcip->mci_state_flags &= ~MCIS_UNICAST_HW;
2998 
2999         if (mcip->mci_state_flags & MCIS_TAG_DISABLE)
3000                 mcip->mci_state_flags &= ~MCIS_TAG_DISABLE;
3001 
3002         if (mcip->mci_state_flags & MCIS_STRIP_DISABLE)
3003                 mcip->mci_state_flags &= ~MCIS_STRIP_DISABLE;
3004 
3005         if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK)
3006                 mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK;
3007 
3008         if (muip != NULL)
3009                 kmem_free(muip, sizeof (mac_unicast_impl_t));
3010         mac_protect_cancel_timer(mcip);
3011         mac_protect_flush_dynamic(mcip);
3012 
3013         bzero(&mcip->mci_misc_stat, sizeof (mcip->mci_misc_stat));
3014         /*
3015          * Disable fastpath if this is a VNIC or a VLAN.
3016          */
3017         if (mcip->mci_state_flags & MCIS_IS_VNIC)
3018                 mac_fastpath_enable((mac_handle_t)mip);
3019         mac_stop((mac_handle_t)mip);
3020 }
3021 
3022 /*
3023  * Remove a MAC address which was previously added by mac_unicast_add().
3024  */
3025 int
3026 mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
3027 {
3028         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3029         mac_unicast_impl_t *muip = (mac_unicast_impl_t *)mah;
3030         mac_unicast_impl_t *pre;
3031         mac_impl_t *mip = mcip->mci_mip;
3032         flow_entry_t            *flent;
3033         uint16_t mui_vid;
3034 
3035         i_mac_perim_enter(mip);
3036         if (mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) {
3037                 /*
3038                  * Call made by the upper MAC client of a VNIC.
3039                  * There's nothing much to do, the unicast address will
3040                  * be removed by the VNIC driver when the VNIC is deleted,
3041                  * but let's ensure that all our transmit is done before
3042                  * the client does a mac_client_stop lest it trigger an
3043                  * assert in the driver.
3044                  */
3045                 ASSERT(muip->mui_vid == VLAN_ID_NONE);
3046 
3047                 mac_tx_client_flush(mcip);
3048 
3049                 if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
3050                         mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
3051                         if (mcip->mci_rx_p_fn != NULL) {
3052                                 mac_rx_set(mch, mcip->mci_rx_p_fn,
3053                                     mcip->mci_rx_p_arg);
3054                                 mcip->mci_rx_p_fn = NULL;
3055                                 mcip->mci_rx_p_arg = NULL;
3056                         }
3057                         kmem_free(muip, sizeof (mac_unicast_impl_t));
3058                         i_mac_perim_exit(mip);
3059                         return (0);
3060                 }
3061                 mcip->mci_flags &= ~MAC_CLIENT_FLAGS_VNIC_PRIMARY;
3062 
3063                 if (mcip->mci_state_flags & MCIS_TAG_DISABLE)
3064                         mcip->mci_state_flags &= ~MCIS_TAG_DISABLE;
3065 
3066                 if (mcip->mci_state_flags & MCIS_STRIP_DISABLE)
3067                         mcip->mci_state_flags &= ~MCIS_STRIP_DISABLE;
3068 
3069                 if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK)
3070                         mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK;
3071 
3072                 kmem_free(muip, sizeof (mac_unicast_impl_t));
3073                 i_mac_perim_exit(mip);
3074                 return (0);
3075         }
3076 
3077         ASSERT(muip != NULL);
3078 
3079         /*
3080          * We are removing a passive client, we haven't setup the datapath
3081          * for this yet, so nothing much to do.
3082          */
3083         if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
3084 
3085                 ASSERT((mcip->mci_flent->fe_flags & FE_MC_NO_DATAPATH) != 0);
3086                 ASSERT(mcip->mci_p_unicast_list == muip);
3087 
3088                 mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
3089 
3090                 mcip->mci_p_unicast_list = NULL;
3091                 mcip->mci_rx_p_fn = NULL;
3092                 mcip->mci_rx_p_arg = NULL;
3093 
3094                 mcip->mci_state_flags &= ~MCIS_UNICAST_HW;
3095 
3096                 if (mcip->mci_state_flags & MCIS_TAG_DISABLE)
3097                         mcip->mci_state_flags &= ~MCIS_TAG_DISABLE;
3098 
3099                 if (mcip->mci_state_flags & MCIS_STRIP_DISABLE)
3100                         mcip->mci_state_flags &= ~MCIS_STRIP_DISABLE;
3101 
3102                 if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK)
3103                         mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK;
3104 
3105                 kmem_free(muip, sizeof (mac_unicast_impl_t));
3106                 i_mac_perim_exit(mip);
3107                 return (0);
3108         }
3109 
3110         /*
3111          * Remove the VID from the list of client's VIDs.
3112          */
3113         pre = mcip->mci_unicast_list;
3114         if (muip == pre) {
3115                 mcip->mci_unicast_list = muip->mui_next;
3116         } else {
3117                 while ((pre->mui_next != NULL) && (pre->mui_next != muip))
3118                         pre = pre->mui_next;
3119                 ASSERT(pre->mui_next == muip);
3120                 rw_enter(&mcip->mci_rw_lock, RW_WRITER);
3121                 pre->mui_next = muip->mui_next;
3122                 rw_exit(&mcip->mci_rw_lock);
3123         }
3124 
3125         if (!mac_client_single_rcvr(mcip)) {
3126                 /*
3127                  * This MAC client is shared by more than one unicast
3128                  * addresses, so we will just remove the flent
3129                  * corresponding to the address being removed. We don't invoke
3130                  * mac_rx_classify_flow_rem() since the additional flow is
3131                  * not associated with its own separate set of SRS and rings,
3132                  * and these constructs are still needed for the remaining
3133                  * flows.
3134                  */
3135                 flent = mac_client_get_flow(mcip, muip);
3136                 VERIFY3P(flent, !=, NULL);
3137 
3138                 /*
3139                  * The first one is disappearing, need to make sure
3140                  * we replace it with another from the list of
3141                  * shared clients.
3142                  */
3143                 if (flent == mcip->mci_flent)
3144                         flent = mac_client_swap_mciflent(mcip);
3145                 mac_client_remove_flow_from_list(mcip, flent);
3146                 mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
3147                 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
3148 
3149                 /*
3150                  * The multicast groups that were added by the client so
3151                  * far must be removed from the brodcast domain corresponding
3152                  * to the VID being removed.
3153                  */
3154                 mac_client_bcast_refresh(mcip, mac_client_update_mcast,
3155                     (void *)flent, B_FALSE);
3156 
3157                 if (mip->mi_type->mt_brdcst_addr != NULL) {
3158                         mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr,
3159                             muip->mui_vid);
3160                 }
3161 
3162                 FLOW_FINAL_REFRELE(flent);
3163                 ASSERT(!(mcip->mci_state_flags & MCIS_EXCLUSIVE));
3164 
3165                 /*
3166                  * Enable fastpath if this is a VNIC or a VLAN.
3167                  */
3168                 if (mcip->mci_state_flags & MCIS_IS_VNIC)
3169                         mac_fastpath_enable((mac_handle_t)mip);
3170                 mac_stop((mac_handle_t)mip);
3171                 i_mac_perim_exit(mip);
3172                 return (0);
3173         }
3174 
3175         mui_vid = muip->mui_vid;
3176         mac_client_datapath_teardown(mch, muip, flent);
3177 
3178         if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) &&
3179             mui_vid == VLAN_ID_NONE) {
3180                 mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY;
3181         } else {
3182                 i_mac_perim_exit(mip);
3183                 return (0);
3184         }
3185 
3186         /*
3187          * If we are removing the primary, check if we have a passive primary
3188          * client that we need to activate now.
3189          */
3190         mcip = mac_get_passive_primary_client(mip);
3191         if (mcip != NULL) {
3192                 mac_resource_props_t    *mrp;
3193                 mac_unicast_impl_t      *muip;
3194 
3195                 mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
3196                 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP);
3197 
3198                 /*
3199                  * Apply the property cached in the mac_impl_t to the
3200                  * primary mac client.
3201                  */
3202                 mac_get_resources((mac_handle_t)mip, mrp);
3203                 (void) mac_client_set_resources(mch, mrp);
3204                 ASSERT(mcip->mci_p_unicast_list != NULL);
3205                 muip = mcip->mci_p_unicast_list;
3206                 mcip->mci_p_unicast_list = NULL;
3207                 if (mac_client_datapath_setup(mcip, VLAN_ID_NONE,
3208                     mip->mi_addr, mrp, B_TRUE, muip) == 0) {
3209                         if (mcip->mci_rx_p_fn != NULL) {
3210                                 mac_rx_set(mch, mcip->mci_rx_p_fn,
3211                                     mcip->mci_rx_p_arg);
3212                                 mcip->mci_rx_p_fn = NULL;
3213                                 mcip->mci_rx_p_arg = NULL;
3214                         }
3215                 } else {
3216                         kmem_free(muip, sizeof (mac_unicast_impl_t));
3217                 }
3218                 kmem_free(mrp, sizeof (*mrp));
3219         }
3220         i_mac_perim_exit(mip);
3221         return (0);
3222 }
3223 
3224 /*
3225  * Multicast add function invoked by MAC clients.
3226  */
3227 int
3228 mac_multicast_add(mac_client_handle_t mch, const uint8_t *addr)
3229 {
3230         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3231         mac_impl_t              *mip = mcip->mci_mip;
3232         flow_entry_t            *flent = mcip->mci_flent_list;
3233         flow_entry_t            *prev_fe = NULL;
3234         uint16_t                vid;
3235         int                     err = 0;
3236 
3237         /* Verify the address is a valid multicast address */
3238         if ((err = mip->mi_type->mt_ops.mtops_multicst_verify(addr,
3239             mip->mi_pdata)) != 0)
3240                 return (err);
3241 
3242         i_mac_perim_enter(mip);
3243         while (flent != NULL) {
3244                 vid = i_mac_flow_vid(flent);
3245 
3246                 err = mac_bcast_add((mac_client_impl_t *)mch, addr, vid,
3247                     MAC_ADDRTYPE_MULTICAST);
3248                 if (err != 0)
3249                         break;
3250                 prev_fe = flent;
3251                 flent = flent->fe_client_next;
3252         }
3253 
3254         /*
3255          * If we failed adding, then undo all, rather than partial
3256          * success.
3257          */
3258         if (flent != NULL && prev_fe != NULL) {
3259                 flent = mcip->mci_flent_list;
3260                 while (flent != prev_fe->fe_client_next) {
3261                         vid = i_mac_flow_vid(flent);
3262                         mac_bcast_delete((mac_client_impl_t *)mch, addr, vid);
3263                         flent = flent->fe_client_next;
3264                 }
3265         }
3266         i_mac_perim_exit(mip);
3267         return (err);
3268 }
3269 
3270 /*
3271  * Multicast delete function invoked by MAC clients.
3272  */
3273 void
3274 mac_multicast_remove(mac_client_handle_t mch, const uint8_t *addr)
3275 {
3276         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3277         mac_impl_t              *mip = mcip->mci_mip;
3278         flow_entry_t            *flent;
3279         uint16_t                vid;
3280 
3281         i_mac_perim_enter(mip);
3282         for (flent = mcip->mci_flent_list; flent != NULL;
3283             flent = flent->fe_client_next) {
3284                 vid = i_mac_flow_vid(flent);
3285                 mac_bcast_delete((mac_client_impl_t *)mch, addr, vid);
3286         }
3287         i_mac_perim_exit(mip);
3288 }
3289 
3290 /*
3291  * When a MAC client desires to capture packets on an interface,
3292  * it registers a promiscuous call back with mac_promisc_add().
3293  * There are three types of promiscuous callbacks:
3294  *
3295  * * MAC_CLIENT_PROMISC_ALL
3296  *   Captures all packets sent and received by the MAC client,
3297  *   the physical interface, as well as all other MAC clients
3298  *   defined on top of the same MAC.
3299  *
3300  * * MAC_CLIENT_PROMISC_FILTERED
3301  *   Captures all packets sent and received by the MAC client,
3302  *   plus all multicast traffic sent and received by the phyisical
3303  *   interface and the other MAC clients.
3304  *
3305  * * MAC_CLIENT_PROMISC_MULTI
3306  *   Captures all broadcast and multicast packets sent and
3307  *   received by the MAC clients as well as the physical interface.
3308  *
3309  * In all cases, the underlying MAC is put in promiscuous mode.
3310  */
3311 int
3312 mac_promisc_add(mac_client_handle_t mch, mac_client_promisc_type_t type,
3313     mac_rx_t fn, void *arg, mac_promisc_handle_t *mphp, uint16_t flags)
3314 {
3315         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3316         mac_impl_t *mip = mcip->mci_mip;
3317         mac_promisc_impl_t *mpip;
3318         mac_cb_info_t   *mcbi;
3319         int rc;
3320 
3321         i_mac_perim_enter(mip);
3322 
3323         if ((rc = mac_start((mac_handle_t)mip)) != 0) {
3324                 i_mac_perim_exit(mip);
3325                 return (rc);
3326         }
3327 
3328         if ((mcip->mci_state_flags & MCIS_IS_VNIC) &&
3329             type == MAC_CLIENT_PROMISC_ALL &&
3330             (mcip->mci_protect_flags & MPT_FLAG_PROMISC_FILTERED)) {
3331                 /*
3332                  * The function is being invoked by the upper MAC client
3333                  * of a VNIC. The VNIC should only see the traffic
3334                  * it is entitled to.
3335                  */
3336                 type = MAC_CLIENT_PROMISC_FILTERED;
3337         }
3338 
3339 
3340         /*
3341          * Turn on promiscuous mode for the underlying NIC.
3342          * This is needed even for filtered callbacks which
3343          * expect to receive all multicast traffic on the wire.
3344          *
3345          * Physical promiscuous mode should not be turned on if
3346          * MAC_PROMISC_FLAGS_NO_PHYS is set.
3347          */
3348         if ((flags & MAC_PROMISC_FLAGS_NO_PHYS) == 0) {
3349                 if ((rc = i_mac_promisc_set(mip, B_TRUE)) != 0) {
3350                         mac_stop((mac_handle_t)mip);
3351                         i_mac_perim_exit(mip);
3352                         return (rc);
3353                 }
3354         }
3355 
3356         mpip = kmem_cache_alloc(mac_promisc_impl_cache, KM_SLEEP);
3357 
3358         mpip->mpi_type = type;
3359         mpip->mpi_fn = fn;
3360         mpip->mpi_arg = arg;
3361         mpip->mpi_mcip = mcip;
3362         mpip->mpi_no_tx_loop = ((flags & MAC_PROMISC_FLAGS_NO_TX_LOOP) != 0);
3363         mpip->mpi_no_phys = ((flags & MAC_PROMISC_FLAGS_NO_PHYS) != 0);
3364         mpip->mpi_strip_vlan_tag =
3365             ((flags & MAC_PROMISC_FLAGS_VLAN_TAG_STRIP) != 0);
3366         mpip->mpi_no_copy = ((flags & MAC_PROMISC_FLAGS_NO_COPY) != 0);
3367 
3368         mcbi = &mip->mi_promisc_cb_info;
3369         mutex_enter(mcbi->mcbi_lockp);
3370 
3371         mac_callback_add(&mip->mi_promisc_cb_info, &mcip->mci_promisc_list,
3372             &mpip->mpi_mci_link);
3373         mac_callback_add(&mip->mi_promisc_cb_info, &mip->mi_promisc_list,
3374             &mpip->mpi_mi_link);
3375 
3376         mutex_exit(mcbi->mcbi_lockp);
3377 
3378         *mphp = (mac_promisc_handle_t)mpip;
3379 
3380         if (mcip->mci_state_flags & MCIS_IS_VNIC) {
3381                 mac_impl_t *umip = mcip->mci_upper_mip;
3382 
3383                 ASSERT(umip != NULL);
3384                 mac_vnic_secondary_update(umip);
3385         }
3386 
3387         i_mac_perim_exit(mip);
3388 
3389         return (0);
3390 }
3391 
3392 /*
3393  * Remove a multicast address previously aded through mac_promisc_add().
3394  */
3395 void
3396 mac_promisc_remove(mac_promisc_handle_t mph)
3397 {
3398         mac_promisc_impl_t *mpip = (mac_promisc_impl_t *)mph;
3399         mac_client_impl_t *mcip = mpip->mpi_mcip;
3400         mac_impl_t *mip = mcip->mci_mip;
3401         mac_cb_info_t *mcbi;
3402         int rv;
3403 
3404         i_mac_perim_enter(mip);
3405 
3406         /*
3407          * Even if the device can't be reset into normal mode, we still
3408          * need to clear the client promisc callbacks. The client may want
3409          * to close the mac end point and we can't have stale callbacks.
3410          */
3411         if (!(mpip->mpi_no_phys)) {
3412                 if ((rv = i_mac_promisc_set(mip, B_FALSE)) != 0) {
3413                         cmn_err(CE_WARN, "%s: failed to switch OFF promiscuous"
3414                             " mode because of error 0x%x", mip->mi_name, rv);
3415                 }
3416         }
3417         mcbi = &mip->mi_promisc_cb_info;
3418         mutex_enter(mcbi->mcbi_lockp);
3419         if (mac_callback_remove(mcbi, &mip->mi_promisc_list,
3420             &mpip->mpi_mi_link)) {
3421                 VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info,
3422                     &mcip->mci_promisc_list, &mpip->mpi_mci_link));
3423                 kmem_cache_free(mac_promisc_impl_cache, mpip);
3424         } else {
3425                 mac_callback_remove_wait(&mip->mi_promisc_cb_info);
3426         }
3427 
3428         if (mcip->mci_state_flags & MCIS_IS_VNIC) {
3429                 mac_impl_t *umip = mcip->mci_upper_mip;
3430 
3431                 ASSERT(umip != NULL);
3432                 mac_vnic_secondary_update(umip);
3433         }
3434 
3435         mutex_exit(mcbi->mcbi_lockp);
3436         mac_stop((mac_handle_t)mip);
3437 
3438         i_mac_perim_exit(mip);
3439 }
3440 
3441 /*
3442  * Reference count the number of active Tx threads. MCI_TX_QUIESCE indicates
3443  * that a control operation wants to quiesce the Tx data flow in which case
3444  * we return an error. Holding any of the per cpu locks ensures that the
3445  * mci_tx_flag won't change.
3446  *
3447  * 'CPU' must be accessed just once and used to compute the index into the
3448  * percpu array, and that index must be used for the entire duration of the
3449  * packet send operation. Note that the thread may be preempted and run on
3450  * another cpu any time and so we can't use 'CPU' more than once for the
3451  * operation.
3452  */
3453 #define MAC_TX_TRY_HOLD(mcip, mytx, error)                              \
3454 {                                                                       \
3455         (error) = 0;                                                    \
3456         (mytx) = &(mcip)->mci_tx_pcpu[CPU->cpu_seqid & mac_tx_percpu_cnt]; \
3457         mutex_enter(&(mytx)->pcpu_tx_lock);                              \
3458         if (!((mcip)->mci_tx_flag & MCI_TX_QUIESCE)) {                   \
3459                 (mytx)->pcpu_tx_refcnt++;                            \
3460         } else {                                                        \
3461                 (error) = -1;                                           \
3462         }                                                               \
3463         mutex_exit(&(mytx)->pcpu_tx_lock);                               \
3464 }
3465 
3466 /*
3467  * Release the reference. If needed, signal any control operation waiting
3468  * for Tx quiescence. The wait and signal are always done using the
3469  * mci_tx_pcpu[0]'s lock
3470  */
3471 #define MAC_TX_RELE(mcip, mytx) {                                       \
3472         mutex_enter(&(mytx)->pcpu_tx_lock);                              \
3473         if (--(mytx)->pcpu_tx_refcnt == 0 &&                         \
3474             (mcip)->mci_tx_flag & MCI_TX_QUIESCE) {                      \
3475                 mutex_exit(&(mytx)->pcpu_tx_lock);                       \
3476                 mutex_enter(&(mcip)->mci_tx_pcpu[0].pcpu_tx_lock);       \
3477                 cv_signal(&(mcip)->mci_tx_cv);                           \
3478                 mutex_exit(&(mcip)->mci_tx_pcpu[0].pcpu_tx_lock);        \
3479         } else {                                                        \
3480                 mutex_exit(&(mytx)->pcpu_tx_lock);                       \
3481         }                                                               \
3482 }
3483 
3484 /*
3485  * Send function invoked by MAC clients.
3486  */
3487 mac_tx_cookie_t
3488 mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint,
3489     uint16_t flag, mblk_t **ret_mp)
3490 {
3491         mac_tx_cookie_t         cookie = 0;
3492         int                     error;
3493         mac_tx_percpu_t         *mytx;
3494         mac_soft_ring_set_t     *srs;
3495         flow_entry_t            *flent;
3496         boolean_t               is_subflow = B_FALSE;
3497         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3498         mac_impl_t              *mip = mcip->mci_mip;
3499         mac_srs_tx_t            *srs_tx;
3500 
3501         /*
3502          * Check whether the active Tx threads count is bumped already.
3503          */
3504         if (!(flag & MAC_TX_NO_HOLD)) {
3505                 MAC_TX_TRY_HOLD(mcip, mytx, error);
3506                 if (error != 0) {
3507                         freemsgchain(mp_chain);
3508                         return (0);
3509                 }
3510         }
3511 
3512         /*
3513          * If mac protection is enabled, only the permissible packets will be
3514          * returned by mac_protect_check().
3515          */
3516         if ((mcip->mci_flent->
3517             fe_resource_props.mrp_mask & MRP_PROTECT) != 0 &&
3518             (mp_chain = mac_protect_check(mch, mp_chain)) == NULL)
3519                 goto done;
3520 
3521         if (mcip->mci_subflow_tab != NULL &&
3522             mcip->mci_subflow_tab->ft_flow_count > 0 &&
3523             mac_flow_lookup(mcip->mci_subflow_tab, mp_chain,
3524             FLOW_OUTBOUND, &flent) == 0) {
3525                 /*
3526                  * The main assumption here is that if in the event
3527                  * we get a chain, all the packets will be classified
3528                  * to the same Flow/SRS. If this changes for any
3529                  * reason, the following logic should change as well.
3530                  * I suppose the fanout_hint also assumes this .
3531                  */
3532                 ASSERT(flent != NULL);
3533                 is_subflow = B_TRUE;
3534         } else {
3535                 flent = mcip->mci_flent;
3536         }
3537 
3538         srs = flent->fe_tx_srs;
3539         /*
3540          * This is to avoid panics with PF_PACKET that can call mac_tx()
3541          * against an interface that is not capable of sending. A rewrite
3542          * of the mac datapath is required to remove this limitation.
3543          */
3544         if (srs == NULL) {
3545                 freemsgchain(mp_chain);
3546                 goto done;
3547         }
3548 
3549         srs_tx = &srs->srs_tx;
3550         if (srs_tx->st_mode == SRS_TX_DEFAULT &&
3551             (srs->srs_state & SRS_ENQUEUED) == 0 &&
3552             mip->mi_nactiveclients == 1 && mp_chain->b_next == NULL) {
3553                 uint64_t        obytes;
3554 
3555                 /*
3556                  * Since dls always opens the underlying MAC, nclients equals
3557                  * to 1 means that the only active client is dls itself acting
3558                  * as a primary client of the MAC instance. Since dls will not
3559                  * send tagged packets in that case, and dls is trusted to send
3560                  * packets for its allowed VLAN(s), the VLAN tag insertion and
3561                  * check is required only if nclients is greater than 1.
3562                  */
3563                 if (mip->mi_nclients > 1) {
3564                         if (MAC_VID_CHECK_NEEDED(mcip)) {
3565                                 int     err = 0;
3566 
3567                                 MAC_VID_CHECK(mcip, mp_chain, err);
3568                                 if (err != 0) {
3569                                         freemsg(mp_chain);
3570                                         mcip->mci_misc_stat.mms_txerrors++;
3571                                         goto done;
3572                                 }
3573                         }
3574                         if (MAC_TAG_NEEDED(mcip)) {
3575                                 mp_chain = mac_add_vlan_tag(mp_chain, 0,
3576                                     mac_client_vid(mch));
3577                                 if (mp_chain == NULL) {
3578                                         mcip->mci_misc_stat.mms_txerrors++;
3579                                         goto done;
3580                                 }
3581                         }
3582                 }
3583 
3584                 obytes = (mp_chain->b_cont == NULL ? MBLKL(mp_chain) :
3585                     msgdsize(mp_chain));
3586 
3587                 MAC_TX(mip, srs_tx->st_arg2, mp_chain, mcip);
3588                 if (mp_chain == NULL) {
3589                         cookie = 0;
3590                         SRS_TX_STAT_UPDATE(srs, opackets, 1);
3591                         SRS_TX_STAT_UPDATE(srs, obytes, obytes);
3592                 } else {
3593                         mutex_enter(&srs->srs_lock);
3594                         cookie = mac_tx_srs_no_desc(srs, mp_chain,
3595                             flag, ret_mp);
3596                         mutex_exit(&srs->srs_lock);
3597                 }
3598         } else {
3599                 cookie = srs_tx->st_func(srs, mp_chain, hint, flag, ret_mp);
3600         }
3601 
3602 done:
3603         if (is_subflow)
3604                 FLOW_REFRELE(flent);
3605 
3606         if (!(flag & MAC_TX_NO_HOLD))
3607                 MAC_TX_RELE(mcip, mytx);
3608 
3609         return (cookie);
3610 }
3611 
3612 /*
3613  * mac_tx_is_blocked
3614  *
3615  * Given a cookie, it returns if the ring identified by the cookie is
3616  * flow-controlled or not. If NULL is passed in place of a cookie,
3617  * then it finds out if any of the underlying rings belonging to the
3618  * SRS is flow controlled or not and returns that status.
3619  */
3620 /* ARGSUSED */
3621 boolean_t
3622 mac_tx_is_flow_blocked(mac_client_handle_t mch, mac_tx_cookie_t cookie)
3623 {
3624         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3625         mac_soft_ring_set_t *mac_srs;
3626         mac_soft_ring_t *sringp;
3627         boolean_t blocked = B_FALSE;
3628         mac_tx_percpu_t *mytx;
3629         int err;
3630         int i;
3631 
3632         /*
3633          * Bump the reference count so that mac_srs won't be deleted.
3634          * If the client is currently quiesced and we failed to bump
3635          * the reference, return B_TRUE so that flow control stays
3636          * as enabled.
3637          *
3638          * Flow control will then be disabled once the client is no
3639          * longer quiesced.
3640          */
3641         MAC_TX_TRY_HOLD(mcip, mytx, err);
3642         if (err != 0)
3643                 return (B_TRUE);
3644 
3645         if ((mac_srs = MCIP_TX_SRS(mcip)) == NULL) {
3646                 MAC_TX_RELE(mcip, mytx);
3647                 return (B_FALSE);
3648         }
3649 
3650         mutex_enter(&mac_srs->srs_lock);
3651         /*
3652          * Only in the case of TX_FANOUT and TX_AGGR, the underlying
3653          * softring (s_ring_state) will have the HIWAT set. This is
3654          * the multiple Tx ring flow control case. For all other
3655          * case, SRS (srs_state) will store the condition.
3656          */
3657         if (mac_srs->srs_tx.st_mode == SRS_TX_FANOUT ||
3658             mac_srs->srs_tx.st_mode == SRS_TX_AGGR) {
3659                 if (cookie != 0) {
3660                         sringp = (mac_soft_ring_t *)cookie;
3661                         mutex_enter(&sringp->s_ring_lock);
3662                         if (sringp->s_ring_state & S_RING_TX_HIWAT)
3663                                 blocked = B_TRUE;
3664                         mutex_exit(&sringp->s_ring_lock);
3665                 } else {
3666                         for (i = 0; i < mac_srs->srs_tx_ring_count; i++) {
3667                                 sringp = mac_srs->srs_tx_soft_rings[i];
3668                                 mutex_enter(&sringp->s_ring_lock);
3669                                 if (sringp->s_ring_state & S_RING_TX_HIWAT) {
3670                                         blocked = B_TRUE;
3671                                         mutex_exit(&sringp->s_ring_lock);
3672                                         break;
3673                                 }
3674                                 mutex_exit(&sringp->s_ring_lock);
3675                         }
3676                 }
3677         } else {
3678                 blocked = (mac_srs->srs_state & SRS_TX_HIWAT);
3679         }
3680         mutex_exit(&mac_srs->srs_lock);
3681         MAC_TX_RELE(mcip, mytx);
3682         return (blocked);
3683 }
3684 
3685 /*
3686  * Check if the MAC client is the primary MAC client.
3687  */
3688 boolean_t
3689 mac_is_primary_client(mac_client_impl_t *mcip)
3690 {
3691         return (mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY);
3692 }
3693 
3694 void
3695 mac_ioctl(mac_handle_t mh, queue_t *wq, mblk_t *bp)
3696 {
3697         mac_impl_t      *mip = (mac_impl_t *)mh;
3698         int cmd = ((struct iocblk *)bp->b_rptr)->ioc_cmd;
3699 
3700         if ((cmd == ND_GET && (mip->mi_callbacks->mc_callbacks & MC_GETPROP)) ||
3701             (cmd == ND_SET && (mip->mi_callbacks->mc_callbacks & MC_SETPROP))) {
3702                 /*
3703                  * If ndd props were registered, call them.
3704                  * Note that ndd ioctls are Obsolete
3705                  */
3706                 mac_ndd_ioctl(mip, wq, bp);
3707                 return;
3708         }
3709 
3710         /*
3711          * Call the driver to handle the ioctl.  The driver may not support
3712          * any ioctls, in which case we reply with a NAK on its behalf.
3713          */
3714         if (mip->mi_callbacks->mc_callbacks & MC_IOCTL)
3715                 mip->mi_ioctl(mip->mi_driver, wq, bp);
3716         else
3717                 miocnak(wq, bp, 0, EINVAL);
3718 }
3719 
3720 /*
3721  * Return the link state of the specified MAC instance.
3722  */
3723 link_state_t
3724 mac_link_get(mac_handle_t mh)
3725 {
3726         return (((mac_impl_t *)mh)->mi_linkstate);
3727 }
3728 
3729 /*
3730  * Add a mac client specified notification callback. Please see the comments
3731  * above mac_callback_add() for general information about mac callback
3732  * addition/deletion in the presence of mac callback list walkers
3733  */
3734 mac_notify_handle_t
3735 mac_notify_add(mac_handle_t mh, mac_notify_t notify_fn, void *arg)
3736 {
3737         mac_impl_t              *mip = (mac_impl_t *)mh;
3738         mac_notify_cb_t         *mncb;
3739         mac_cb_info_t           *mcbi;
3740 
3741         /*
3742          * Allocate a notify callback structure, fill in the details and
3743          * use the mac callback list manipulation functions to chain into
3744          * the list of callbacks.
3745          */
3746         mncb = kmem_zalloc(sizeof (mac_notify_cb_t), KM_SLEEP);
3747         mncb->mncb_fn = notify_fn;
3748         mncb->mncb_arg = arg;
3749         mncb->mncb_mip = mip;
3750         mncb->mncb_link.mcb_objp = mncb;
3751         mncb->mncb_link.mcb_objsize = sizeof (mac_notify_cb_t);
3752         mncb->mncb_link.mcb_flags = MCB_NOTIFY_CB_T;
3753 
3754         mcbi = &mip->mi_notify_cb_info;
3755 
3756         i_mac_perim_enter(mip);
3757         mutex_enter(mcbi->mcbi_lockp);
3758 
3759         mac_callback_add(&mip->mi_notify_cb_info, &mip->mi_notify_cb_list,
3760             &mncb->mncb_link);
3761 
3762         mutex_exit(mcbi->mcbi_lockp);
3763         i_mac_perim_exit(mip);
3764         return ((mac_notify_handle_t)mncb);
3765 }
3766 
3767 void
3768 mac_notify_remove_wait(mac_handle_t mh)
3769 {
3770         mac_impl_t      *mip = (mac_impl_t *)mh;
3771         mac_cb_info_t   *mcbi = &mip->mi_notify_cb_info;
3772 
3773         mutex_enter(mcbi->mcbi_lockp);
3774         mac_callback_remove_wait(&mip->mi_notify_cb_info);
3775         mutex_exit(mcbi->mcbi_lockp);
3776 }
3777 
3778 /*
3779  * Remove a mac client specified notification callback
3780  */
3781 int
3782 mac_notify_remove(mac_notify_handle_t mnh, boolean_t wait)
3783 {
3784         mac_notify_cb_t *mncb = (mac_notify_cb_t *)mnh;
3785         mac_impl_t      *mip = mncb->mncb_mip;
3786         mac_cb_info_t   *mcbi;
3787         int             err = 0;
3788 
3789         mcbi = &mip->mi_notify_cb_info;
3790 
3791         i_mac_perim_enter(mip);
3792         mutex_enter(mcbi->mcbi_lockp);
3793 
3794         ASSERT(mncb->mncb_link.mcb_objp == mncb);
3795         /*
3796          * If there aren't any list walkers, the remove would succeed
3797          * inline, else we wait for the deferred remove to complete
3798          */
3799         if (mac_callback_remove(&mip->mi_notify_cb_info,
3800             &mip->mi_notify_cb_list, &mncb->mncb_link)) {
3801                 kmem_free(mncb, sizeof (mac_notify_cb_t));
3802         } else {
3803                 err = EBUSY;
3804         }
3805 
3806         mutex_exit(mcbi->mcbi_lockp);
3807         i_mac_perim_exit(mip);
3808 
3809         /*
3810          * If we failed to remove the notification callback and "wait" is set
3811          * to be B_TRUE, wait for the callback to finish after we exit the
3812          * mac perimeter.
3813          */
3814         if (err != 0 && wait) {
3815                 mac_notify_remove_wait((mac_handle_t)mip);
3816                 return (0);
3817         }
3818 
3819         return (err);
3820 }
3821 
3822 /*
3823  * Associate resource management callbacks with the specified MAC
3824  * clients.
3825  */
3826 
3827 void
3828 mac_resource_set_common(mac_client_handle_t mch, mac_resource_add_t add,
3829     mac_resource_remove_t remove, mac_resource_quiesce_t quiesce,
3830     mac_resource_restart_t restart, mac_resource_bind_t bind,
3831     void *arg)
3832 {
3833         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3834 
3835         mcip->mci_resource_add = add;
3836         mcip->mci_resource_remove = remove;
3837         mcip->mci_resource_quiesce = quiesce;
3838         mcip->mci_resource_restart = restart;
3839         mcip->mci_resource_bind = bind;
3840         mcip->mci_resource_arg = arg;
3841 }
3842 
3843 void
3844 mac_resource_set(mac_client_handle_t mch, mac_resource_add_t add, void *arg)
3845 {
3846         /* update the 'resource_add' callback */
3847         mac_resource_set_common(mch, add, NULL, NULL, NULL, NULL, arg);
3848 }
3849 
3850 /*
3851  * Sets up the client resources and enable the polling interface over all the
3852  * SRS's and the soft rings of the client
3853  */
3854 void
3855 mac_client_poll_enable(mac_client_handle_t mch)
3856 {
3857         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3858         mac_soft_ring_set_t     *mac_srs;
3859         flow_entry_t            *flent;
3860         int                     i;
3861 
3862         flent = mcip->mci_flent;
3863         ASSERT(flent != NULL);
3864 
3865         mcip->mci_state_flags |= MCIS_CLIENT_POLL_CAPABLE;
3866         for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
3867                 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
3868                 ASSERT(mac_srs->srs_mcip == mcip);
3869                 mac_srs_client_poll_enable(mcip, mac_srs);
3870         }
3871 }
3872 
3873 /*
3874  * Tears down the client resources and disable the polling interface over all
3875  * the SRS's and the soft rings of the client
3876  */
3877 void
3878 mac_client_poll_disable(mac_client_handle_t mch)
3879 {
3880         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3881         mac_soft_ring_set_t     *mac_srs;
3882         flow_entry_t            *flent;
3883         int                     i;
3884 
3885         flent = mcip->mci_flent;
3886         ASSERT(flent != NULL);
3887 
3888         mcip->mci_state_flags &= ~MCIS_CLIENT_POLL_CAPABLE;
3889         for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
3890                 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
3891                 ASSERT(mac_srs->srs_mcip == mcip);
3892                 mac_srs_client_poll_disable(mcip, mac_srs);
3893         }
3894 }
3895 
3896 /*
3897  * Associate the CPUs specified by the given property with a MAC client.
3898  */
3899 int
3900 mac_cpu_set(mac_client_handle_t mch, mac_resource_props_t *mrp)
3901 {
3902         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3903         mac_impl_t *mip = mcip->mci_mip;
3904         int err = 0;
3905 
3906         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
3907 
3908         if ((err = mac_validate_props(mcip->mci_state_flags & MCIS_IS_VNIC ?
3909             mcip->mci_upper_mip : mip, mrp)) != 0) {
3910                 return (err);
3911         }
3912         if (MCIP_DATAPATH_SETUP(mcip))
3913                 mac_flow_modify(mip->mi_flow_tab, mcip->mci_flent, mrp);
3914 
3915         mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE);
3916         return (0);
3917 }
3918 
3919 /*
3920  * Apply the specified properties to the specified MAC client.
3921  */
3922 int
3923 mac_client_set_resources(mac_client_handle_t mch, mac_resource_props_t *mrp)
3924 {
3925         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3926         mac_impl_t *mip = mcip->mci_mip;
3927         int err = 0;
3928 
3929         i_mac_perim_enter(mip);
3930 
3931         if ((mrp->mrp_mask & MRP_MAXBW) || (mrp->mrp_mask & MRP_PRIORITY)) {
3932                 err = mac_resource_ctl_set(mch, mrp);
3933                 if (err != 0)
3934                         goto done;
3935         }
3936 
3937         if (mrp->mrp_mask & (MRP_CPUS|MRP_POOL)) {
3938                 err = mac_cpu_set(mch, mrp);
3939                 if (err != 0)
3940                         goto done;
3941         }
3942 
3943         if (mrp->mrp_mask & MRP_PROTECT) {
3944                 err = mac_protect_set(mch, mrp);
3945                 if (err != 0)
3946                         goto done;
3947         }
3948 
3949         if ((mrp->mrp_mask & MRP_RX_RINGS) || (mrp->mrp_mask & MRP_TX_RINGS))
3950                 err = mac_resource_ctl_set(mch, mrp);
3951 
3952 done:
3953         i_mac_perim_exit(mip);
3954         return (err);
3955 }
3956 
3957 /*
3958  * Return the properties currently associated with the specified MAC client.
3959  */
3960 void
3961 mac_client_get_resources(mac_client_handle_t mch, mac_resource_props_t *mrp)
3962 {
3963         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3964         mac_resource_props_t    *mcip_mrp = MCIP_RESOURCE_PROPS(mcip);
3965 
3966         bcopy(mcip_mrp, mrp, sizeof (mac_resource_props_t));
3967 }
3968 
3969 /*
3970  * Return the effective properties currently associated with the specified
3971  * MAC client.
3972  */
3973 void
3974 mac_client_get_effective_resources(mac_client_handle_t mch,
3975     mac_resource_props_t *mrp)
3976 {
3977         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3978         mac_resource_props_t    *mcip_mrp = MCIP_EFFECTIVE_PROPS(mcip);
3979 
3980         bcopy(mcip_mrp, mrp, sizeof (mac_resource_props_t));
3981 }
3982 
3983 /*
3984  * Pass a copy of the specified packet to the promiscuous callbacks
3985  * of the specified MAC.
3986  *
3987  * If sender is NULL, the function is being invoked for a packet chain
3988  * received from the wire. If sender is non-NULL, it points to
3989  * the MAC client from which the packet is being sent.
3990  *
3991  * The packets are distributed to the promiscuous callbacks as follows:
3992  *
3993  * - all packets are sent to the MAC_CLIENT_PROMISC_ALL callbacks
3994  * - all broadcast and multicast packets are sent to the
3995  *   MAC_CLIENT_PROMISC_FILTER and MAC_CLIENT_PROMISC_MULTI.
3996  *
3997  * The unicast packets of MAC_CLIENT_PROMISC_FILTER callbacks are dispatched
3998  * after classification by mac_rx_deliver().
3999  */
4000 
4001 static void
4002 mac_promisc_dispatch_one(mac_promisc_impl_t *mpip, mblk_t *mp,
4003     boolean_t loopback)
4004 {
4005         mblk_t *mp_copy, *mp_next;
4006 
4007         if (!mpip->mpi_no_copy || mpip->mpi_strip_vlan_tag) {
4008                 mp_copy = copymsg(mp);
4009                 if (mp_copy == NULL)
4010                         return;
4011 
4012                 if (mpip->mpi_strip_vlan_tag) {
4013                         mp_copy = mac_strip_vlan_tag_chain(mp_copy);
4014                         if (mp_copy == NULL)
4015                                 return;
4016                 }
4017                 mp_next = NULL;
4018         } else {
4019                 mp_copy = mp;
4020                 mp_next = mp->b_next;
4021         }
4022         mp_copy->b_next = NULL;
4023 
4024         mpip->mpi_fn(mpip->mpi_arg, NULL, mp_copy, loopback);
4025         if (mp_copy == mp)
4026                 mp->b_next = mp_next;
4027 }
4028 
4029 /*
4030  * Return the VID of a packet. Zero if the packet is not tagged.
4031  */
4032 static uint16_t
4033 mac_ether_vid(mblk_t *mp)
4034 {
4035         struct ether_header *eth = (struct ether_header *)mp->b_rptr;
4036 
4037         if (ntohs(eth->ether_type) == ETHERTYPE_VLAN) {
4038                 struct ether_vlan_header *t_evhp =
4039                     (struct ether_vlan_header *)mp->b_rptr;
4040                 return (VLAN_ID(ntohs(t_evhp->ether_tci)));
4041         }
4042 
4043         return (0);
4044 }
4045 
4046 /*
4047  * Return whether the specified packet contains a multicast or broadcast
4048  * destination MAC address.
4049  */
4050 static boolean_t
4051 mac_is_mcast(mac_impl_t *mip, mblk_t *mp)
4052 {
4053         mac_header_info_t hdr_info;
4054 
4055         if (mac_header_info((mac_handle_t)mip, mp, &hdr_info) != 0)
4056                 return (B_FALSE);
4057         return ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) ||
4058             (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST));
4059 }
4060 
4061 /*
4062  * Send a copy of an mblk chain to the MAC clients of the specified MAC.
4063  * "sender" points to the sender MAC client for outbound packets, and
4064  * is set to NULL for inbound packets.
4065  */
4066 void
4067 mac_promisc_dispatch(mac_impl_t *mip, mblk_t *mp_chain,
4068     mac_client_impl_t *sender)
4069 {
4070         mac_promisc_impl_t *mpip;
4071         mac_cb_t *mcb;
4072         mblk_t *mp;
4073         boolean_t is_mcast, is_sender;
4074 
4075         MAC_PROMISC_WALKER_INC(mip);
4076         for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
4077                 is_mcast = mac_is_mcast(mip, mp);
4078                 /* send packet to interested callbacks */
4079                 for (mcb = mip->mi_promisc_list; mcb != NULL;
4080                     mcb = mcb->mcb_nextp) {
4081                         mpip = (mac_promisc_impl_t *)mcb->mcb_objp;
4082                         is_sender = (mpip->mpi_mcip == sender);
4083 
4084                         if (is_sender && mpip->mpi_no_tx_loop)
4085                                 /*
4086                                  * The sender doesn't want to receive
4087                                  * copies of the packets it sends.
4088                                  */
4089                                 continue;
4090 
4091                         /* this client doesn't need any packets (bridge) */
4092                         if (mpip->mpi_fn == NULL)
4093                                 continue;
4094 
4095                         /*
4096                          * For an ethernet MAC, don't displatch a multicast
4097                          * packet to a non-PROMISC_ALL callbacks unless the VID
4098                          * of the packet matches the VID of the client.
4099                          */
4100                         if (is_mcast &&
4101                             mpip->mpi_type != MAC_CLIENT_PROMISC_ALL &&
4102                             !mac_client_check_flow_vid(mpip->mpi_mcip,
4103                             mac_ether_vid(mp)))
4104                                 continue;
4105 
4106                         if (is_sender ||
4107                             mpip->mpi_type == MAC_CLIENT_PROMISC_ALL ||
4108                             is_mcast)
4109                                 mac_promisc_dispatch_one(mpip, mp, is_sender);
4110                 }
4111         }
4112         MAC_PROMISC_WALKER_DCR(mip);
4113 }
4114 
4115 void
4116 mac_promisc_client_dispatch(mac_client_impl_t *mcip, mblk_t *mp_chain)
4117 {
4118         mac_impl_t              *mip = mcip->mci_mip;
4119         mac_promisc_impl_t      *mpip;
4120         boolean_t               is_mcast;
4121         mblk_t                  *mp;
4122         mac_cb_t                *mcb;
4123 
4124         /*
4125          * The unicast packets for the MAC client still
4126          * need to be delivered to the MAC_CLIENT_PROMISC_FILTERED
4127          * promiscuous callbacks. The broadcast and multicast
4128          * packets were delivered from mac_rx().
4129          */
4130         MAC_PROMISC_WALKER_INC(mip);
4131         for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
4132                 is_mcast = mac_is_mcast(mip, mp);
4133                 for (mcb = mcip->mci_promisc_list; mcb != NULL;
4134                     mcb = mcb->mcb_nextp) {
4135                         mpip = (mac_promisc_impl_t *)mcb->mcb_objp;
4136                         if (mpip->mpi_type == MAC_CLIENT_PROMISC_FILTERED &&
4137                             !is_mcast) {
4138                                 mac_promisc_dispatch_one(mpip, mp, B_FALSE);
4139                         }
4140                 }
4141         }
4142         MAC_PROMISC_WALKER_DCR(mip);
4143 }
4144 
4145 /*
4146  * Return the margin value currently assigned to the specified MAC instance.
4147  */
4148 void
4149 mac_margin_get(mac_handle_t mh, uint32_t *marginp)
4150 {
4151         mac_impl_t *mip = (mac_impl_t *)mh;
4152 
4153         rw_enter(&(mip->mi_rw_lock), RW_READER);
4154         *marginp = mip->mi_margin;
4155         rw_exit(&(mip->mi_rw_lock));
4156 }
4157 
4158 /*
4159  * mac_info_get() is used for retrieving the mac_info when a DL_INFO_REQ is
4160  * issued before a DL_ATTACH_REQ. we walk the i_mac_impl_hash table and find
4161  * the first mac_impl_t with a matching driver name; then we copy its mac_info_t
4162  * to the caller. we do all this with i_mac_impl_lock held so the mac_impl_t
4163  * cannot disappear while we are accessing it.
4164  */
4165 typedef struct i_mac_info_state_s {
4166         const char      *mi_name;
4167         mac_info_t      *mi_infop;
4168 } i_mac_info_state_t;
4169 
4170 /*ARGSUSED*/
4171 static uint_t
4172 i_mac_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
4173 {
4174         i_mac_info_state_t *statep = arg;
4175         mac_impl_t *mip = (mac_impl_t *)val;
4176 
4177         if (mip->mi_state_flags & MIS_DISABLED)
4178                 return (MH_WALK_CONTINUE);
4179 
4180         if (strcmp(statep->mi_name,
4181             ddi_driver_name(mip->mi_dip)) != 0)
4182                 return (MH_WALK_CONTINUE);
4183 
4184         statep->mi_infop = &mip->mi_info;
4185         return (MH_WALK_TERMINATE);
4186 }
4187 
4188 boolean_t
4189 mac_info_get(const char *name, mac_info_t *minfop)
4190 {
4191         i_mac_info_state_t state;
4192 
4193         rw_enter(&i_mac_impl_lock, RW_READER);
4194         state.mi_name = name;
4195         state.mi_infop = NULL;
4196         mod_hash_walk(i_mac_impl_hash, i_mac_info_walker, &state);
4197         if (state.mi_infop == NULL) {
4198                 rw_exit(&i_mac_impl_lock);
4199                 return (B_FALSE);
4200         }
4201         *minfop = *state.mi_infop;
4202         rw_exit(&i_mac_impl_lock);
4203         return (B_TRUE);
4204 }
4205 
4206 /*
4207  * To get the capabilities that MAC layer cares about, such as rings, factory
4208  * mac address, vnic or not, it should directly invoke this function.  If the
4209  * link is part of a bridge, then the only "capability" it has is the inability
4210  * to do zero copy.
4211  */
4212 boolean_t
4213 i_mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
4214 {
4215         mac_impl_t *mip = (mac_impl_t *)mh;
4216 
4217         if (mip->mi_bridge_link != NULL)
4218                 return (cap == MAC_CAPAB_NO_ZCOPY);
4219         else if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
4220                 return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
4221         else
4222                 return (B_FALSE);
4223 }
4224 
4225 /*
4226  * Capability query function. If number of active mac clients is greater than
4227  * 1, only limited capabilities can be advertised to the caller no matter the
4228  * driver has certain capability or not. Else, we query the driver to get the
4229  * capability.
4230  */
4231 boolean_t
4232 mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
4233 {
4234         mac_impl_t *mip = (mac_impl_t *)mh;
4235 
4236         /*
4237          * if mi_nactiveclients > 1, only MAC_CAPAB_LEGACY, MAC_CAPAB_HCKSUM,
4238          * MAC_CAPAB_NO_NATIVEVLAN and MAC_CAPAB_NO_ZCOPY can be advertised.
4239          */
4240         if (mip->mi_nactiveclients > 1) {
4241                 switch (cap) {
4242                 case MAC_CAPAB_NO_ZCOPY:
4243                         return (B_TRUE);
4244                 case MAC_CAPAB_LEGACY:
4245                 case MAC_CAPAB_HCKSUM:
4246                 case MAC_CAPAB_NO_NATIVEVLAN:
4247                         break;
4248                 default:
4249                         return (B_FALSE);
4250                 }
4251         }
4252 
4253         /* else get capab from driver */
4254         return (i_mac_capab_get(mh, cap, cap_data));
4255 }
4256 
4257 boolean_t
4258 mac_sap_verify(mac_handle_t mh, uint32_t sap, uint32_t *bind_sap)
4259 {
4260         mac_impl_t *mip = (mac_impl_t *)mh;
4261 
4262         return (mip->mi_type->mt_ops.mtops_sap_verify(sap, bind_sap,
4263             mip->mi_pdata));
4264 }
4265 
4266 mblk_t *
4267 mac_header(mac_handle_t mh, const uint8_t *daddr, uint32_t sap, mblk_t *payload,
4268     size_t extra_len)
4269 {
4270         mac_impl_t      *mip = (mac_impl_t *)mh;
4271         const uint8_t   *hdr_daddr;
4272 
4273         /*
4274          * If the MAC is point-to-point with a fixed destination address, then
4275          * we must always use that destination in the MAC header.
4276          */
4277         hdr_daddr = (mip->mi_dstaddr_set ? mip->mi_dstaddr : daddr);
4278         return (mip->mi_type->mt_ops.mtops_header(mip->mi_addr, hdr_daddr, sap,
4279             mip->mi_pdata, payload, extra_len));
4280 }
4281 
4282 int
4283 mac_header_info(mac_handle_t mh, mblk_t *mp, mac_header_info_t *mhip)
4284 {
4285         mac_impl_t *mip = (mac_impl_t *)mh;
4286 
4287         return (mip->mi_type->mt_ops.mtops_header_info(mp, mip->mi_pdata,
4288             mhip));
4289 }
4290 
4291 int
4292 mac_vlan_header_info(mac_handle_t mh, mblk_t *mp, mac_header_info_t *mhip)
4293 {
4294         mac_impl_t      *mip = (mac_impl_t *)mh;
4295         boolean_t       is_ethernet = (mip->mi_info.mi_media == DL_ETHER);
4296         int             err = 0;
4297 
4298         /*
4299          * Packets should always be at least 16 bit aligned.
4300          */
4301         ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
4302 
4303         if ((err = mac_header_info(mh, mp, mhip)) != 0)
4304                 return (err);
4305 
4306         /*
4307          * If this is a VLAN-tagged Ethernet packet, then the SAP in the
4308          * mac_header_info_t as returned by mac_header_info() is
4309          * ETHERTYPE_VLAN. We need to grab the ethertype from the VLAN header.
4310          */
4311         if (is_ethernet && (mhip->mhi_bindsap == ETHERTYPE_VLAN)) {
4312                 struct ether_vlan_header *evhp;
4313                 uint16_t sap;
4314                 mblk_t *tmp = NULL;
4315                 size_t size;
4316 
4317                 size = sizeof (struct ether_vlan_header);
4318                 if (MBLKL(mp) < size) {
4319                         /*
4320                          * Pullup the message in order to get the MAC header
4321                          * infomation. Note that this is a read-only function,
4322                          * we keep the input packet intact.
4323                          */
4324                         if ((tmp = msgpullup(mp, size)) == NULL)
4325                                 return (EINVAL);
4326 
4327                         mp = tmp;
4328                 }
4329                 evhp = (struct ether_vlan_header *)mp->b_rptr;
4330                 sap = ntohs(evhp->ether_type);
4331                 (void) mac_sap_verify(mh, sap, &mhip->mhi_bindsap);
4332                 mhip->mhi_hdrsize = sizeof (struct ether_vlan_header);
4333                 mhip->mhi_tci = ntohs(evhp->ether_tci);
4334                 mhip->mhi_istagged = B_TRUE;
4335                 freemsg(tmp);
4336 
4337                 if (VLAN_CFI(mhip->mhi_tci) != ETHER_CFI)
4338                         return (EINVAL);
4339         } else {
4340                 mhip->mhi_istagged = B_FALSE;
4341                 mhip->mhi_tci = 0;
4342         }
4343 
4344         return (0);
4345 }
4346 
4347 mblk_t *
4348 mac_header_cook(mac_handle_t mh, mblk_t *mp)
4349 {
4350         mac_impl_t *mip = (mac_impl_t *)mh;
4351 
4352         if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_COOK) {
4353                 if (DB_REF(mp) > 1) {
4354                         mblk_t *newmp = copymsg(mp);
4355                         if (newmp == NULL)
4356                                 return (NULL);
4357                         freemsg(mp);
4358                         mp = newmp;
4359                 }
4360                 return (mip->mi_type->mt_ops.mtops_header_cook(mp,
4361                     mip->mi_pdata));
4362         }
4363         return (mp);
4364 }
4365 
4366 mblk_t *
4367 mac_header_uncook(mac_handle_t mh, mblk_t *mp)
4368 {
4369         mac_impl_t *mip = (mac_impl_t *)mh;
4370 
4371         if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_UNCOOK) {
4372                 if (DB_REF(mp) > 1) {
4373                         mblk_t *newmp = copymsg(mp);
4374                         if (newmp == NULL)
4375                                 return (NULL);
4376                         freemsg(mp);
4377                         mp = newmp;
4378                 }
4379                 return (mip->mi_type->mt_ops.mtops_header_uncook(mp,
4380                     mip->mi_pdata));
4381         }
4382         return (mp);
4383 }
4384 
4385 uint_t
4386 mac_addr_len(mac_handle_t mh)
4387 {
4388         mac_impl_t *mip = (mac_impl_t *)mh;
4389 
4390         return (mip->mi_type->mt_addr_length);
4391 }
4392 
4393 /* True if a MAC is a VNIC */
4394 boolean_t
4395 mac_is_vnic(mac_handle_t mh)
4396 {
4397         return (((mac_impl_t *)mh)->mi_state_flags & MIS_IS_VNIC);
4398 }
4399 
4400 mac_handle_t
4401 mac_get_lower_mac_handle(mac_handle_t mh)
4402 {
4403         mac_impl_t *mip = (mac_impl_t *)mh;
4404 
4405         ASSERT(mac_is_vnic(mh));
4406         return (((vnic_t *)mip->mi_driver)->vn_lower_mh);
4407 }
4408 
4409 boolean_t
4410 mac_is_vnic_primary(mac_handle_t mh)
4411 {
4412         mac_impl_t *mip = (mac_impl_t *)mh;
4413 
4414         ASSERT(mac_is_vnic(mh));
4415         return (((vnic_t *)mip->mi_driver)->vn_addr_type ==
4416             VNIC_MAC_ADDR_TYPE_PRIMARY);
4417 }
4418 
4419 void
4420 mac_update_resources(mac_resource_props_t *nmrp, mac_resource_props_t *cmrp,
4421     boolean_t is_user_flow)
4422 {
4423         if (nmrp != NULL && cmrp != NULL) {
4424                 if (nmrp->mrp_mask & MRP_PRIORITY) {
4425                         if (nmrp->mrp_priority == MPL_RESET) {
4426                                 cmrp->mrp_mask &= ~MRP_PRIORITY;
4427                                 if (is_user_flow) {
4428                                         cmrp->mrp_priority =
4429                                             MPL_SUBFLOW_DEFAULT;
4430                                 } else {
4431                                         cmrp->mrp_priority = MPL_LINK_DEFAULT;
4432                                 }
4433                         } else {
4434                                 cmrp->mrp_mask |= MRP_PRIORITY;
4435                                 cmrp->mrp_priority = nmrp->mrp_priority;
4436                         }
4437                 }
4438                 if (nmrp->mrp_mask & MRP_MAXBW) {
4439                         if (nmrp->mrp_maxbw == MRP_MAXBW_RESETVAL) {
4440                                 cmrp->mrp_mask &= ~MRP_MAXBW;
4441                                 cmrp->mrp_maxbw = 0;
4442                         } else {
4443                                 cmrp->mrp_mask |= MRP_MAXBW;
4444                                 cmrp->mrp_maxbw = nmrp->mrp_maxbw;
4445                         }
4446                 }
4447                 if (nmrp->mrp_mask & MRP_CPUS)
4448                         MAC_COPY_CPUS(nmrp, cmrp);
4449 
4450                 if (nmrp->mrp_mask & MRP_POOL) {
4451                         if (strlen(nmrp->mrp_pool) == 0) {
4452                                 cmrp->mrp_mask &= ~MRP_POOL;
4453                                 bzero(cmrp->mrp_pool, sizeof (cmrp->mrp_pool));
4454                         } else {
4455                                 cmrp->mrp_mask |= MRP_POOL;
4456                                 (void) strncpy(cmrp->mrp_pool, nmrp->mrp_pool,
4457                                     sizeof (cmrp->mrp_pool));
4458                         }
4459 
4460                 }
4461 
4462                 if (nmrp->mrp_mask & MRP_PROTECT)
4463                         mac_protect_update(nmrp, cmrp);
4464 
4465                 /*
4466                  * Update the rings specified.
4467                  */
4468                 if (nmrp->mrp_mask & MRP_RX_RINGS) {
4469                         if (nmrp->mrp_mask & MRP_RINGS_RESET) {
4470                                 cmrp->mrp_mask &= ~MRP_RX_RINGS;
4471                                 if (cmrp->mrp_mask & MRP_RXRINGS_UNSPEC)
4472                                         cmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC;
4473                                 cmrp->mrp_nrxrings = 0;
4474                         } else {
4475                                 cmrp->mrp_mask |= MRP_RX_RINGS;
4476                                 cmrp->mrp_nrxrings = nmrp->mrp_nrxrings;
4477                         }
4478                 }
4479                 if (nmrp->mrp_mask & MRP_TX_RINGS) {
4480                         if (nmrp->mrp_mask & MRP_RINGS_RESET) {
4481                                 cmrp->mrp_mask &= ~MRP_TX_RINGS;
4482                                 if (cmrp->mrp_mask & MRP_TXRINGS_UNSPEC)
4483                                         cmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC;
4484                                 cmrp->mrp_ntxrings = 0;
4485                         } else {
4486                                 cmrp->mrp_mask |= MRP_TX_RINGS;
4487                                 cmrp->mrp_ntxrings = nmrp->mrp_ntxrings;
4488                         }
4489                 }
4490                 if (nmrp->mrp_mask & MRP_RXRINGS_UNSPEC)
4491                         cmrp->mrp_mask |= MRP_RXRINGS_UNSPEC;
4492                 else if (cmrp->mrp_mask & MRP_RXRINGS_UNSPEC)
4493                         cmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC;
4494 
4495                 if (nmrp->mrp_mask & MRP_TXRINGS_UNSPEC)
4496                         cmrp->mrp_mask |= MRP_TXRINGS_UNSPEC;
4497                 else if (cmrp->mrp_mask & MRP_TXRINGS_UNSPEC)
4498                         cmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC;
4499         }
4500 }
4501 
4502 /*
4503  * i_mac_set_resources:
4504  *
4505  * This routine associates properties with the primary MAC client of
4506  * the specified MAC instance.
4507  * - Cache the properties in mac_impl_t
4508  * - Apply the properties to the primary MAC client if exists
4509  */
4510 int
4511 i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp)
4512 {
4513         mac_impl_t              *mip = (mac_impl_t *)mh;
4514         mac_client_impl_t       *mcip;
4515         int                     err = 0;
4516         uint32_t                resmask, newresmask;
4517         mac_resource_props_t    *tmrp, *umrp;
4518 
4519         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
4520 
4521         err = mac_validate_props(mip, mrp);
4522         if (err != 0)
4523                 return (err);
4524 
4525         umrp = kmem_zalloc(sizeof (*umrp), KM_SLEEP);
4526         bcopy(&mip->mi_resource_props, umrp, sizeof (*umrp));
4527         resmask = umrp->mrp_mask;
4528         mac_update_resources(mrp, umrp, B_FALSE);
4529         newresmask = umrp->mrp_mask;
4530 
4531         if (resmask == 0 && newresmask != 0) {
4532                 /*
4533                  * Bandwidth, priority, cpu or pool link properties configured,
4534                  * must disable fastpath.
4535                  */
4536                 if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0) {
4537                         kmem_free(umrp, sizeof (*umrp));
4538                         return (err);
4539                 }
4540         }
4541 
4542         /*
4543          * Since bind_cpu may be modified by mac_client_set_resources()
4544          * we use a copy of bind_cpu and finally cache bind_cpu in mip.
4545          * This allows us to cache only user edits in mip.
4546          */
4547         tmrp = kmem_zalloc(sizeof (*tmrp), KM_SLEEP);
4548         bcopy(mrp, tmrp, sizeof (*tmrp));
4549         mcip = mac_primary_client_handle(mip);
4550         if (mcip != NULL && (mcip->mci_state_flags & MCIS_IS_AGGR_PORT) == 0) {
4551                 err = mac_client_set_resources((mac_client_handle_t)mcip, tmrp);
4552         } else if ((mrp->mrp_mask & MRP_RX_RINGS ||
4553             mrp->mrp_mask & MRP_TX_RINGS)) {
4554                 mac_client_impl_t       *vmcip;
4555 
4556                 /*
4557                  * If the primary is not up, we need to check if there
4558                  * are any VLANs on this primary. If there are then
4559                  * we need to set this property on the VLANs since
4560                  * VLANs follow the primary they are based on. Just
4561                  * look for the first VLAN and change its properties,
4562                  * all the other VLANs should be in the same group.
4563                  */
4564                 for (vmcip = mip->mi_clients_list; vmcip != NULL;
4565                     vmcip = vmcip->mci_client_next) {
4566                         if ((vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) &&
4567                             mac_client_vid((mac_client_handle_t)vmcip) !=
4568                             VLAN_ID_NONE) {
4569                                 break;
4570                         }
4571                 }
4572                 if (vmcip != NULL) {
4573                         mac_resource_props_t    *omrp;
4574                         mac_resource_props_t    *vmrp;
4575 
4576                         omrp = kmem_zalloc(sizeof (*omrp), KM_SLEEP);
4577                         bcopy(MCIP_RESOURCE_PROPS(vmcip), omrp, sizeof (*omrp));
4578                         /*
4579                          * We dont' call mac_update_resources since we
4580                          * want to take only the ring properties and
4581                          * not all the properties that may have changed.
4582                          */
4583                         vmrp = MCIP_RESOURCE_PROPS(vmcip);
4584                         if (mrp->mrp_mask & MRP_RX_RINGS) {
4585                                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
4586                                         vmrp->mrp_mask &= ~MRP_RX_RINGS;
4587                                         if (vmrp->mrp_mask &
4588                                             MRP_RXRINGS_UNSPEC) {
4589                                                 vmrp->mrp_mask &=
4590                                                     ~MRP_RXRINGS_UNSPEC;
4591                                         }
4592                                         vmrp->mrp_nrxrings = 0;
4593                                 } else {
4594                                         vmrp->mrp_mask |= MRP_RX_RINGS;
4595                                         vmrp->mrp_nrxrings = mrp->mrp_nrxrings;
4596                                 }
4597                         }
4598                         if (mrp->mrp_mask & MRP_TX_RINGS) {
4599                                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
4600                                         vmrp->mrp_mask &= ~MRP_TX_RINGS;
4601                                         if (vmrp->mrp_mask &
4602                                             MRP_TXRINGS_UNSPEC) {
4603                                                 vmrp->mrp_mask &=
4604                                                     ~MRP_TXRINGS_UNSPEC;
4605                                         }
4606                                         vmrp->mrp_ntxrings = 0;
4607                                 } else {
4608                                         vmrp->mrp_mask |= MRP_TX_RINGS;
4609                                         vmrp->mrp_ntxrings = mrp->mrp_ntxrings;
4610                                 }
4611                         }
4612                         if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC)
4613                                 vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC;
4614 
4615                         if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC)
4616                                 vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC;
4617 
4618                         if ((err = mac_client_set_rings_prop(vmcip, mrp,
4619                             omrp)) != 0) {
4620                                 bcopy(omrp, MCIP_RESOURCE_PROPS(vmcip),
4621                                     sizeof (*omrp));
4622                         } else {
4623                                 mac_set_prim_vlan_rings(mip, vmrp);
4624                         }
4625                         kmem_free(omrp, sizeof (*omrp));
4626                 }
4627         }
4628 
4629         /* Only update the values if mac_client_set_resources succeeded */
4630         if (err == 0) {
4631                 bcopy(umrp, &mip->mi_resource_props, sizeof (*umrp));
4632                 /*
4633                  * If bandwidth, priority or cpu link properties cleared,
4634                  * renable fastpath.
4635                  */
4636                 if (resmask != 0 && newresmask == 0)
4637                         mac_fastpath_enable((mac_handle_t)mip);
4638         } else if (resmask == 0 && newresmask != 0) {
4639                 mac_fastpath_enable((mac_handle_t)mip);
4640         }
4641         kmem_free(tmrp, sizeof (*tmrp));
4642         kmem_free(umrp, sizeof (*umrp));
4643         return (err);
4644 }
4645 
4646 int
4647 mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp)
4648 {
4649         int err;
4650 
4651         i_mac_perim_enter((mac_impl_t *)mh);
4652         err = i_mac_set_resources(mh, mrp);
4653         i_mac_perim_exit((mac_impl_t *)mh);
4654         return (err);
4655 }
4656 
4657 /*
4658  * Get the properties cached for the specified MAC instance.
4659  */
4660 void
4661 mac_get_resources(mac_handle_t mh, mac_resource_props_t *mrp)
4662 {
4663         mac_impl_t              *mip = (mac_impl_t *)mh;
4664         mac_client_impl_t       *mcip;
4665 
4666         mcip = mac_primary_client_handle(mip);
4667         if (mcip != NULL) {
4668                 mac_client_get_resources((mac_client_handle_t)mcip, mrp);
4669                 return;
4670         }
4671         bcopy(&mip->mi_resource_props, mrp, sizeof (mac_resource_props_t));
4672 }
4673 
4674 /*
4675  * Get the effective properties from the primary client of the
4676  * specified MAC instance.
4677  */
4678 void
4679 mac_get_effective_resources(mac_handle_t mh, mac_resource_props_t *mrp)
4680 {
4681         mac_impl_t              *mip = (mac_impl_t *)mh;
4682         mac_client_impl_t       *mcip;
4683 
4684         mcip = mac_primary_client_handle(mip);
4685         if (mcip != NULL) {
4686                 mac_client_get_effective_resources((mac_client_handle_t)mcip,
4687                     mrp);
4688                 return;
4689         }
4690         bzero(mrp, sizeof (mac_resource_props_t));
4691 }
4692 
4693 int
4694 mac_set_pvid(mac_handle_t mh, uint16_t pvid)
4695 {
4696         mac_impl_t *mip = (mac_impl_t *)mh;
4697         mac_client_impl_t *mcip;
4698         mac_unicast_impl_t *muip;
4699 
4700         i_mac_perim_enter(mip);
4701         if (pvid != 0) {
4702                 for (mcip = mip->mi_clients_list; mcip != NULL;
4703                     mcip = mcip->mci_client_next) {
4704                         for (muip = mcip->mci_unicast_list; muip != NULL;
4705                             muip = muip->mui_next) {
4706                                 if (muip->mui_vid == pvid) {
4707                                         i_mac_perim_exit(mip);
4708                                         return (EBUSY);
4709                                 }
4710                         }
4711                 }
4712         }
4713         mip->mi_pvid = pvid;
4714         i_mac_perim_exit(mip);
4715         return (0);
4716 }
4717 
4718 uint16_t
4719 mac_get_pvid(mac_handle_t mh)
4720 {
4721         mac_impl_t *mip = (mac_impl_t *)mh;
4722 
4723         return (mip->mi_pvid);
4724 }
4725 
4726 uint32_t
4727 mac_get_llimit(mac_handle_t mh)
4728 {
4729         mac_impl_t *mip = (mac_impl_t *)mh;
4730 
4731         return (mip->mi_llimit);
4732 }
4733 
4734 uint32_t
4735 mac_get_ldecay(mac_handle_t mh)
4736 {
4737         mac_impl_t *mip = (mac_impl_t *)mh;
4738 
4739         return (mip->mi_ldecay);
4740 }
4741 
4742 /*
4743  * Rename a mac client, its flow, and the kstat.
4744  */
4745 int
4746 mac_rename_primary(mac_handle_t mh, const char *new_name)
4747 {
4748         mac_impl_t              *mip = (mac_impl_t *)mh;
4749         mac_client_impl_t       *cur_clnt = NULL;
4750         flow_entry_t            *fep;
4751 
4752         i_mac_perim_enter(mip);
4753 
4754         /*
4755          * VNICs: we need to change the sys flow name and
4756          * the associated flow kstat.
4757          */
4758         if (mip->mi_state_flags & MIS_IS_VNIC) {
4759                 mac_client_impl_t *mcip = mac_vnic_lower(mip);
4760                 ASSERT(new_name != NULL);
4761                 mac_rename_flow_names(mcip, new_name);
4762                 mac_stat_rename(mcip);
4763                 goto done;
4764         }
4765         /*
4766          * This mac may itself be an aggr link, or it may have some client
4767          * which is an aggr port. For both cases, we need to change the
4768          * aggr port's mac client name, its flow name and the associated flow
4769          * kstat.
4770          */
4771         if (mip->mi_state_flags & MIS_IS_AGGR) {
4772                 mac_capab_aggr_t aggr_cap;
4773                 mac_rename_fn_t rename_fn;
4774                 boolean_t ret;
4775 
4776                 ASSERT(new_name != NULL);
4777                 ret = i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR,
4778                     (void *)(&aggr_cap));
4779                 ASSERT(ret == B_TRUE);
4780                 rename_fn = aggr_cap.mca_rename_fn;
4781                 rename_fn(new_name, mip->mi_driver);
4782                 /*
4783                  * The aggr's client name and kstat flow name will be
4784                  * updated below, i.e. via mac_rename_flow_names.
4785                  */
4786         }
4787 
4788         for (cur_clnt = mip->mi_clients_list; cur_clnt != NULL;
4789             cur_clnt = cur_clnt->mci_client_next) {
4790                 if (cur_clnt->mci_state_flags & MCIS_IS_AGGR_PORT) {
4791                         if (new_name != NULL) {
4792                                 char *str_st = cur_clnt->mci_name;
4793                                 char *str_del = strchr(str_st, '-');
4794 
4795                                 ASSERT(str_del != NULL);
4796                                 bzero(str_del + 1, MAXNAMELEN -
4797                                     (str_del - str_st + 1));
4798                                 bcopy(new_name, str_del + 1,
4799                                     strlen(new_name));
4800                         }
4801                         fep = cur_clnt->mci_flent;
4802                         mac_rename_flow(fep, cur_clnt->mci_name);
4803                         break;
4804                 } else if (new_name != NULL &&
4805                     cur_clnt->mci_state_flags & MCIS_USE_DATALINK_NAME) {
4806                         mac_rename_flow_names(cur_clnt, new_name);
4807                         break;
4808                 }
4809         }
4810 
4811         /* Recreate kstats associated with aggr pseudo rings */
4812         if (mip->mi_state_flags & MIS_IS_AGGR)
4813                 mac_pseudo_ring_stat_rename(mip);
4814 
4815 done:
4816         i_mac_perim_exit(mip);
4817         return (0);
4818 }
4819 
4820 /*
4821  * Rename the MAC client's flow names
4822  */
4823 static void
4824 mac_rename_flow_names(mac_client_impl_t *mcip, const char *new_name)
4825 {
4826         flow_entry_t    *flent;
4827         uint16_t        vid;
4828         char            flowname[MAXFLOWNAMELEN];
4829         mac_impl_t      *mip = mcip->mci_mip;
4830 
4831         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
4832 
4833         /*
4834          * Use mi_rw_lock to ensure that threads not in the mac perimeter
4835          * see a self-consistent value for mci_name
4836          */
4837         rw_enter(&mip->mi_rw_lock, RW_WRITER);
4838         (void) strlcpy(mcip->mci_name, new_name, sizeof (mcip->mci_name));
4839         rw_exit(&mip->mi_rw_lock);
4840 
4841         mac_rename_flow(mcip->mci_flent, new_name);
4842 
4843         if (mcip->mci_nflents == 1)
4844                 return;
4845 
4846         /*
4847          * We have to rename all the others too, no stats to destroy for
4848          * these.
4849          */
4850         for (flent = mcip->mci_flent_list; flent != NULL;
4851             flent = flent->fe_client_next) {
4852                 if (flent != mcip->mci_flent) {
4853                         vid = i_mac_flow_vid(flent);
4854                         (void) sprintf(flowname, "%s%u", new_name, vid);
4855                         mac_flow_set_name(flent, flowname);
4856                 }
4857         }
4858 }
4859 
4860 
4861 /*
4862  * Add a flow to the MAC client's flow list - i.e list of MAC/VID tuples
4863  * defined for the specified MAC client.
4864  */
4865 static void
4866 mac_client_add_to_flow_list(mac_client_impl_t *mcip, flow_entry_t *flent)
4867 {
4868         ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
4869         /*
4870          * The promisc Rx data path walks the mci_flent_list. Protect by
4871          * using mi_rw_lock
4872          */
4873         rw_enter(&mcip->mci_rw_lock, RW_WRITER);
4874 
4875         mcip->mci_vidcache = MCIP_VIDCACHE_INVALID;
4876 
4877         /* Add it to the head */
4878         flent->fe_client_next = mcip->mci_flent_list;
4879         mcip->mci_flent_list = flent;
4880         mcip->mci_nflents++;
4881 
4882         /*
4883          * Keep track of the number of non-zero VIDs addresses per MAC
4884          * client to avoid figuring it out in the data-path.
4885          */
4886         if (i_mac_flow_vid(flent) != VLAN_ID_NONE)
4887                 mcip->mci_nvids++;
4888 
4889         rw_exit(&mcip->mci_rw_lock);
4890 }
4891 
4892 /*
4893  * Remove a flow entry from the MAC client's list.
4894  */
4895 static void
4896 mac_client_remove_flow_from_list(mac_client_impl_t *mcip, flow_entry_t *flent)
4897 {
4898         flow_entry_t    *fe = mcip->mci_flent_list;
4899         flow_entry_t    *prev_fe = NULL;
4900 
4901         ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
4902         /*
4903          * The promisc Rx data path walks the mci_flent_list. Protect by
4904          * using mci_rw_lock
4905          */
4906         rw_enter(&mcip->mci_rw_lock, RW_WRITER);
4907         mcip->mci_vidcache = MCIP_VIDCACHE_INVALID;
4908 
4909         while ((fe != NULL) && (fe != flent)) {
4910                 prev_fe = fe;
4911                 fe = fe->fe_client_next;
4912         }
4913 
4914         ASSERT(fe != NULL);
4915         if (prev_fe == NULL) {
4916                 /* Deleting the first node */
4917                 mcip->mci_flent_list = fe->fe_client_next;
4918         } else {
4919                 prev_fe->fe_client_next = fe->fe_client_next;
4920         }
4921         mcip->mci_nflents--;
4922 
4923         if (i_mac_flow_vid(flent) != VLAN_ID_NONE)
4924                 mcip->mci_nvids--;
4925 
4926         rw_exit(&mcip->mci_rw_lock);
4927 }
4928 
4929 /*
4930  * Check if the given VID belongs to this MAC client.
4931  */
4932 boolean_t
4933 mac_client_check_flow_vid(mac_client_impl_t *mcip, uint16_t vid)
4934 {
4935         flow_entry_t    *flent;
4936         uint16_t        mci_vid;
4937         uint32_t        cache = mcip->mci_vidcache;
4938 
4939         /*
4940          * In hopes of not having to touch the mci_rw_lock, check to see if
4941          * this vid matches our cached result.
4942          */
4943         if (MCIP_VIDCACHE_ISVALID(cache) && MCIP_VIDCACHE_VID(cache) == vid)
4944                 return (MCIP_VIDCACHE_BOOL(cache) ? B_TRUE : B_FALSE);
4945 
4946         /* The mci_flent_list is protected by mci_rw_lock */
4947         rw_enter(&mcip->mci_rw_lock, RW_WRITER);
4948         for (flent = mcip->mci_flent_list; flent != NULL;
4949             flent = flent->fe_client_next) {
4950                 mci_vid = i_mac_flow_vid(flent);
4951                 if (vid == mci_vid) {
4952                         mcip->mci_vidcache = MCIP_VIDCACHE_CACHE(vid, B_TRUE);
4953                         rw_exit(&mcip->mci_rw_lock);
4954                         return (B_TRUE);
4955                 }
4956         }
4957 
4958         mcip->mci_vidcache = MCIP_VIDCACHE_CACHE(vid, B_FALSE);
4959         rw_exit(&mcip->mci_rw_lock);
4960         return (B_FALSE);
4961 }
4962 
4963 /*
4964  * Get the flow entry for the specified <MAC addr, VID> tuple.
4965  */
4966 static flow_entry_t *
4967 mac_client_get_flow(mac_client_impl_t *mcip, mac_unicast_impl_t *muip)
4968 {
4969         mac_address_t *map = mcip->mci_unicast;
4970         flow_entry_t *flent;
4971         uint16_t vid;
4972         flow_desc_t flow_desc;
4973 
4974         ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
4975 
4976         mac_flow_get_desc(mcip->mci_flent, &flow_desc);
4977         if (bcmp(flow_desc.fd_dst_mac, map->ma_addr, map->ma_len) != 0)
4978                 return (NULL);
4979 
4980         for (flent = mcip->mci_flent_list; flent != NULL;
4981             flent = flent->fe_client_next) {
4982                 vid = i_mac_flow_vid(flent);
4983                 if (vid == muip->mui_vid) {
4984                         return (flent);
4985                 }
4986         }
4987 
4988         return (NULL);
4989 }
4990 
4991 /*
4992  * Since mci_flent has the SRSs, when we want to remove it, we replace
4993  * the flow_desc_t in mci_flent with that of an existing flent and then
4994  * remove that flent instead of mci_flent.
4995  */
4996 static flow_entry_t *
4997 mac_client_swap_mciflent(mac_client_impl_t *mcip)
4998 {
4999         flow_entry_t    *flent = mcip->mci_flent;
5000         flow_tab_t      *ft = flent->fe_flow_tab;
5001         flow_entry_t    *flent1;
5002         flow_desc_t     fl_desc;
5003         char            fl_name[MAXFLOWNAMELEN];
5004         int             err;
5005 
5006         ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
5007         ASSERT(mcip->mci_nflents > 1);
5008 
5009         /* get the next flent following the primary flent  */
5010         flent1 = mcip->mci_flent_list->fe_client_next;
5011         ASSERT(flent1 != NULL && flent1->fe_flow_tab == ft);
5012 
5013         /*
5014          * Remove the flent from the flow table before updating the
5015          * flow descriptor as the hash depends on the flow descriptor.
5016          * This also helps incoming packet classification avoid having
5017          * to grab fe_lock. Access to fe_flow_desc of a flent not in the
5018          * flow table is done under the fe_lock so that log or stat functions
5019          * see a self-consistent fe_flow_desc. The name and desc are specific
5020          * to a flow, the rest are shared by all the clients, including
5021          * resource control etc.
5022          */
5023         mac_flow_remove(ft, flent, B_TRUE);
5024         mac_flow_remove(ft, flent1, B_TRUE);
5025 
5026         bcopy(&flent->fe_flow_desc, &fl_desc, sizeof (flow_desc_t));
5027         bcopy(flent->fe_flow_name, fl_name, MAXFLOWNAMELEN);
5028 
5029         /* update the primary flow entry */
5030         mutex_enter(&flent->fe_lock);
5031         bcopy(&flent1->fe_flow_desc, &flent->fe_flow_desc,
5032             sizeof (flow_desc_t));
5033         bcopy(&flent1->fe_flow_name, &flent->fe_flow_name, MAXFLOWNAMELEN);
5034         mutex_exit(&flent->fe_lock);
5035 
5036         /* update the flow entry that is to be freed */
5037         mutex_enter(&flent1->fe_lock);
5038         bcopy(&fl_desc, &flent1->fe_flow_desc, sizeof (flow_desc_t));
5039         bcopy(fl_name, &flent1->fe_flow_name, MAXFLOWNAMELEN);
5040         mutex_exit(&flent1->fe_lock);
5041 
5042         /* now reinsert the flow entries in the table */
5043         err = mac_flow_add(ft, flent);
5044         ASSERT(err == 0);
5045 
5046         err = mac_flow_add(ft, flent1);
5047         ASSERT(err == 0);
5048 
5049         return (flent1);
5050 }
5051 
5052 /*
5053  * Return whether there is only one flow entry associated with this
5054  * MAC client.
5055  */
5056 static boolean_t
5057 mac_client_single_rcvr(mac_client_impl_t *mcip)
5058 {
5059         return (mcip->mci_nflents == 1);
5060 }
5061 
5062 int
5063 mac_validate_props(mac_impl_t *mip, mac_resource_props_t *mrp)
5064 {
5065         boolean_t               reset;
5066         uint32_t                rings_needed;
5067         uint32_t                rings_avail;
5068         mac_group_type_t        gtype;
5069         mac_resource_props_t    *mip_mrp;
5070 
5071         if (mrp == NULL)
5072                 return (0);
5073 
5074         if (mrp->mrp_mask & MRP_PRIORITY) {
5075                 mac_priority_level_t    pri = mrp->mrp_priority;
5076 
5077                 if (pri < MPL_LOW || pri > MPL_RESET)
5078                         return (EINVAL);
5079         }
5080 
5081         if (mrp->mrp_mask & MRP_MAXBW) {
5082                 uint64_t maxbw = mrp->mrp_maxbw;
5083 
5084                 if (maxbw < MRP_MAXBW_MINVAL && maxbw != 0)
5085                         return (EINVAL);
5086         }
5087         if (mrp->mrp_mask & MRP_CPUS) {
5088                 int i, j;
5089                 mac_cpu_mode_t  fanout;
5090 
5091                 if (mrp->mrp_ncpus > ncpus)
5092                         return (EINVAL);
5093 
5094                 for (i = 0; i < mrp->mrp_ncpus; i++) {
5095                         for (j = 0; j < mrp->mrp_ncpus; j++) {
5096                                 if (i != j &&
5097                                     mrp->mrp_cpu[i] == mrp->mrp_cpu[j]) {
5098                                         return (EINVAL);
5099                                 }
5100                         }
5101                 }
5102 
5103                 for (i = 0; i < mrp->mrp_ncpus; i++) {
5104                         cpu_t *cp;
5105                         int rv;
5106 
5107                         mutex_enter(&cpu_lock);
5108                         cp = cpu_get(mrp->mrp_cpu[i]);
5109                         if (cp != NULL)
5110                                 rv = cpu_is_online(cp);
5111                         else
5112                                 rv = 0;
5113                         mutex_exit(&cpu_lock);
5114                         if (rv == 0)
5115                                 return (EINVAL);
5116                 }
5117 
5118                 fanout = mrp->mrp_fanout_mode;
5119                 if (fanout < 0 || fanout > MCM_CPUS)
5120                         return (EINVAL);
5121         }
5122 
5123         if (mrp->mrp_mask & MRP_PROTECT) {
5124                 int err = mac_protect_validate(mrp);
5125                 if (err != 0)
5126                         return (err);
5127         }
5128 
5129         if (!(mrp->mrp_mask & MRP_RX_RINGS) &&
5130             !(mrp->mrp_mask & MRP_TX_RINGS)) {
5131                 return (0);
5132         }
5133 
5134         /*
5135          * mip will be null when we come from mac_flow_create or
5136          * mac_link_flow_modify. In the latter case it is a user flow,
5137          * for which we don't support rings. In the former we would
5138          * have validated the props beforehand (i_mac_unicast_add ->
5139          * mac_client_set_resources -> validate for the primary and
5140          * vnic_dev_create -> mac_client_set_resources -> validate for
5141          * a vnic.
5142          */
5143         if (mip == NULL)
5144                 return (0);
5145 
5146         /*
5147          * We don't support setting rings property for a VNIC that is using a
5148          * primary address (VLAN)
5149          */
5150         if ((mip->mi_state_flags & MIS_IS_VNIC) &&
5151             mac_is_vnic_primary((mac_handle_t)mip)) {
5152                 return (ENOTSUP);
5153         }
5154 
5155         mip_mrp = &mip->mi_resource_props;
5156         /*
5157          * The rings property should be validated against the NICs
5158          * resources
5159          */
5160         if (mip->mi_state_flags & MIS_IS_VNIC)
5161                 mip = (mac_impl_t *)mac_get_lower_mac_handle((mac_handle_t)mip);
5162 
5163         reset = mrp->mrp_mask & MRP_RINGS_RESET;
5164         /*
5165          * If groups are not supported, return error.
5166          */
5167         if (((mrp->mrp_mask & MRP_RX_RINGS) && mip->mi_rx_groups == NULL) ||
5168             ((mrp->mrp_mask & MRP_TX_RINGS) && mip->mi_tx_groups == NULL)) {
5169                 return (EINVAL);
5170         }
5171         /*
5172          * If we are just resetting, there is no validation needed.
5173          */
5174         if (reset)
5175                 return (0);
5176 
5177         if (mrp->mrp_mask & MRP_RX_RINGS) {
5178                 rings_needed = mrp->mrp_nrxrings;
5179                 /*
5180                  * We just want to check if the number of additional
5181                  * rings requested is available.
5182                  */
5183                 if (mip_mrp->mrp_mask & MRP_RX_RINGS) {
5184                         if (mrp->mrp_nrxrings > mip_mrp->mrp_nrxrings)
5185                                 /* Just check for the additional rings */
5186                                 rings_needed -= mip_mrp->mrp_nrxrings;
5187                         else
5188                                 /* We are not asking for additional rings */
5189                                 rings_needed = 0;
5190                 }
5191                 rings_avail = mip->mi_rxrings_avail;
5192                 gtype = mip->mi_rx_group_type;
5193         } else {
5194                 rings_needed = mrp->mrp_ntxrings;
5195                 /* Similarly for the TX rings */
5196                 if (mip_mrp->mrp_mask & MRP_TX_RINGS) {
5197                         if (mrp->mrp_ntxrings > mip_mrp->mrp_ntxrings)
5198                                 /* Just check for the additional rings */
5199                                 rings_needed -= mip_mrp->mrp_ntxrings;
5200                         else
5201                                 /* We are not asking for additional rings */
5202                                 rings_needed = 0;
5203                 }
5204                 rings_avail = mip->mi_txrings_avail;
5205                 gtype = mip->mi_tx_group_type;
5206         }
5207 
5208         /* Error if the group is dynamic .. */
5209         if (gtype == MAC_GROUP_TYPE_DYNAMIC) {
5210                 /*
5211                  * .. and rings specified are more than available.
5212                  */
5213                 if (rings_needed > rings_avail)
5214                         return (EINVAL);
5215         } else {
5216                 /*
5217                  * OR group is static and we have specified some rings.
5218                  */
5219                 if (rings_needed > 0)
5220                         return (EINVAL);
5221         }
5222         return (0);
5223 }
5224 
5225 /*
5226  * Send a MAC_NOTE_LINK notification to all the MAC clients whenever the
5227  * underlying physical link is down. This is to allow MAC clients to
5228  * communicate with other clients.
5229  */
5230 void
5231 mac_virtual_link_update(mac_impl_t *mip)
5232 {
5233         if (mip->mi_linkstate != LINK_STATE_UP)
5234                 i_mac_notify(mip, MAC_NOTE_LINK);
5235 }
5236 
5237 /*
5238  * For clients that have a pass-thru MAC, e.g. VNIC, we set the VNIC's
5239  * mac handle in the client.
5240  */
5241 void
5242 mac_set_upper_mac(mac_client_handle_t mch, mac_handle_t mh,
5243     mac_resource_props_t *mrp)
5244 {
5245         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
5246         mac_impl_t              *mip = (mac_impl_t *)mh;
5247 
5248         mcip->mci_upper_mip = mip;
5249         /* If there are any properties, copy it over too */
5250         if (mrp != NULL) {
5251                 bcopy(mrp, &mip->mi_resource_props,
5252                     sizeof (mac_resource_props_t));
5253         }
5254 }
5255 
5256 /*
5257  * Mark the mac as being used exclusively by the single mac client that is
5258  * doing some control operation on this mac. No further opens of this mac
5259  * will be allowed until this client calls mac_unmark_exclusive. The mac
5260  * client calling this function must already be in the mac perimeter
5261  */
5262 int
5263 mac_mark_exclusive(mac_handle_t mh)
5264 {
5265         mac_impl_t      *mip = (mac_impl_t *)mh;
5266 
5267         ASSERT(MAC_PERIM_HELD(mh));
5268         /*
5269          * Look up its entry in the global hash table.
5270          */
5271         rw_enter(&i_mac_impl_lock, RW_WRITER);
5272         if (mip->mi_state_flags & MIS_DISABLED) {
5273                 rw_exit(&i_mac_impl_lock);
5274                 return (ENOENT);
5275         }
5276 
5277         /*
5278          * A reference to mac is held even if the link is not plumbed.
5279          * In i_dls_link_create() we open the MAC interface and hold the
5280          * reference. There is an additional reference for the mac_open
5281          * done in acquiring the mac perimeter
5282          */
5283         if (mip->mi_ref != 2) {
5284                 rw_exit(&i_mac_impl_lock);
5285                 return (EBUSY);
5286         }
5287 
5288         ASSERT(!(mip->mi_state_flags & MIS_EXCLUSIVE_HELD));
5289         mip->mi_state_flags |= MIS_EXCLUSIVE_HELD;
5290         rw_exit(&i_mac_impl_lock);
5291         return (0);
5292 }
5293 
5294 void
5295 mac_unmark_exclusive(mac_handle_t mh)
5296 {
5297         mac_impl_t      *mip = (mac_impl_t *)mh;
5298 
5299         ASSERT(MAC_PERIM_HELD(mh));
5300 
5301         rw_enter(&i_mac_impl_lock, RW_WRITER);
5302         /* 1 for the creation and another for the perimeter */
5303         ASSERT(mip->mi_ref == 2 && (mip->mi_state_flags & MIS_EXCLUSIVE_HELD));
5304         mip->mi_state_flags &= ~MIS_EXCLUSIVE_HELD;
5305         rw_exit(&i_mac_impl_lock);
5306 }
5307 
5308 /*
5309  * Set the MTU for the specified MAC.
5310  */
5311 int
5312 mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg)
5313 {
5314         mac_impl_t *mip = (mac_impl_t *)mh;
5315         uint_t old_mtu;
5316         int rv = 0;
5317 
5318         i_mac_perim_enter(mip);
5319 
5320         if (!(mip->mi_callbacks->mc_callbacks & (MC_SETPROP|MC_GETPROP))) {
5321                 rv = ENOTSUP;
5322                 goto bail;
5323         }
5324 
5325         old_mtu = mip->mi_sdu_max;
5326 
5327         if (new_mtu == 0 || new_mtu < mip->mi_sdu_min) {
5328                 rv = EINVAL;
5329                 goto bail;
5330         }
5331 
5332         rw_enter(&mip->mi_rw_lock, RW_READER);
5333         if (mip->mi_mtrp != NULL && new_mtu < mip->mi_mtrp->mtr_mtu) {
5334                 rv = EBUSY;
5335                 rw_exit(&mip->mi_rw_lock);
5336                 goto bail;
5337         }
5338         rw_exit(&mip->mi_rw_lock);
5339 
5340         if (old_mtu != new_mtu) {
5341                 rv = mip->mi_callbacks->mc_setprop(mip->mi_driver,
5342                     "mtu", MAC_PROP_MTU, sizeof (uint_t), &new_mtu);
5343                 if (rv != 0)
5344                         goto bail;
5345                 rv = mac_maxsdu_update(mh, new_mtu);
5346                 ASSERT(rv == 0);
5347         }
5348 
5349 bail:
5350         i_mac_perim_exit(mip);
5351 
5352         if (rv == 0 && old_mtu_arg != NULL)
5353                 *old_mtu_arg = old_mtu;
5354         return (rv);
5355 }
5356 
5357 /*
5358  * Return the RX h/w information for the group indexed by grp_num.
5359  */
5360 void
5361 mac_get_hwrxgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num,
5362     uint_t *n_rings, uint_t *rings, uint_t *type, uint_t *n_clnts,
5363     char *clnts_name)
5364 {
5365         mac_impl_t *mip = (mac_impl_t *)mh;
5366         mac_grp_client_t *mcip;
5367         uint_t i = 0, index = 0;
5368         mac_ring_t      *ring;
5369 
5370         /* Revisit when we implement fully dynamic group allocation */
5371         ASSERT(grp_index >= 0 && grp_index < mip->mi_rx_group_count);
5372 
5373         rw_enter(&mip->mi_rw_lock, RW_READER);
5374         *grp_num = mip->mi_rx_groups[grp_index].mrg_index;
5375         *type = mip->mi_rx_groups[grp_index].mrg_type;
5376         *n_rings = mip->mi_rx_groups[grp_index].mrg_cur_count;
5377         ring = mip->mi_rx_groups[grp_index].mrg_rings;
5378         for (index = 0; index < mip->mi_rx_groups[grp_index].mrg_cur_count;
5379             index++) {
5380                 rings[index] = ring->mr_index;
5381                 ring = ring->mr_next;
5382         }
5383         /* Assuming the 1st is the default group */
5384         index = 0;
5385         if (grp_index == 0) {
5386                 (void) strlcpy(clnts_name, "<default,mcast>,",
5387                     MAXCLIENTNAMELEN);
5388                 index += strlen("<default,mcast>,");
5389         }
5390         for (mcip = mip->mi_rx_groups[grp_index].mrg_clients; mcip != NULL;
5391             mcip = mcip->mgc_next) {
5392                 int name_len = strlen(mcip->mgc_client->mci_name);
5393 
5394                 /*
5395                  * MAXCLIENTNAMELEN is the buffer size reserved for client
5396                  * names.
5397                  * XXXX Formating the client name string needs to be moved
5398                  * to user land when fixing the size of dhi_clnts in
5399                  * dld_hwgrpinfo_t. We should use n_clients * client_name for
5400                  * dhi_clntsin instead of MAXCLIENTNAMELEN
5401                  */
5402                 if (index + name_len >= MAXCLIENTNAMELEN) {
5403                         index = MAXCLIENTNAMELEN;
5404                         break;
5405                 }
5406                 bcopy(mcip->mgc_client->mci_name, &(clnts_name[index]),
5407                     name_len);
5408                 index += name_len;
5409                 clnts_name[index++] = ',';
5410                 i++;
5411         }
5412 
5413         /* Get rid of the last , */
5414         if (index > 0)
5415                 clnts_name[index - 1] = '\0';
5416         *n_clnts = i;
5417         rw_exit(&mip->mi_rw_lock);
5418 }
5419 
5420 /*
5421  * Return the TX h/w information for the group indexed by grp_num.
5422  */
5423 void
5424 mac_get_hwtxgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num,
5425     uint_t *n_rings, uint_t *rings, uint_t *type, uint_t *n_clnts,
5426     char *clnts_name)
5427 {
5428         mac_impl_t *mip = (mac_impl_t *)mh;
5429         mac_grp_client_t *mcip;
5430         uint_t i = 0, index = 0;
5431         mac_ring_t      *ring;
5432 
5433         /* Revisit when we implement fully dynamic group allocation */
5434         ASSERT(grp_index >= 0 && grp_index <= mip->mi_tx_group_count);
5435 
5436         rw_enter(&mip->mi_rw_lock, RW_READER);
5437         *grp_num = mip->mi_tx_groups[grp_index].mrg_index > 0 ?
5438             mip->mi_tx_groups[grp_index].mrg_index : grp_index;
5439         *type = mip->mi_tx_groups[grp_index].mrg_type;
5440         *n_rings = mip->mi_tx_groups[grp_index].mrg_cur_count;
5441         ring = mip->mi_tx_groups[grp_index].mrg_rings;
5442         for (index = 0; index < mip->mi_tx_groups[grp_index].mrg_cur_count;
5443             index++) {
5444                 rings[index] = ring->mr_index;
5445                 ring = ring->mr_next;
5446         }
5447         index = 0;
5448         /* Default group has an index of -1 */
5449         if (mip->mi_tx_groups[grp_index].mrg_index < 0) {
5450                 (void) strlcpy(clnts_name, "<default>,",
5451                     MAXCLIENTNAMELEN);
5452                 index += strlen("<default>,");
5453         }
5454         for (mcip = mip->mi_tx_groups[grp_index].mrg_clients; mcip != NULL;
5455             mcip = mcip->mgc_next) {
5456                 int name_len = strlen(mcip->mgc_client->mci_name);
5457 
5458                 /*
5459                  * MAXCLIENTNAMELEN is the buffer size reserved for client
5460                  * names.
5461                  * XXXX Formating the client name string needs to be moved
5462                  * to user land when fixing the size of dhi_clnts in
5463                  * dld_hwgrpinfo_t. We should use n_clients * client_name for
5464                  * dhi_clntsin instead of MAXCLIENTNAMELEN
5465                  */
5466                 if (index + name_len >= MAXCLIENTNAMELEN) {
5467                         index = MAXCLIENTNAMELEN;
5468                         break;
5469                 }
5470                 bcopy(mcip->mgc_client->mci_name, &(clnts_name[index]),
5471                     name_len);
5472                 index += name_len;
5473                 clnts_name[index++] = ',';
5474                 i++;
5475         }
5476 
5477         /* Get rid of the last , */
5478         if (index > 0)
5479                 clnts_name[index - 1] = '\0';
5480         *n_clnts = i;
5481         rw_exit(&mip->mi_rw_lock);
5482 }
5483 
5484 /*
5485  * Return the group count for RX or TX.
5486  */
5487 uint_t
5488 mac_hwgrp_num(mac_handle_t mh, int type)
5489 {
5490         mac_impl_t *mip = (mac_impl_t *)mh;
5491 
5492         /*
5493          * Return the Rx and Tx group count; for the Tx we need to
5494          * include the default too.
5495          */
5496         return (type == MAC_RING_TYPE_RX ? mip->mi_rx_group_count :
5497             mip->mi_tx_groups != NULL ? mip->mi_tx_group_count + 1 : 0);
5498 }
5499 
5500 /*
5501  * The total number of free TX rings for this MAC.
5502  */
5503 uint_t
5504 mac_txavail_get(mac_handle_t mh)
5505 {
5506         mac_impl_t      *mip = (mac_impl_t *)mh;
5507 
5508         return (mip->mi_txrings_avail);
5509 }
5510 
5511 /*
5512  * The total number of free RX rings for this MAC.
5513  */
5514 uint_t
5515 mac_rxavail_get(mac_handle_t mh)
5516 {
5517         mac_impl_t      *mip = (mac_impl_t *)mh;
5518 
5519         return (mip->mi_rxrings_avail);
5520 }
5521 
5522 /*
5523  * The total number of reserved RX rings on this MAC.
5524  */
5525 uint_t
5526 mac_rxrsvd_get(mac_handle_t mh)
5527 {
5528         mac_impl_t      *mip = (mac_impl_t *)mh;
5529 
5530         return (mip->mi_rxrings_rsvd);
5531 }
5532 
5533 /*
5534  * The total number of reserved TX rings on this MAC.
5535  */
5536 uint_t
5537 mac_txrsvd_get(mac_handle_t mh)
5538 {
5539         mac_impl_t      *mip = (mac_impl_t *)mh;
5540 
5541         return (mip->mi_txrings_rsvd);
5542 }
5543 
5544 /*
5545  * Total number of free RX groups on this MAC.
5546  */
5547 uint_t
5548 mac_rxhwlnksavail_get(mac_handle_t mh)
5549 {
5550         mac_impl_t      *mip = (mac_impl_t *)mh;
5551 
5552         return (mip->mi_rxhwclnt_avail);
5553 }
5554 
5555 /*
5556  * Total number of RX groups reserved on this MAC.
5557  */
5558 uint_t
5559 mac_rxhwlnksrsvd_get(mac_handle_t mh)
5560 {
5561         mac_impl_t      *mip = (mac_impl_t *)mh;
5562 
5563         return (mip->mi_rxhwclnt_used);
5564 }
5565 
5566 /*
5567  * Total number of free TX groups on this MAC.
5568  */
5569 uint_t
5570 mac_txhwlnksavail_get(mac_handle_t mh)
5571 {
5572         mac_impl_t      *mip = (mac_impl_t *)mh;
5573 
5574         return (mip->mi_txhwclnt_avail);
5575 }
5576 
5577 /*
5578  * Total number of TX groups reserved on this MAC.
5579  */
5580 uint_t
5581 mac_txhwlnksrsvd_get(mac_handle_t mh)
5582 {
5583         mac_impl_t      *mip = (mac_impl_t *)mh;
5584 
5585         return (mip->mi_txhwclnt_used);
5586 }
5587 
5588 /*
5589  * Initialize the rings property for a mac client. A non-0 value for
5590  * rxring or txring specifies the number of rings required, a value
5591  * of MAC_RXRINGS_NONE/MAC_TXRINGS_NONE specifies that it doesn't need
5592  * any RX/TX rings and a value of MAC_RXRINGS_DONTCARE/MAC_TXRINGS_DONTCARE
5593  * means the system can decide whether it can give any rings or not.
5594  */
5595 void
5596 mac_client_set_rings(mac_client_handle_t mch, int rxrings, int txrings)
5597 {
5598         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
5599         mac_resource_props_t    *mrp = MCIP_RESOURCE_PROPS(mcip);
5600 
5601         if (rxrings != MAC_RXRINGS_DONTCARE) {
5602                 mrp->mrp_mask |= MRP_RX_RINGS;
5603                 mrp->mrp_nrxrings = rxrings;
5604         }
5605 
5606         if (txrings != MAC_TXRINGS_DONTCARE) {
5607                 mrp->mrp_mask |= MRP_TX_RINGS;
5608                 mrp->mrp_ntxrings = txrings;
5609         }
5610 }
5611 
5612 boolean_t
5613 mac_get_promisc_filtered(mac_client_handle_t mch)
5614 {
5615         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
5616 
5617         return (mcip->mci_protect_flags & MPT_FLAG_PROMISC_FILTERED);
5618 }
5619 
5620 void
5621 mac_set_promisc_filtered(mac_client_handle_t mch, boolean_t enable)
5622 {
5623         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
5624 
5625         ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
5626         if (enable)
5627                 mcip->mci_protect_flags |= MPT_FLAG_PROMISC_FILTERED;
5628         else
5629                 mcip->mci_protect_flags &= ~MPT_FLAG_PROMISC_FILTERED;
5630 }