1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2018 Joyent, Inc.
  25  * Copyright 2017 RackTop Systems.
  26  */
  27 
  28 /*
  29  * - General Introduction:
  30  *
  31  * This file contains the implementation of the MAC client kernel
  32  * API and related code. The MAC client API allows a kernel module
  33  * to gain access to a MAC instance (physical NIC, link aggregation, etc).
  34  * It allows a MAC client to associate itself with a MAC address,
  35  * VLANs, callback functions for data traffic and for promiscuous mode.
  36  * The MAC client API is also used to specify the properties associated
  37  * with a MAC client, such as bandwidth limits, priority, CPUS, etc.
  38  * These properties are further used to determine the hardware resources
  39  * to allocate to the various MAC clients.
  40  *
  41  * - Primary MAC clients:
  42  *
  43  * The MAC client API refers to "primary MAC clients". A primary MAC
  44  * client is a client which "owns" the primary MAC address of
  45  * the underlying MAC instance. The primary MAC address is called out
  46  * since it is associated with specific semantics: the primary MAC
  47  * address is the MAC address which is assigned to the IP interface
  48  * when it is plumbed, and the primary MAC address is assigned
  49  * to VLAN data-links. The primary address of a MAC instance can
  50  * also change dynamically from under the MAC client, for example
  51  * as a result of a change of state of a link aggregation. In that
  52  * case the MAC layer automatically updates all data-structures which
  53  * refer to the current value of the primary MAC address. Typical
  54  * primary MAC clients are dls, aggr, and xnb. A typical non-primary
  55  * MAC client is the vnic driver.
  56  *
  57  * - Virtual Switching:
  58  *
  59  * The MAC layer implements a virtual switch between the MAC clients
  60  * (primary and non-primary) defined on top of the same underlying
  61  * NIC (physical, link aggregation, etc). The virtual switch is
  62  * VLAN-aware, i.e. it allows multiple MAC clients to be member
  63  * of one or more VLANs, and the virtual switch will distribute
  64  * multicast tagged packets only to the member of the corresponding
  65  * VLANs.
  66  *
  67  * - Upper vs Lower MAC:
  68  *
  69  * Creating a VNIC on top of a MAC instance effectively causes
  70  * two MAC instances to be layered on top of each other, one for
  71  * the VNIC(s), one for the underlying MAC instance (physical NIC,
  72  * link aggregation, etc). In the code below we refer to the
  73  * underlying NIC as the "lower MAC", and we refer to VNICs as
  74  * the "upper MAC".
  75  *
  76  * - Pass-through for VNICs:
  77  *
  78  * When VNICs are created on top of an underlying MAC, this causes
  79  * a layering of two MAC instances. Since the lower MAC already
  80  * does the switching and demultiplexing to its MAC clients, the
  81  * upper MAC would simply have to pass packets to the layer below
  82  * or above it, which would introduce overhead. In order to avoid
  83  * this overhead, the MAC layer implements a pass-through mechanism
  84  * for VNICs. When a VNIC opens the lower MAC instance, it saves
  85  * the MAC client handle it optains from the MAC layer. When a MAC
  86  * client opens a VNIC (upper MAC), the MAC layer detects that
  87  * the MAC being opened is a VNIC, and gets the MAC client handle
  88  * that the VNIC driver obtained from the lower MAC. This exchange
  89  * is done through a private capability between the MAC layer
  90  * and the VNIC driver. The upper MAC then returns that handle
  91  * directly to its MAC client. Any operation done by the upper
  92  * MAC client is now done on the lower MAC client handle, which
  93  * allows the VNIC driver to be completely bypassed for the
  94  * performance sensitive data-path.
  95  *
  96  * - Secondary MACs for VNICs:
  97  *
  98  * VNICs support multiple upper mac clients to enable support for
  99  * multiple MAC addresses on the VNIC. When the VNIC is created the
 100  * initial mac client is the primary upper mac. Any additional mac
 101  * clients are secondary macs. These are kept in sync with the primary
 102  * (for things such as the rx function and resource control settings)
 103  * using the same private capability interface between the MAC layer
 104  * and the VNIC layer.
 105  *
 106  */
 107 
 108 #include <sys/types.h>
 109 #include <sys/conf.h>
 110 #include <sys/id_space.h>
 111 #include <sys/esunddi.h>
 112 #include <sys/stat.h>
 113 #include <sys/mkdev.h>
 114 #include <sys/stream.h>
 115 #include <sys/strsun.h>
 116 #include <sys/strsubr.h>
 117 #include <sys/dlpi.h>
 118 #include <sys/modhash.h>
 119 #include <sys/mac_impl.h>
 120 #include <sys/mac_client_impl.h>
 121 #include <sys/mac_soft_ring.h>
 122 #include <sys/mac_stat.h>
 123 #include <sys/dls.h>
 124 #include <sys/dld.h>
 125 #include <sys/modctl.h>
 126 #include <sys/fs/dv_node.h>
 127 #include <sys/thread.h>
 128 #include <sys/proc.h>
 129 #include <sys/callb.h>
 130 #include <sys/cpuvar.h>
 131 #include <sys/atomic.h>
 132 #include <sys/sdt.h>
 133 #include <sys/mac_flow.h>
 134 #include <sys/ddi_intr_impl.h>
 135 #include <sys/disp.h>
 136 #include <sys/sdt.h>
 137 #include <sys/vnic.h>
 138 #include <sys/vnic_impl.h>
 139 #include <sys/vlan.h>
 140 #include <inet/ip.h>
 141 #include <inet/ip6.h>
 142 #include <sys/exacct.h>
 143 #include <sys/exacct_impl.h>
 144 #include <inet/nd.h>
 145 #include <sys/ethernet.h>
 146 
 147 kmem_cache_t    *mac_client_impl_cache;
 148 kmem_cache_t    *mac_promisc_impl_cache;
 149 
 150 static boolean_t mac_client_single_rcvr(mac_client_impl_t *);
 151 static flow_entry_t *mac_client_swap_mciflent(mac_client_impl_t *);
 152 static flow_entry_t *mac_client_get_flow(mac_client_impl_t *,
 153     mac_unicast_impl_t *);
 154 static void mac_client_remove_flow_from_list(mac_client_impl_t *,
 155     flow_entry_t *);
 156 static void mac_client_add_to_flow_list(mac_client_impl_t *, flow_entry_t *);
 157 static void mac_rename_flow_names(mac_client_impl_t *, const char *);
 158 static void mac_virtual_link_update(mac_impl_t *);
 159 static int mac_client_datapath_setup(mac_client_impl_t *, uint16_t,
 160     uint8_t *, mac_resource_props_t *, boolean_t, mac_unicast_impl_t *);
 161 static void mac_client_datapath_teardown(mac_client_handle_t,
 162     mac_unicast_impl_t *, flow_entry_t *);
 163 static int mac_resource_ctl_set(mac_client_handle_t, mac_resource_props_t *);
 164 
 165 /* ARGSUSED */
 166 static int
 167 i_mac_client_impl_ctor(void *buf, void *arg, int kmflag)
 168 {
 169         int     i;
 170         mac_client_impl_t       *mcip = buf;
 171 
 172         bzero(buf, MAC_CLIENT_IMPL_SIZE);
 173         mutex_init(&mcip->mci_tx_cb_lock, NULL, MUTEX_DRIVER, NULL);
 174         mcip->mci_tx_notify_cb_info.mcbi_lockp = &mcip->mci_tx_cb_lock;
 175 
 176         ASSERT(mac_tx_percpu_cnt >= 0);
 177         for (i = 0; i <= mac_tx_percpu_cnt; i++) {
 178                 mutex_init(&mcip->mci_tx_pcpu[i].pcpu_tx_lock, NULL,
 179                     MUTEX_DRIVER, NULL);
 180         }
 181         cv_init(&mcip->mci_tx_cv, NULL, CV_DRIVER, NULL);
 182 
 183         return (0);
 184 }
 185 
 186 /* ARGSUSED */
 187 static void
 188 i_mac_client_impl_dtor(void *buf, void *arg)
 189 {
 190         int     i;
 191         mac_client_impl_t *mcip = buf;
 192 
 193         ASSERT(mcip->mci_promisc_list == NULL);
 194         ASSERT(mcip->mci_unicast_list == NULL);
 195         ASSERT(mcip->mci_state_flags == 0);
 196         ASSERT(mcip->mci_tx_flag == 0);
 197 
 198         mutex_destroy(&mcip->mci_tx_cb_lock);
 199 
 200         ASSERT(mac_tx_percpu_cnt >= 0);
 201         for (i = 0; i <= mac_tx_percpu_cnt; i++) {
 202                 ASSERT(mcip->mci_tx_pcpu[i].pcpu_tx_refcnt == 0);
 203                 mutex_destroy(&mcip->mci_tx_pcpu[i].pcpu_tx_lock);
 204         }
 205         cv_destroy(&mcip->mci_tx_cv);
 206 }
 207 
 208 /* ARGSUSED */
 209 static int
 210 i_mac_promisc_impl_ctor(void *buf, void *arg, int kmflag)
 211 {
 212         mac_promisc_impl_t      *mpip = buf;
 213 
 214         bzero(buf, sizeof (mac_promisc_impl_t));
 215         mpip->mpi_mci_link.mcb_objp = buf;
 216         mpip->mpi_mci_link.mcb_objsize = sizeof (mac_promisc_impl_t);
 217         mpip->mpi_mi_link.mcb_objp = buf;
 218         mpip->mpi_mi_link.mcb_objsize = sizeof (mac_promisc_impl_t);
 219         return (0);
 220 }
 221 
 222 /* ARGSUSED */
 223 static void
 224 i_mac_promisc_impl_dtor(void *buf, void *arg)
 225 {
 226         mac_promisc_impl_t      *mpip = buf;
 227 
 228         ASSERT(mpip->mpi_mci_link.mcb_objp != NULL);
 229         ASSERT(mpip->mpi_mci_link.mcb_objsize == sizeof (mac_promisc_impl_t));
 230         ASSERT(mpip->mpi_mi_link.mcb_objp == mpip->mpi_mci_link.mcb_objp);
 231         ASSERT(mpip->mpi_mi_link.mcb_objsize == sizeof (mac_promisc_impl_t));
 232 
 233         mpip->mpi_mci_link.mcb_objp = NULL;
 234         mpip->mpi_mci_link.mcb_objsize = 0;
 235         mpip->mpi_mi_link.mcb_objp = NULL;
 236         mpip->mpi_mi_link.mcb_objsize = 0;
 237 
 238         ASSERT(mpip->mpi_mci_link.mcb_flags == 0);
 239         mpip->mpi_mci_link.mcb_objsize = 0;
 240 }
 241 
 242 void
 243 mac_client_init(void)
 244 {
 245         ASSERT(mac_tx_percpu_cnt >= 0);
 246 
 247         mac_client_impl_cache = kmem_cache_create("mac_client_impl_cache",
 248             MAC_CLIENT_IMPL_SIZE, 0, i_mac_client_impl_ctor,
 249             i_mac_client_impl_dtor, NULL, NULL, NULL, 0);
 250         ASSERT(mac_client_impl_cache != NULL);
 251 
 252         mac_promisc_impl_cache = kmem_cache_create("mac_promisc_impl_cache",
 253             sizeof (mac_promisc_impl_t), 0, i_mac_promisc_impl_ctor,
 254             i_mac_promisc_impl_dtor, NULL, NULL, NULL, 0);
 255         ASSERT(mac_promisc_impl_cache != NULL);
 256 }
 257 
 258 void
 259 mac_client_fini(void)
 260 {
 261         kmem_cache_destroy(mac_client_impl_cache);
 262         kmem_cache_destroy(mac_promisc_impl_cache);
 263 }
 264 
 265 /*
 266  * Return the lower MAC client handle from the VNIC driver for the
 267  * specified VNIC MAC instance.
 268  */
 269 mac_client_impl_t *
 270 mac_vnic_lower(mac_impl_t *mip)
 271 {
 272         mac_capab_vnic_t cap;
 273         mac_client_impl_t *mcip;
 274 
 275         VERIFY(i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, &cap));
 276         mcip = cap.mcv_mac_client_handle(cap.mcv_arg);
 277 
 278         return (mcip);
 279 }
 280 
 281 /*
 282  * Update the secondary macs
 283  */
 284 void
 285 mac_vnic_secondary_update(mac_impl_t *mip)
 286 {
 287         mac_capab_vnic_t cap;
 288 
 289         VERIFY(i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, &cap));
 290         cap.mcv_mac_secondary_update(cap.mcv_arg);
 291 }
 292 
 293 /*
 294  * Return the MAC client handle of the primary MAC client for the
 295  * specified MAC instance, or NULL otherwise.
 296  */
 297 mac_client_impl_t *
 298 mac_primary_client_handle(mac_impl_t *mip)
 299 {
 300         mac_client_impl_t *mcip;
 301 
 302         if (mip->mi_state_flags & MIS_IS_VNIC)
 303                 return (mac_vnic_lower(mip));
 304 
 305         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
 306 
 307         for (mcip = mip->mi_clients_list; mcip != NULL;
 308             mcip = mcip->mci_client_next) {
 309                 if (MCIP_DATAPATH_SETUP(mcip) && mac_is_primary_client(mcip))
 310                         return (mcip);
 311         }
 312         return (NULL);
 313 }
 314 
 315 /*
 316  * Open a MAC specified by its MAC name.
 317  */
 318 int
 319 mac_open(const char *macname, mac_handle_t *mhp)
 320 {
 321         mac_impl_t      *mip;
 322         int             err;
 323 
 324         /*
 325          * Look up its entry in the global hash table.
 326          */
 327         if ((err = mac_hold(macname, &mip)) != 0)
 328                 return (err);
 329 
 330         /*
 331          * Hold the dip associated to the MAC to prevent it from being
 332          * detached. For a softmac, its underlying dip is held by the
 333          * mi_open() callback.
 334          *
 335          * This is done to be more tolerant with some defective drivers,
 336          * which incorrectly handle mac_unregister() failure in their
 337          * xxx_detach() routine. For example, some drivers ignore the
 338          * failure of mac_unregister() and free all resources that
 339          * that are needed for data transmition.
 340          */
 341         e_ddi_hold_devi(mip->mi_dip);
 342 
 343         if (!(mip->mi_callbacks->mc_callbacks & MC_OPEN)) {
 344                 *mhp = (mac_handle_t)mip;
 345                 return (0);
 346         }
 347 
 348         /*
 349          * The mac perimeter is used in both mac_open and mac_close by the
 350          * framework to single thread the MC_OPEN/MC_CLOSE of drivers.
 351          */
 352         i_mac_perim_enter(mip);
 353         mip->mi_oref++;
 354         if (mip->mi_oref != 1 || ((err = mip->mi_open(mip->mi_driver)) == 0)) {
 355                 *mhp = (mac_handle_t)mip;
 356                 i_mac_perim_exit(mip);
 357                 return (0);
 358         }
 359         mip->mi_oref--;
 360         ddi_release_devi(mip->mi_dip);
 361         mac_rele(mip);
 362         i_mac_perim_exit(mip);
 363         return (err);
 364 }
 365 
 366 /*
 367  * Open a MAC specified by its linkid.
 368  */
 369 int
 370 mac_open_by_linkid(datalink_id_t linkid, mac_handle_t *mhp)
 371 {
 372         dls_dl_handle_t dlh;
 373         int             err;
 374 
 375         if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
 376                 return (err);
 377 
 378         dls_devnet_prop_task_wait(dlh);
 379 
 380         err = mac_open(dls_devnet_mac(dlh), mhp);
 381 
 382         dls_devnet_rele_tmp(dlh);
 383         return (err);
 384 }
 385 
 386 /*
 387  * Open a MAC specified by its link name.
 388  */
 389 int
 390 mac_open_by_linkname(const char *link, mac_handle_t *mhp)
 391 {
 392         datalink_id_t   linkid;
 393         int             err;
 394 
 395         if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0)
 396                 return (err);
 397         return (mac_open_by_linkid(linkid, mhp));
 398 }
 399 
 400 /*
 401  * Close the specified MAC.
 402  */
 403 void
 404 mac_close(mac_handle_t mh)
 405 {
 406         mac_impl_t      *mip = (mac_impl_t *)mh;
 407 
 408         i_mac_perim_enter(mip);
 409         /*
 410          * The mac perimeter is used in both mac_open and mac_close by the
 411          * framework to single thread the MC_OPEN/MC_CLOSE of drivers.
 412          */
 413         if (mip->mi_callbacks->mc_callbacks & MC_OPEN) {
 414                 ASSERT(mip->mi_oref != 0);
 415                 if (--mip->mi_oref == 0) {
 416                         if ((mip->mi_callbacks->mc_callbacks & MC_CLOSE))
 417                                 mip->mi_close(mip->mi_driver);
 418                 }
 419         }
 420         i_mac_perim_exit(mip);
 421         ddi_release_devi(mip->mi_dip);
 422         mac_rele(mip);
 423 }
 424 
 425 /*
 426  * Misc utility functions to retrieve various information about a MAC
 427  * instance or a MAC client.
 428  */
 429 
 430 const mac_info_t *
 431 mac_info(mac_handle_t mh)
 432 {
 433         return (&((mac_impl_t *)mh)->mi_info);
 434 }
 435 
 436 dev_info_t *
 437 mac_devinfo_get(mac_handle_t mh)
 438 {
 439         return (((mac_impl_t *)mh)->mi_dip);
 440 }
 441 
 442 void *
 443 mac_driver(mac_handle_t mh)
 444 {
 445         return (((mac_impl_t *)mh)->mi_driver);
 446 }
 447 
 448 const char *
 449 mac_name(mac_handle_t mh)
 450 {
 451         return (((mac_impl_t *)mh)->mi_name);
 452 }
 453 
 454 int
 455 mac_type(mac_handle_t mh)
 456 {
 457         return (((mac_impl_t *)mh)->mi_type->mt_type);
 458 }
 459 
 460 int
 461 mac_nativetype(mac_handle_t mh)
 462 {
 463         return (((mac_impl_t *)mh)->mi_type->mt_nativetype);
 464 }
 465 
 466 char *
 467 mac_client_name(mac_client_handle_t mch)
 468 {
 469         return (((mac_client_impl_t *)mch)->mci_name);
 470 }
 471 
 472 minor_t
 473 mac_minor(mac_handle_t mh)
 474 {
 475         return (((mac_impl_t *)mh)->mi_minor);
 476 }
 477 
 478 /*
 479  * Return the VID associated with a MAC client. This function should
 480  * be called for clients which are associated with only one VID.
 481  */
 482 uint16_t
 483 mac_client_vid(mac_client_handle_t mch)
 484 {
 485         uint16_t                vid = VLAN_ID_NONE;
 486         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
 487         flow_desc_t             flow_desc;
 488 
 489         if (mcip->mci_nflents == 0)
 490                 return (vid);
 491 
 492         ASSERT(MCIP_DATAPATH_SETUP(mcip) && mac_client_single_rcvr(mcip));
 493 
 494         mac_flow_get_desc(mcip->mci_flent, &flow_desc);
 495         if ((flow_desc.fd_mask & FLOW_LINK_VID) != 0)
 496                 vid = flow_desc.fd_vid;
 497 
 498         return (vid);
 499 }
 500 
 501 /*
 502  * Return whether the specified MAC client corresponds to a VLAN VNIC.
 503  */
 504 boolean_t
 505 mac_client_is_vlan_vnic(mac_client_handle_t mch)
 506 {
 507         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
 508 
 509         return (((mcip->mci_state_flags & MCIS_IS_VNIC) != 0) &&
 510             ((mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) != 0));
 511 }
 512 
 513 /*
 514  * Return the link speed associated with the specified MAC client.
 515  *
 516  * The link speed of a MAC client is equal to the smallest value of
 517  * 1) the current link speed of the underlying NIC, or
 518  * 2) the bandwidth limit set for the MAC client.
 519  *
 520  * Note that the bandwidth limit can be higher than the speed
 521  * of the underlying NIC. This is allowed to avoid spurious
 522  * administration action failures or artifically lowering the
 523  * bandwidth limit of a link that may  have temporarily lowered
 524  * its link speed due to hardware problem or administrator action.
 525  */
 526 static uint64_t
 527 mac_client_ifspeed(mac_client_impl_t *mcip)
 528 {
 529         mac_impl_t *mip = mcip->mci_mip;
 530         uint64_t nic_speed;
 531 
 532         nic_speed = mac_stat_get((mac_handle_t)mip, MAC_STAT_IFSPEED);
 533 
 534         if (nic_speed == 0) {
 535                 return (0);
 536         } else {
 537                 uint64_t policy_limit = (uint64_t)-1;
 538 
 539                 if (MCIP_RESOURCE_PROPS_MASK(mcip) & MRP_MAXBW)
 540                         policy_limit = MCIP_RESOURCE_PROPS_MAXBW(mcip);
 541 
 542                 return (MIN(policy_limit, nic_speed));
 543         }
 544 }
 545 
 546 /*
 547  * Return the link state of the specified client. If here are more
 548  * than one clients of the underying mac_impl_t, the link state
 549  * will always be UP regardless of the link state of the underlying
 550  * mac_impl_t. This is needed to allow the MAC clients to continue
 551  * to communicate with each other even when the physical link of
 552  * their mac_impl_t is down.
 553  */
 554 static uint64_t
 555 mac_client_link_state(mac_client_impl_t *mcip)
 556 {
 557         mac_impl_t *mip = mcip->mci_mip;
 558         uint16_t vid;
 559         mac_client_impl_t *mci_list;
 560         mac_unicast_impl_t *mui_list, *oth_mui_list;
 561 
 562         /*
 563          * Returns LINK_STATE_UP if there are other MAC clients defined on
 564          * mac_impl_t which share same VLAN ID as that of mcip. Note that
 565          * if 'mcip' has more than one VID's then we match ANY one of the
 566          * VID's with other MAC client's VID's and return LINK_STATE_UP.
 567          */
 568         rw_enter(&mcip->mci_rw_lock, RW_READER);
 569         for (mui_list = mcip->mci_unicast_list; mui_list != NULL;
 570             mui_list = mui_list->mui_next) {
 571                 vid = mui_list->mui_vid;
 572                 for (mci_list = mip->mi_clients_list; mci_list != NULL;
 573                     mci_list = mci_list->mci_client_next) {
 574                         if (mci_list == mcip)
 575                                 continue;
 576                         for (oth_mui_list = mci_list->mci_unicast_list;
 577                             oth_mui_list != NULL; oth_mui_list = oth_mui_list->
 578                             mui_next) {
 579                                 if (vid == oth_mui_list->mui_vid) {
 580                                         rw_exit(&mcip->mci_rw_lock);
 581                                         return (LINK_STATE_UP);
 582                                 }
 583                         }
 584                 }
 585         }
 586         rw_exit(&mcip->mci_rw_lock);
 587 
 588         return (mac_stat_get((mac_handle_t)mip, MAC_STAT_LINK_STATE));
 589 }
 590 
 591 /*
 592  * These statistics are consumed by dladm show-link -s <vnic>,
 593  * dladm show-vnic -s and netstat. With the introduction of dlstat,
 594  * dladm show-link -s and dladm show-vnic -s witll be EOL'ed while
 595  * netstat will consume from kstats introduced for dlstat. This code
 596  * will be removed at that time.
 597  */
 598 
 599 /*
 600  * Return the statistics of a MAC client. These statistics are different
 601  * then the statistics of the underlying MAC which are returned by
 602  * mac_stat_get().
 603  *
 604  * Note that for things based on the tx and rx stats, mac will end up clobbering
 605  * those stats when the underlying set of rings in the srs changes. As such, we
 606  * need to source not only the current set, but also the historical set when
 607  * returning to the client, lest our counters appear to go backwards.
 608  */
 609 uint64_t
 610 mac_client_stat_get(mac_client_handle_t mch, uint_t stat)
 611 {
 612         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
 613         mac_impl_t              *mip = mcip->mci_mip;
 614         flow_entry_t            *flent = mcip->mci_flent;
 615         mac_soft_ring_set_t     *mac_srs;
 616         mac_rx_stats_t          *mac_rx_stat, *old_rx_stat;
 617         mac_tx_stats_t          *mac_tx_stat, *old_tx_stat;
 618         int i;
 619         uint64_t val = 0;
 620 
 621         mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs);
 622         mac_tx_stat = &mac_srs->srs_tx.st_stat;
 623         old_rx_stat = &mcip->mci_misc_stat.mms_defunctrxlanestats;
 624         old_tx_stat = &mcip->mci_misc_stat.mms_defuncttxlanestats;
 625 
 626         switch (stat) {
 627         case MAC_STAT_LINK_STATE:
 628                 val = mac_client_link_state(mcip);
 629                 break;
 630         case MAC_STAT_LINK_UP:
 631                 val = (mac_client_link_state(mcip) == LINK_STATE_UP);
 632                 break;
 633         case MAC_STAT_PROMISC:
 634                 val = mac_stat_get((mac_handle_t)mip, MAC_STAT_PROMISC);
 635                 break;
 636         case MAC_STAT_LOWLINK_STATE:
 637                 val = mac_stat_get((mac_handle_t)mip, MAC_STAT_LOWLINK_STATE);
 638                 break;
 639         case MAC_STAT_IFSPEED:
 640                 val = mac_client_ifspeed(mcip);
 641                 break;
 642         case MAC_STAT_MULTIRCV:
 643                 val = mcip->mci_misc_stat.mms_multircv;
 644                 break;
 645         case MAC_STAT_BRDCSTRCV:
 646                 val = mcip->mci_misc_stat.mms_brdcstrcv;
 647                 break;
 648         case MAC_STAT_MULTIXMT:
 649                 val = mcip->mci_misc_stat.mms_multixmt;
 650                 break;
 651         case MAC_STAT_BRDCSTXMT:
 652                 val = mcip->mci_misc_stat.mms_brdcstxmt;
 653                 break;
 654         case MAC_STAT_OBYTES:
 655                 val = mac_tx_stat->mts_obytes;
 656                 val += old_tx_stat->mts_obytes;
 657                 break;
 658         case MAC_STAT_OPACKETS:
 659                 val = mac_tx_stat->mts_opackets;
 660                 val += old_tx_stat->mts_opackets;
 661                 break;
 662         case MAC_STAT_OERRORS:
 663                 val = mac_tx_stat->mts_oerrors;
 664                 val += old_tx_stat->mts_oerrors;
 665                 break;
 666         case MAC_STAT_IPACKETS:
 667                 for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
 668                         mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
 669                         mac_rx_stat = &mac_srs->srs_rx.sr_stat;
 670                         val += mac_rx_stat->mrs_intrcnt +
 671                             mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt;
 672                 }
 673                 val += old_rx_stat->mrs_intrcnt + old_rx_stat->mrs_pollcnt +
 674                     old_rx_stat->mrs_lclcnt;
 675                 break;
 676         case MAC_STAT_RBYTES:
 677                 for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
 678                         mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
 679                         mac_rx_stat = &mac_srs->srs_rx.sr_stat;
 680                         val += mac_rx_stat->mrs_intrbytes +
 681                             mac_rx_stat->mrs_pollbytes +
 682                             mac_rx_stat->mrs_lclbytes;
 683                 }
 684                 val += old_rx_stat->mrs_intrbytes + old_rx_stat->mrs_pollbytes +
 685                     old_rx_stat->mrs_lclbytes;
 686                 break;
 687         case MAC_STAT_IERRORS:
 688                 for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
 689                         mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
 690                         mac_rx_stat = &mac_srs->srs_rx.sr_stat;
 691                         val += mac_rx_stat->mrs_ierrors;
 692                 }
 693                 val += old_rx_stat->mrs_ierrors;
 694                 break;
 695         default:
 696                 val = mac_driver_stat_default(mip, stat);
 697                 break;
 698         }
 699 
 700         return (val);
 701 }
 702 
 703 /*
 704  * Return the statistics of the specified MAC instance.
 705  */
 706 uint64_t
 707 mac_stat_get(mac_handle_t mh, uint_t stat)
 708 {
 709         mac_impl_t      *mip = (mac_impl_t *)mh;
 710         uint64_t        val;
 711         int             ret;
 712 
 713         /*
 714          * The range of stat determines where it is maintained.  Stat
 715          * values from 0 up to (but not including) MAC_STAT_MIN are
 716          * mainteined by the mac module itself.  Everything else is
 717          * maintained by the driver.
 718          *
 719          * If the mac_impl_t being queried corresponds to a VNIC,
 720          * the stats need to be queried from the lower MAC client
 721          * corresponding to the VNIC. (The mac_link_update()
 722          * invoked by the driver to the lower MAC causes the *lower
 723          * MAC* to update its mi_linkstate, and send a notification
 724          * to its MAC clients. Due to the VNIC passthrough,
 725          * these notifications are sent to the upper MAC clients
 726          * of the VNIC directly, and the upper mac_impl_t of the VNIC
 727          * does not have a valid mi_linkstate.
 728          */
 729         if (stat < MAC_STAT_MIN && !(mip->mi_state_flags & MIS_IS_VNIC)) {
 730                 /* these stats are maintained by the mac module itself */
 731                 switch (stat) {
 732                 case MAC_STAT_LINK_STATE:
 733                         return (mip->mi_linkstate);
 734                 case MAC_STAT_LINK_UP:
 735                         return (mip->mi_linkstate == LINK_STATE_UP);
 736                 case MAC_STAT_PROMISC:
 737                         return (mip->mi_devpromisc != 0);
 738                 case MAC_STAT_LOWLINK_STATE:
 739                         return (mip->mi_lowlinkstate);
 740                 default:
 741                         ASSERT(B_FALSE);
 742                 }
 743         }
 744 
 745         /*
 746          * Call the driver to get the given statistic.
 747          */
 748         ret = mip->mi_getstat(mip->mi_driver, stat, &val);
 749         if (ret != 0) {
 750                 /*
 751                  * The driver doesn't support this statistic.  Get the
 752                  * statistic's default value.
 753                  */
 754                 val = mac_driver_stat_default(mip, stat);
 755         }
 756         return (val);
 757 }
 758 
 759 /*
 760  * Query hardware rx ring corresponding to the pseudo ring.
 761  */
 762 uint64_t
 763 mac_pseudo_rx_ring_stat_get(mac_ring_handle_t handle, uint_t stat)
 764 {
 765         return (mac_rx_ring_stat_get(handle, stat));
 766 }
 767 
 768 /*
 769  * Query hardware tx ring corresponding to the pseudo ring.
 770  */
 771 uint64_t
 772 mac_pseudo_tx_ring_stat_get(mac_ring_handle_t handle, uint_t stat)
 773 {
 774         return (mac_tx_ring_stat_get(handle, stat));
 775 }
 776 
 777 /*
 778  * Utility function which returns the VID associated with a flow entry.
 779  */
 780 uint16_t
 781 i_mac_flow_vid(flow_entry_t *flent)
 782 {
 783         flow_desc_t     flow_desc;
 784 
 785         mac_flow_get_desc(flent, &flow_desc);
 786 
 787         if ((flow_desc.fd_mask & FLOW_LINK_VID) != 0)
 788                 return (flow_desc.fd_vid);
 789         return (VLAN_ID_NONE);
 790 }
 791 
 792 /*
 793  * Verify the validity of the specified unicast MAC address. Returns B_TRUE
 794  * if the address is valid, B_FALSE otherwise (multicast address, or incorrect
 795  * length.
 796  */
 797 boolean_t
 798 mac_unicst_verify(mac_handle_t mh, const uint8_t *addr, uint_t len)
 799 {
 800         mac_impl_t      *mip = (mac_impl_t *)mh;
 801 
 802         /*
 803          * Verify the address. No lock is needed since mi_type and plugin
 804          * details don't change after mac_register().
 805          */
 806         if ((len != mip->mi_type->mt_addr_length) ||
 807             (mip->mi_type->mt_ops.mtops_unicst_verify(addr,
 808             mip->mi_pdata)) != 0) {
 809                 return (B_FALSE);
 810         } else {
 811                 return (B_TRUE);
 812         }
 813 }
 814 
 815 void
 816 mac_sdu_get(mac_handle_t mh, uint_t *min_sdu, uint_t *max_sdu)
 817 {
 818         mac_impl_t      *mip = (mac_impl_t *)mh;
 819 
 820         if (min_sdu != NULL)
 821                 *min_sdu = mip->mi_sdu_min;
 822         if (max_sdu != NULL)
 823                 *max_sdu = mip->mi_sdu_max;
 824 }
 825 
 826 void
 827 mac_sdu_get2(mac_handle_t mh, uint_t *min_sdu, uint_t *max_sdu,
 828     uint_t *multicast_sdu)
 829 {
 830         mac_impl_t      *mip = (mac_impl_t *)mh;
 831 
 832         if (min_sdu != NULL)
 833                 *min_sdu = mip->mi_sdu_min;
 834         if (max_sdu != NULL)
 835                 *max_sdu = mip->mi_sdu_max;
 836         if (multicast_sdu != NULL)
 837                 *multicast_sdu = mip->mi_sdu_multicast;
 838 }
 839 
 840 /*
 841  * Update the MAC unicast address of the specified client's flows. Currently
 842  * only one unicast MAC unicast address is allowed per client.
 843  */
 844 static void
 845 mac_unicast_update_client_flow(mac_client_impl_t *mcip)
 846 {
 847         mac_impl_t *mip = mcip->mci_mip;
 848         flow_entry_t *flent = mcip->mci_flent;
 849         mac_address_t *map = mcip->mci_unicast;
 850         flow_desc_t flow_desc;
 851 
 852         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
 853         ASSERT(flent != NULL);
 854 
 855         mac_flow_get_desc(flent, &flow_desc);
 856         ASSERT(flow_desc.fd_mask & FLOW_LINK_DST);
 857 
 858         bcopy(map->ma_addr, flow_desc.fd_dst_mac, map->ma_len);
 859         mac_flow_set_desc(flent, &flow_desc);
 860 
 861         /*
 862          * The v6 local and SLAAC addrs (used by mac protection) need to be
 863          * regenerated because our mac address has changed.
 864          */
 865         mac_protect_update_mac_token(mcip);
 866 
 867         /*
 868          * When there are multiple VLANs sharing the same MAC address,
 869          * each gets its own MAC client, except when running on sun4v
 870          * vsw. In that case the mci_flent_list is used to place
 871          * multiple VLAN flows on one MAC client. If we ever get rid
 872          * of vsw then this code can go, but until then we need to
 873          * update all flow entries.
 874          */
 875         for (flent = mcip->mci_flent_list; flent != NULL;
 876             flent = flent->fe_client_next) {
 877                 mac_flow_get_desc(flent, &flow_desc);
 878                 if (!(flent->fe_type & FLOW_PRIMARY_MAC ||
 879                     flent->fe_type & FLOW_VNIC_MAC))
 880                         continue;
 881 
 882                 bcopy(map->ma_addr, flow_desc.fd_dst_mac, map->ma_len);
 883                 mac_flow_set_desc(flent, &flow_desc);
 884         }
 885 }
 886 
 887 /*
 888  * Update all clients that share the same unicast address.
 889  */
 890 void
 891 mac_unicast_update_clients(mac_impl_t *mip, mac_address_t *map)
 892 {
 893         mac_client_impl_t *mcip;
 894 
 895         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
 896 
 897         /*
 898          * Find all clients that share the same unicast MAC address and update
 899          * them appropriately.
 900          */
 901         for (mcip = mip->mi_clients_list; mcip != NULL;
 902             mcip = mcip->mci_client_next) {
 903                 /*
 904                  * Ignore clients that don't share this MAC address.
 905                  */
 906                 if (map != mcip->mci_unicast)
 907                         continue;
 908 
 909                 /*
 910                  * Update those clients with same old unicast MAC address.
 911                  */
 912                 mac_unicast_update_client_flow(mcip);
 913         }
 914 }
 915 
 916 /*
 917  * Update the unicast MAC address of the specified VNIC MAC client.
 918  *
 919  * Check whether the operation is valid. Any of following cases should fail:
 920  *
 921  * 1. It's a VLAN type of VNIC.
 922  * 2. The new value is current "primary" MAC address.
 923  * 3. The current MAC address is shared with other clients.
 924  * 4. The new MAC address has been used. This case will be valid when
 925  *    client migration is fully supported.
 926  */
 927 int
 928 mac_vnic_unicast_set(mac_client_handle_t mch, const uint8_t *addr)
 929 {
 930         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
 931         mac_impl_t *mip = mcip->mci_mip;
 932         mac_address_t *map = mcip->mci_unicast;
 933         int err;
 934 
 935         ASSERT(!(mip->mi_state_flags & MIS_IS_VNIC));
 936         ASSERT(mcip->mci_state_flags & MCIS_IS_VNIC);
 937         ASSERT(mcip->mci_flags != MAC_CLIENT_FLAGS_PRIMARY);
 938 
 939         i_mac_perim_enter(mip);
 940 
 941         /*
 942          * If this is a VLAN type of VNIC, it's using "primary" MAC address
 943          * of the underlying interface. Must fail here. Refer to case 1 above.
 944          */
 945         if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0) {
 946                 i_mac_perim_exit(mip);
 947                 return (ENOTSUP);
 948         }
 949 
 950         /*
 951          * If the new address is the "primary" one, must fail. Refer to
 952          * case 2 above.
 953          */
 954         if (bcmp(addr, mip->mi_addr, map->ma_len) == 0) {
 955                 i_mac_perim_exit(mip);
 956                 return (EACCES);
 957         }
 958 
 959         /*
 960          * If the address is shared by multiple clients, must fail. Refer
 961          * to case 3 above.
 962          */
 963         if (mac_check_macaddr_shared(map)) {
 964                 i_mac_perim_exit(mip);
 965                 return (EBUSY);
 966         }
 967 
 968         /*
 969          * If the new address has been used, must fail for now. Refer to
 970          * case 4 above.
 971          */
 972         if (mac_find_macaddr(mip, (uint8_t *)addr) != NULL) {
 973                 i_mac_perim_exit(mip);
 974                 return (ENOTSUP);
 975         }
 976 
 977         /*
 978          * Update the MAC address.
 979          */
 980         err = mac_update_macaddr(map, (uint8_t *)addr);
 981 
 982         if (err != 0) {
 983                 i_mac_perim_exit(mip);
 984                 return (err);
 985         }
 986 
 987         /*
 988          * Update all flows of this MAC client.
 989          */
 990         mac_unicast_update_client_flow(mcip);
 991 
 992         i_mac_perim_exit(mip);
 993         return (0);
 994 }
 995 
 996 /*
 997  * Program the new primary unicast address of the specified MAC.
 998  *
 999  * Function mac_update_macaddr() takes care different types of underlying
1000  * MAC. If the underlying MAC is VNIC, the VNIC driver must have registerd
1001  * mi_unicst() entry point, that indirectly calls mac_vnic_unicast_set()
1002  * which will take care of updating the MAC address of the corresponding
1003  * MAC client.
1004  *
1005  * This is the only interface that allow the client to update the "primary"
1006  * MAC address of the underlying MAC. The new value must have not been
1007  * used by other clients.
1008  */
1009 int
1010 mac_unicast_primary_set(mac_handle_t mh, const uint8_t *addr)
1011 {
1012         mac_impl_t *mip = (mac_impl_t *)mh;
1013         mac_address_t *map;
1014         int err;
1015 
1016         /* verify the address validity */
1017         if (!mac_unicst_verify(mh, addr, mip->mi_type->mt_addr_length))
1018                 return (EINVAL);
1019 
1020         i_mac_perim_enter(mip);
1021 
1022         /*
1023          * If the new value is the same as the current primary address value,
1024          * there's nothing to do.
1025          */
1026         if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) == 0) {
1027                 i_mac_perim_exit(mip);
1028                 return (0);
1029         }
1030 
1031         if (mac_find_macaddr(mip, (uint8_t *)addr) != NULL) {
1032                 i_mac_perim_exit(mip);
1033                 return (EBUSY);
1034         }
1035 
1036         map = mac_find_macaddr(mip, mip->mi_addr);
1037         ASSERT(map != NULL);
1038 
1039         /*
1040          * Update the MAC address.
1041          */
1042         if (mip->mi_state_flags & MIS_IS_AGGR) {
1043                 mac_capab_aggr_t aggr_cap;
1044 
1045                 /*
1046                  * If the MAC is an aggregation, other than the unicast
1047                  * addresses programming, aggr must be informed about this
1048                  * primary unicst address change to change its MAC address
1049                  * policy to be user-specified.
1050                  */
1051                 ASSERT(map->ma_type == MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED);
1052                 VERIFY(i_mac_capab_get(mh, MAC_CAPAB_AGGR, &aggr_cap));
1053                 err = aggr_cap.mca_unicst(mip->mi_driver, addr);
1054                 if (err == 0)
1055                         bcopy(addr, map->ma_addr, map->ma_len);
1056         } else {
1057                 err = mac_update_macaddr(map, (uint8_t *)addr);
1058         }
1059 
1060         if (err != 0) {
1061                 i_mac_perim_exit(mip);
1062                 return (err);
1063         }
1064 
1065         mac_unicast_update_clients(mip, map);
1066 
1067         /*
1068          * Save the new primary MAC address in mac_impl_t.
1069          */
1070         bcopy(addr, mip->mi_addr, mip->mi_type->mt_addr_length);
1071 
1072         i_mac_perim_exit(mip);
1073 
1074         if (err == 0)
1075                 i_mac_notify(mip, MAC_NOTE_UNICST);
1076 
1077         return (err);
1078 }
1079 
1080 /*
1081  * Return the current primary MAC address of the specified MAC.
1082  */
1083 void
1084 mac_unicast_primary_get(mac_handle_t mh, uint8_t *addr)
1085 {
1086         mac_impl_t *mip = (mac_impl_t *)mh;
1087 
1088         rw_enter(&mip->mi_rw_lock, RW_READER);
1089         bcopy(mip->mi_addr, addr, mip->mi_type->mt_addr_length);
1090         rw_exit(&mip->mi_rw_lock);
1091 }
1092 
1093 /*
1094  * Return the secondary MAC address for the specified handle
1095  */
1096 void
1097 mac_unicast_secondary_get(mac_client_handle_t mh, uint8_t *addr)
1098 {
1099         mac_client_impl_t *mcip = (mac_client_impl_t *)mh;
1100 
1101         ASSERT(mcip->mci_unicast != NULL);
1102         bcopy(mcip->mci_unicast->ma_addr, addr, mcip->mci_unicast->ma_len);
1103 }
1104 
1105 /*
1106  * Return information about the use of the primary MAC address of the
1107  * specified MAC instance:
1108  *
1109  * - if client_name is non-NULL, it must point to a string of at
1110  *   least MAXNAMELEN bytes, and will be set to the name of the MAC
1111  *   client which uses the primary MAC address.
1112  *
1113  * - if in_use is non-NULL, used to return whether the primary MAC
1114  *   address is currently in use.
1115  */
1116 void
1117 mac_unicast_primary_info(mac_handle_t mh, char *client_name, boolean_t *in_use)
1118 {
1119         mac_impl_t *mip = (mac_impl_t *)mh;
1120         mac_client_impl_t *cur_client;
1121 
1122         if (in_use != NULL)
1123                 *in_use = B_FALSE;
1124         if (client_name != NULL)
1125                 bzero(client_name, MAXNAMELEN);
1126 
1127         /*
1128          * The mi_rw_lock is used to protect threads that don't hold the
1129          * mac perimeter to get a consistent view of the mi_clients_list.
1130          * Threads that modify the list must hold both the mac perimeter and
1131          * mi_rw_lock(RW_WRITER)
1132          */
1133         rw_enter(&mip->mi_rw_lock, RW_READER);
1134         for (cur_client = mip->mi_clients_list; cur_client != NULL;
1135             cur_client = cur_client->mci_client_next) {
1136                 if (mac_is_primary_client(cur_client) ||
1137                     (mip->mi_state_flags & MIS_IS_VNIC)) {
1138                         rw_exit(&mip->mi_rw_lock);
1139                         if (in_use != NULL)
1140                                 *in_use = B_TRUE;
1141                         if (client_name != NULL) {
1142                                 bcopy(cur_client->mci_name, client_name,
1143                                     MAXNAMELEN);
1144                         }
1145                         return;
1146                 }
1147         }
1148         rw_exit(&mip->mi_rw_lock);
1149 }
1150 
1151 /*
1152  * Return the current destination MAC address of the specified MAC.
1153  */
1154 boolean_t
1155 mac_dst_get(mac_handle_t mh, uint8_t *addr)
1156 {
1157         mac_impl_t *mip = (mac_impl_t *)mh;
1158 
1159         rw_enter(&mip->mi_rw_lock, RW_READER);
1160         if (mip->mi_dstaddr_set)
1161                 bcopy(mip->mi_dstaddr, addr, mip->mi_type->mt_addr_length);
1162         rw_exit(&mip->mi_rw_lock);
1163         return (mip->mi_dstaddr_set);
1164 }
1165 
1166 /*
1167  * Add the specified MAC client to the list of clients which opened
1168  * the specified MAC.
1169  */
1170 static void
1171 mac_client_add(mac_client_impl_t *mcip)
1172 {
1173         mac_impl_t *mip = mcip->mci_mip;
1174 
1175         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1176 
1177         /* add VNIC to the front of the list */
1178         rw_enter(&mip->mi_rw_lock, RW_WRITER);
1179         mcip->mci_client_next = mip->mi_clients_list;
1180         mip->mi_clients_list = mcip;
1181         mip->mi_nclients++;
1182         rw_exit(&mip->mi_rw_lock);
1183 }
1184 
1185 /*
1186  * Remove the specified MAC client from the list of clients which opened
1187  * the specified MAC.
1188  */
1189 static void
1190 mac_client_remove(mac_client_impl_t *mcip)
1191 {
1192         mac_impl_t *mip = mcip->mci_mip;
1193         mac_client_impl_t **prev, *cclient;
1194 
1195         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1196 
1197         rw_enter(&mip->mi_rw_lock, RW_WRITER);
1198         prev = &mip->mi_clients_list;
1199         cclient = *prev;
1200         while (cclient != NULL && cclient != mcip) {
1201                 prev = &cclient->mci_client_next;
1202                 cclient = *prev;
1203         }
1204         ASSERT(cclient != NULL);
1205         *prev = cclient->mci_client_next;
1206         mip->mi_nclients--;
1207         rw_exit(&mip->mi_rw_lock);
1208 }
1209 
1210 static mac_unicast_impl_t *
1211 mac_client_find_vid(mac_client_impl_t *mcip, uint16_t vid)
1212 {
1213         mac_unicast_impl_t *muip = mcip->mci_unicast_list;
1214 
1215         while ((muip != NULL) && (muip->mui_vid != vid))
1216                 muip = muip->mui_next;
1217 
1218         return (muip);
1219 }
1220 
1221 /*
1222  * Return whether the specified (MAC address, VID) tuple is already used by
1223  * one of the MAC clients associated with the specified MAC.
1224  */
1225 static boolean_t
1226 mac_addr_in_use(mac_impl_t *mip, uint8_t *mac_addr, uint16_t vid)
1227 {
1228         mac_client_impl_t *client;
1229         mac_address_t *map;
1230 
1231         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1232 
1233         for (client = mip->mi_clients_list; client != NULL;
1234             client = client->mci_client_next) {
1235 
1236                 /*
1237                  * Ignore clients that don't have unicast address.
1238                  */
1239                 if (client->mci_unicast_list == NULL)
1240                         continue;
1241 
1242                 map = client->mci_unicast;
1243 
1244                 if ((bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) &&
1245                     (mac_client_find_vid(client, vid) != NULL)) {
1246                         return (B_TRUE);
1247                 }
1248         }
1249 
1250         return (B_FALSE);
1251 }
1252 
1253 /*
1254  * Generate a random MAC address. The MAC address prefix is
1255  * stored in the array pointed to by mac_addr, and its length, in bytes,
1256  * is specified by prefix_len. The least significant bits
1257  * after prefix_len bytes are generated, and stored after the prefix
1258  * in the mac_addr array.
1259  */
1260 int
1261 mac_addr_random(mac_client_handle_t mch, uint_t prefix_len,
1262     uint8_t *mac_addr, mac_diag_t *diag)
1263 {
1264         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1265         mac_impl_t *mip = mcip->mci_mip;
1266         size_t addr_len = mip->mi_type->mt_addr_length;
1267 
1268         if (prefix_len >= addr_len) {
1269                 *diag = MAC_DIAG_MACPREFIXLEN_INVALID;
1270                 return (EINVAL);
1271         }
1272 
1273         /* check the prefix value */
1274         if (prefix_len > 0) {
1275                 bzero(mac_addr + prefix_len, addr_len - prefix_len);
1276                 if (!mac_unicst_verify((mac_handle_t)mip, mac_addr,
1277                     addr_len)) {
1278                         *diag = MAC_DIAG_MACPREFIX_INVALID;
1279                         return (EINVAL);
1280                 }
1281         }
1282 
1283         /* generate the MAC address */
1284         if (prefix_len < addr_len) {
1285                 (void) random_get_pseudo_bytes(mac_addr +
1286                     prefix_len, addr_len - prefix_len);
1287         }
1288 
1289         *diag = 0;
1290         return (0);
1291 }
1292 
1293 /*
1294  * Set the priority range for this MAC client. This will be used to
1295  * determine the absolute priority for the threads created for this
1296  * MAC client using the specified "low", "medium" and "high" level.
1297  * This will also be used for any subflows on this MAC client.
1298  */
1299 #define MAC_CLIENT_SET_PRIORITY_RANGE(mcip, pri) {                      \
1300         (mcip)->mci_min_pri = FLOW_MIN_PRIORITY(MINCLSYSPRI, \
1301             MAXCLSYSPRI, (pri));                                        \
1302         (mcip)->mci_max_pri = FLOW_MAX_PRIORITY(MINCLSYSPRI, \
1303             MAXCLSYSPRI, (mcip)->mci_min_pri);                               \
1304         }
1305 
1306 /*
1307  * MAC client open entry point. Return a new MAC client handle. Each
1308  * MAC client is associated with a name, specified through the 'name'
1309  * argument.
1310  */
1311 int
1312 mac_client_open(mac_handle_t mh, mac_client_handle_t *mchp, char *name,
1313     uint16_t flags)
1314 {
1315         mac_impl_t              *mip = (mac_impl_t *)mh;
1316         mac_client_impl_t       *mcip;
1317         int                     err = 0;
1318         boolean_t               share_desired;
1319         flow_entry_t            *flent = NULL;
1320 
1321         share_desired = (flags & MAC_OPEN_FLAGS_SHARES_DESIRED) != 0;
1322         *mchp = NULL;
1323 
1324         i_mac_perim_enter(mip);
1325 
1326         if (mip->mi_state_flags & MIS_IS_VNIC) {
1327                 /*
1328                  * The underlying MAC is a VNIC. Return the MAC client
1329                  * handle of the lower MAC which was obtained by
1330                  * the VNIC driver when it did its mac_client_open().
1331                  */
1332 
1333                 mcip = mac_vnic_lower(mip);
1334 
1335                 /*
1336                  * Note that multiple mac clients share the same mcip in
1337                  * this case.
1338                  */
1339                 if (flags & MAC_OPEN_FLAGS_EXCLUSIVE)
1340                         mcip->mci_state_flags |= MCIS_EXCLUSIVE;
1341 
1342                 if (flags & MAC_OPEN_FLAGS_MULTI_PRIMARY)
1343                         mcip->mci_flags |= MAC_CLIENT_FLAGS_MULTI_PRIMARY;
1344 
1345                 mip->mi_clients_list = mcip;
1346                 i_mac_perim_exit(mip);
1347                 *mchp = (mac_client_handle_t)mcip;
1348 
1349                 DTRACE_PROBE2(mac__client__open__nonallocated, mac_impl_t *,
1350                     mcip->mci_mip, mac_client_impl_t *, mcip);
1351 
1352                 return (err);
1353         }
1354 
1355         mcip = kmem_cache_alloc(mac_client_impl_cache, KM_SLEEP);
1356 
1357         mcip->mci_mip = mip;
1358         mcip->mci_upper_mip = NULL;
1359         mcip->mci_rx_fn = mac_pkt_drop;
1360         mcip->mci_rx_arg = NULL;
1361         mcip->mci_rx_p_fn = NULL;
1362         mcip->mci_rx_p_arg = NULL;
1363         mcip->mci_p_unicast_list = NULL;
1364         mcip->mci_direct_rx_fn = NULL;
1365         mcip->mci_direct_rx_arg = NULL;
1366         mcip->mci_vidcache = MCIP_VIDCACHE_INVALID;
1367 
1368         mcip->mci_unicast_list = NULL;
1369 
1370         if ((flags & MAC_OPEN_FLAGS_IS_VNIC) != 0)
1371                 mcip->mci_state_flags |= MCIS_IS_VNIC;
1372 
1373         if ((flags & MAC_OPEN_FLAGS_EXCLUSIVE) != 0)
1374                 mcip->mci_state_flags |= MCIS_EXCLUSIVE;
1375 
1376         if ((flags & MAC_OPEN_FLAGS_IS_AGGR_PORT) != 0)
1377                 mcip->mci_state_flags |= MCIS_IS_AGGR_PORT;
1378 
1379         if (mip->mi_state_flags & MIS_IS_AGGR)
1380                 mcip->mci_state_flags |= MCIS_IS_AGGR_CLIENT;
1381 
1382         if ((flags & MAC_OPEN_FLAGS_USE_DATALINK_NAME) != 0) {
1383                 datalink_id_t   linkid;
1384 
1385                 ASSERT(name == NULL);
1386                 if ((err = dls_devnet_macname2linkid(mip->mi_name,
1387                     &linkid)) != 0) {
1388                         goto done;
1389                 }
1390                 if ((err = dls_mgmt_get_linkinfo(linkid, mcip->mci_name, NULL,
1391                     NULL, NULL)) != 0) {
1392                         /*
1393                          * Use mac name if dlmgmtd is not available.
1394                          */
1395                         if (err == EBADF) {
1396                                 (void) strlcpy(mcip->mci_name, mip->mi_name,
1397                                     sizeof (mcip->mci_name));
1398                                 err = 0;
1399                         } else {
1400                                 goto done;
1401                         }
1402                 }
1403                 mcip->mci_state_flags |= MCIS_USE_DATALINK_NAME;
1404         } else {
1405                 ASSERT(name != NULL);
1406                 if (strlen(name) > MAXNAMELEN) {
1407                         err = EINVAL;
1408                         goto done;
1409                 }
1410                 (void) strlcpy(mcip->mci_name, name, sizeof (mcip->mci_name));
1411         }
1412 
1413         if (flags & MAC_OPEN_FLAGS_MULTI_PRIMARY)
1414                 mcip->mci_flags |= MAC_CLIENT_FLAGS_MULTI_PRIMARY;
1415 
1416         if (flags & MAC_OPEN_FLAGS_NO_UNICAST_ADDR)
1417                 mcip->mci_state_flags |= MCIS_NO_UNICAST_ADDR;
1418 
1419         mac_protect_init(mcip);
1420 
1421         /* the subflow table will be created dynamically */
1422         mcip->mci_subflow_tab = NULL;
1423 
1424         mcip->mci_misc_stat.mms_multircv = 0;
1425         mcip->mci_misc_stat.mms_brdcstrcv = 0;
1426         mcip->mci_misc_stat.mms_multixmt = 0;
1427         mcip->mci_misc_stat.mms_brdcstxmt = 0;
1428 
1429         /* Create an initial flow */
1430 
1431         err = mac_flow_create(NULL, NULL, mcip->mci_name, NULL,
1432             mcip->mci_state_flags & MCIS_IS_VNIC ? FLOW_VNIC_MAC :
1433             FLOW_PRIMARY_MAC, &flent);
1434         if (err != 0)
1435                 goto done;
1436         mcip->mci_flent = flent;
1437         FLOW_MARK(flent, FE_MC_NO_DATAPATH);
1438         flent->fe_mcip = mcip;
1439         /*
1440          * Place initial creation reference on the flow. This reference
1441          * is released in the corresponding delete action viz.
1442          * mac_unicast_remove after waiting for all transient refs to
1443          * to go away. The wait happens in mac_flow_wait.
1444          */
1445         FLOW_REFHOLD(flent);
1446 
1447         /*
1448          * Do this ahead of the mac_bcast_add() below so that the mi_nclients
1449          * will have the right value for mac_rx_srs_setup().
1450          */
1451         mac_client_add(mcip);
1452 
1453         mcip->mci_share = 0;
1454         if (share_desired)
1455                 i_mac_share_alloc(mcip);
1456 
1457         /*
1458          * We will do mimimal datapath setup to allow a MAC client to
1459          * transmit or receive non-unicast packets without waiting
1460          * for mac_unicast_add.
1461          */
1462         if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) {
1463                 if ((err = mac_client_datapath_setup(mcip, VLAN_ID_NONE,
1464                     NULL, NULL, B_TRUE, NULL)) != 0) {
1465                         goto done;
1466                 }
1467         }
1468 
1469         DTRACE_PROBE2(mac__client__open__allocated, mac_impl_t *,
1470             mcip->mci_mip, mac_client_impl_t *, mcip);
1471 
1472         *mchp = (mac_client_handle_t)mcip;
1473         i_mac_perim_exit(mip);
1474         return (0);
1475 
1476 done:
1477         i_mac_perim_exit(mip);
1478         mcip->mci_state_flags = 0;
1479         mcip->mci_tx_flag = 0;
1480         kmem_cache_free(mac_client_impl_cache, mcip);
1481         return (err);
1482 }
1483 
1484 /*
1485  * Close the specified MAC client handle.
1486  */
1487 void
1488 mac_client_close(mac_client_handle_t mch, uint16_t flags)
1489 {
1490         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
1491         mac_impl_t              *mip = mcip->mci_mip;
1492         flow_entry_t            *flent;
1493 
1494         i_mac_perim_enter(mip);
1495 
1496         if (flags & MAC_CLOSE_FLAGS_EXCLUSIVE)
1497                 mcip->mci_state_flags &= ~MCIS_EXCLUSIVE;
1498 
1499         if ((mcip->mci_state_flags & MCIS_IS_VNIC) &&
1500             !(flags & MAC_CLOSE_FLAGS_IS_VNIC)) {
1501                 /*
1502                  * This is an upper VNIC client initiated operation.
1503                  * The lower MAC client will be closed by the VNIC driver
1504                  * when the VNIC is deleted.
1505                  */
1506 
1507                 i_mac_perim_exit(mip);
1508                 return;
1509         }
1510 
1511         /* If we have only setup up minimal datapth setup, tear it down */
1512         if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) {
1513                 mac_client_datapath_teardown((mac_client_handle_t)mcip, NULL,
1514                     mcip->mci_flent);
1515                 mcip->mci_state_flags &= ~MCIS_NO_UNICAST_ADDR;
1516         }
1517 
1518         /*
1519          * Remove the flent associated with the MAC client
1520          */
1521         flent = mcip->mci_flent;
1522         mcip->mci_flent = NULL;
1523         FLOW_FINAL_REFRELE(flent);
1524 
1525         /*
1526          * MAC clients must remove the unicast addresses and promisc callbacks
1527          * they added before issuing a mac_client_close().
1528          */
1529         ASSERT(mcip->mci_unicast_list == NULL);
1530         ASSERT(mcip->mci_promisc_list == NULL);
1531         ASSERT(mcip->mci_tx_notify_cb_list == NULL);
1532 
1533         i_mac_share_free(mcip);
1534         mac_protect_fini(mcip);
1535         mac_client_remove(mcip);
1536 
1537         i_mac_perim_exit(mip);
1538         mcip->mci_subflow_tab = NULL;
1539         mcip->mci_state_flags = 0;
1540         mcip->mci_tx_flag = 0;
1541         kmem_cache_free(mac_client_impl_cache, mch);
1542 }
1543 
1544 /*
1545  * Set the Rx bypass receive callback and return B_TRUE. Return
1546  * B_FALSE if it's not possible to enable bypass.
1547  */
1548 boolean_t
1549 mac_rx_bypass_set(mac_client_handle_t mch, mac_direct_rx_t rx_fn, void *arg1)
1550 {
1551         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
1552         mac_impl_t              *mip = mcip->mci_mip;
1553 
1554         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1555 
1556         /*
1557          * If the client has more than one VLAN then process packets
1558          * through DLS. This should happen only when sun4v vsw is on
1559          * the scene.
1560          */
1561         if (mcip->mci_nvids > 1)
1562                 return (B_FALSE);
1563 
1564         /*
1565          * These are not accessed directly in the data path, and hence
1566          * don't need any protection
1567          */
1568         mcip->mci_direct_rx_fn = rx_fn;
1569         mcip->mci_direct_rx_arg = arg1;
1570         return (B_TRUE);
1571 }
1572 
1573 /*
1574  * Enable/Disable rx bypass. By default, bypass is assumed to be enabled.
1575  */
1576 void
1577 mac_rx_bypass_enable(mac_client_handle_t mch)
1578 {
1579         ((mac_client_impl_t *)mch)->mci_state_flags &= ~MCIS_RX_BYPASS_DISABLE;
1580 }
1581 
1582 void
1583 mac_rx_bypass_disable(mac_client_handle_t mch)
1584 {
1585         ((mac_client_impl_t *)mch)->mci_state_flags |= MCIS_RX_BYPASS_DISABLE;
1586 }
1587 
1588 /*
1589  * Set the receive callback for the specified MAC client. There can be
1590  * at most one such callback per MAC client.
1591  */
1592 void
1593 mac_rx_set(mac_client_handle_t mch, mac_rx_t rx_fn, void *arg)
1594 {
1595         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1596         mac_impl_t      *mip = mcip->mci_mip;
1597         mac_impl_t      *umip = mcip->mci_upper_mip;
1598 
1599         /*
1600          * Instead of adding an extra set of locks and refcnts in
1601          * the datapath at the mac client boundary, we temporarily quiesce
1602          * the SRS and related entities. We then change the receive function
1603          * without interference from any receive data thread and then reenable
1604          * the data flow subsequently.
1605          */
1606         i_mac_perim_enter(mip);
1607         mac_rx_client_quiesce(mch);
1608 
1609         mcip->mci_rx_fn = rx_fn;
1610         mcip->mci_rx_arg = arg;
1611         mac_rx_client_restart(mch);
1612         i_mac_perim_exit(mip);
1613 
1614         /*
1615          * If we're changing the Rx function on the primary MAC of a VNIC,
1616          * make sure any secondary addresses on the VNIC are updated as well.
1617          */
1618         if (umip != NULL) {
1619                 ASSERT((umip->mi_state_flags & MIS_IS_VNIC) != 0);
1620                 mac_vnic_secondary_update(umip);
1621         }
1622 }
1623 
1624 /*
1625  * Reset the receive callback for the specified MAC client.
1626  */
1627 void
1628 mac_rx_clear(mac_client_handle_t mch)
1629 {
1630         mac_rx_set(mch, mac_pkt_drop, NULL);
1631 }
1632 
1633 void
1634 mac_secondary_dup(mac_client_handle_t smch, mac_client_handle_t dmch)
1635 {
1636         mac_client_impl_t *smcip = (mac_client_impl_t *)smch;
1637         mac_client_impl_t *dmcip = (mac_client_impl_t *)dmch;
1638         flow_entry_t *flent = dmcip->mci_flent;
1639 
1640         /* This should only be called to setup secondary macs */
1641         ASSERT((flent->fe_type & FLOW_PRIMARY_MAC) == 0);
1642 
1643         mac_rx_set(dmch, smcip->mci_rx_fn, smcip->mci_rx_arg);
1644         dmcip->mci_promisc_list = smcip->mci_promisc_list;
1645 
1646         /*
1647          * Duplicate the primary mac resources to the secondary.
1648          * Since we already validated the resource controls when setting
1649          * them on the primary, we can ignore errors here.
1650          */
1651         (void) mac_resource_ctl_set(dmch, MCIP_RESOURCE_PROPS(smcip));
1652 }
1653 
1654 /*
1655  * Called when removing a secondary MAC. Currently only clears the promisc_list
1656  * since we share the primary mac's promisc_list.
1657  */
1658 void
1659 mac_secondary_cleanup(mac_client_handle_t mch)
1660 {
1661         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
1662         flow_entry_t *flent = mcip->mci_flent;
1663 
1664         /* This should only be called for secondary macs */
1665         ASSERT((flent->fe_type & FLOW_PRIMARY_MAC) == 0);
1666         mcip->mci_promisc_list = NULL;
1667 }
1668 
1669 /*
1670  * Walk the MAC client subflow table and updates their priority values.
1671  */
1672 static int
1673 mac_update_subflow_priority_cb(flow_entry_t *flent, void *arg)
1674 {
1675         mac_flow_update_priority(arg, flent);
1676         return (0);
1677 }
1678 
1679 void
1680 mac_update_subflow_priority(mac_client_impl_t *mcip)
1681 {
1682         (void) mac_flow_walk(mcip->mci_subflow_tab,
1683             mac_update_subflow_priority_cb, mcip);
1684 }
1685 
1686 /*
1687  * Modify the TX or RX ring properties. We could either just move around
1688  * rings, i.e add/remove rings given to a client. Or this might cause the
1689  * client to move from hardware based to software or the other way around.
1690  * If we want to reset this property, then we clear the mask, additionally
1691  * if the client was given a non-default group we remove all rings except
1692  * for 1 and give it back to the default group.
1693  */
1694 int
1695 mac_client_set_rings_prop(mac_client_impl_t *mcip, mac_resource_props_t *mrp,
1696     mac_resource_props_t *tmrp)
1697 {
1698         mac_impl_t              *mip = mcip->mci_mip;
1699         flow_entry_t            *flent = mcip->mci_flent;
1700         uint8_t                 *mac_addr;
1701         int                     err = 0;
1702         mac_group_t             *defgrp;
1703         mac_group_t             *group;
1704         mac_group_t             *ngrp;
1705         mac_resource_props_t    *cmrp = MCIP_RESOURCE_PROPS(mcip);
1706         uint_t                  ringcnt;
1707         boolean_t               unspec;
1708 
1709         if (mcip->mci_share != 0)
1710                 return (EINVAL);
1711 
1712         if (mrp->mrp_mask & MRP_RX_RINGS) {
1713                 unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC;
1714                 group = flent->fe_rx_ring_group;
1715                 defgrp = MAC_DEFAULT_RX_GROUP(mip);
1716                 mac_addr = flent->fe_flow_desc.fd_dst_mac;
1717 
1718                 /*
1719                  * No resulting change. If we are resetting on a client on
1720                  * which there was no rx rings property. For dynamic group
1721                  * if we are setting the same number of rings already set.
1722                  * For static group if we are requesting a group again.
1723                  */
1724                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
1725                         if (!(tmrp->mrp_mask & MRP_RX_RINGS))
1726                                 return (0);
1727                 } else {
1728                         if (unspec) {
1729                                 if (tmrp->mrp_mask & MRP_RXRINGS_UNSPEC)
1730                                         return (0);
1731                         } else if (mip->mi_rx_group_type ==
1732                             MAC_GROUP_TYPE_DYNAMIC) {
1733                                 if ((tmrp->mrp_mask & MRP_RX_RINGS) &&
1734                                     !(tmrp->mrp_mask & MRP_RXRINGS_UNSPEC) &&
1735                                     mrp->mrp_nrxrings == tmrp->mrp_nrxrings) {
1736                                         return (0);
1737                                 }
1738                         }
1739                 }
1740                 /* Resetting the prop */
1741                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
1742                         /*
1743                          * We will just keep one ring and give others back if
1744                          * we are not the primary. For the primary we give
1745                          * all the rings in the default group except the
1746                          * default ring. If it is a static group, then
1747                          * we don't do anything, but clear the MRP_RX_RINGS
1748                          * flag.
1749                          */
1750                         if (group != defgrp) {
1751                                 if (mip->mi_rx_group_type ==
1752                                     MAC_GROUP_TYPE_DYNAMIC) {
1753                                         /*
1754                                          * This group has reserved rings
1755                                          * that need to be released now,
1756                                          * so does the group.
1757                                          */
1758                                         MAC_RX_RING_RELEASED(mip,
1759                                             group->mrg_cur_count);
1760                                         MAC_RX_GRP_RELEASED(mip);
1761                                         if ((flent->fe_type &
1762                                             FLOW_PRIMARY_MAC) != 0) {
1763                                                 if (mip->mi_nactiveclients ==
1764                                                     1) {
1765                                                         (void)
1766                                                             mac_rx_switch_group(
1767                                                             mcip, group,
1768                                                             defgrp);
1769                                                         return (0);
1770                                                 } else {
1771                                                         cmrp->mrp_nrxrings =
1772                                                             group->
1773                                                             mrg_cur_count +
1774                                                             defgrp->
1775                                                             mrg_cur_count - 1;
1776                                                 }
1777                                         } else {
1778                                                 cmrp->mrp_nrxrings = 1;
1779                                         }
1780                                         (void) mac_group_ring_modify(mcip,
1781                                             group, defgrp);
1782                                 } else {
1783                                         /*
1784                                          * If this is a static group, we
1785                                          * need to release the group. The
1786                                          * client will remain in the same
1787                                          * group till some other client
1788                                          * needs this group.
1789                                          */
1790                                         MAC_RX_GRP_RELEASED(mip);
1791                                 }
1792                         /* Let check if we can give this an excl group */
1793                         } else if (group == defgrp) {
1794                                 /*
1795                                  * If multiple clients share an
1796                                  * address then they must stay on the
1797                                  * default group.
1798                                  */
1799                                 if (mac_check_macaddr_shared(mcip->mci_unicast))
1800                                         return (0);
1801 
1802                                 ngrp = mac_reserve_rx_group(mcip, mac_addr,
1803                                     B_TRUE);
1804                                 /* Couldn't give it a group, that's fine */
1805                                 if (ngrp == NULL)
1806                                         return (0);
1807                                 /* Switch to H/W */
1808                                 if (mac_rx_switch_group(mcip, defgrp, ngrp) !=
1809                                     0) {
1810                                         mac_stop_group(ngrp);
1811                                         return (0);
1812                                 }
1813                         }
1814                         /*
1815                          * If the client is in the default group, we will
1816                          * just clear the MRP_RX_RINGS and leave it as
1817                          * it rather than look for an exclusive group
1818                          * for it.
1819                          */
1820                         return (0);
1821                 }
1822 
1823                 if (group == defgrp && ((mrp->mrp_nrxrings > 0) || unspec)) {
1824                         /*
1825                          * We are requesting Rx rings. Try to reserve
1826                          * a non-default group.
1827                          *
1828                          * If multiple clients share an address then
1829                          * they must stay on the default group.
1830                          */
1831                         if (mac_check_macaddr_shared(mcip->mci_unicast))
1832                                 return (EINVAL);
1833 
1834                         ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE);
1835                         if (ngrp == NULL)
1836                                 return (ENOSPC);
1837 
1838                         /* Switch to H/W */
1839                         if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) {
1840                                 mac_release_rx_group(mcip, ngrp);
1841                                 return (ENOSPC);
1842                         }
1843                         MAC_RX_GRP_RESERVED(mip);
1844                         if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC)
1845                                 MAC_RX_RING_RESERVED(mip, ngrp->mrg_cur_count);
1846                 } else if (group != defgrp && !unspec &&
1847                     mrp->mrp_nrxrings == 0) {
1848                         /* Switch to S/W */
1849                         ringcnt = group->mrg_cur_count;
1850                         if (mac_rx_switch_group(mcip, group, defgrp) != 0)
1851                                 return (ENOSPC);
1852                         if (tmrp->mrp_mask & MRP_RX_RINGS) {
1853                                 MAC_RX_GRP_RELEASED(mip);
1854                                 if (mip->mi_rx_group_type ==
1855                                     MAC_GROUP_TYPE_DYNAMIC) {
1856                                         MAC_RX_RING_RELEASED(mip, ringcnt);
1857                                 }
1858                         }
1859                 } else if (group != defgrp && mip->mi_rx_group_type ==
1860                     MAC_GROUP_TYPE_DYNAMIC) {
1861                         ringcnt = group->mrg_cur_count;
1862                         err = mac_group_ring_modify(mcip, group, defgrp);
1863                         if (err != 0)
1864                                 return (err);
1865                         /*
1866                          * Update the accounting. If this group
1867                          * already had explicitly reserved rings,
1868                          * we need to update the rings based on
1869                          * the new ring count. If this group
1870                          * had not explicitly reserved rings,
1871                          * then we just reserve the rings asked for
1872                          * and reserve the group.
1873                          */
1874                         if (tmrp->mrp_mask & MRP_RX_RINGS) {
1875                                 if (ringcnt > group->mrg_cur_count) {
1876                                         MAC_RX_RING_RELEASED(mip,
1877                                             ringcnt - group->mrg_cur_count);
1878                                 } else {
1879                                         MAC_RX_RING_RESERVED(mip,
1880                                             group->mrg_cur_count - ringcnt);
1881                                 }
1882                         } else {
1883                                 MAC_RX_RING_RESERVED(mip, group->mrg_cur_count);
1884                                 MAC_RX_GRP_RESERVED(mip);
1885                         }
1886                 }
1887         }
1888         if (mrp->mrp_mask & MRP_TX_RINGS) {
1889                 unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC;
1890                 group = flent->fe_tx_ring_group;
1891                 defgrp = MAC_DEFAULT_TX_GROUP(mip);
1892 
1893                 /*
1894                  * For static groups we only allow rings=0 or resetting the
1895                  * rings property.
1896                  */
1897                 if (mrp->mrp_ntxrings > 0 &&
1898                     mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) {
1899                         return (ENOTSUP);
1900                 }
1901                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
1902                         if (!(tmrp->mrp_mask & MRP_TX_RINGS))
1903                                 return (0);
1904                 } else {
1905                         if (unspec) {
1906                                 if (tmrp->mrp_mask & MRP_TXRINGS_UNSPEC)
1907                                         return (0);
1908                         } else if (mip->mi_tx_group_type ==
1909                             MAC_GROUP_TYPE_DYNAMIC) {
1910                                 if ((tmrp->mrp_mask & MRP_TX_RINGS) &&
1911                                     !(tmrp->mrp_mask & MRP_TXRINGS_UNSPEC) &&
1912                                     mrp->mrp_ntxrings == tmrp->mrp_ntxrings) {
1913                                         return (0);
1914                                 }
1915                         }
1916                 }
1917                 /* Resetting the prop */
1918                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
1919                         if (group != defgrp) {
1920                                 if (mip->mi_tx_group_type ==
1921                                     MAC_GROUP_TYPE_DYNAMIC) {
1922                                         ringcnt = group->mrg_cur_count;
1923                                         if ((flent->fe_type &
1924                                             FLOW_PRIMARY_MAC) != 0) {
1925                                                 mac_tx_client_quiesce(
1926                                                     (mac_client_handle_t)
1927                                                     mcip);
1928                                                 mac_tx_switch_group(mcip,
1929                                                     group, defgrp);
1930                                                 mac_tx_client_restart(
1931                                                     (mac_client_handle_t)
1932                                                     mcip);
1933                                                 MAC_TX_GRP_RELEASED(mip);
1934                                                 MAC_TX_RING_RELEASED(mip,
1935                                                     ringcnt);
1936                                                 return (0);
1937                                         }
1938                                         cmrp->mrp_ntxrings = 1;
1939                                         (void) mac_group_ring_modify(mcip,
1940                                             group, defgrp);
1941                                         /*
1942                                          * This group has reserved rings
1943                                          * that need to be released now.
1944                                          */
1945                                         MAC_TX_RING_RELEASED(mip, ringcnt);
1946                                 }
1947                                 /*
1948                                  * If this is a static group, we
1949                                  * need to release the group. The
1950                                  * client will remain in the same
1951                                  * group till some other client
1952                                  * needs this group.
1953                                  */
1954                                 MAC_TX_GRP_RELEASED(mip);
1955                         } else if (group == defgrp &&
1956                             (flent->fe_type & FLOW_PRIMARY_MAC) == 0) {
1957                                 ngrp = mac_reserve_tx_group(mcip, B_TRUE);
1958                                 if (ngrp == NULL)
1959                                         return (0);
1960                                 mac_tx_client_quiesce(
1961                                     (mac_client_handle_t)mcip);
1962                                 mac_tx_switch_group(mcip, defgrp, ngrp);
1963                                 mac_tx_client_restart(
1964                                     (mac_client_handle_t)mcip);
1965                         }
1966                         /*
1967                          * If the client is in the default group, we will
1968                          * just clear the MRP_TX_RINGS and leave it as
1969                          * it rather than look for an exclusive group
1970                          * for it.
1971                          */
1972                         return (0);
1973                 }
1974 
1975                 /* Switch to H/W */
1976                 if (group == defgrp && ((mrp->mrp_ntxrings > 0) || unspec)) {
1977                         ngrp = mac_reserve_tx_group(mcip, B_TRUE);
1978                         if (ngrp == NULL)
1979                                 return (ENOSPC);
1980                         mac_tx_client_quiesce((mac_client_handle_t)mcip);
1981                         mac_tx_switch_group(mcip, defgrp, ngrp);
1982                         mac_tx_client_restart((mac_client_handle_t)mcip);
1983                         MAC_TX_GRP_RESERVED(mip);
1984                         if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC)
1985                                 MAC_TX_RING_RESERVED(mip, ngrp->mrg_cur_count);
1986                 /* Switch to S/W */
1987                 } else if (group != defgrp && !unspec &&
1988                     mrp->mrp_ntxrings == 0) {
1989                         /* Switch to S/W */
1990                         ringcnt = group->mrg_cur_count;
1991                         mac_tx_client_quiesce((mac_client_handle_t)mcip);
1992                         mac_tx_switch_group(mcip, group, defgrp);
1993                         mac_tx_client_restart((mac_client_handle_t)mcip);
1994                         if (tmrp->mrp_mask & MRP_TX_RINGS) {
1995                                 MAC_TX_GRP_RELEASED(mip);
1996                                 if (mip->mi_tx_group_type ==
1997                                     MAC_GROUP_TYPE_DYNAMIC) {
1998                                         MAC_TX_RING_RELEASED(mip, ringcnt);
1999                                 }
2000                         }
2001                 } else if (group != defgrp && mip->mi_tx_group_type ==
2002                     MAC_GROUP_TYPE_DYNAMIC) {
2003                         ringcnt = group->mrg_cur_count;
2004                         err = mac_group_ring_modify(mcip, group, defgrp);
2005                         if (err != 0)
2006                                 return (err);
2007                         /*
2008                          * Update the accounting. If this group
2009                          * already had explicitly reserved rings,
2010                          * we need to update the rings based on
2011                          * the new ring count. If this group
2012                          * had not explicitly reserved rings,
2013                          * then we just reserve the rings asked for
2014                          * and reserve the group.
2015                          */
2016                         if (tmrp->mrp_mask & MRP_TX_RINGS) {
2017                                 if (ringcnt > group->mrg_cur_count) {
2018                                         MAC_TX_RING_RELEASED(mip,
2019                                             ringcnt - group->mrg_cur_count);
2020                                 } else {
2021                                         MAC_TX_RING_RESERVED(mip,
2022                                             group->mrg_cur_count - ringcnt);
2023                                 }
2024                         } else {
2025                                 MAC_TX_RING_RESERVED(mip, group->mrg_cur_count);
2026                                 MAC_TX_GRP_RESERVED(mip);
2027                         }
2028                 }
2029         }
2030         return (0);
2031 }
2032 
2033 /*
2034  * When the MAC client is being brought up (i.e. we do a unicast_add) we need
2035  * to initialize the cpu and resource control structure in the
2036  * mac_client_impl_t from the mac_impl_t (i.e if there are any cached
2037  * properties before the flow entry for the unicast address was created).
2038  */
2039 static int
2040 mac_resource_ctl_set(mac_client_handle_t mch, mac_resource_props_t *mrp)
2041 {
2042         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
2043         mac_impl_t              *mip = (mac_impl_t *)mcip->mci_mip;
2044         mac_impl_t              *umip = mcip->mci_upper_mip;
2045         int                     err = 0;
2046         flow_entry_t            *flent = mcip->mci_flent;
2047         mac_resource_props_t    *omrp, *nmrp = MCIP_RESOURCE_PROPS(mcip);
2048 
2049         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2050 
2051         err = mac_validate_props(mcip->mci_state_flags & MCIS_IS_VNIC ?
2052             mcip->mci_upper_mip : mip, mrp);
2053         if (err != 0)
2054                 return (err);
2055 
2056         /*
2057          * Copy over the existing properties since mac_update_resources
2058          * will modify the client's mrp. Currently, the saved property
2059          * is used to determine the difference between existing and
2060          * modified rings property.
2061          */
2062         omrp = kmem_zalloc(sizeof (*omrp), KM_SLEEP);
2063         bcopy(nmrp, omrp, sizeof (*omrp));
2064         mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE);
2065         if (MCIP_DATAPATH_SETUP(mcip)) {
2066                 /*
2067                  * We support rings only for primary client when there are
2068                  * multiple clients sharing the same MAC address (e.g. VLAN).
2069                  */
2070                 if (mrp->mrp_mask & MRP_RX_RINGS ||
2071                     mrp->mrp_mask & MRP_TX_RINGS) {
2072 
2073                         if ((err = mac_client_set_rings_prop(mcip, mrp,
2074                             omrp)) != 0) {
2075                                 if (omrp->mrp_mask & MRP_RX_RINGS) {
2076                                         nmrp->mrp_mask |= MRP_RX_RINGS;
2077                                         nmrp->mrp_nrxrings = omrp->mrp_nrxrings;
2078                                 } else {
2079                                         nmrp->mrp_mask &= ~MRP_RX_RINGS;
2080                                         nmrp->mrp_nrxrings = 0;
2081                                 }
2082                                 if (omrp->mrp_mask & MRP_TX_RINGS) {
2083                                         nmrp->mrp_mask |= MRP_TX_RINGS;
2084                                         nmrp->mrp_ntxrings = omrp->mrp_ntxrings;
2085                                 } else {
2086                                         nmrp->mrp_mask &= ~MRP_TX_RINGS;
2087                                         nmrp->mrp_ntxrings = 0;
2088                                 }
2089                                 if (omrp->mrp_mask & MRP_RXRINGS_UNSPEC)
2090                                         omrp->mrp_mask |= MRP_RXRINGS_UNSPEC;
2091                                 else
2092                                         omrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC;
2093 
2094                                 if (omrp->mrp_mask & MRP_TXRINGS_UNSPEC)
2095                                         omrp->mrp_mask |= MRP_TXRINGS_UNSPEC;
2096                                 else
2097                                         omrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC;
2098                                 kmem_free(omrp, sizeof (*omrp));
2099                                 return (err);
2100                         }
2101 
2102                         /*
2103                          * If we modified the rings property of the primary
2104                          * we need to update the property fields of its
2105                          * VLANs as they inherit the primary's properites.
2106                          */
2107                         if (mac_is_primary_client(mcip)) {
2108                                 mac_set_prim_vlan_rings(mip,
2109                                     MCIP_RESOURCE_PROPS(mcip));
2110                         }
2111                 }
2112                 /*
2113                  * We have to set this prior to calling mac_flow_modify.
2114                  */
2115                 if (mrp->mrp_mask & MRP_PRIORITY) {
2116                         if (mrp->mrp_priority == MPL_RESET) {
2117                                 MAC_CLIENT_SET_PRIORITY_RANGE(mcip,
2118                                     MPL_LINK_DEFAULT);
2119                         } else {
2120                                 MAC_CLIENT_SET_PRIORITY_RANGE(mcip,
2121                                     mrp->mrp_priority);
2122                         }
2123                 }
2124 
2125                 mac_flow_modify(mip->mi_flow_tab, flent, mrp);
2126                 if (mrp->mrp_mask & MRP_PRIORITY)
2127                         mac_update_subflow_priority(mcip);
2128 
2129                 /* Apply these resource settings to any secondary macs */
2130                 if (umip != NULL) {
2131                         ASSERT((umip->mi_state_flags & MIS_IS_VNIC) != 0);
2132                         mac_vnic_secondary_update(umip);
2133                 }
2134         }
2135         kmem_free(omrp, sizeof (*omrp));
2136         return (0);
2137 }
2138 
2139 static int
2140 mac_unicast_flow_create(mac_client_impl_t *mcip, uint8_t *mac_addr,
2141     uint16_t vid, boolean_t is_primary, boolean_t first_flow,
2142     flow_entry_t **flent, mac_resource_props_t *mrp)
2143 {
2144         mac_impl_t      *mip = (mac_impl_t *)mcip->mci_mip;
2145         flow_desc_t     flow_desc;
2146         char            flowname[MAXFLOWNAMELEN];
2147         int             err;
2148         uint_t          flent_flags;
2149 
2150         /*
2151          * First unicast address being added, create a new flow
2152          * for that MAC client.
2153          */
2154         bzero(&flow_desc, sizeof (flow_desc));
2155 
2156         ASSERT(mac_addr != NULL ||
2157             (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR));
2158         if (mac_addr != NULL) {
2159                 flow_desc.fd_mac_len = mip->mi_type->mt_addr_length;
2160                 bcopy(mac_addr, flow_desc.fd_dst_mac, flow_desc.fd_mac_len);
2161         }
2162         flow_desc.fd_mask = FLOW_LINK_DST;
2163         if (vid != 0) {
2164                 flow_desc.fd_vid = vid;
2165                 flow_desc.fd_mask |= FLOW_LINK_VID;
2166         }
2167 
2168         /*
2169          * XXX-nicolas. For now I'm keeping the FLOW_PRIMARY_MAC
2170          * and FLOW_VNIC. Even though they're a hack inherited
2171          * from the SRS code, we'll keep them for now. They're currently
2172          * consumed by mac_datapath_setup() to create the SRS.
2173          * That code should be eventually moved out of
2174          * mac_datapath_setup() and moved to a mac_srs_create()
2175          * function of some sort to keep things clean.
2176          *
2177          * Also, there's no reason why the SRS for the primary MAC
2178          * client should be different than any other MAC client. Until
2179          * this is cleaned-up, we support only one MAC unicast address
2180          * per client.
2181          *
2182          * We set FLOW_PRIMARY_MAC for the primary MAC address,
2183          * FLOW_VNIC for everything else.
2184          */
2185         if (is_primary)
2186                 flent_flags = FLOW_PRIMARY_MAC;
2187         else
2188                 flent_flags = FLOW_VNIC_MAC;
2189 
2190         /*
2191          * For the first flow we use the MAC client's name - mci_name, for
2192          * subsequent ones we just create a name with the VID. This is
2193          * so that we can add these flows to the same flow table. This is
2194          * fine as the flow name (except for the one with the MAC client's
2195          * name) is not visible. When the first flow is removed, we just replace
2196          * its fdesc with another from the list, so we will still retain the
2197          * flent with the MAC client's flow name.
2198          */
2199         if (first_flow) {
2200                 bcopy(mcip->mci_name, flowname, MAXFLOWNAMELEN);
2201         } else {
2202                 (void) sprintf(flowname, "%s%u", mcip->mci_name, vid);
2203                 flent_flags = FLOW_NO_STATS;
2204         }
2205 
2206         if ((err = mac_flow_create(&flow_desc, mrp, flowname, NULL,
2207             flent_flags, flent)) != 0)
2208                 return (err);
2209 
2210         mac_misc_stat_create(*flent);
2211         FLOW_MARK(*flent, FE_INCIPIENT);
2212         (*flent)->fe_mcip = mcip;
2213 
2214         /*
2215          * Place initial creation reference on the flow. This reference
2216          * is released in the corresponding delete action viz.
2217          * mac_unicast_remove after waiting for all transient refs to
2218          * to go away. The wait happens in mac_flow_wait.
2219          * We have already held the reference in mac_client_open().
2220          */
2221         if (!first_flow)
2222                 FLOW_REFHOLD(*flent);
2223         return (0);
2224 }
2225 
2226 /* Refresh the multicast grouping for this VID. */
2227 int
2228 mac_client_update_mcast(void *arg, boolean_t add, const uint8_t *addrp)
2229 {
2230         flow_entry_t            *flent = arg;
2231         mac_client_impl_t       *mcip = flent->fe_mcip;
2232         uint16_t                vid;
2233         flow_desc_t             flow_desc;
2234 
2235         mac_flow_get_desc(flent, &flow_desc);
2236         vid = (flow_desc.fd_mask & FLOW_LINK_VID) != 0 ?
2237             flow_desc.fd_vid : VLAN_ID_NONE;
2238 
2239         /*
2240          * We don't call mac_multicast_add()/mac_multicast_remove() as
2241          * we want to add/remove for this specific vid.
2242          */
2243         if (add) {
2244                 return (mac_bcast_add(mcip, addrp, vid,
2245                     MAC_ADDRTYPE_MULTICAST));
2246         } else {
2247                 mac_bcast_delete(mcip, addrp, vid);
2248                 return (0);
2249         }
2250 }
2251 
2252 static void
2253 mac_update_single_active_client(mac_impl_t *mip)
2254 {
2255         mac_client_impl_t *client = NULL;
2256 
2257         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2258 
2259         rw_enter(&mip->mi_rw_lock, RW_WRITER);
2260         if (mip->mi_nactiveclients == 1) {
2261                 /*
2262                  * Find the one active MAC client from the list of MAC
2263                  * clients. The active MAC client has at least one
2264                  * unicast address.
2265                  */
2266                 for (client = mip->mi_clients_list; client != NULL;
2267                     client = client->mci_client_next) {
2268                         if (client->mci_unicast_list != NULL)
2269                                 break;
2270                 }
2271                 ASSERT(client != NULL);
2272         }
2273 
2274         /*
2275          * mi_single_active_client is protected by the MAC impl's read/writer
2276          * lock, which allows mac_rx() to check the value of that pointer
2277          * as a reader.
2278          */
2279         mip->mi_single_active_client = client;
2280         rw_exit(&mip->mi_rw_lock);
2281 }
2282 
2283 /*
2284  * Set up the data path. Called from i_mac_unicast_add after having
2285  * done all the validations including making sure this is an active
2286  * client (i.e that is ready to process packets.)
2287  */
2288 static int
2289 mac_client_datapath_setup(mac_client_impl_t *mcip, uint16_t vid,
2290     uint8_t *mac_addr, mac_resource_props_t *mrp, boolean_t isprimary,
2291     mac_unicast_impl_t *muip)
2292 {
2293         mac_impl_t      *mip = mcip->mci_mip;
2294         boolean_t       mac_started = B_FALSE;
2295         boolean_t       bcast_added = B_FALSE;
2296         boolean_t       nactiveclients_added = B_FALSE;
2297         flow_entry_t    *flent;
2298         int             err = 0;
2299         boolean_t       no_unicast;
2300 
2301         no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR;
2302 
2303         if ((err = mac_start((mac_handle_t)mip)) != 0)
2304                 goto bail;
2305 
2306         mac_started = B_TRUE;
2307 
2308         /* add the MAC client to the broadcast address group by default */
2309         if (mip->mi_type->mt_brdcst_addr != NULL) {
2310                 err = mac_bcast_add(mcip, mip->mi_type->mt_brdcst_addr, vid,
2311                     MAC_ADDRTYPE_BROADCAST);
2312                 if (err != 0)
2313                         goto bail;
2314                 bcast_added = B_TRUE;
2315         }
2316 
2317         /*
2318          * If this is the first unicast address addition for this
2319          * client, reuse the pre-allocated larval flow entry associated with
2320          * the MAC client.
2321          */
2322         flent = (mcip->mci_nflents == 0) ? mcip->mci_flent : NULL;
2323 
2324         /* We are configuring the unicast flow now */
2325         if (!MCIP_DATAPATH_SETUP(mcip)) {
2326 
2327                 if (mrp != NULL) {
2328                         MAC_CLIENT_SET_PRIORITY_RANGE(mcip,
2329                             (mrp->mrp_mask & MRP_PRIORITY) ? mrp->mrp_priority :
2330                             MPL_LINK_DEFAULT);
2331                 }
2332                 if ((err = mac_unicast_flow_create(mcip, mac_addr, vid,
2333                     isprimary, B_TRUE, &flent, mrp)) != 0)
2334                         goto bail;
2335 
2336                 mip->mi_nactiveclients++;
2337                 nactiveclients_added = B_TRUE;
2338 
2339                 /*
2340                  * This will allocate the RX ring group if possible for the
2341                  * flow and program the software classifier as needed.
2342                  */
2343                 if ((err = mac_datapath_setup(mcip, flent, SRST_LINK)) != 0)
2344                         goto bail;
2345 
2346                 if (no_unicast)
2347                         goto done_setup;
2348                 /*
2349                  * The unicast MAC address must have been added successfully.
2350                  */
2351                 ASSERT(mcip->mci_unicast != NULL);
2352 
2353                 /*
2354                  * Push down the sub-flows that were defined on this link
2355                  * hitherto. The flows are added to the active flow table
2356                  * and SRS, softrings etc. are created as needed.
2357                  */
2358                 mac_link_init_flows((mac_client_handle_t)mcip);
2359         } else {
2360                 mac_address_t *map = mcip->mci_unicast;
2361 
2362                 ASSERT(!no_unicast);
2363                 /*
2364                  * A unicast flow already exists for that MAC client
2365                  * so this flow must be the same MAC address but with
2366                  * a different VID. It has been checked by
2367                  * mac_addr_in_use().
2368                  *
2369                  * We will use the SRS etc. from the initial
2370                  * mci_flent. We don't need to create a kstat for
2371                  * this, as except for the fdesc, everything will be
2372                  * used from the first flent.
2373                  *
2374                  * The only time we should see multiple flents on the
2375                  * same MAC client is on the sun4v vsw. If we removed
2376                  * that code we should be able to remove the entire
2377                  * notion of multiple flents on a MAC client (this
2378                  * doesn't affect sub/user flows because they have
2379                  * their own list unrelated to mci_flent_list).
2380                  */
2381                 if (bcmp(mac_addr, map->ma_addr, map->ma_len) != 0) {
2382                         err = EINVAL;
2383                         goto bail;
2384                 }
2385 
2386                 if ((err = mac_unicast_flow_create(mcip, mac_addr, vid,
2387                     isprimary, B_FALSE, &flent, NULL)) != 0) {
2388                         goto bail;
2389                 }
2390                 if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0) {
2391                         FLOW_FINAL_REFRELE(flent);
2392                         goto bail;
2393                 }
2394 
2395                 /* update the multicast group for this vid */
2396                 mac_client_bcast_refresh(mcip, mac_client_update_mcast,
2397                     (void *)flent, B_TRUE);
2398 
2399         }
2400 
2401         /* populate the shared MAC address */
2402         muip->mui_map = mcip->mci_unicast;
2403 
2404         rw_enter(&mcip->mci_rw_lock, RW_WRITER);
2405         muip->mui_next = mcip->mci_unicast_list;
2406         mcip->mci_unicast_list = muip;
2407         rw_exit(&mcip->mci_rw_lock);
2408 
2409 done_setup:
2410         /*
2411          * First add the flent to the flow list of this mcip. Then set
2412          * the mip's mi_single_active_client if needed. The Rx path assumes
2413          * that mip->mi_single_active_client will always have an associated
2414          * flent.
2415          */
2416         mac_client_add_to_flow_list(mcip, flent);
2417         if (nactiveclients_added)
2418                 mac_update_single_active_client(mip);
2419         /*
2420          * Trigger a renegotiation of the capabilities when the number of
2421          * active clients changes from 1 to 2, since some of the capabilities
2422          * might have to be disabled. Also send a MAC_NOTE_LINK notification
2423          * to all the MAC clients whenever physical link is DOWN.
2424          */
2425         if (mip->mi_nactiveclients == 2) {
2426                 mac_capab_update((mac_handle_t)mip);
2427                 mac_virtual_link_update(mip);
2428         }
2429         /*
2430          * Now that the setup is complete, clear the INCIPIENT flag.
2431          * The flag was set to avoid incoming packets seeing inconsistent
2432          * structures while the setup was in progress. Clear the mci_tx_flag
2433          * by calling mac_tx_client_block. It is possible that
2434          * mac_unicast_remove was called prior to this mac_unicast_add which
2435          * could have set the MCI_TX_QUIESCE flag.
2436          */
2437         if (flent->fe_rx_ring_group != NULL)
2438                 mac_rx_group_unmark(flent->fe_rx_ring_group, MR_INCIPIENT);
2439         FLOW_UNMARK(flent, FE_INCIPIENT);
2440         FLOW_UNMARK(flent, FE_MC_NO_DATAPATH);
2441         mac_tx_client_unblock(mcip);
2442         return (0);
2443 bail:
2444         if (bcast_added)
2445                 mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr, vid);
2446 
2447         if (nactiveclients_added)
2448                 mip->mi_nactiveclients--;
2449 
2450         if (mac_started)
2451                 mac_stop((mac_handle_t)mip);
2452 
2453         return (err);
2454 }
2455 
2456 /*
2457  * Return the passive primary MAC client, if present. The passive client is
2458  * a stand-by client that has the same unicast address as another that is
2459  * currenly active. Once the active client goes away, the passive client
2460  * becomes active.
2461  */
2462 static mac_client_impl_t *
2463 mac_get_passive_primary_client(mac_impl_t *mip)
2464 {
2465         mac_client_impl_t       *mcip;
2466 
2467         for (mcip = mip->mi_clients_list; mcip != NULL;
2468             mcip = mcip->mci_client_next) {
2469                 if (mac_is_primary_client(mcip) &&
2470                     (mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
2471                         return (mcip);
2472                 }
2473         }
2474         return (NULL);
2475 }
2476 
2477 /*
2478  * Add a new unicast address to the MAC client.
2479  *
2480  * The MAC address can be specified either by value, or the MAC client
2481  * can specify that it wants to use the primary MAC address of the
2482  * underlying MAC. See the introductory comments at the beginning
2483  * of this file for more more information on primary MAC addresses.
2484  *
2485  * Note also the tuple (MAC address, VID) must be unique
2486  * for the MAC clients defined on top of the same underlying MAC
2487  * instance, unless the MAC_UNICAST_NODUPCHECK is specified.
2488  *
2489  * In no case can a client use the PVID for the MAC, if the MAC has one set.
2490  */
2491 int
2492 i_mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
2493     mac_unicast_handle_t *mah, uint16_t vid, mac_diag_t *diag)
2494 {
2495         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
2496         mac_impl_t              *mip = mcip->mci_mip;
2497         int                     err;
2498         uint_t                  mac_len = mip->mi_type->mt_addr_length;
2499         boolean_t               check_dups = !(flags & MAC_UNICAST_NODUPCHECK);
2500         boolean_t               fastpath_disabled = B_FALSE;
2501         boolean_t               is_primary = (flags & MAC_UNICAST_PRIMARY);
2502         boolean_t               is_unicast_hw = (flags & MAC_UNICAST_HW);
2503         mac_resource_props_t    *mrp;
2504         boolean_t               passive_client = B_FALSE;
2505         mac_unicast_impl_t      *muip;
2506         boolean_t               is_vnic_primary =
2507             (flags & MAC_UNICAST_VNIC_PRIMARY);
2508 
2509         /*
2510          * When the VID is non-zero the underlying MAC cannot be a
2511          * VNIC. I.e., dladm create-vlan cannot take a VNIC as
2512          * argument, only the primary MAC client.
2513          */
2514         ASSERT(!((mip->mi_state_flags & MIS_IS_VNIC) && (vid != VLAN_ID_NONE)));
2515 
2516         /*
2517          * Can't unicast add if the client asked only for minimal datapath
2518          * setup.
2519          */
2520         if (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR)
2521                 return (ENOTSUP);
2522 
2523         /*
2524          * Check for an attempted use of the current Port VLAN ID, if enabled.
2525          * No client may use it.
2526          */
2527         if (mip->mi_pvid != VLAN_ID_NONE && vid == mip->mi_pvid)
2528                 return (EBUSY);
2529 
2530         /*
2531          * Check whether it's the primary client and flag it.
2532          */
2533         if (!(mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary &&
2534             vid == VLAN_ID_NONE)
2535                 mcip->mci_flags |= MAC_CLIENT_FLAGS_PRIMARY;
2536 
2537         /*
2538          * is_vnic_primary is true when we come here as a VLAN VNIC
2539          * which uses the primary MAC client's address but with a non-zero
2540          * VID. In this case the MAC address is not specified by an upper
2541          * MAC client.
2542          */
2543         if ((mcip->mci_state_flags & MCIS_IS_VNIC) && is_primary &&
2544             !is_vnic_primary) {
2545                 /*
2546                  * The address is being set by the upper MAC client
2547                  * of a VNIC. The MAC address was already set by the
2548                  * VNIC driver during VNIC creation.
2549                  *
2550                  * Note: a VNIC has only one MAC address. We return
2551                  * the MAC unicast address handle of the lower MAC client
2552                  * corresponding to the VNIC. We allocate a new entry
2553                  * which is flagged appropriately, so that mac_unicast_remove()
2554                  * doesn't attempt to free the original entry that
2555                  * was allocated by the VNIC driver.
2556                  */
2557                 ASSERT(mcip->mci_unicast != NULL);
2558 
2559                 /* Check for VLAN flags, if present */
2560                 if ((flags & MAC_UNICAST_TAG_DISABLE) != 0)
2561                         mcip->mci_state_flags |= MCIS_TAG_DISABLE;
2562 
2563                 if ((flags & MAC_UNICAST_STRIP_DISABLE) != 0)
2564                         mcip->mci_state_flags |= MCIS_STRIP_DISABLE;
2565 
2566                 if ((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) != 0)
2567                         mcip->mci_state_flags |= MCIS_DISABLE_TX_VID_CHECK;
2568 
2569                 /*
2570                  * Ensure that the primary unicast address of the VNIC
2571                  * is added only once unless we have the
2572                  * MAC_CLIENT_FLAGS_MULTI_PRIMARY set (and this is not
2573                  * a passive MAC client).
2574                  */
2575                 if ((mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) != 0) {
2576                         if ((mcip->mci_flags &
2577                             MAC_CLIENT_FLAGS_MULTI_PRIMARY) == 0 ||
2578                             (mcip->mci_flags &
2579                             MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
2580                                 return (EBUSY);
2581                         }
2582                         mcip->mci_flags |= MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
2583                         passive_client = B_TRUE;
2584                 }
2585 
2586                 mcip->mci_flags |= MAC_CLIENT_FLAGS_VNIC_PRIMARY;
2587 
2588                 /*
2589                  * Create a handle for vid 0.
2590                  */
2591                 ASSERT(vid == VLAN_ID_NONE);
2592                 muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP);
2593                 muip->mui_vid = vid;
2594                 *mah = (mac_unicast_handle_t)muip;
2595                 /*
2596                  * This will be used by the caller to defer setting the
2597                  * rx functions.
2598                  */
2599                 if (passive_client)
2600                         return (EAGAIN);
2601                 return (0);
2602         }
2603 
2604         /* primary MAC clients cannot be opened on top of anchor VNICs */
2605         if ((is_vnic_primary || is_primary) &&
2606             i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_ANCHOR_VNIC, NULL)) {
2607                 return (ENXIO);
2608         }
2609 
2610         /*
2611          * If this is a VNIC/VLAN, disable softmac fast-path. This is
2612          * only relevant to legacy devices which use softmac to
2613          * interface with GLDv3.
2614          */
2615         if (mcip->mci_state_flags & MCIS_IS_VNIC) {
2616                 err = mac_fastpath_disable((mac_handle_t)mip);
2617                 if (err != 0)
2618                         return (err);
2619                 fastpath_disabled = B_TRUE;
2620         }
2621 
2622         /*
2623          * Return EBUSY if:
2624          *  - there is an exclusively active mac client exists.
2625          *  - this is an exclusive active mac client but
2626          *      a. there is already active mac clients exist, or
2627          *      b. fastpath streams are already plumbed on this legacy device
2628          *  - the mac creator has disallowed active mac clients.
2629          */
2630         if (mip->mi_state_flags & (MIS_EXCLUSIVE|MIS_NO_ACTIVE)) {
2631                 if (fastpath_disabled)
2632                         mac_fastpath_enable((mac_handle_t)mip);
2633                 return (EBUSY);
2634         }
2635 
2636         if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
2637                 ASSERT(!fastpath_disabled);
2638                 if (mip->mi_nactiveclients != 0)
2639                         return (EBUSY);
2640 
2641                 if ((mip->mi_state_flags & MIS_LEGACY) &&
2642                     !(mip->mi_capab_legacy.ml_active_set(mip->mi_driver))) {
2643                         return (EBUSY);
2644                 }
2645                 mip->mi_state_flags |= MIS_EXCLUSIVE;
2646         }
2647 
2648         mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP);
2649         if (is_primary && !(mcip->mci_state_flags & (MCIS_IS_VNIC |
2650             MCIS_IS_AGGR_PORT))) {
2651                 /*
2652                  * Apply the property cached in the mac_impl_t to the primary
2653                  * mac client. If the mac client is a VNIC or an aggregation
2654                  * port, its property should be set in the mcip when the
2655                  * VNIC/aggr was created.
2656                  */
2657                 mac_get_resources((mac_handle_t)mip, mrp);
2658                 (void) mac_client_set_resources(mch, mrp);
2659         } else if (mcip->mci_state_flags & MCIS_IS_VNIC) {
2660                 /*
2661                  * This is a VLAN client sharing the address of the
2662                  * primary MAC client; i.e., one created via dladm
2663                  * create-vlan. We don't support specifying ring
2664                  * properties for this type of client as it inherits
2665                  * these from the primary MAC client.
2666                  */
2667                 if (is_vnic_primary) {
2668                         mac_resource_props_t    *vmrp;
2669 
2670                         vmrp = MCIP_RESOURCE_PROPS(mcip);
2671                         if (vmrp->mrp_mask & MRP_RX_RINGS ||
2672                             vmrp->mrp_mask & MRP_TX_RINGS) {
2673                                 if (fastpath_disabled)
2674                                         mac_fastpath_enable((mac_handle_t)mip);
2675                                 kmem_free(mrp, sizeof (*mrp));
2676                                 return (ENOTSUP);
2677                         }
2678                         /*
2679                          * Additionally we also need to inherit any
2680                          * rings property from the MAC.
2681                          */
2682                         mac_get_resources((mac_handle_t)mip, mrp);
2683                         if (mrp->mrp_mask & MRP_RX_RINGS) {
2684                                 vmrp->mrp_mask |= MRP_RX_RINGS;
2685                                 vmrp->mrp_nrxrings = mrp->mrp_nrxrings;
2686                         }
2687                         if (mrp->mrp_mask & MRP_TX_RINGS) {
2688                                 vmrp->mrp_mask |= MRP_TX_RINGS;
2689                                 vmrp->mrp_ntxrings = mrp->mrp_ntxrings;
2690                         }
2691                 }
2692                 bcopy(MCIP_RESOURCE_PROPS(mcip), mrp, sizeof (*mrp));
2693         }
2694 
2695         muip = kmem_zalloc(sizeof (mac_unicast_impl_t), KM_SLEEP);
2696         muip->mui_vid = vid;
2697 
2698         if (is_primary || is_vnic_primary) {
2699                 mac_addr = mip->mi_addr;
2700         } else {
2701 
2702                 /*
2703                  * Verify the validity of the specified MAC addresses value.
2704                  */
2705                 if (!mac_unicst_verify((mac_handle_t)mip, mac_addr, mac_len)) {
2706                         *diag = MAC_DIAG_MACADDR_INVALID;
2707                         err = EINVAL;
2708                         goto bail_out;
2709                 }
2710 
2711                 /*
2712                  * Make sure that the specified MAC address is different
2713                  * than the unicast MAC address of the underlying NIC.
2714                  */
2715                 if (check_dups && bcmp(mip->mi_addr, mac_addr, mac_len) == 0) {
2716                         *diag = MAC_DIAG_MACADDR_NIC;
2717                         err = EINVAL;
2718                         goto bail_out;
2719                 }
2720         }
2721 
2722         /*
2723          * Set the flags here so that if this is a passive client, we
2724          * can return and set it when we call mac_client_datapath_setup
2725          * when this becomes the active client. If we defer to using these
2726          * flags to mac_client_datapath_setup, then for a passive client,
2727          * we'd have to store the flags somewhere (probably fe_flags)
2728          * and then use it.
2729          */
2730         if (!MCIP_DATAPATH_SETUP(mcip)) {
2731                 if (is_unicast_hw) {
2732                         /*
2733                          * The client requires a hardware MAC address slot
2734                          * for that unicast address. Since we support only
2735                          * one unicast MAC address per client, flag the
2736                          * MAC client itself.
2737                          */
2738                         mcip->mci_state_flags |= MCIS_UNICAST_HW;
2739                 }
2740 
2741                 /* Check for VLAN flags, if present */
2742                 if ((flags & MAC_UNICAST_TAG_DISABLE) != 0)
2743                         mcip->mci_state_flags |= MCIS_TAG_DISABLE;
2744 
2745                 if ((flags & MAC_UNICAST_STRIP_DISABLE) != 0)
2746                         mcip->mci_state_flags |= MCIS_STRIP_DISABLE;
2747 
2748                 if ((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) != 0)
2749                         mcip->mci_state_flags |= MCIS_DISABLE_TX_VID_CHECK;
2750         } else {
2751                 /*
2752                  * Assert that the specified flags are consistent with the
2753                  * flags specified by previous calls to mac_unicast_add().
2754                  */
2755                 ASSERT(((flags & MAC_UNICAST_TAG_DISABLE) != 0 &&
2756                     (mcip->mci_state_flags & MCIS_TAG_DISABLE) != 0) ||
2757                     ((flags & MAC_UNICAST_TAG_DISABLE) == 0 &&
2758                     (mcip->mci_state_flags & MCIS_TAG_DISABLE) == 0));
2759 
2760                 ASSERT(((flags & MAC_UNICAST_STRIP_DISABLE) != 0 &&
2761                     (mcip->mci_state_flags & MCIS_STRIP_DISABLE) != 0) ||
2762                     ((flags & MAC_UNICAST_STRIP_DISABLE) == 0 &&
2763                     (mcip->mci_state_flags & MCIS_STRIP_DISABLE) == 0));
2764 
2765                 ASSERT(((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) != 0 &&
2766                     (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK) != 0) ||
2767                     ((flags & MAC_UNICAST_DISABLE_TX_VID_CHECK) == 0 &&
2768                     (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK) == 0));
2769 
2770                 /*
2771                  * Make sure the client is consistent about its requests
2772                  * for MAC addresses. I.e. all requests from the clients
2773                  * must have the MAC_UNICAST_HW flag set or clear.
2774                  */
2775                 if (((mcip->mci_state_flags & MCIS_UNICAST_HW) != 0 &&
2776                     !is_unicast_hw) ||
2777                     ((mcip->mci_state_flags & MCIS_UNICAST_HW) == 0 &&
2778                     is_unicast_hw)) {
2779                         err = EINVAL;
2780                         goto bail_out;
2781                 }
2782         }
2783         /*
2784          * Make sure the MAC address is not already used by
2785          * another MAC client defined on top of the same
2786          * underlying NIC. Unless we have MAC_CLIENT_FLAGS_MULTI_PRIMARY
2787          * set when we allow a passive client to be present which will
2788          * be activated when the currently active client goes away - this
2789          * works only with primary addresses.
2790          */
2791         if ((check_dups || is_primary || is_vnic_primary) &&
2792             mac_addr_in_use(mip, mac_addr, vid)) {
2793                 /*
2794                  * Must have set the multiple primary address flag when
2795                  * we did a mac_client_open AND this should be a primary
2796                  * MAC client AND there should not already be a passive
2797                  * primary. If all is true then we let this succeed
2798                  * even if the address is a dup.
2799                  */
2800                 if ((mcip->mci_flags & MAC_CLIENT_FLAGS_MULTI_PRIMARY) == 0 ||
2801                     (mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) == 0 ||
2802                     mac_get_passive_primary_client(mip) != NULL) {
2803                         *diag = MAC_DIAG_MACADDR_INUSE;
2804                         err = EEXIST;
2805                         goto bail_out;
2806                 }
2807                 ASSERT((mcip->mci_flags &
2808                     MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) == 0);
2809                 mcip->mci_flags |= MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
2810                 kmem_free(mrp, sizeof (*mrp));
2811 
2812                 /*
2813                  * Stash the unicast address handle, we will use it when
2814                  * we set up the passive client.
2815                  */
2816                 mcip->mci_p_unicast_list = muip;
2817                 *mah = (mac_unicast_handle_t)muip;
2818                 return (0);
2819         }
2820 
2821         err = mac_client_datapath_setup(mcip, vid, mac_addr, mrp,
2822             is_primary || is_vnic_primary, muip);
2823         if (err != 0)
2824                 goto bail_out;
2825 
2826         kmem_free(mrp, sizeof (*mrp));
2827         *mah = (mac_unicast_handle_t)muip;
2828         return (0);
2829 
2830 bail_out:
2831         if (fastpath_disabled)
2832                 mac_fastpath_enable((mac_handle_t)mip);
2833         if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
2834                 mip->mi_state_flags &= ~MIS_EXCLUSIVE;
2835                 if (mip->mi_state_flags & MIS_LEGACY) {
2836                         mip->mi_capab_legacy.ml_active_clear(
2837                             mip->mi_driver);
2838                 }
2839         }
2840         kmem_free(mrp, sizeof (*mrp));
2841         kmem_free(muip, sizeof (mac_unicast_impl_t));
2842         return (err);
2843 }
2844 
2845 /*
2846  * Wrapper function to mac_unicast_add when we want to have the same mac
2847  * client open for two instances, one that is currently active and another
2848  * that will become active when the current one is removed. In this case
2849  * mac_unicast_add will return EGAIN and we will save the rx function and
2850  * arg which will be used when we activate the passive client in
2851  * mac_unicast_remove.
2852  */
2853 int
2854 mac_unicast_add_set_rx(mac_client_handle_t mch, uint8_t *mac_addr,
2855     uint16_t flags, mac_unicast_handle_t *mah,  uint16_t vid, mac_diag_t *diag,
2856     mac_rx_t rx_fn, void *arg)
2857 {
2858         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
2859         uint_t                  err;
2860 
2861         err = mac_unicast_add(mch, mac_addr, flags, mah, vid, diag);
2862         if (err != 0 && err != EAGAIN)
2863                 return (err);
2864         if (err == EAGAIN) {
2865                 if (rx_fn != NULL) {
2866                         mcip->mci_rx_p_fn = rx_fn;
2867                         mcip->mci_rx_p_arg = arg;
2868                 }
2869                 return (0);
2870         }
2871         if (rx_fn != NULL)
2872                 mac_rx_set(mch, rx_fn, arg);
2873         return (err);
2874 }
2875 
2876 int
2877 mac_unicast_add(mac_client_handle_t mch, uint8_t *mac_addr, uint16_t flags,
2878     mac_unicast_handle_t *mah, uint16_t vid, mac_diag_t *diag)
2879 {
2880         mac_impl_t *mip = ((mac_client_impl_t *)mch)->mci_mip;
2881         uint_t err;
2882 
2883         i_mac_perim_enter(mip);
2884         err = i_mac_unicast_add(mch, mac_addr, flags, mah, vid, diag);
2885         i_mac_perim_exit(mip);
2886 
2887         return (err);
2888 }
2889 
2890 static void
2891 mac_client_datapath_teardown(mac_client_handle_t mch, mac_unicast_impl_t *muip,
2892     flow_entry_t *flent)
2893 {
2894         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
2895         mac_impl_t              *mip = mcip->mci_mip;
2896         boolean_t               no_unicast;
2897 
2898         /*
2899          * If we have not added a unicast address for this MAC client, just
2900          * teardown the datapath.
2901          */
2902         no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR;
2903 
2904         if (!no_unicast) {
2905                 /*
2906                  * We would have initialized subflows etc. only if we brought
2907                  * up the primary client and set the unicast unicast address
2908                  * etc. Deactivate the flows. The flow entry will be removed
2909                  * from the active flow tables, and the associated SRS,
2910                  * softrings etc will be deleted. But the flow entry itself
2911                  * won't be destroyed, instead it will continue to be archived
2912                  * off the  the global flow hash list, for a possible future
2913                  * activation when say IP is plumbed again.
2914                  */
2915                 mac_link_release_flows(mch);
2916         }
2917         mip->mi_nactiveclients--;
2918         mac_update_single_active_client(mip);
2919 
2920         /* Tear down the data path */
2921         mac_datapath_teardown(mcip, mcip->mci_flent, SRST_LINK);
2922 
2923         /*
2924          * Prevent any future access to the flow entry through the mci_flent
2925          * pointer by setting the mci_flent to NULL. Access to mci_flent in
2926          * mac_bcast_send is also under mi_rw_lock.
2927          */
2928         rw_enter(&mip->mi_rw_lock, RW_WRITER);
2929         flent = mcip->mci_flent;
2930         mac_client_remove_flow_from_list(mcip, flent);
2931 
2932         if (mcip->mci_state_flags & MCIS_DESC_LOGGED)
2933                 mcip->mci_state_flags &= ~MCIS_DESC_LOGGED;
2934 
2935         /*
2936          * This is the last unicast address being removed and there shouldn't
2937          * be any outbound data threads at this point coming down from mac
2938          * clients. We have waited for the data threads to finish before
2939          * starting dld_str_detach. Non-data threads must access TX SRS
2940          * under mi_rw_lock.
2941          */
2942         rw_exit(&mip->mi_rw_lock);
2943 
2944         /*
2945          * Don't use FLOW_MARK with FE_MC_NO_DATAPATH, as the flow might
2946          * contain other flags, such as FE_CONDEMNED, which we need to
2947          * cleared. We don't call mac_flow_cleanup() for this unicast
2948          * flow as we have a already cleaned up SRSs etc. (via the teadown
2949          * path). We just clear the stats and reset the initial callback
2950          * function, the rest will be set when we call mac_flow_create,
2951          * if at all.
2952          */
2953         mutex_enter(&flent->fe_lock);
2954         ASSERT(flent->fe_refcnt == 1 && flent->fe_mbg == NULL &&
2955             flent->fe_tx_srs == NULL && flent->fe_rx_srs_cnt == 0);
2956         flent->fe_flags = FE_MC_NO_DATAPATH;
2957         flow_stat_destroy(flent);
2958         mac_misc_stat_delete(flent);
2959 
2960         /* Initialize the receiver function to a safe routine */
2961         flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop;
2962         flent->fe_cb_arg1 = NULL;
2963         flent->fe_cb_arg2 = NULL;
2964 
2965         flent->fe_index = -1;
2966         mutex_exit(&flent->fe_lock);
2967 
2968         if (mip->mi_type->mt_brdcst_addr != NULL) {
2969                 ASSERT(muip != NULL || no_unicast);
2970                 mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr,
2971                     muip != NULL ? muip->mui_vid : VLAN_ID_NONE);
2972         }
2973 
2974         if (mip->mi_nactiveclients == 1) {
2975                 mac_capab_update((mac_handle_t)mip);
2976                 mac_virtual_link_update(mip);
2977         }
2978 
2979         if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
2980                 mip->mi_state_flags &= ~MIS_EXCLUSIVE;
2981 
2982                 if (mip->mi_state_flags & MIS_LEGACY)
2983                         mip->mi_capab_legacy.ml_active_clear(mip->mi_driver);
2984         }
2985 
2986         mcip->mci_state_flags &= ~MCIS_UNICAST_HW;
2987 
2988         if (mcip->mci_state_flags & MCIS_TAG_DISABLE)
2989                 mcip->mci_state_flags &= ~MCIS_TAG_DISABLE;
2990 
2991         if (mcip->mci_state_flags & MCIS_STRIP_DISABLE)
2992                 mcip->mci_state_flags &= ~MCIS_STRIP_DISABLE;
2993 
2994         if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK)
2995                 mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK;
2996 
2997         if (muip != NULL)
2998                 kmem_free(muip, sizeof (mac_unicast_impl_t));
2999         mac_protect_cancel_timer(mcip);
3000         mac_protect_flush_dynamic(mcip);
3001 
3002         bzero(&mcip->mci_misc_stat, sizeof (mcip->mci_misc_stat));
3003         /*
3004          * Disable fastpath if this is a VNIC or a VLAN.
3005          */
3006         if (mcip->mci_state_flags & MCIS_IS_VNIC)
3007                 mac_fastpath_enable((mac_handle_t)mip);
3008         mac_stop((mac_handle_t)mip);
3009 }
3010 
3011 /*
3012  * Remove a MAC address which was previously added by mac_unicast_add().
3013  */
3014 int
3015 mac_unicast_remove(mac_client_handle_t mch, mac_unicast_handle_t mah)
3016 {
3017         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3018         mac_unicast_impl_t *muip = (mac_unicast_impl_t *)mah;
3019         mac_unicast_impl_t *pre;
3020         mac_impl_t *mip = mcip->mci_mip;
3021         flow_entry_t            *flent;
3022         uint16_t mui_vid;
3023 
3024         i_mac_perim_enter(mip);
3025         if (mcip->mci_flags & MAC_CLIENT_FLAGS_VNIC_PRIMARY) {
3026                 /*
3027                  * Call made by the upper MAC client of a VNIC.
3028                  * There's nothing much to do, the unicast address will
3029                  * be removed by the VNIC driver when the VNIC is deleted,
3030                  * but let's ensure that all our transmit is done before
3031                  * the client does a mac_client_stop lest it trigger an
3032                  * assert in the driver.
3033                  */
3034                 ASSERT(muip->mui_vid == VLAN_ID_NONE);
3035 
3036                 mac_tx_client_flush(mcip);
3037 
3038                 if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
3039                         mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
3040                         if (mcip->mci_rx_p_fn != NULL) {
3041                                 mac_rx_set(mch, mcip->mci_rx_p_fn,
3042                                     mcip->mci_rx_p_arg);
3043                                 mcip->mci_rx_p_fn = NULL;
3044                                 mcip->mci_rx_p_arg = NULL;
3045                         }
3046                         kmem_free(muip, sizeof (mac_unicast_impl_t));
3047                         i_mac_perim_exit(mip);
3048                         return (0);
3049                 }
3050                 mcip->mci_flags &= ~MAC_CLIENT_FLAGS_VNIC_PRIMARY;
3051 
3052                 if (mcip->mci_state_flags & MCIS_TAG_DISABLE)
3053                         mcip->mci_state_flags &= ~MCIS_TAG_DISABLE;
3054 
3055                 if (mcip->mci_state_flags & MCIS_STRIP_DISABLE)
3056                         mcip->mci_state_flags &= ~MCIS_STRIP_DISABLE;
3057 
3058                 if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK)
3059                         mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK;
3060 
3061                 kmem_free(muip, sizeof (mac_unicast_impl_t));
3062                 i_mac_perim_exit(mip);
3063                 return (0);
3064         }
3065 
3066         ASSERT(muip != NULL);
3067 
3068         /*
3069          * We are removing a passive client, we haven't setup the datapath
3070          * for this yet, so nothing much to do.
3071          */
3072         if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
3073 
3074                 ASSERT((mcip->mci_flent->fe_flags & FE_MC_NO_DATAPATH) != 0);
3075                 ASSERT(mcip->mci_p_unicast_list == muip);
3076 
3077                 mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
3078 
3079                 mcip->mci_p_unicast_list = NULL;
3080                 mcip->mci_rx_p_fn = NULL;
3081                 mcip->mci_rx_p_arg = NULL;
3082 
3083                 mcip->mci_state_flags &= ~MCIS_UNICAST_HW;
3084 
3085                 if (mcip->mci_state_flags & MCIS_TAG_DISABLE)
3086                         mcip->mci_state_flags &= ~MCIS_TAG_DISABLE;
3087 
3088                 if (mcip->mci_state_flags & MCIS_STRIP_DISABLE)
3089                         mcip->mci_state_flags &= ~MCIS_STRIP_DISABLE;
3090 
3091                 if (mcip->mci_state_flags & MCIS_DISABLE_TX_VID_CHECK)
3092                         mcip->mci_state_flags &= ~MCIS_DISABLE_TX_VID_CHECK;
3093 
3094                 kmem_free(muip, sizeof (mac_unicast_impl_t));
3095                 i_mac_perim_exit(mip);
3096                 return (0);
3097         }
3098 
3099         /*
3100          * Remove the VID from the list of client's VIDs.
3101          */
3102         pre = mcip->mci_unicast_list;
3103         if (muip == pre) {
3104                 mcip->mci_unicast_list = muip->mui_next;
3105         } else {
3106                 while ((pre->mui_next != NULL) && (pre->mui_next != muip))
3107                         pre = pre->mui_next;
3108                 ASSERT(pre->mui_next == muip);
3109                 rw_enter(&mcip->mci_rw_lock, RW_WRITER);
3110                 pre->mui_next = muip->mui_next;
3111                 rw_exit(&mcip->mci_rw_lock);
3112         }
3113 
3114         if (!mac_client_single_rcvr(mcip)) {
3115                 /*
3116                  * This MAC client is shared by more than one unicast
3117                  * addresses, so we will just remove the flent
3118                  * corresponding to the address being removed. We don't invoke
3119                  * mac_rx_classify_flow_rem() since the additional flow is
3120                  * not associated with its own separate set of SRS and rings,
3121                  * and these constructs are still needed for the remaining
3122                  * flows.
3123                  */
3124                 flent = mac_client_get_flow(mcip, muip);
3125                 VERIFY3P(flent, !=, NULL);
3126 
3127                 /*
3128                  * The first one is disappearing, need to make sure
3129                  * we replace it with another from the list of
3130                  * shared clients.
3131                  */
3132                 if (flent == mcip->mci_flent)
3133                         flent = mac_client_swap_mciflent(mcip);
3134                 mac_client_remove_flow_from_list(mcip, flent);
3135                 mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
3136                 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
3137 
3138                 /*
3139                  * The multicast groups that were added by the client so
3140                  * far must be removed from the brodcast domain corresponding
3141                  * to the VID being removed.
3142                  */
3143                 mac_client_bcast_refresh(mcip, mac_client_update_mcast,
3144                     (void *)flent, B_FALSE);
3145 
3146                 if (mip->mi_type->mt_brdcst_addr != NULL) {
3147                         mac_bcast_delete(mcip, mip->mi_type->mt_brdcst_addr,
3148                             muip->mui_vid);
3149                 }
3150 
3151                 FLOW_FINAL_REFRELE(flent);
3152                 ASSERT(!(mcip->mci_state_flags & MCIS_EXCLUSIVE));
3153 
3154                 /*
3155                  * Enable fastpath if this is a VNIC or a VLAN.
3156                  */
3157                 if (mcip->mci_state_flags & MCIS_IS_VNIC)
3158                         mac_fastpath_enable((mac_handle_t)mip);
3159                 mac_stop((mac_handle_t)mip);
3160                 i_mac_perim_exit(mip);
3161                 return (0);
3162         }
3163 
3164         mui_vid = muip->mui_vid;
3165         mac_client_datapath_teardown(mch, muip, flent);
3166 
3167         if ((mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY) &&
3168             mui_vid == VLAN_ID_NONE) {
3169                 mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PRIMARY;
3170         } else {
3171                 i_mac_perim_exit(mip);
3172                 return (0);
3173         }
3174 
3175         /*
3176          * If we are removing the primary, check if we have a passive primary
3177          * client that we need to activate now.
3178          */
3179         mcip = mac_get_passive_primary_client(mip);
3180         if (mcip != NULL) {
3181                 mac_resource_props_t    *mrp;
3182                 mac_unicast_impl_t      *muip;
3183 
3184                 mcip->mci_flags &= ~MAC_CLIENT_FLAGS_PASSIVE_PRIMARY;
3185                 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP);
3186 
3187                 /*
3188                  * Apply the property cached in the mac_impl_t to the
3189                  * primary mac client.
3190                  */
3191                 mac_get_resources((mac_handle_t)mip, mrp);
3192                 (void) mac_client_set_resources(mch, mrp);
3193                 ASSERT(mcip->mci_p_unicast_list != NULL);
3194                 muip = mcip->mci_p_unicast_list;
3195                 mcip->mci_p_unicast_list = NULL;
3196                 if (mac_client_datapath_setup(mcip, VLAN_ID_NONE,
3197                     mip->mi_addr, mrp, B_TRUE, muip) == 0) {
3198                         if (mcip->mci_rx_p_fn != NULL) {
3199                                 mac_rx_set(mch, mcip->mci_rx_p_fn,
3200                                     mcip->mci_rx_p_arg);
3201                                 mcip->mci_rx_p_fn = NULL;
3202                                 mcip->mci_rx_p_arg = NULL;
3203                         }
3204                 } else {
3205                         kmem_free(muip, sizeof (mac_unicast_impl_t));
3206                 }
3207                 kmem_free(mrp, sizeof (*mrp));
3208         }
3209         i_mac_perim_exit(mip);
3210         return (0);
3211 }
3212 
3213 /*
3214  * Multicast add function invoked by MAC clients.
3215  */
3216 int
3217 mac_multicast_add(mac_client_handle_t mch, const uint8_t *addr)
3218 {
3219         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3220         mac_impl_t              *mip = mcip->mci_mip;
3221         flow_entry_t            *flent = mcip->mci_flent_list;
3222         flow_entry_t            *prev_fe = NULL;
3223         uint16_t                vid;
3224         int                     err = 0;
3225 
3226         /* Verify the address is a valid multicast address */
3227         if ((err = mip->mi_type->mt_ops.mtops_multicst_verify(addr,
3228             mip->mi_pdata)) != 0)
3229                 return (err);
3230 
3231         i_mac_perim_enter(mip);
3232         while (flent != NULL) {
3233                 vid = i_mac_flow_vid(flent);
3234 
3235                 err = mac_bcast_add((mac_client_impl_t *)mch, addr, vid,
3236                     MAC_ADDRTYPE_MULTICAST);
3237                 if (err != 0)
3238                         break;
3239                 prev_fe = flent;
3240                 flent = flent->fe_client_next;
3241         }
3242 
3243         /*
3244          * If we failed adding, then undo all, rather than partial
3245          * success.
3246          */
3247         if (flent != NULL && prev_fe != NULL) {
3248                 flent = mcip->mci_flent_list;
3249                 while (flent != prev_fe->fe_client_next) {
3250                         vid = i_mac_flow_vid(flent);
3251                         mac_bcast_delete((mac_client_impl_t *)mch, addr, vid);
3252                         flent = flent->fe_client_next;
3253                 }
3254         }
3255         i_mac_perim_exit(mip);
3256         return (err);
3257 }
3258 
3259 /*
3260  * Multicast delete function invoked by MAC clients.
3261  */
3262 void
3263 mac_multicast_remove(mac_client_handle_t mch, const uint8_t *addr)
3264 {
3265         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3266         mac_impl_t              *mip = mcip->mci_mip;
3267         flow_entry_t            *flent;
3268         uint16_t                vid;
3269 
3270         i_mac_perim_enter(mip);
3271         for (flent = mcip->mci_flent_list; flent != NULL;
3272             flent = flent->fe_client_next) {
3273                 vid = i_mac_flow_vid(flent);
3274                 mac_bcast_delete((mac_client_impl_t *)mch, addr, vid);
3275         }
3276         i_mac_perim_exit(mip);
3277 }
3278 
3279 /*
3280  * When a MAC client desires to capture packets on an interface,
3281  * it registers a promiscuous call back with mac_promisc_add().
3282  * There are three types of promiscuous callbacks:
3283  *
3284  * * MAC_CLIENT_PROMISC_ALL
3285  *   Captures all packets sent and received by the MAC client,
3286  *   the physical interface, as well as all other MAC clients
3287  *   defined on top of the same MAC.
3288  *
3289  * * MAC_CLIENT_PROMISC_FILTERED
3290  *   Captures all packets sent and received by the MAC client,
3291  *   plus all multicast traffic sent and received by the phyisical
3292  *   interface and the other MAC clients.
3293  *
3294  * * MAC_CLIENT_PROMISC_MULTI
3295  *   Captures all broadcast and multicast packets sent and
3296  *   received by the MAC clients as well as the physical interface.
3297  *
3298  * In all cases, the underlying MAC is put in promiscuous mode.
3299  */
3300 int
3301 mac_promisc_add(mac_client_handle_t mch, mac_client_promisc_type_t type,
3302     mac_rx_t fn, void *arg, mac_promisc_handle_t *mphp, uint16_t flags)
3303 {
3304         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3305         mac_impl_t *mip = mcip->mci_mip;
3306         mac_promisc_impl_t *mpip;
3307         mac_cb_info_t   *mcbi;
3308         int rc;
3309 
3310         i_mac_perim_enter(mip);
3311 
3312         if ((rc = mac_start((mac_handle_t)mip)) != 0) {
3313                 i_mac_perim_exit(mip);
3314                 return (rc);
3315         }
3316 
3317         if ((mcip->mci_state_flags & MCIS_IS_VNIC) &&
3318             type == MAC_CLIENT_PROMISC_ALL &&
3319             (mcip->mci_protect_flags & MPT_FLAG_PROMISC_FILTERED)) {
3320                 /*
3321                  * The function is being invoked by the upper MAC client
3322                  * of a VNIC. The VNIC should only see the traffic
3323                  * it is entitled to.
3324                  */
3325                 type = MAC_CLIENT_PROMISC_FILTERED;
3326         }
3327 
3328 
3329         /*
3330          * Turn on promiscuous mode for the underlying NIC.
3331          * This is needed even for filtered callbacks which
3332          * expect to receive all multicast traffic on the wire.
3333          *
3334          * Physical promiscuous mode should not be turned on if
3335          * MAC_PROMISC_FLAGS_NO_PHYS is set.
3336          */
3337         if ((flags & MAC_PROMISC_FLAGS_NO_PHYS) == 0) {
3338                 if ((rc = i_mac_promisc_set(mip, B_TRUE)) != 0) {
3339                         mac_stop((mac_handle_t)mip);
3340                         i_mac_perim_exit(mip);
3341                         return (rc);
3342                 }
3343         }
3344 
3345         mpip = kmem_cache_alloc(mac_promisc_impl_cache, KM_SLEEP);
3346 
3347         mpip->mpi_type = type;
3348         mpip->mpi_fn = fn;
3349         mpip->mpi_arg = arg;
3350         mpip->mpi_mcip = mcip;
3351         mpip->mpi_no_tx_loop = ((flags & MAC_PROMISC_FLAGS_NO_TX_LOOP) != 0);
3352         mpip->mpi_no_phys = ((flags & MAC_PROMISC_FLAGS_NO_PHYS) != 0);
3353         mpip->mpi_strip_vlan_tag =
3354             ((flags & MAC_PROMISC_FLAGS_VLAN_TAG_STRIP) != 0);
3355         mpip->mpi_no_copy = ((flags & MAC_PROMISC_FLAGS_NO_COPY) != 0);
3356 
3357         mcbi = &mip->mi_promisc_cb_info;
3358         mutex_enter(mcbi->mcbi_lockp);
3359 
3360         mac_callback_add(&mip->mi_promisc_cb_info, &mcip->mci_promisc_list,
3361             &mpip->mpi_mci_link);
3362         mac_callback_add(&mip->mi_promisc_cb_info, &mip->mi_promisc_list,
3363             &mpip->mpi_mi_link);
3364 
3365         mutex_exit(mcbi->mcbi_lockp);
3366 
3367         *mphp = (mac_promisc_handle_t)mpip;
3368 
3369         if (mcip->mci_state_flags & MCIS_IS_VNIC) {
3370                 mac_impl_t *umip = mcip->mci_upper_mip;
3371 
3372                 ASSERT(umip != NULL);
3373                 mac_vnic_secondary_update(umip);
3374         }
3375 
3376         i_mac_perim_exit(mip);
3377 
3378         return (0);
3379 }
3380 
3381 /*
3382  * Remove a multicast address previously aded through mac_promisc_add().
3383  */
3384 void
3385 mac_promisc_remove(mac_promisc_handle_t mph)
3386 {
3387         mac_promisc_impl_t *mpip = (mac_promisc_impl_t *)mph;
3388         mac_client_impl_t *mcip = mpip->mpi_mcip;
3389         mac_impl_t *mip = mcip->mci_mip;
3390         mac_cb_info_t *mcbi;
3391         int rv;
3392 
3393         i_mac_perim_enter(mip);
3394 
3395         /*
3396          * Even if the device can't be reset into normal mode, we still
3397          * need to clear the client promisc callbacks. The client may want
3398          * to close the mac end point and we can't have stale callbacks.
3399          */
3400         if (!(mpip->mpi_no_phys)) {
3401                 if ((rv = i_mac_promisc_set(mip, B_FALSE)) != 0) {
3402                         cmn_err(CE_WARN, "%s: failed to switch OFF promiscuous"
3403                             " mode because of error 0x%x", mip->mi_name, rv);
3404                 }
3405         }
3406         mcbi = &mip->mi_promisc_cb_info;
3407         mutex_enter(mcbi->mcbi_lockp);
3408         if (mac_callback_remove(mcbi, &mip->mi_promisc_list,
3409             &mpip->mpi_mi_link)) {
3410                 VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info,
3411                     &mcip->mci_promisc_list, &mpip->mpi_mci_link));
3412                 kmem_cache_free(mac_promisc_impl_cache, mpip);
3413         } else {
3414                 mac_callback_remove_wait(&mip->mi_promisc_cb_info);
3415         }
3416 
3417         if (mcip->mci_state_flags & MCIS_IS_VNIC) {
3418                 mac_impl_t *umip = mcip->mci_upper_mip;
3419 
3420                 ASSERT(umip != NULL);
3421                 mac_vnic_secondary_update(umip);
3422         }
3423 
3424         mutex_exit(mcbi->mcbi_lockp);
3425         mac_stop((mac_handle_t)mip);
3426 
3427         i_mac_perim_exit(mip);
3428 }
3429 
3430 /*
3431  * Reference count the number of active Tx threads. MCI_TX_QUIESCE indicates
3432  * that a control operation wants to quiesce the Tx data flow in which case
3433  * we return an error. Holding any of the per cpu locks ensures that the
3434  * mci_tx_flag won't change.
3435  *
3436  * 'CPU' must be accessed just once and used to compute the index into the
3437  * percpu array, and that index must be used for the entire duration of the
3438  * packet send operation. Note that the thread may be preempted and run on
3439  * another cpu any time and so we can't use 'CPU' more than once for the
3440  * operation.
3441  */
3442 #define MAC_TX_TRY_HOLD(mcip, mytx, error)                              \
3443 {                                                                       \
3444         (error) = 0;                                                    \
3445         (mytx) = &(mcip)->mci_tx_pcpu[CPU->cpu_seqid & mac_tx_percpu_cnt]; \
3446         mutex_enter(&(mytx)->pcpu_tx_lock);                              \
3447         if (!((mcip)->mci_tx_flag & MCI_TX_QUIESCE)) {                   \
3448                 (mytx)->pcpu_tx_refcnt++;                            \
3449         } else {                                                        \
3450                 (error) = -1;                                           \
3451         }                                                               \
3452         mutex_exit(&(mytx)->pcpu_tx_lock);                               \
3453 }
3454 
3455 /*
3456  * Release the reference. If needed, signal any control operation waiting
3457  * for Tx quiescence. The wait and signal are always done using the
3458  * mci_tx_pcpu[0]'s lock
3459  */
3460 #define MAC_TX_RELE(mcip, mytx) {                                       \
3461         mutex_enter(&(mytx)->pcpu_tx_lock);                              \
3462         if (--(mytx)->pcpu_tx_refcnt == 0 &&                         \
3463             (mcip)->mci_tx_flag & MCI_TX_QUIESCE) {                      \
3464                 mutex_exit(&(mytx)->pcpu_tx_lock);                       \
3465                 mutex_enter(&(mcip)->mci_tx_pcpu[0].pcpu_tx_lock);       \
3466                 cv_signal(&(mcip)->mci_tx_cv);                           \
3467                 mutex_exit(&(mcip)->mci_tx_pcpu[0].pcpu_tx_lock);        \
3468         } else {                                                        \
3469                 mutex_exit(&(mytx)->pcpu_tx_lock);                       \
3470         }                                                               \
3471 }
3472 
3473 /*
3474  * Send function invoked by MAC clients.
3475  */
3476 mac_tx_cookie_t
3477 mac_tx(mac_client_handle_t mch, mblk_t *mp_chain, uintptr_t hint,
3478     uint16_t flag, mblk_t **ret_mp)
3479 {
3480         mac_tx_cookie_t         cookie = 0;
3481         int                     error;
3482         mac_tx_percpu_t         *mytx;
3483         mac_soft_ring_set_t     *srs;
3484         flow_entry_t            *flent;
3485         boolean_t               is_subflow = B_FALSE;
3486         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3487         mac_impl_t              *mip = mcip->mci_mip;
3488         mac_srs_tx_t            *srs_tx;
3489 
3490         /*
3491          * Check whether the active Tx threads count is bumped already.
3492          */
3493         if (!(flag & MAC_TX_NO_HOLD)) {
3494                 MAC_TX_TRY_HOLD(mcip, mytx, error);
3495                 if (error != 0) {
3496                         freemsgchain(mp_chain);
3497                         return (0);
3498                 }
3499         }
3500 
3501         /*
3502          * If mac protection is enabled, only the permissible packets will be
3503          * returned by mac_protect_check().
3504          */
3505         if ((mcip->mci_flent->
3506             fe_resource_props.mrp_mask & MRP_PROTECT) != 0 &&
3507             (mp_chain = mac_protect_check(mch, mp_chain)) == NULL)
3508                 goto done;
3509 
3510         if (mcip->mci_subflow_tab != NULL &&
3511             mcip->mci_subflow_tab->ft_flow_count > 0 &&
3512             mac_flow_lookup(mcip->mci_subflow_tab, mp_chain,
3513             FLOW_OUTBOUND, &flent) == 0) {
3514                 /*
3515                  * The main assumption here is that if in the event
3516                  * we get a chain, all the packets will be classified
3517                  * to the same Flow/SRS. If this changes for any
3518                  * reason, the following logic should change as well.
3519                  * I suppose the fanout_hint also assumes this .
3520                  */
3521                 ASSERT(flent != NULL);
3522                 is_subflow = B_TRUE;
3523         } else {
3524                 flent = mcip->mci_flent;
3525         }
3526 
3527         srs = flent->fe_tx_srs;
3528         /*
3529          * This is to avoid panics with PF_PACKET that can call mac_tx()
3530          * against an interface that is not capable of sending. A rewrite
3531          * of the mac datapath is required to remove this limitation.
3532          */
3533         if (srs == NULL) {
3534                 freemsgchain(mp_chain);
3535                 goto done;
3536         }
3537 
3538         srs_tx = &srs->srs_tx;
3539         if (srs_tx->st_mode == SRS_TX_DEFAULT &&
3540             (srs->srs_state & SRS_ENQUEUED) == 0 &&
3541             mip->mi_nactiveclients == 1 && mp_chain->b_next == NULL) {
3542                 uint64_t        obytes;
3543 
3544                 /*
3545                  * Since dls always opens the underlying MAC, nclients equals
3546                  * to 1 means that the only active client is dls itself acting
3547                  * as a primary client of the MAC instance. Since dls will not
3548                  * send tagged packets in that case, and dls is trusted to send
3549                  * packets for its allowed VLAN(s), the VLAN tag insertion and
3550                  * check is required only if nclients is greater than 1.
3551                  */
3552                 if (mip->mi_nclients > 1) {
3553                         if (MAC_VID_CHECK_NEEDED(mcip)) {
3554                                 int     err = 0;
3555 
3556                                 MAC_VID_CHECK(mcip, mp_chain, err);
3557                                 if (err != 0) {
3558                                         freemsg(mp_chain);
3559                                         mcip->mci_misc_stat.mms_txerrors++;
3560                                         goto done;
3561                                 }
3562                         }
3563                         if (MAC_TAG_NEEDED(mcip)) {
3564                                 mp_chain = mac_add_vlan_tag(mp_chain, 0,
3565                                     mac_client_vid(mch));
3566                                 if (mp_chain == NULL) {
3567                                         mcip->mci_misc_stat.mms_txerrors++;
3568                                         goto done;
3569                                 }
3570                         }
3571                 }
3572 
3573                 obytes = (mp_chain->b_cont == NULL ? MBLKL(mp_chain) :
3574                     msgdsize(mp_chain));
3575 
3576                 MAC_TX(mip, srs_tx->st_arg2, mp_chain, mcip);
3577                 if (mp_chain == NULL) {
3578                         cookie = 0;
3579                         SRS_TX_STAT_UPDATE(srs, opackets, 1);
3580                         SRS_TX_STAT_UPDATE(srs, obytes, obytes);
3581                 } else {
3582                         mutex_enter(&srs->srs_lock);
3583                         cookie = mac_tx_srs_no_desc(srs, mp_chain,
3584                             flag, ret_mp);
3585                         mutex_exit(&srs->srs_lock);
3586                 }
3587         } else {
3588                 cookie = srs_tx->st_func(srs, mp_chain, hint, flag, ret_mp);
3589         }
3590 
3591 done:
3592         if (is_subflow)
3593                 FLOW_REFRELE(flent);
3594 
3595         if (!(flag & MAC_TX_NO_HOLD))
3596                 MAC_TX_RELE(mcip, mytx);
3597 
3598         return (cookie);
3599 }
3600 
3601 /*
3602  * mac_tx_is_blocked
3603  *
3604  * Given a cookie, it returns if the ring identified by the cookie is
3605  * flow-controlled or not. If NULL is passed in place of a cookie,
3606  * then it finds out if any of the underlying rings belonging to the
3607  * SRS is flow controlled or not and returns that status.
3608  */
3609 /* ARGSUSED */
3610 boolean_t
3611 mac_tx_is_flow_blocked(mac_client_handle_t mch, mac_tx_cookie_t cookie)
3612 {
3613         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3614         mac_soft_ring_set_t *mac_srs;
3615         mac_soft_ring_t *sringp;
3616         boolean_t blocked = B_FALSE;
3617         mac_tx_percpu_t *mytx;
3618         int err;
3619         int i;
3620 
3621         /*
3622          * Bump the reference count so that mac_srs won't be deleted.
3623          * If the client is currently quiesced and we failed to bump
3624          * the reference, return B_TRUE so that flow control stays
3625          * as enabled.
3626          *
3627          * Flow control will then be disabled once the client is no
3628          * longer quiesced.
3629          */
3630         MAC_TX_TRY_HOLD(mcip, mytx, err);
3631         if (err != 0)
3632                 return (B_TRUE);
3633 
3634         if ((mac_srs = MCIP_TX_SRS(mcip)) == NULL) {
3635                 MAC_TX_RELE(mcip, mytx);
3636                 return (B_FALSE);
3637         }
3638 
3639         mutex_enter(&mac_srs->srs_lock);
3640         /*
3641          * Only in the case of TX_FANOUT and TX_AGGR, the underlying
3642          * softring (s_ring_state) will have the HIWAT set. This is
3643          * the multiple Tx ring flow control case. For all other
3644          * case, SRS (srs_state) will store the condition.
3645          */
3646         if (mac_srs->srs_tx.st_mode == SRS_TX_FANOUT ||
3647             mac_srs->srs_tx.st_mode == SRS_TX_AGGR) {
3648                 if (cookie != 0) {
3649                         sringp = (mac_soft_ring_t *)cookie;
3650                         mutex_enter(&sringp->s_ring_lock);
3651                         if (sringp->s_ring_state & S_RING_TX_HIWAT)
3652                                 blocked = B_TRUE;
3653                         mutex_exit(&sringp->s_ring_lock);
3654                 } else {
3655                         for (i = 0; i < mac_srs->srs_tx_ring_count; i++) {
3656                                 sringp = mac_srs->srs_tx_soft_rings[i];
3657                                 mutex_enter(&sringp->s_ring_lock);
3658                                 if (sringp->s_ring_state & S_RING_TX_HIWAT) {
3659                                         blocked = B_TRUE;
3660                                         mutex_exit(&sringp->s_ring_lock);
3661                                         break;
3662                                 }
3663                                 mutex_exit(&sringp->s_ring_lock);
3664                         }
3665                 }
3666         } else {
3667                 blocked = (mac_srs->srs_state & SRS_TX_HIWAT);
3668         }
3669         mutex_exit(&mac_srs->srs_lock);
3670         MAC_TX_RELE(mcip, mytx);
3671         return (blocked);
3672 }
3673 
3674 /*
3675  * Check if the MAC client is the primary MAC client.
3676  */
3677 boolean_t
3678 mac_is_primary_client(mac_client_impl_t *mcip)
3679 {
3680         return (mcip->mci_flags & MAC_CLIENT_FLAGS_PRIMARY);
3681 }
3682 
3683 void
3684 mac_ioctl(mac_handle_t mh, queue_t *wq, mblk_t *bp)
3685 {
3686         mac_impl_t      *mip = (mac_impl_t *)mh;
3687         int cmd = ((struct iocblk *)bp->b_rptr)->ioc_cmd;
3688 
3689         if ((cmd == ND_GET && (mip->mi_callbacks->mc_callbacks & MC_GETPROP)) ||
3690             (cmd == ND_SET && (mip->mi_callbacks->mc_callbacks & MC_SETPROP))) {
3691                 /*
3692                  * If ndd props were registered, call them.
3693                  * Note that ndd ioctls are Obsolete
3694                  */
3695                 mac_ndd_ioctl(mip, wq, bp);
3696                 return;
3697         }
3698 
3699         /*
3700          * Call the driver to handle the ioctl.  The driver may not support
3701          * any ioctls, in which case we reply with a NAK on its behalf.
3702          */
3703         if (mip->mi_callbacks->mc_callbacks & MC_IOCTL)
3704                 mip->mi_ioctl(mip->mi_driver, wq, bp);
3705         else
3706                 miocnak(wq, bp, 0, EINVAL);
3707 }
3708 
3709 /*
3710  * Return the link state of the specified MAC instance.
3711  */
3712 link_state_t
3713 mac_link_get(mac_handle_t mh)
3714 {
3715         return (((mac_impl_t *)mh)->mi_linkstate);
3716 }
3717 
3718 /*
3719  * Add a mac client specified notification callback. Please see the comments
3720  * above mac_callback_add() for general information about mac callback
3721  * addition/deletion in the presence of mac callback list walkers
3722  */
3723 mac_notify_handle_t
3724 mac_notify_add(mac_handle_t mh, mac_notify_t notify_fn, void *arg)
3725 {
3726         mac_impl_t              *mip = (mac_impl_t *)mh;
3727         mac_notify_cb_t         *mncb;
3728         mac_cb_info_t           *mcbi;
3729 
3730         /*
3731          * Allocate a notify callback structure, fill in the details and
3732          * use the mac callback list manipulation functions to chain into
3733          * the list of callbacks.
3734          */
3735         mncb = kmem_zalloc(sizeof (mac_notify_cb_t), KM_SLEEP);
3736         mncb->mncb_fn = notify_fn;
3737         mncb->mncb_arg = arg;
3738         mncb->mncb_mip = mip;
3739         mncb->mncb_link.mcb_objp = mncb;
3740         mncb->mncb_link.mcb_objsize = sizeof (mac_notify_cb_t);
3741         mncb->mncb_link.mcb_flags = MCB_NOTIFY_CB_T;
3742 
3743         mcbi = &mip->mi_notify_cb_info;
3744 
3745         i_mac_perim_enter(mip);
3746         mutex_enter(mcbi->mcbi_lockp);
3747 
3748         mac_callback_add(&mip->mi_notify_cb_info, &mip->mi_notify_cb_list,
3749             &mncb->mncb_link);
3750 
3751         mutex_exit(mcbi->mcbi_lockp);
3752         i_mac_perim_exit(mip);
3753         return ((mac_notify_handle_t)mncb);
3754 }
3755 
3756 void
3757 mac_notify_remove_wait(mac_handle_t mh)
3758 {
3759         mac_impl_t      *mip = (mac_impl_t *)mh;
3760         mac_cb_info_t   *mcbi = &mip->mi_notify_cb_info;
3761 
3762         mutex_enter(mcbi->mcbi_lockp);
3763         mac_callback_remove_wait(&mip->mi_notify_cb_info);
3764         mutex_exit(mcbi->mcbi_lockp);
3765 }
3766 
3767 /*
3768  * Remove a mac client specified notification callback
3769  */
3770 int
3771 mac_notify_remove(mac_notify_handle_t mnh, boolean_t wait)
3772 {
3773         mac_notify_cb_t *mncb = (mac_notify_cb_t *)mnh;
3774         mac_impl_t      *mip = mncb->mncb_mip;
3775         mac_cb_info_t   *mcbi;
3776         int             err = 0;
3777 
3778         mcbi = &mip->mi_notify_cb_info;
3779 
3780         i_mac_perim_enter(mip);
3781         mutex_enter(mcbi->mcbi_lockp);
3782 
3783         ASSERT(mncb->mncb_link.mcb_objp == mncb);
3784         /*
3785          * If there aren't any list walkers, the remove would succeed
3786          * inline, else we wait for the deferred remove to complete
3787          */
3788         if (mac_callback_remove(&mip->mi_notify_cb_info,
3789             &mip->mi_notify_cb_list, &mncb->mncb_link)) {
3790                 kmem_free(mncb, sizeof (mac_notify_cb_t));
3791         } else {
3792                 err = EBUSY;
3793         }
3794 
3795         mutex_exit(mcbi->mcbi_lockp);
3796         i_mac_perim_exit(mip);
3797 
3798         /*
3799          * If we failed to remove the notification callback and "wait" is set
3800          * to be B_TRUE, wait for the callback to finish after we exit the
3801          * mac perimeter.
3802          */
3803         if (err != 0 && wait) {
3804                 mac_notify_remove_wait((mac_handle_t)mip);
3805                 return (0);
3806         }
3807 
3808         return (err);
3809 }
3810 
3811 /*
3812  * Associate resource management callbacks with the specified MAC
3813  * clients.
3814  */
3815 
3816 void
3817 mac_resource_set_common(mac_client_handle_t mch, mac_resource_add_t add,
3818     mac_resource_remove_t remove, mac_resource_quiesce_t quiesce,
3819     mac_resource_restart_t restart, mac_resource_bind_t bind,
3820     void *arg)
3821 {
3822         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3823 
3824         mcip->mci_resource_add = add;
3825         mcip->mci_resource_remove = remove;
3826         mcip->mci_resource_quiesce = quiesce;
3827         mcip->mci_resource_restart = restart;
3828         mcip->mci_resource_bind = bind;
3829         mcip->mci_resource_arg = arg;
3830 }
3831 
3832 void
3833 mac_resource_set(mac_client_handle_t mch, mac_resource_add_t add, void *arg)
3834 {
3835         /* update the 'resource_add' callback */
3836         mac_resource_set_common(mch, add, NULL, NULL, NULL, NULL, arg);
3837 }
3838 
3839 /*
3840  * Sets up the client resources and enable the polling interface over all the
3841  * SRS's and the soft rings of the client
3842  */
3843 void
3844 mac_client_poll_enable(mac_client_handle_t mch)
3845 {
3846         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3847         mac_soft_ring_set_t     *mac_srs;
3848         flow_entry_t            *flent;
3849         int                     i;
3850 
3851         flent = mcip->mci_flent;
3852         ASSERT(flent != NULL);
3853 
3854         mcip->mci_state_flags |= MCIS_CLIENT_POLL_CAPABLE;
3855         for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
3856                 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
3857                 ASSERT(mac_srs->srs_mcip == mcip);
3858                 mac_srs_client_poll_enable(mcip, mac_srs);
3859         }
3860 }
3861 
3862 /*
3863  * Tears down the client resources and disable the polling interface over all
3864  * the SRS's and the soft rings of the client
3865  */
3866 void
3867 mac_client_poll_disable(mac_client_handle_t mch)
3868 {
3869         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3870         mac_soft_ring_set_t     *mac_srs;
3871         flow_entry_t            *flent;
3872         int                     i;
3873 
3874         flent = mcip->mci_flent;
3875         ASSERT(flent != NULL);
3876 
3877         mcip->mci_state_flags &= ~MCIS_CLIENT_POLL_CAPABLE;
3878         for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
3879                 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i];
3880                 ASSERT(mac_srs->srs_mcip == mcip);
3881                 mac_srs_client_poll_disable(mcip, mac_srs);
3882         }
3883 }
3884 
3885 /*
3886  * Associate the CPUs specified by the given property with a MAC client.
3887  */
3888 int
3889 mac_cpu_set(mac_client_handle_t mch, mac_resource_props_t *mrp)
3890 {
3891         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3892         mac_impl_t *mip = mcip->mci_mip;
3893         int err = 0;
3894 
3895         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
3896 
3897         if ((err = mac_validate_props(mcip->mci_state_flags & MCIS_IS_VNIC ?
3898             mcip->mci_upper_mip : mip, mrp)) != 0) {
3899                 return (err);
3900         }
3901         if (MCIP_DATAPATH_SETUP(mcip))
3902                 mac_flow_modify(mip->mi_flow_tab, mcip->mci_flent, mrp);
3903 
3904         mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), B_FALSE);
3905         return (0);
3906 }
3907 
3908 /*
3909  * Apply the specified properties to the specified MAC client.
3910  */
3911 int
3912 mac_client_set_resources(mac_client_handle_t mch, mac_resource_props_t *mrp)
3913 {
3914         mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
3915         mac_impl_t *mip = mcip->mci_mip;
3916         int err = 0;
3917 
3918         i_mac_perim_enter(mip);
3919 
3920         if ((mrp->mrp_mask & MRP_MAXBW) || (mrp->mrp_mask & MRP_PRIORITY)) {
3921                 err = mac_resource_ctl_set(mch, mrp);
3922                 if (err != 0)
3923                         goto done;
3924         }
3925 
3926         if (mrp->mrp_mask & (MRP_CPUS|MRP_POOL)) {
3927                 err = mac_cpu_set(mch, mrp);
3928                 if (err != 0)
3929                         goto done;
3930         }
3931 
3932         if (mrp->mrp_mask & MRP_PROTECT) {
3933                 err = mac_protect_set(mch, mrp);
3934                 if (err != 0)
3935                         goto done;
3936         }
3937 
3938         if ((mrp->mrp_mask & MRP_RX_RINGS) || (mrp->mrp_mask & MRP_TX_RINGS))
3939                 err = mac_resource_ctl_set(mch, mrp);
3940 
3941 done:
3942         i_mac_perim_exit(mip);
3943         return (err);
3944 }
3945 
3946 /*
3947  * Return the properties currently associated with the specified MAC client.
3948  */
3949 void
3950 mac_client_get_resources(mac_client_handle_t mch, mac_resource_props_t *mrp)
3951 {
3952         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3953         mac_resource_props_t    *mcip_mrp = MCIP_RESOURCE_PROPS(mcip);
3954 
3955         bcopy(mcip_mrp, mrp, sizeof (mac_resource_props_t));
3956 }
3957 
3958 /*
3959  * Return the effective properties currently associated with the specified
3960  * MAC client.
3961  */
3962 void
3963 mac_client_get_effective_resources(mac_client_handle_t mch,
3964     mac_resource_props_t *mrp)
3965 {
3966         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
3967         mac_resource_props_t    *mcip_mrp = MCIP_EFFECTIVE_PROPS(mcip);
3968 
3969         bcopy(mcip_mrp, mrp, sizeof (mac_resource_props_t));
3970 }
3971 
3972 /*
3973  * Pass a copy of the specified packet to the promiscuous callbacks
3974  * of the specified MAC.
3975  *
3976  * If sender is NULL, the function is being invoked for a packet chain
3977  * received from the wire. If sender is non-NULL, it points to
3978  * the MAC client from which the packet is being sent.
3979  *
3980  * The packets are distributed to the promiscuous callbacks as follows:
3981  *
3982  * - all packets are sent to the MAC_CLIENT_PROMISC_ALL callbacks
3983  * - all broadcast and multicast packets are sent to the
3984  *   MAC_CLIENT_PROMISC_FILTER and MAC_CLIENT_PROMISC_MULTI.
3985  *
3986  * The unicast packets of MAC_CLIENT_PROMISC_FILTER callbacks are dispatched
3987  * after classification by mac_rx_deliver().
3988  */
3989 
3990 static void
3991 mac_promisc_dispatch_one(mac_promisc_impl_t *mpip, mblk_t *mp,
3992     boolean_t loopback)
3993 {
3994         mblk_t *mp_copy, *mp_next;
3995 
3996         if (!mpip->mpi_no_copy || mpip->mpi_strip_vlan_tag) {
3997                 mp_copy = copymsg(mp);
3998                 if (mp_copy == NULL)
3999                         return;
4000 
4001                 if (mpip->mpi_strip_vlan_tag) {
4002                         mp_copy = mac_strip_vlan_tag_chain(mp_copy);
4003                         if (mp_copy == NULL)
4004                                 return;
4005                 }
4006                 mp_next = NULL;
4007         } else {
4008                 mp_copy = mp;
4009                 mp_next = mp->b_next;
4010         }
4011         mp_copy->b_next = NULL;
4012 
4013         mpip->mpi_fn(mpip->mpi_arg, NULL, mp_copy, loopback);
4014         if (mp_copy == mp)
4015                 mp->b_next = mp_next;
4016 }
4017 
4018 /*
4019  * Return the VID of a packet. Zero if the packet is not tagged.
4020  */
4021 static uint16_t
4022 mac_ether_vid(mblk_t *mp)
4023 {
4024         struct ether_header *eth = (struct ether_header *)mp->b_rptr;
4025 
4026         if (ntohs(eth->ether_type) == ETHERTYPE_VLAN) {
4027                 struct ether_vlan_header *t_evhp =
4028                     (struct ether_vlan_header *)mp->b_rptr;
4029                 return (VLAN_ID(ntohs(t_evhp->ether_tci)));
4030         }
4031 
4032         return (0);
4033 }
4034 
4035 /*
4036  * Return whether the specified packet contains a multicast or broadcast
4037  * destination MAC address.
4038  */
4039 static boolean_t
4040 mac_is_mcast(mac_impl_t *mip, mblk_t *mp)
4041 {
4042         mac_header_info_t hdr_info;
4043 
4044         if (mac_header_info((mac_handle_t)mip, mp, &hdr_info) != 0)
4045                 return (B_FALSE);
4046         return ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) ||
4047             (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST));
4048 }
4049 
4050 /*
4051  * Send a copy of an mblk chain to the MAC clients of the specified MAC.
4052  * "sender" points to the sender MAC client for outbound packets, and
4053  * is set to NULL for inbound packets.
4054  */
4055 void
4056 mac_promisc_dispatch(mac_impl_t *mip, mblk_t *mp_chain,
4057     mac_client_impl_t *sender)
4058 {
4059         mac_promisc_impl_t *mpip;
4060         mac_cb_t *mcb;
4061         mblk_t *mp;
4062         boolean_t is_mcast, is_sender;
4063 
4064         MAC_PROMISC_WALKER_INC(mip);
4065         for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
4066                 is_mcast = mac_is_mcast(mip, mp);
4067                 /* send packet to interested callbacks */
4068                 for (mcb = mip->mi_promisc_list; mcb != NULL;
4069                     mcb = mcb->mcb_nextp) {
4070                         mpip = (mac_promisc_impl_t *)mcb->mcb_objp;
4071                         is_sender = (mpip->mpi_mcip == sender);
4072 
4073                         if (is_sender && mpip->mpi_no_tx_loop)
4074                                 /*
4075                                  * The sender doesn't want to receive
4076                                  * copies of the packets it sends.
4077                                  */
4078                                 continue;
4079 
4080                         /* this client doesn't need any packets (bridge) */
4081                         if (mpip->mpi_fn == NULL)
4082                                 continue;
4083 
4084                         /*
4085                          * For an ethernet MAC, don't displatch a multicast
4086                          * packet to a non-PROMISC_ALL callbacks unless the VID
4087                          * of the packet matches the VID of the client.
4088                          */
4089                         if (is_mcast &&
4090                             mpip->mpi_type != MAC_CLIENT_PROMISC_ALL &&
4091                             !mac_client_check_flow_vid(mpip->mpi_mcip,
4092                             mac_ether_vid(mp)))
4093                                 continue;
4094 
4095                         if (is_sender ||
4096                             mpip->mpi_type == MAC_CLIENT_PROMISC_ALL ||
4097                             is_mcast)
4098                                 mac_promisc_dispatch_one(mpip, mp, is_sender);
4099                 }
4100         }
4101         MAC_PROMISC_WALKER_DCR(mip);
4102 }
4103 
4104 void
4105 mac_promisc_client_dispatch(mac_client_impl_t *mcip, mblk_t *mp_chain)
4106 {
4107         mac_impl_t              *mip = mcip->mci_mip;
4108         mac_promisc_impl_t      *mpip;
4109         boolean_t               is_mcast;
4110         mblk_t                  *mp;
4111         mac_cb_t                *mcb;
4112 
4113         /*
4114          * The unicast packets for the MAC client still
4115          * need to be delivered to the MAC_CLIENT_PROMISC_FILTERED
4116          * promiscuous callbacks. The broadcast and multicast
4117          * packets were delivered from mac_rx().
4118          */
4119         MAC_PROMISC_WALKER_INC(mip);
4120         for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
4121                 is_mcast = mac_is_mcast(mip, mp);
4122                 for (mcb = mcip->mci_promisc_list; mcb != NULL;
4123                     mcb = mcb->mcb_nextp) {
4124                         mpip = (mac_promisc_impl_t *)mcb->mcb_objp;
4125                         if (mpip->mpi_type == MAC_CLIENT_PROMISC_FILTERED &&
4126                             !is_mcast) {
4127                                 mac_promisc_dispatch_one(mpip, mp, B_FALSE);
4128                         }
4129                 }
4130         }
4131         MAC_PROMISC_WALKER_DCR(mip);
4132 }
4133 
4134 /*
4135  * Return the margin value currently assigned to the specified MAC instance.
4136  */
4137 void
4138 mac_margin_get(mac_handle_t mh, uint32_t *marginp)
4139 {
4140         mac_impl_t *mip = (mac_impl_t *)mh;
4141 
4142         rw_enter(&(mip->mi_rw_lock), RW_READER);
4143         *marginp = mip->mi_margin;
4144         rw_exit(&(mip->mi_rw_lock));
4145 }
4146 
4147 /*
4148  * mac_info_get() is used for retrieving the mac_info when a DL_INFO_REQ is
4149  * issued before a DL_ATTACH_REQ. we walk the i_mac_impl_hash table and find
4150  * the first mac_impl_t with a matching driver name; then we copy its mac_info_t
4151  * to the caller. we do all this with i_mac_impl_lock held so the mac_impl_t
4152  * cannot disappear while we are accessing it.
4153  */
4154 typedef struct i_mac_info_state_s {
4155         const char      *mi_name;
4156         mac_info_t      *mi_infop;
4157 } i_mac_info_state_t;
4158 
4159 /*ARGSUSED*/
4160 static uint_t
4161 i_mac_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
4162 {
4163         i_mac_info_state_t *statep = arg;
4164         mac_impl_t *mip = (mac_impl_t *)val;
4165 
4166         if (mip->mi_state_flags & MIS_DISABLED)
4167                 return (MH_WALK_CONTINUE);
4168 
4169         if (strcmp(statep->mi_name,
4170             ddi_driver_name(mip->mi_dip)) != 0)
4171                 return (MH_WALK_CONTINUE);
4172 
4173         statep->mi_infop = &mip->mi_info;
4174         return (MH_WALK_TERMINATE);
4175 }
4176 
4177 boolean_t
4178 mac_info_get(const char *name, mac_info_t *minfop)
4179 {
4180         i_mac_info_state_t state;
4181 
4182         rw_enter(&i_mac_impl_lock, RW_READER);
4183         state.mi_name = name;
4184         state.mi_infop = NULL;
4185         mod_hash_walk(i_mac_impl_hash, i_mac_info_walker, &state);
4186         if (state.mi_infop == NULL) {
4187                 rw_exit(&i_mac_impl_lock);
4188                 return (B_FALSE);
4189         }
4190         *minfop = *state.mi_infop;
4191         rw_exit(&i_mac_impl_lock);
4192         return (B_TRUE);
4193 }
4194 
4195 /*
4196  * To get the capabilities that MAC layer cares about, such as rings, factory
4197  * mac address, vnic or not, it should directly invoke this function.  If the
4198  * link is part of a bridge, then the only "capability" it has is the inability
4199  * to do zero copy.
4200  */
4201 boolean_t
4202 i_mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
4203 {
4204         mac_impl_t *mip = (mac_impl_t *)mh;
4205 
4206         if (mip->mi_bridge_link != NULL)
4207                 return (cap == MAC_CAPAB_NO_ZCOPY);
4208         else if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
4209                 return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
4210         else
4211                 return (B_FALSE);
4212 }
4213 
4214 /*
4215  * Capability query function. If number of active mac clients is greater than
4216  * 1, only limited capabilities can be advertised to the caller no matter the
4217  * driver has certain capability or not. Else, we query the driver to get the
4218  * capability.
4219  */
4220 boolean_t
4221 mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
4222 {
4223         mac_impl_t *mip = (mac_impl_t *)mh;
4224 
4225         /*
4226          * if mi_nactiveclients > 1, only MAC_CAPAB_LEGACY, MAC_CAPAB_HCKSUM,
4227          * MAC_CAPAB_NO_NATIVEVLAN and MAC_CAPAB_NO_ZCOPY can be advertised.
4228          */
4229         if (mip->mi_nactiveclients > 1) {
4230                 switch (cap) {
4231                 case MAC_CAPAB_NO_ZCOPY:
4232                         return (B_TRUE);
4233                 case MAC_CAPAB_LEGACY:
4234                 case MAC_CAPAB_HCKSUM:
4235                 case MAC_CAPAB_NO_NATIVEVLAN:
4236                         break;
4237                 default:
4238                         return (B_FALSE);
4239                 }
4240         }
4241 
4242         /* else get capab from driver */
4243         return (i_mac_capab_get(mh, cap, cap_data));
4244 }
4245 
4246 boolean_t
4247 mac_sap_verify(mac_handle_t mh, uint32_t sap, uint32_t *bind_sap)
4248 {
4249         mac_impl_t *mip = (mac_impl_t *)mh;
4250 
4251         return (mip->mi_type->mt_ops.mtops_sap_verify(sap, bind_sap,
4252             mip->mi_pdata));
4253 }
4254 
4255 mblk_t *
4256 mac_header(mac_handle_t mh, const uint8_t *daddr, uint32_t sap, mblk_t *payload,
4257     size_t extra_len)
4258 {
4259         mac_impl_t      *mip = (mac_impl_t *)mh;
4260         const uint8_t   *hdr_daddr;
4261 
4262         /*
4263          * If the MAC is point-to-point with a fixed destination address, then
4264          * we must always use that destination in the MAC header.
4265          */
4266         hdr_daddr = (mip->mi_dstaddr_set ? mip->mi_dstaddr : daddr);
4267         return (mip->mi_type->mt_ops.mtops_header(mip->mi_addr, hdr_daddr, sap,
4268             mip->mi_pdata, payload, extra_len));
4269 }
4270 
4271 int
4272 mac_header_info(mac_handle_t mh, mblk_t *mp, mac_header_info_t *mhip)
4273 {
4274         mac_impl_t *mip = (mac_impl_t *)mh;
4275 
4276         return (mip->mi_type->mt_ops.mtops_header_info(mp, mip->mi_pdata,
4277             mhip));
4278 }
4279 
4280 int
4281 mac_vlan_header_info(mac_handle_t mh, mblk_t *mp, mac_header_info_t *mhip)
4282 {
4283         mac_impl_t      *mip = (mac_impl_t *)mh;
4284         boolean_t       is_ethernet = (mip->mi_info.mi_media == DL_ETHER);
4285         int             err = 0;
4286 
4287         /*
4288          * Packets should always be at least 16 bit aligned.
4289          */
4290         ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
4291 
4292         if ((err = mac_header_info(mh, mp, mhip)) != 0)
4293                 return (err);
4294 
4295         /*
4296          * If this is a VLAN-tagged Ethernet packet, then the SAP in the
4297          * mac_header_info_t as returned by mac_header_info() is
4298          * ETHERTYPE_VLAN. We need to grab the ethertype from the VLAN header.
4299          */
4300         if (is_ethernet && (mhip->mhi_bindsap == ETHERTYPE_VLAN)) {
4301                 struct ether_vlan_header *evhp;
4302                 uint16_t sap;
4303                 mblk_t *tmp = NULL;
4304                 size_t size;
4305 
4306                 size = sizeof (struct ether_vlan_header);
4307                 if (MBLKL(mp) < size) {
4308                         /*
4309                          * Pullup the message in order to get the MAC header
4310                          * infomation. Note that this is a read-only function,
4311                          * we keep the input packet intact.
4312                          */
4313                         if ((tmp = msgpullup(mp, size)) == NULL)
4314                                 return (EINVAL);
4315 
4316                         mp = tmp;
4317                 }
4318                 evhp = (struct ether_vlan_header *)mp->b_rptr;
4319                 sap = ntohs(evhp->ether_type);
4320                 (void) mac_sap_verify(mh, sap, &mhip->mhi_bindsap);
4321                 mhip->mhi_hdrsize = sizeof (struct ether_vlan_header);
4322                 mhip->mhi_tci = ntohs(evhp->ether_tci);
4323                 mhip->mhi_istagged = B_TRUE;
4324                 freemsg(tmp);
4325 
4326                 if (VLAN_CFI(mhip->mhi_tci) != ETHER_CFI)
4327                         return (EINVAL);
4328         } else {
4329                 mhip->mhi_istagged = B_FALSE;
4330                 mhip->mhi_tci = 0;
4331         }
4332 
4333         return (0);
4334 }
4335 
4336 mblk_t *
4337 mac_header_cook(mac_handle_t mh, mblk_t *mp)
4338 {
4339         mac_impl_t *mip = (mac_impl_t *)mh;
4340 
4341         if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_COOK) {
4342                 if (DB_REF(mp) > 1) {
4343                         mblk_t *newmp = copymsg(mp);
4344                         if (newmp == NULL)
4345                                 return (NULL);
4346                         freemsg(mp);
4347                         mp = newmp;
4348                 }
4349                 return (mip->mi_type->mt_ops.mtops_header_cook(mp,
4350                     mip->mi_pdata));
4351         }
4352         return (mp);
4353 }
4354 
4355 mblk_t *
4356 mac_header_uncook(mac_handle_t mh, mblk_t *mp)
4357 {
4358         mac_impl_t *mip = (mac_impl_t *)mh;
4359 
4360         if (mip->mi_type->mt_ops.mtops_ops & MTOPS_HEADER_UNCOOK) {
4361                 if (DB_REF(mp) > 1) {
4362                         mblk_t *newmp = copymsg(mp);
4363                         if (newmp == NULL)
4364                                 return (NULL);
4365                         freemsg(mp);
4366                         mp = newmp;
4367                 }
4368                 return (mip->mi_type->mt_ops.mtops_header_uncook(mp,
4369                     mip->mi_pdata));
4370         }
4371         return (mp);
4372 }
4373 
4374 uint_t
4375 mac_addr_len(mac_handle_t mh)
4376 {
4377         mac_impl_t *mip = (mac_impl_t *)mh;
4378 
4379         return (mip->mi_type->mt_addr_length);
4380 }
4381 
4382 /* True if a MAC is a VNIC */
4383 boolean_t
4384 mac_is_vnic(mac_handle_t mh)
4385 {
4386         return (((mac_impl_t *)mh)->mi_state_flags & MIS_IS_VNIC);
4387 }
4388 
4389 mac_handle_t
4390 mac_get_lower_mac_handle(mac_handle_t mh)
4391 {
4392         mac_impl_t *mip = (mac_impl_t *)mh;
4393 
4394         ASSERT(mac_is_vnic(mh));
4395         return (((vnic_t *)mip->mi_driver)->vn_lower_mh);
4396 }
4397 
4398 boolean_t
4399 mac_is_vnic_primary(mac_handle_t mh)
4400 {
4401         mac_impl_t *mip = (mac_impl_t *)mh;
4402 
4403         ASSERT(mac_is_vnic(mh));
4404         return (((vnic_t *)mip->mi_driver)->vn_addr_type ==
4405             VNIC_MAC_ADDR_TYPE_PRIMARY);
4406 }
4407 
4408 void
4409 mac_update_resources(mac_resource_props_t *nmrp, mac_resource_props_t *cmrp,
4410     boolean_t is_user_flow)
4411 {
4412         if (nmrp != NULL && cmrp != NULL) {
4413                 if (nmrp->mrp_mask & MRP_PRIORITY) {
4414                         if (nmrp->mrp_priority == MPL_RESET) {
4415                                 cmrp->mrp_mask &= ~MRP_PRIORITY;
4416                                 if (is_user_flow) {
4417                                         cmrp->mrp_priority =
4418                                             MPL_SUBFLOW_DEFAULT;
4419                                 } else {
4420                                         cmrp->mrp_priority = MPL_LINK_DEFAULT;
4421                                 }
4422                         } else {
4423                                 cmrp->mrp_mask |= MRP_PRIORITY;
4424                                 cmrp->mrp_priority = nmrp->mrp_priority;
4425                         }
4426                 }
4427                 if (nmrp->mrp_mask & MRP_MAXBW) {
4428                         if (nmrp->mrp_maxbw == MRP_MAXBW_RESETVAL) {
4429                                 cmrp->mrp_mask &= ~MRP_MAXBW;
4430                                 cmrp->mrp_maxbw = 0;
4431                         } else {
4432                                 cmrp->mrp_mask |= MRP_MAXBW;
4433                                 cmrp->mrp_maxbw = nmrp->mrp_maxbw;
4434                         }
4435                 }
4436                 if (nmrp->mrp_mask & MRP_CPUS)
4437                         MAC_COPY_CPUS(nmrp, cmrp);
4438 
4439                 if (nmrp->mrp_mask & MRP_POOL) {
4440                         if (strlen(nmrp->mrp_pool) == 0) {
4441                                 cmrp->mrp_mask &= ~MRP_POOL;
4442                                 bzero(cmrp->mrp_pool, sizeof (cmrp->mrp_pool));
4443                         } else {
4444                                 cmrp->mrp_mask |= MRP_POOL;
4445                                 (void) strncpy(cmrp->mrp_pool, nmrp->mrp_pool,
4446                                     sizeof (cmrp->mrp_pool));
4447                         }
4448 
4449                 }
4450 
4451                 if (nmrp->mrp_mask & MRP_PROTECT)
4452                         mac_protect_update(nmrp, cmrp);
4453 
4454                 /*
4455                  * Update the rings specified.
4456                  */
4457                 if (nmrp->mrp_mask & MRP_RX_RINGS) {
4458                         if (nmrp->mrp_mask & MRP_RINGS_RESET) {
4459                                 cmrp->mrp_mask &= ~MRP_RX_RINGS;
4460                                 if (cmrp->mrp_mask & MRP_RXRINGS_UNSPEC)
4461                                         cmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC;
4462                                 cmrp->mrp_nrxrings = 0;
4463                         } else {
4464                                 cmrp->mrp_mask |= MRP_RX_RINGS;
4465                                 cmrp->mrp_nrxrings = nmrp->mrp_nrxrings;
4466                         }
4467                 }
4468                 if (nmrp->mrp_mask & MRP_TX_RINGS) {
4469                         if (nmrp->mrp_mask & MRP_RINGS_RESET) {
4470                                 cmrp->mrp_mask &= ~MRP_TX_RINGS;
4471                                 if (cmrp->mrp_mask & MRP_TXRINGS_UNSPEC)
4472                                         cmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC;
4473                                 cmrp->mrp_ntxrings = 0;
4474                         } else {
4475                                 cmrp->mrp_mask |= MRP_TX_RINGS;
4476                                 cmrp->mrp_ntxrings = nmrp->mrp_ntxrings;
4477                         }
4478                 }
4479                 if (nmrp->mrp_mask & MRP_RXRINGS_UNSPEC)
4480                         cmrp->mrp_mask |= MRP_RXRINGS_UNSPEC;
4481                 else if (cmrp->mrp_mask & MRP_RXRINGS_UNSPEC)
4482                         cmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC;
4483 
4484                 if (nmrp->mrp_mask & MRP_TXRINGS_UNSPEC)
4485                         cmrp->mrp_mask |= MRP_TXRINGS_UNSPEC;
4486                 else if (cmrp->mrp_mask & MRP_TXRINGS_UNSPEC)
4487                         cmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC;
4488         }
4489 }
4490 
4491 /*
4492  * i_mac_set_resources:
4493  *
4494  * This routine associates properties with the primary MAC client of
4495  * the specified MAC instance.
4496  * - Cache the properties in mac_impl_t
4497  * - Apply the properties to the primary MAC client if exists
4498  */
4499 int
4500 i_mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp)
4501 {
4502         mac_impl_t              *mip = (mac_impl_t *)mh;
4503         mac_client_impl_t       *mcip;
4504         int                     err = 0;
4505         uint32_t                resmask, newresmask;
4506         mac_resource_props_t    *tmrp, *umrp;
4507 
4508         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
4509 
4510         err = mac_validate_props(mip, mrp);
4511         if (err != 0)
4512                 return (err);
4513 
4514         umrp = kmem_zalloc(sizeof (*umrp), KM_SLEEP);
4515         bcopy(&mip->mi_resource_props, umrp, sizeof (*umrp));
4516         resmask = umrp->mrp_mask;
4517         mac_update_resources(mrp, umrp, B_FALSE);
4518         newresmask = umrp->mrp_mask;
4519 
4520         if (resmask == 0 && newresmask != 0) {
4521                 /*
4522                  * Bandwidth, priority, cpu or pool link properties configured,
4523                  * must disable fastpath.
4524                  */
4525                 if ((err = mac_fastpath_disable((mac_handle_t)mip)) != 0) {
4526                         kmem_free(umrp, sizeof (*umrp));
4527                         return (err);
4528                 }
4529         }
4530 
4531         /*
4532          * Since bind_cpu may be modified by mac_client_set_resources()
4533          * we use a copy of bind_cpu and finally cache bind_cpu in mip.
4534          * This allows us to cache only user edits in mip.
4535          */
4536         tmrp = kmem_zalloc(sizeof (*tmrp), KM_SLEEP);
4537         bcopy(mrp, tmrp, sizeof (*tmrp));
4538         mcip = mac_primary_client_handle(mip);
4539         if (mcip != NULL && (mcip->mci_state_flags & MCIS_IS_AGGR_PORT) == 0) {
4540                 err = mac_client_set_resources((mac_client_handle_t)mcip, tmrp);
4541         } else if ((mrp->mrp_mask & MRP_RX_RINGS ||
4542             mrp->mrp_mask & MRP_TX_RINGS)) {
4543                 mac_client_impl_t       *vmcip;
4544 
4545                 /*
4546                  * If the primary is not up, we need to check if there
4547                  * are any VLANs on this primary. If there are then
4548                  * we need to set this property on the VLANs since
4549                  * VLANs follow the primary they are based on. Just
4550                  * look for the first VLAN and change its properties,
4551                  * all the other VLANs should be in the same group.
4552                  */
4553                 for (vmcip = mip->mi_clients_list; vmcip != NULL;
4554                     vmcip = vmcip->mci_client_next) {
4555                         if ((vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) &&
4556                             mac_client_vid((mac_client_handle_t)vmcip) !=
4557                             VLAN_ID_NONE) {
4558                                 break;
4559                         }
4560                 }
4561                 if (vmcip != NULL) {
4562                         mac_resource_props_t    *omrp;
4563                         mac_resource_props_t    *vmrp;
4564 
4565                         omrp = kmem_zalloc(sizeof (*omrp), KM_SLEEP);
4566                         bcopy(MCIP_RESOURCE_PROPS(vmcip), omrp, sizeof (*omrp));
4567                         /*
4568                          * We dont' call mac_update_resources since we
4569                          * want to take only the ring properties and
4570                          * not all the properties that may have changed.
4571                          */
4572                         vmrp = MCIP_RESOURCE_PROPS(vmcip);
4573                         if (mrp->mrp_mask & MRP_RX_RINGS) {
4574                                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
4575                                         vmrp->mrp_mask &= ~MRP_RX_RINGS;
4576                                         if (vmrp->mrp_mask &
4577                                             MRP_RXRINGS_UNSPEC) {
4578                                                 vmrp->mrp_mask &=
4579                                                     ~MRP_RXRINGS_UNSPEC;
4580                                         }
4581                                         vmrp->mrp_nrxrings = 0;
4582                                 } else {
4583                                         vmrp->mrp_mask |= MRP_RX_RINGS;
4584                                         vmrp->mrp_nrxrings = mrp->mrp_nrxrings;
4585                                 }
4586                         }
4587                         if (mrp->mrp_mask & MRP_TX_RINGS) {
4588                                 if (mrp->mrp_mask & MRP_RINGS_RESET) {
4589                                         vmrp->mrp_mask &= ~MRP_TX_RINGS;
4590                                         if (vmrp->mrp_mask &
4591                                             MRP_TXRINGS_UNSPEC) {
4592                                                 vmrp->mrp_mask &=
4593                                                     ~MRP_TXRINGS_UNSPEC;
4594                                         }
4595                                         vmrp->mrp_ntxrings = 0;
4596                                 } else {
4597                                         vmrp->mrp_mask |= MRP_TX_RINGS;
4598                                         vmrp->mrp_ntxrings = mrp->mrp_ntxrings;
4599                                 }
4600                         }
4601                         if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC)
4602                                 vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC;
4603 
4604                         if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC)
4605                                 vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC;
4606 
4607                         if ((err = mac_client_set_rings_prop(vmcip, mrp,
4608                             omrp)) != 0) {
4609                                 bcopy(omrp, MCIP_RESOURCE_PROPS(vmcip),
4610                                     sizeof (*omrp));
4611                         } else {
4612                                 mac_set_prim_vlan_rings(mip, vmrp);
4613                         }
4614                         kmem_free(omrp, sizeof (*omrp));
4615                 }
4616         }
4617 
4618         /* Only update the values if mac_client_set_resources succeeded */
4619         if (err == 0) {
4620                 bcopy(umrp, &mip->mi_resource_props, sizeof (*umrp));
4621                 /*
4622                  * If bandwidth, priority or cpu link properties cleared,
4623                  * renable fastpath.
4624                  */
4625                 if (resmask != 0 && newresmask == 0)
4626                         mac_fastpath_enable((mac_handle_t)mip);
4627         } else if (resmask == 0 && newresmask != 0) {
4628                 mac_fastpath_enable((mac_handle_t)mip);
4629         }
4630         kmem_free(tmrp, sizeof (*tmrp));
4631         kmem_free(umrp, sizeof (*umrp));
4632         return (err);
4633 }
4634 
4635 int
4636 mac_set_resources(mac_handle_t mh, mac_resource_props_t *mrp)
4637 {
4638         int err;
4639 
4640         i_mac_perim_enter((mac_impl_t *)mh);
4641         err = i_mac_set_resources(mh, mrp);
4642         i_mac_perim_exit((mac_impl_t *)mh);
4643         return (err);
4644 }
4645 
4646 /*
4647  * Get the properties cached for the specified MAC instance.
4648  */
4649 void
4650 mac_get_resources(mac_handle_t mh, mac_resource_props_t *mrp)
4651 {
4652         mac_impl_t              *mip = (mac_impl_t *)mh;
4653         mac_client_impl_t       *mcip;
4654 
4655         mcip = mac_primary_client_handle(mip);
4656         if (mcip != NULL) {
4657                 mac_client_get_resources((mac_client_handle_t)mcip, mrp);
4658                 return;
4659         }
4660         bcopy(&mip->mi_resource_props, mrp, sizeof (mac_resource_props_t));
4661 }
4662 
4663 /*
4664  * Get the effective properties from the primary client of the
4665  * specified MAC instance.
4666  */
4667 void
4668 mac_get_effective_resources(mac_handle_t mh, mac_resource_props_t *mrp)
4669 {
4670         mac_impl_t              *mip = (mac_impl_t *)mh;
4671         mac_client_impl_t       *mcip;
4672 
4673         mcip = mac_primary_client_handle(mip);
4674         if (mcip != NULL) {
4675                 mac_client_get_effective_resources((mac_client_handle_t)mcip,
4676                     mrp);
4677                 return;
4678         }
4679         bzero(mrp, sizeof (mac_resource_props_t));
4680 }
4681 
4682 int
4683 mac_set_pvid(mac_handle_t mh, uint16_t pvid)
4684 {
4685         mac_impl_t *mip = (mac_impl_t *)mh;
4686         mac_client_impl_t *mcip;
4687         mac_unicast_impl_t *muip;
4688 
4689         i_mac_perim_enter(mip);
4690         if (pvid != 0) {
4691                 for (mcip = mip->mi_clients_list; mcip != NULL;
4692                     mcip = mcip->mci_client_next) {
4693                         for (muip = mcip->mci_unicast_list; muip != NULL;
4694                             muip = muip->mui_next) {
4695                                 if (muip->mui_vid == pvid) {
4696                                         i_mac_perim_exit(mip);
4697                                         return (EBUSY);
4698                                 }
4699                         }
4700                 }
4701         }
4702         mip->mi_pvid = pvid;
4703         i_mac_perim_exit(mip);
4704         return (0);
4705 }
4706 
4707 uint16_t
4708 mac_get_pvid(mac_handle_t mh)
4709 {
4710         mac_impl_t *mip = (mac_impl_t *)mh;
4711 
4712         return (mip->mi_pvid);
4713 }
4714 
4715 uint32_t
4716 mac_get_llimit(mac_handle_t mh)
4717 {
4718         mac_impl_t *mip = (mac_impl_t *)mh;
4719 
4720         return (mip->mi_llimit);
4721 }
4722 
4723 uint32_t
4724 mac_get_ldecay(mac_handle_t mh)
4725 {
4726         mac_impl_t *mip = (mac_impl_t *)mh;
4727 
4728         return (mip->mi_ldecay);
4729 }
4730 
4731 /*
4732  * Rename a mac client, its flow, and the kstat.
4733  */
4734 int
4735 mac_rename_primary(mac_handle_t mh, const char *new_name)
4736 {
4737         mac_impl_t              *mip = (mac_impl_t *)mh;
4738         mac_client_impl_t       *cur_clnt = NULL;
4739         flow_entry_t            *fep;
4740 
4741         i_mac_perim_enter(mip);
4742 
4743         /*
4744          * VNICs: we need to change the sys flow name and
4745          * the associated flow kstat.
4746          */
4747         if (mip->mi_state_flags & MIS_IS_VNIC) {
4748                 mac_client_impl_t *mcip = mac_vnic_lower(mip);
4749                 ASSERT(new_name != NULL);
4750                 mac_rename_flow_names(mcip, new_name);
4751                 mac_stat_rename(mcip);
4752                 goto done;
4753         }
4754         /*
4755          * This mac may itself be an aggr link, or it may have some client
4756          * which is an aggr port. For both cases, we need to change the
4757          * aggr port's mac client name, its flow name and the associated flow
4758          * kstat.
4759          */
4760         if (mip->mi_state_flags & MIS_IS_AGGR) {
4761                 mac_capab_aggr_t aggr_cap;
4762                 mac_rename_fn_t rename_fn;
4763                 boolean_t ret;
4764 
4765                 ASSERT(new_name != NULL);
4766                 ret = i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR,
4767                     (void *)(&aggr_cap));
4768                 ASSERT(ret == B_TRUE);
4769                 rename_fn = aggr_cap.mca_rename_fn;
4770                 rename_fn(new_name, mip->mi_driver);
4771                 /*
4772                  * The aggr's client name and kstat flow name will be
4773                  * updated below, i.e. via mac_rename_flow_names.
4774                  */
4775         }
4776 
4777         for (cur_clnt = mip->mi_clients_list; cur_clnt != NULL;
4778             cur_clnt = cur_clnt->mci_client_next) {
4779                 if (cur_clnt->mci_state_flags & MCIS_IS_AGGR_PORT) {
4780                         if (new_name != NULL) {
4781                                 char *str_st = cur_clnt->mci_name;
4782                                 char *str_del = strchr(str_st, '-');
4783 
4784                                 ASSERT(str_del != NULL);
4785                                 bzero(str_del + 1, MAXNAMELEN -
4786                                     (str_del - str_st + 1));
4787                                 bcopy(new_name, str_del + 1,
4788                                     strlen(new_name));
4789                         }
4790                         fep = cur_clnt->mci_flent;
4791                         mac_rename_flow(fep, cur_clnt->mci_name);
4792                         break;
4793                 } else if (new_name != NULL &&
4794                     cur_clnt->mci_state_flags & MCIS_USE_DATALINK_NAME) {
4795                         mac_rename_flow_names(cur_clnt, new_name);
4796                         break;
4797                 }
4798         }
4799 
4800         /* Recreate kstats associated with aggr pseudo rings */
4801         if (mip->mi_state_flags & MIS_IS_AGGR)
4802                 mac_pseudo_ring_stat_rename(mip);
4803 
4804 done:
4805         i_mac_perim_exit(mip);
4806         return (0);
4807 }
4808 
4809 /*
4810  * Rename the MAC client's flow names
4811  */
4812 static void
4813 mac_rename_flow_names(mac_client_impl_t *mcip, const char *new_name)
4814 {
4815         flow_entry_t    *flent;
4816         uint16_t        vid;
4817         char            flowname[MAXFLOWNAMELEN];
4818         mac_impl_t      *mip = mcip->mci_mip;
4819 
4820         ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
4821 
4822         /*
4823          * Use mi_rw_lock to ensure that threads not in the mac perimeter
4824          * see a self-consistent value for mci_name
4825          */
4826         rw_enter(&mip->mi_rw_lock, RW_WRITER);
4827         (void) strlcpy(mcip->mci_name, new_name, sizeof (mcip->mci_name));
4828         rw_exit(&mip->mi_rw_lock);
4829 
4830         mac_rename_flow(mcip->mci_flent, new_name);
4831 
4832         if (mcip->mci_nflents == 1)
4833                 return;
4834 
4835         /*
4836          * We have to rename all the others too, no stats to destroy for
4837          * these.
4838          */
4839         for (flent = mcip->mci_flent_list; flent != NULL;
4840             flent = flent->fe_client_next) {
4841                 if (flent != mcip->mci_flent) {
4842                         vid = i_mac_flow_vid(flent);
4843                         (void) sprintf(flowname, "%s%u", new_name, vid);
4844                         mac_flow_set_name(flent, flowname);
4845                 }
4846         }
4847 }
4848 
4849 
4850 /*
4851  * Add a flow to the MAC client's flow list - i.e list of MAC/VID tuples
4852  * defined for the specified MAC client.
4853  */
4854 static void
4855 mac_client_add_to_flow_list(mac_client_impl_t *mcip, flow_entry_t *flent)
4856 {
4857         ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
4858         /*
4859          * The promisc Rx data path walks the mci_flent_list. Protect by
4860          * using mi_rw_lock
4861          */
4862         rw_enter(&mcip->mci_rw_lock, RW_WRITER);
4863 
4864         mcip->mci_vidcache = MCIP_VIDCACHE_INVALID;
4865 
4866         /* Add it to the head */
4867         flent->fe_client_next = mcip->mci_flent_list;
4868         mcip->mci_flent_list = flent;
4869         mcip->mci_nflents++;
4870 
4871         /*
4872          * Keep track of the number of non-zero VIDs addresses per MAC
4873          * client to avoid figuring it out in the data-path.
4874          */
4875         if (i_mac_flow_vid(flent) != VLAN_ID_NONE)
4876                 mcip->mci_nvids++;
4877 
4878         rw_exit(&mcip->mci_rw_lock);
4879 }
4880 
4881 /*
4882  * Remove a flow entry from the MAC client's list.
4883  */
4884 static void
4885 mac_client_remove_flow_from_list(mac_client_impl_t *mcip, flow_entry_t *flent)
4886 {
4887         flow_entry_t    *fe = mcip->mci_flent_list;
4888         flow_entry_t    *prev_fe = NULL;
4889 
4890         ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
4891         /*
4892          * The promisc Rx data path walks the mci_flent_list. Protect by
4893          * using mci_rw_lock
4894          */
4895         rw_enter(&mcip->mci_rw_lock, RW_WRITER);
4896         mcip->mci_vidcache = MCIP_VIDCACHE_INVALID;
4897 
4898         while ((fe != NULL) && (fe != flent)) {
4899                 prev_fe = fe;
4900                 fe = fe->fe_client_next;
4901         }
4902 
4903         ASSERT(fe != NULL);
4904         if (prev_fe == NULL) {
4905                 /* Deleting the first node */
4906                 mcip->mci_flent_list = fe->fe_client_next;
4907         } else {
4908                 prev_fe->fe_client_next = fe->fe_client_next;
4909         }
4910         mcip->mci_nflents--;
4911 
4912         if (i_mac_flow_vid(flent) != VLAN_ID_NONE)
4913                 mcip->mci_nvids--;
4914 
4915         rw_exit(&mcip->mci_rw_lock);
4916 }
4917 
4918 /*
4919  * Check if the given VID belongs to this MAC client.
4920  */
4921 boolean_t
4922 mac_client_check_flow_vid(mac_client_impl_t *mcip, uint16_t vid)
4923 {
4924         flow_entry_t    *flent;
4925         uint16_t        mci_vid;
4926         uint32_t        cache = mcip->mci_vidcache;
4927 
4928         /*
4929          * In hopes of not having to touch the mci_rw_lock, check to see if
4930          * this vid matches our cached result.
4931          */
4932         if (MCIP_VIDCACHE_ISVALID(cache) && MCIP_VIDCACHE_VID(cache) == vid)
4933                 return (MCIP_VIDCACHE_BOOL(cache) ? B_TRUE : B_FALSE);
4934 
4935         /* The mci_flent_list is protected by mci_rw_lock */
4936         rw_enter(&mcip->mci_rw_lock, RW_WRITER);
4937         for (flent = mcip->mci_flent_list; flent != NULL;
4938             flent = flent->fe_client_next) {
4939                 mci_vid = i_mac_flow_vid(flent);
4940                 if (vid == mci_vid) {
4941                         mcip->mci_vidcache = MCIP_VIDCACHE_CACHE(vid, B_TRUE);
4942                         rw_exit(&mcip->mci_rw_lock);
4943                         return (B_TRUE);
4944                 }
4945         }
4946 
4947         mcip->mci_vidcache = MCIP_VIDCACHE_CACHE(vid, B_FALSE);
4948         rw_exit(&mcip->mci_rw_lock);
4949         return (B_FALSE);
4950 }
4951 
4952 /*
4953  * Get the flow entry for the specified <MAC addr, VID> tuple.
4954  */
4955 static flow_entry_t *
4956 mac_client_get_flow(mac_client_impl_t *mcip, mac_unicast_impl_t *muip)
4957 {
4958         mac_address_t *map = mcip->mci_unicast;
4959         flow_entry_t *flent;
4960         uint16_t vid;
4961         flow_desc_t flow_desc;
4962 
4963         ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
4964 
4965         mac_flow_get_desc(mcip->mci_flent, &flow_desc);
4966         if (bcmp(flow_desc.fd_dst_mac, map->ma_addr, map->ma_len) != 0)
4967                 return (NULL);
4968 
4969         for (flent = mcip->mci_flent_list; flent != NULL;
4970             flent = flent->fe_client_next) {
4971                 vid = i_mac_flow_vid(flent);
4972                 if (vid == muip->mui_vid) {
4973                         return (flent);
4974                 }
4975         }
4976 
4977         return (NULL);
4978 }
4979 
4980 /*
4981  * Since mci_flent has the SRSs, when we want to remove it, we replace
4982  * the flow_desc_t in mci_flent with that of an existing flent and then
4983  * remove that flent instead of mci_flent.
4984  */
4985 static flow_entry_t *
4986 mac_client_swap_mciflent(mac_client_impl_t *mcip)
4987 {
4988         flow_entry_t    *flent = mcip->mci_flent;
4989         flow_tab_t      *ft = flent->fe_flow_tab;
4990         flow_entry_t    *flent1;
4991         flow_desc_t     fl_desc;
4992         char            fl_name[MAXFLOWNAMELEN];
4993         int             err;
4994 
4995         ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
4996         ASSERT(mcip->mci_nflents > 1);
4997 
4998         /* get the next flent following the primary flent  */
4999         flent1 = mcip->mci_flent_list->fe_client_next;
5000         ASSERT(flent1 != NULL && flent1->fe_flow_tab == ft);
5001 
5002         /*
5003          * Remove the flent from the flow table before updating the
5004          * flow descriptor as the hash depends on the flow descriptor.
5005          * This also helps incoming packet classification avoid having
5006          * to grab fe_lock. Access to fe_flow_desc of a flent not in the
5007          * flow table is done under the fe_lock so that log or stat functions
5008          * see a self-consistent fe_flow_desc. The name and desc are specific
5009          * to a flow, the rest are shared by all the clients, including
5010          * resource control etc.
5011          */
5012         mac_flow_remove(ft, flent, B_TRUE);
5013         mac_flow_remove(ft, flent1, B_TRUE);
5014 
5015         bcopy(&flent->fe_flow_desc, &fl_desc, sizeof (flow_desc_t));
5016         bcopy(flent->fe_flow_name, fl_name, MAXFLOWNAMELEN);
5017 
5018         /* update the primary flow entry */
5019         mutex_enter(&flent->fe_lock);
5020         bcopy(&flent1->fe_flow_desc, &flent->fe_flow_desc,
5021             sizeof (flow_desc_t));
5022         bcopy(&flent1->fe_flow_name, &flent->fe_flow_name, MAXFLOWNAMELEN);
5023         mutex_exit(&flent->fe_lock);
5024 
5025         /* update the flow entry that is to be freed */
5026         mutex_enter(&flent1->fe_lock);
5027         bcopy(&fl_desc, &flent1->fe_flow_desc, sizeof (flow_desc_t));
5028         bcopy(fl_name, &flent1->fe_flow_name, MAXFLOWNAMELEN);
5029         mutex_exit(&flent1->fe_lock);
5030 
5031         /* now reinsert the flow entries in the table */
5032         err = mac_flow_add(ft, flent);
5033         ASSERT(err == 0);
5034 
5035         err = mac_flow_add(ft, flent1);
5036         ASSERT(err == 0);
5037 
5038         return (flent1);
5039 }
5040 
5041 /*
5042  * Return whether there is only one flow entry associated with this
5043  * MAC client.
5044  */
5045 static boolean_t
5046 mac_client_single_rcvr(mac_client_impl_t *mcip)
5047 {
5048         return (mcip->mci_nflents == 1);
5049 }
5050 
5051 int
5052 mac_validate_props(mac_impl_t *mip, mac_resource_props_t *mrp)
5053 {
5054         boolean_t               reset;
5055         uint32_t                rings_needed;
5056         uint32_t                rings_avail;
5057         mac_group_type_t        gtype;
5058         mac_resource_props_t    *mip_mrp;
5059 
5060         if (mrp == NULL)
5061                 return (0);
5062 
5063         if (mrp->mrp_mask & MRP_PRIORITY) {
5064                 mac_priority_level_t    pri = mrp->mrp_priority;
5065 
5066                 if (pri < MPL_LOW || pri > MPL_RESET)
5067                         return (EINVAL);
5068         }
5069 
5070         if (mrp->mrp_mask & MRP_MAXBW) {
5071                 uint64_t maxbw = mrp->mrp_maxbw;
5072 
5073                 if (maxbw < MRP_MAXBW_MINVAL && maxbw != 0)
5074                         return (EINVAL);
5075         }
5076         if (mrp->mrp_mask & MRP_CPUS) {
5077                 int i, j;
5078                 mac_cpu_mode_t  fanout;
5079 
5080                 if (mrp->mrp_ncpus > ncpus)
5081                         return (EINVAL);
5082 
5083                 for (i = 0; i < mrp->mrp_ncpus; i++) {
5084                         for (j = 0; j < mrp->mrp_ncpus; j++) {
5085                                 if (i != j &&
5086                                     mrp->mrp_cpu[i] == mrp->mrp_cpu[j]) {
5087                                         return (EINVAL);
5088                                 }
5089                         }
5090                 }
5091 
5092                 for (i = 0; i < mrp->mrp_ncpus; i++) {
5093                         cpu_t *cp;
5094                         int rv;
5095 
5096                         mutex_enter(&cpu_lock);
5097                         cp = cpu_get(mrp->mrp_cpu[i]);
5098                         if (cp != NULL)
5099                                 rv = cpu_is_online(cp);
5100                         else
5101                                 rv = 0;
5102                         mutex_exit(&cpu_lock);
5103                         if (rv == 0)
5104                                 return (EINVAL);
5105                 }
5106 
5107                 fanout = mrp->mrp_fanout_mode;
5108                 if (fanout < 0 || fanout > MCM_CPUS)
5109                         return (EINVAL);
5110         }
5111 
5112         if (mrp->mrp_mask & MRP_PROTECT) {
5113                 int err = mac_protect_validate(mrp);
5114                 if (err != 0)
5115                         return (err);
5116         }
5117 
5118         if (!(mrp->mrp_mask & MRP_RX_RINGS) &&
5119             !(mrp->mrp_mask & MRP_TX_RINGS)) {
5120                 return (0);
5121         }
5122 
5123         /*
5124          * mip will be null when we come from mac_flow_create or
5125          * mac_link_flow_modify. In the latter case it is a user flow,
5126          * for which we don't support rings. In the former we would
5127          * have validated the props beforehand (i_mac_unicast_add ->
5128          * mac_client_set_resources -> validate for the primary and
5129          * vnic_dev_create -> mac_client_set_resources -> validate for
5130          * a vnic.
5131          */
5132         if (mip == NULL)
5133                 return (0);
5134 
5135         /*
5136          * We don't support setting rings property for a VNIC that is using a
5137          * primary address (VLAN)
5138          */
5139         if ((mip->mi_state_flags & MIS_IS_VNIC) &&
5140             mac_is_vnic_primary((mac_handle_t)mip)) {
5141                 return (ENOTSUP);
5142         }
5143 
5144         mip_mrp = &mip->mi_resource_props;
5145         /*
5146          * The rings property should be validated against the NICs
5147          * resources
5148          */
5149         if (mip->mi_state_flags & MIS_IS_VNIC)
5150                 mip = (mac_impl_t *)mac_get_lower_mac_handle((mac_handle_t)mip);
5151 
5152         reset = mrp->mrp_mask & MRP_RINGS_RESET;
5153         /*
5154          * If groups are not supported, return error.
5155          */
5156         if (((mrp->mrp_mask & MRP_RX_RINGS) && mip->mi_rx_groups == NULL) ||
5157             ((mrp->mrp_mask & MRP_TX_RINGS) && mip->mi_tx_groups == NULL)) {
5158                 return (EINVAL);
5159         }
5160         /*
5161          * If we are just resetting, there is no validation needed.
5162          */
5163         if (reset)
5164                 return (0);
5165 
5166         if (mrp->mrp_mask & MRP_RX_RINGS) {
5167                 rings_needed = mrp->mrp_nrxrings;
5168                 /*
5169                  * We just want to check if the number of additional
5170                  * rings requested is available.
5171                  */
5172                 if (mip_mrp->mrp_mask & MRP_RX_RINGS) {
5173                         if (mrp->mrp_nrxrings > mip_mrp->mrp_nrxrings)
5174                                 /* Just check for the additional rings */
5175                                 rings_needed -= mip_mrp->mrp_nrxrings;
5176                         else
5177                                 /* We are not asking for additional rings */
5178                                 rings_needed = 0;
5179                 }
5180                 rings_avail = mip->mi_rxrings_avail;
5181                 gtype = mip->mi_rx_group_type;
5182         } else {
5183                 rings_needed = mrp->mrp_ntxrings;
5184                 /* Similarly for the TX rings */
5185                 if (mip_mrp->mrp_mask & MRP_TX_RINGS) {
5186                         if (mrp->mrp_ntxrings > mip_mrp->mrp_ntxrings)
5187                                 /* Just check for the additional rings */
5188                                 rings_needed -= mip_mrp->mrp_ntxrings;
5189                         else
5190                                 /* We are not asking for additional rings */
5191                                 rings_needed = 0;
5192                 }
5193                 rings_avail = mip->mi_txrings_avail;
5194                 gtype = mip->mi_tx_group_type;
5195         }
5196 
5197         /* Error if the group is dynamic .. */
5198         if (gtype == MAC_GROUP_TYPE_DYNAMIC) {
5199                 /*
5200                  * .. and rings specified are more than available.
5201                  */
5202                 if (rings_needed > rings_avail)
5203                         return (EINVAL);
5204         } else {
5205                 /*
5206                  * OR group is static and we have specified some rings.
5207                  */
5208                 if (rings_needed > 0)
5209                         return (EINVAL);
5210         }
5211         return (0);
5212 }
5213 
5214 /*
5215  * Send a MAC_NOTE_LINK notification to all the MAC clients whenever the
5216  * underlying physical link is down. This is to allow MAC clients to
5217  * communicate with other clients.
5218  */
5219 void
5220 mac_virtual_link_update(mac_impl_t *mip)
5221 {
5222         if (mip->mi_linkstate != LINK_STATE_UP)
5223                 i_mac_notify(mip, MAC_NOTE_LINK);
5224 }
5225 
5226 /*
5227  * For clients that have a pass-thru MAC, e.g. VNIC, we set the VNIC's
5228  * mac handle in the client.
5229  */
5230 void
5231 mac_set_upper_mac(mac_client_handle_t mch, mac_handle_t mh,
5232     mac_resource_props_t *mrp)
5233 {
5234         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
5235         mac_impl_t              *mip = (mac_impl_t *)mh;
5236 
5237         mcip->mci_upper_mip = mip;
5238         /* If there are any properties, copy it over too */
5239         if (mrp != NULL) {
5240                 bcopy(mrp, &mip->mi_resource_props,
5241                     sizeof (mac_resource_props_t));
5242         }
5243 }
5244 
5245 /*
5246  * Mark the mac as being used exclusively by the single mac client that is
5247  * doing some control operation on this mac. No further opens of this mac
5248  * will be allowed until this client calls mac_unmark_exclusive. The mac
5249  * client calling this function must already be in the mac perimeter
5250  */
5251 int
5252 mac_mark_exclusive(mac_handle_t mh)
5253 {
5254         mac_impl_t      *mip = (mac_impl_t *)mh;
5255 
5256         ASSERT(MAC_PERIM_HELD(mh));
5257         /*
5258          * Look up its entry in the global hash table.
5259          */
5260         rw_enter(&i_mac_impl_lock, RW_WRITER);
5261         if (mip->mi_state_flags & MIS_DISABLED) {
5262                 rw_exit(&i_mac_impl_lock);
5263                 return (ENOENT);
5264         }
5265 
5266         /*
5267          * A reference to mac is held even if the link is not plumbed.
5268          * In i_dls_link_create() we open the MAC interface and hold the
5269          * reference. There is an additional reference for the mac_open
5270          * done in acquiring the mac perimeter
5271          */
5272         if (mip->mi_ref != 2) {
5273                 rw_exit(&i_mac_impl_lock);
5274                 return (EBUSY);
5275         }
5276 
5277         ASSERT(!(mip->mi_state_flags & MIS_EXCLUSIVE_HELD));
5278         mip->mi_state_flags |= MIS_EXCLUSIVE_HELD;
5279         rw_exit(&i_mac_impl_lock);
5280         return (0);
5281 }
5282 
5283 void
5284 mac_unmark_exclusive(mac_handle_t mh)
5285 {
5286         mac_impl_t      *mip = (mac_impl_t *)mh;
5287 
5288         ASSERT(MAC_PERIM_HELD(mh));
5289 
5290         rw_enter(&i_mac_impl_lock, RW_WRITER);
5291         /* 1 for the creation and another for the perimeter */
5292         ASSERT(mip->mi_ref == 2 && (mip->mi_state_flags & MIS_EXCLUSIVE_HELD));
5293         mip->mi_state_flags &= ~MIS_EXCLUSIVE_HELD;
5294         rw_exit(&i_mac_impl_lock);
5295 }
5296 
5297 /*
5298  * Set the MTU for the specified MAC.
5299  */
5300 int
5301 mac_set_mtu(mac_handle_t mh, uint_t new_mtu, uint_t *old_mtu_arg)
5302 {
5303         mac_impl_t *mip = (mac_impl_t *)mh;
5304         uint_t old_mtu;
5305         int rv = 0;
5306 
5307         i_mac_perim_enter(mip);
5308 
5309         if (!(mip->mi_callbacks->mc_callbacks & (MC_SETPROP|MC_GETPROP))) {
5310                 rv = ENOTSUP;
5311                 goto bail;
5312         }
5313 
5314         old_mtu = mip->mi_sdu_max;
5315 
5316         if (new_mtu == 0 || new_mtu < mip->mi_sdu_min) {
5317                 rv = EINVAL;
5318                 goto bail;
5319         }
5320 
5321         rw_enter(&mip->mi_rw_lock, RW_READER);
5322         if (mip->mi_mtrp != NULL && new_mtu < mip->mi_mtrp->mtr_mtu) {
5323                 rv = EBUSY;
5324                 rw_exit(&mip->mi_rw_lock);
5325                 goto bail;
5326         }
5327         rw_exit(&mip->mi_rw_lock);
5328 
5329         if (old_mtu != new_mtu) {
5330                 rv = mip->mi_callbacks->mc_setprop(mip->mi_driver,
5331                     "mtu", MAC_PROP_MTU, sizeof (uint_t), &new_mtu);
5332                 if (rv != 0)
5333                         goto bail;
5334                 rv = mac_maxsdu_update(mh, new_mtu);
5335                 ASSERT(rv == 0);
5336         }
5337 
5338 bail:
5339         i_mac_perim_exit(mip);
5340 
5341         if (rv == 0 && old_mtu_arg != NULL)
5342                 *old_mtu_arg = old_mtu;
5343         return (rv);
5344 }
5345 
5346 /*
5347  * Return the RX h/w information for the group indexed by grp_num.
5348  */
5349 void
5350 mac_get_hwrxgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num,
5351     uint_t *n_rings, uint_t *rings, uint_t *type, uint_t *n_clnts,
5352     char *clnts_name)
5353 {
5354         mac_impl_t *mip = (mac_impl_t *)mh;
5355         mac_grp_client_t *mcip;
5356         uint_t i = 0, index = 0;
5357         mac_ring_t      *ring;
5358 
5359         /* Revisit when we implement fully dynamic group allocation */
5360         ASSERT(grp_index >= 0 && grp_index < mip->mi_rx_group_count);
5361 
5362         rw_enter(&mip->mi_rw_lock, RW_READER);
5363         *grp_num = mip->mi_rx_groups[grp_index].mrg_index;
5364         *type = mip->mi_rx_groups[grp_index].mrg_type;
5365         *n_rings = mip->mi_rx_groups[grp_index].mrg_cur_count;
5366         ring = mip->mi_rx_groups[grp_index].mrg_rings;
5367         for (index = 0; index < mip->mi_rx_groups[grp_index].mrg_cur_count;
5368             index++) {
5369                 rings[index] = ring->mr_index;
5370                 ring = ring->mr_next;
5371         }
5372         /* Assuming the 1st is the default group */
5373         index = 0;
5374         if (grp_index == 0) {
5375                 (void) strlcpy(clnts_name, "<default,mcast>,",
5376                     MAXCLIENTNAMELEN);
5377                 index += strlen("<default,mcast>,");
5378         }
5379         for (mcip = mip->mi_rx_groups[grp_index].mrg_clients; mcip != NULL;
5380             mcip = mcip->mgc_next) {
5381                 int name_len = strlen(mcip->mgc_client->mci_name);
5382 
5383                 /*
5384                  * MAXCLIENTNAMELEN is the buffer size reserved for client
5385                  * names.
5386                  * XXXX Formating the client name string needs to be moved
5387                  * to user land when fixing the size of dhi_clnts in
5388                  * dld_hwgrpinfo_t. We should use n_clients * client_name for
5389                  * dhi_clntsin instead of MAXCLIENTNAMELEN
5390                  */
5391                 if (index + name_len >= MAXCLIENTNAMELEN) {
5392                         index = MAXCLIENTNAMELEN;
5393                         break;
5394                 }
5395                 bcopy(mcip->mgc_client->mci_name, &(clnts_name[index]),
5396                     name_len);
5397                 index += name_len;
5398                 clnts_name[index++] = ',';
5399                 i++;
5400         }
5401 
5402         /* Get rid of the last , */
5403         if (index > 0)
5404                 clnts_name[index - 1] = '\0';
5405         *n_clnts = i;
5406         rw_exit(&mip->mi_rw_lock);
5407 }
5408 
5409 /*
5410  * Return the TX h/w information for the group indexed by grp_num.
5411  */
5412 void
5413 mac_get_hwtxgrp_info(mac_handle_t mh, int grp_index, uint_t *grp_num,
5414     uint_t *n_rings, uint_t *rings, uint_t *type, uint_t *n_clnts,
5415     char *clnts_name)
5416 {
5417         mac_impl_t *mip = (mac_impl_t *)mh;
5418         mac_grp_client_t *mcip;
5419         uint_t i = 0, index = 0;
5420         mac_ring_t      *ring;
5421 
5422         /* Revisit when we implement fully dynamic group allocation */
5423         ASSERT(grp_index >= 0 && grp_index <= mip->mi_tx_group_count);
5424 
5425         rw_enter(&mip->mi_rw_lock, RW_READER);
5426         *grp_num = mip->mi_tx_groups[grp_index].mrg_index > 0 ?
5427             mip->mi_tx_groups[grp_index].mrg_index : grp_index;
5428         *type = mip->mi_tx_groups[grp_index].mrg_type;
5429         *n_rings = mip->mi_tx_groups[grp_index].mrg_cur_count;
5430         ring = mip->mi_tx_groups[grp_index].mrg_rings;
5431         for (index = 0; index < mip->mi_tx_groups[grp_index].mrg_cur_count;
5432             index++) {
5433                 rings[index] = ring->mr_index;
5434                 ring = ring->mr_next;
5435         }
5436         index = 0;
5437         /* Default group has an index of -1 */
5438         if (mip->mi_tx_groups[grp_index].mrg_index < 0) {
5439                 (void) strlcpy(clnts_name, "<default>,",
5440                     MAXCLIENTNAMELEN);
5441                 index += strlen("<default>,");
5442         }
5443         for (mcip = mip->mi_tx_groups[grp_index].mrg_clients; mcip != NULL;
5444             mcip = mcip->mgc_next) {
5445                 int name_len = strlen(mcip->mgc_client->mci_name);
5446 
5447                 /*
5448                  * MAXCLIENTNAMELEN is the buffer size reserved for client
5449                  * names.
5450                  * XXXX Formating the client name string needs to be moved
5451                  * to user land when fixing the size of dhi_clnts in
5452                  * dld_hwgrpinfo_t. We should use n_clients * client_name for
5453                  * dhi_clntsin instead of MAXCLIENTNAMELEN
5454                  */
5455                 if (index + name_len >= MAXCLIENTNAMELEN) {
5456                         index = MAXCLIENTNAMELEN;
5457                         break;
5458                 }
5459                 bcopy(mcip->mgc_client->mci_name, &(clnts_name[index]),
5460                     name_len);
5461                 index += name_len;
5462                 clnts_name[index++] = ',';
5463                 i++;
5464         }
5465 
5466         /* Get rid of the last , */
5467         if (index > 0)
5468                 clnts_name[index - 1] = '\0';
5469         *n_clnts = i;
5470         rw_exit(&mip->mi_rw_lock);
5471 }
5472 
5473 /*
5474  * Return the group count for RX or TX.
5475  */
5476 uint_t
5477 mac_hwgrp_num(mac_handle_t mh, int type)
5478 {
5479         mac_impl_t *mip = (mac_impl_t *)mh;
5480 
5481         /*
5482          * Return the Rx and Tx group count; for the Tx we need to
5483          * include the default too.
5484          */
5485         return (type == MAC_RING_TYPE_RX ? mip->mi_rx_group_count :
5486             mip->mi_tx_groups != NULL ? mip->mi_tx_group_count + 1 : 0);
5487 }
5488 
5489 /*
5490  * The total number of free TX rings for this MAC.
5491  */
5492 uint_t
5493 mac_txavail_get(mac_handle_t mh)
5494 {
5495         mac_impl_t      *mip = (mac_impl_t *)mh;
5496 
5497         return (mip->mi_txrings_avail);
5498 }
5499 
5500 /*
5501  * The total number of free RX rings for this MAC.
5502  */
5503 uint_t
5504 mac_rxavail_get(mac_handle_t mh)
5505 {
5506         mac_impl_t      *mip = (mac_impl_t *)mh;
5507 
5508         return (mip->mi_rxrings_avail);
5509 }
5510 
5511 /*
5512  * The total number of reserved RX rings on this MAC.
5513  */
5514 uint_t
5515 mac_rxrsvd_get(mac_handle_t mh)
5516 {
5517         mac_impl_t      *mip = (mac_impl_t *)mh;
5518 
5519         return (mip->mi_rxrings_rsvd);
5520 }
5521 
5522 /*
5523  * The total number of reserved TX rings on this MAC.
5524  */
5525 uint_t
5526 mac_txrsvd_get(mac_handle_t mh)
5527 {
5528         mac_impl_t      *mip = (mac_impl_t *)mh;
5529 
5530         return (mip->mi_txrings_rsvd);
5531 }
5532 
5533 /*
5534  * Total number of free RX groups on this MAC.
5535  */
5536 uint_t
5537 mac_rxhwlnksavail_get(mac_handle_t mh)
5538 {
5539         mac_impl_t      *mip = (mac_impl_t *)mh;
5540 
5541         return (mip->mi_rxhwclnt_avail);
5542 }
5543 
5544 /*
5545  * Total number of RX groups reserved on this MAC.
5546  */
5547 uint_t
5548 mac_rxhwlnksrsvd_get(mac_handle_t mh)
5549 {
5550         mac_impl_t      *mip = (mac_impl_t *)mh;
5551 
5552         return (mip->mi_rxhwclnt_used);
5553 }
5554 
5555 /*
5556  * Total number of free TX groups on this MAC.
5557  */
5558 uint_t
5559 mac_txhwlnksavail_get(mac_handle_t mh)
5560 {
5561         mac_impl_t      *mip = (mac_impl_t *)mh;
5562 
5563         return (mip->mi_txhwclnt_avail);
5564 }
5565 
5566 /*
5567  * Total number of TX groups reserved on this MAC.
5568  */
5569 uint_t
5570 mac_txhwlnksrsvd_get(mac_handle_t mh)
5571 {
5572         mac_impl_t      *mip = (mac_impl_t *)mh;
5573 
5574         return (mip->mi_txhwclnt_used);
5575 }
5576 
5577 /*
5578  * Initialize the rings property for a mac client. A non-0 value for
5579  * rxring or txring specifies the number of rings required, a value
5580  * of MAC_RXRINGS_NONE/MAC_TXRINGS_NONE specifies that it doesn't need
5581  * any RX/TX rings and a value of MAC_RXRINGS_DONTCARE/MAC_TXRINGS_DONTCARE
5582  * means the system can decide whether it can give any rings or not.
5583  */
5584 void
5585 mac_client_set_rings(mac_client_handle_t mch, int rxrings, int txrings)
5586 {
5587         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
5588         mac_resource_props_t    *mrp = MCIP_RESOURCE_PROPS(mcip);
5589 
5590         if (rxrings != MAC_RXRINGS_DONTCARE) {
5591                 mrp->mrp_mask |= MRP_RX_RINGS;
5592                 mrp->mrp_nrxrings = rxrings;
5593         }
5594 
5595         if (txrings != MAC_TXRINGS_DONTCARE) {
5596                 mrp->mrp_mask |= MRP_TX_RINGS;
5597                 mrp->mrp_ntxrings = txrings;
5598         }
5599 }
5600 
5601 boolean_t
5602 mac_get_promisc_filtered(mac_client_handle_t mch)
5603 {
5604         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
5605 
5606         return (mcip->mci_protect_flags & MPT_FLAG_PROMISC_FILTERED);
5607 }
5608 
5609 void
5610 mac_set_promisc_filtered(mac_client_handle_t mch, boolean_t enable)
5611 {
5612         mac_client_impl_t       *mcip = (mac_client_impl_t *)mch;
5613 
5614         ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
5615         if (enable)
5616                 mcip->mci_protect_flags |= MPT_FLAG_PROMISC_FILTERED;
5617         else
5618                 mcip->mci_protect_flags &= ~MPT_FLAG_PROMISC_FILTERED;
5619 }