1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2018 Joyent, Inc.
  25  * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/conf.h>
  30 #include <sys/id_space.h>
  31 #include <sys/esunddi.h>
  32 #include <sys/stat.h>
  33 #include <sys/mkdev.h>
  34 #include <sys/stream.h>
  35 #include <sys/strsubr.h>
  36 #include <sys/dlpi.h>
  37 #include <sys/modhash.h>
  38 #include <sys/mac.h>
  39 #include <sys/mac_provider.h>
  40 #include <sys/mac_impl.h>
  41 #include <sys/mac_client_impl.h>
  42 #include <sys/mac_client_priv.h>
  43 #include <sys/mac_soft_ring.h>
  44 #include <sys/mac_stat.h>
  45 #include <sys/dld.h>
  46 #include <sys/modctl.h>
  47 #include <sys/fs/dv_node.h>
  48 #include <sys/thread.h>
  49 #include <sys/proc.h>
  50 #include <sys/callb.h>
  51 #include <sys/cpuvar.h>
  52 #include <sys/atomic.h>
  53 #include <sys/sdt.h>
  54 #include <sys/mac_flow.h>
  55 #include <sys/ddi_intr_impl.h>
  56 #include <sys/disp.h>
  57 #include <sys/sdt.h>
  58 #include <sys/pattr.h>
  59 #include <sys/strsun.h>
  60 #include <sys/vlan.h>
  61 
  62 /*
  63  * MAC Provider Interface.
  64  *
  65  * Interface for GLDv3 compatible NIC drivers.
  66  */
  67 
  68 static void i_mac_notify_thread(void *);
  69 
  70 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
  71 
  72 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
  73         mac_fanout_recompute,   /* MAC_NOTE_LINK */
  74         NULL,           /* MAC_NOTE_UNICST */
  75         NULL,           /* MAC_NOTE_TX */
  76         NULL,           /* MAC_NOTE_DEVPROMISC */
  77         NULL,           /* MAC_NOTE_FASTPATH_FLUSH */
  78         NULL,           /* MAC_NOTE_SDU_SIZE */
  79         NULL,           /* MAC_NOTE_MARGIN */
  80         NULL,           /* MAC_NOTE_CAPAB_CHG */
  81         NULL            /* MAC_NOTE_LOWLINK */
  82 };
  83 
  84 /*
  85  * Driver support functions.
  86  */
  87 
  88 /* REGISTRATION */
  89 
  90 mac_register_t *
  91 mac_alloc(uint_t mac_version)
  92 {
  93         mac_register_t *mregp;
  94 
  95         /*
  96          * Make sure there isn't a version mismatch between the driver and
  97          * the framework.  In the future, if multiple versions are
  98          * supported, this check could become more sophisticated.
  99          */
 100         if (mac_version != MAC_VERSION)
 101                 return (NULL);
 102 
 103         mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
 104         mregp->m_version = mac_version;
 105         return (mregp);
 106 }
 107 
 108 void
 109 mac_free(mac_register_t *mregp)
 110 {
 111         kmem_free(mregp, sizeof (mac_register_t));
 112 }
 113 
 114 /*
 115  * mac_register() is how drivers register new MACs with the GLDv3
 116  * framework.  The mregp argument is allocated by drivers using the
 117  * mac_alloc() function, and can be freed using mac_free() immediately upon
 118  * return from mac_register().  Upon success (0 return value), the mhp
 119  * opaque pointer becomes the driver's handle to its MAC interface, and is
 120  * the argument to all other mac module entry points.
 121  */
 122 /* ARGSUSED */
 123 int
 124 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
 125 {
 126         mac_impl_t              *mip;
 127         mactype_t               *mtype;
 128         int                     err = EINVAL;
 129         struct devnames         *dnp = NULL;
 130         uint_t                  instance;
 131         boolean_t               style1_created = B_FALSE;
 132         boolean_t               style2_created = B_FALSE;
 133         char                    *driver;
 134         minor_t                 minor = 0;
 135 
 136         /* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
 137         if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
 138                 return (EINVAL);
 139 
 140         /* Find the required MAC-Type plugin. */
 141         if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
 142                 return (EINVAL);
 143 
 144         /* Create a mac_impl_t to represent this MAC. */
 145         mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
 146 
 147         /*
 148          * The mac is not ready for open yet.
 149          */
 150         mip->mi_state_flags |= MIS_DISABLED;
 151 
 152         /*
 153          * When a mac is registered, the m_instance field can be set to:
 154          *
 155          *  0:  Get the mac's instance number from m_dip.
 156          *      This is usually used for physical device dips.
 157          *
 158          *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
 159          *      For example, when an aggregation is created with the key option,
 160          *      "key" will be used as the instance number.
 161          *
 162          *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
 163          *      This is often used when a MAC of a virtual link is registered
 164          *      (e.g., aggregation when "key" is not specified, or vnic).
 165          *
 166          * Note that the instance number is used to derive the mi_minor field
 167          * of mac_impl_t, which will then be used to derive the name of kstats
 168          * and the devfs nodes.  The first 2 cases are needed to preserve
 169          * backward compatibility.
 170          */
 171         switch (mregp->m_instance) {
 172         case 0:
 173                 instance = ddi_get_instance(mregp->m_dip);
 174                 break;
 175         case ((uint_t)-1):
 176                 minor = mac_minor_hold(B_TRUE);
 177                 if (minor == 0) {
 178                         err = ENOSPC;
 179                         goto fail;
 180                 }
 181                 instance = minor - 1;
 182                 break;
 183         default:
 184                 instance = mregp->m_instance;
 185                 if (instance >= MAC_MAX_MINOR) {
 186                         err = EINVAL;
 187                         goto fail;
 188                 }
 189                 break;
 190         }
 191 
 192         mip->mi_minor = (minor_t)(instance + 1);
 193         mip->mi_dip = mregp->m_dip;
 194         mip->mi_clients_list = NULL;
 195         mip->mi_nclients = 0;
 196 
 197         /* Set the default IEEE Port VLAN Identifier */
 198         mip->mi_pvid = 1;
 199 
 200         /* Default bridge link learning protection values */
 201         mip->mi_llimit = 1000;
 202         mip->mi_ldecay = 200;
 203 
 204         driver = (char *)ddi_driver_name(mip->mi_dip);
 205 
 206         /* Construct the MAC name as <drvname><instance> */
 207         (void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
 208             driver, instance);
 209 
 210         mip->mi_driver = mregp->m_driver;
 211 
 212         mip->mi_type = mtype;
 213         mip->mi_margin = mregp->m_margin;
 214         mip->mi_info.mi_media = mtype->mt_type;
 215         mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
 216         if (mregp->m_max_sdu <= mregp->m_min_sdu)
 217                 goto fail;
 218         if (mregp->m_multicast_sdu == 0)
 219                 mregp->m_multicast_sdu = mregp->m_max_sdu;
 220         if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
 221             mregp->m_multicast_sdu > mregp->m_max_sdu)
 222                 goto fail;
 223         mip->mi_sdu_min = mregp->m_min_sdu;
 224         mip->mi_sdu_max = mregp->m_max_sdu;
 225         mip->mi_sdu_multicast = mregp->m_multicast_sdu;
 226         mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
 227         /*
 228          * If the media supports a broadcast address, cache a pointer to it
 229          * in the mac_info_t so that upper layers can use it.
 230          */
 231         mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
 232 
 233         mip->mi_v12n_level = mregp->m_v12n;
 234 
 235         /*
 236          * Copy the unicast source address into the mac_info_t, but only if
 237          * the MAC-Type defines a non-zero address length.  We need to
 238          * handle MAC-Types that have an address length of 0
 239          * (point-to-point protocol MACs for example).
 240          */
 241         if (mip->mi_type->mt_addr_length > 0) {
 242                 if (mregp->m_src_addr == NULL)
 243                         goto fail;
 244                 mip->mi_info.mi_unicst_addr =
 245                     kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
 246                 bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
 247                     mip->mi_type->mt_addr_length);
 248 
 249                 /*
 250                  * Copy the fixed 'factory' MAC address from the immutable
 251                  * info.  This is taken to be the MAC address currently in
 252                  * use.
 253                  */
 254                 bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
 255                     mip->mi_type->mt_addr_length);
 256 
 257                 /*
 258                  * At this point, we should set up the classification
 259                  * rules etc but we delay it till mac_open() so that
 260                  * the resource discovery has taken place and we
 261                  * know someone wants to use the device. Otherwise
 262                  * memory gets allocated for Rx ring structures even
 263                  * during probe.
 264                  */
 265 
 266                 /* Copy the destination address if one is provided. */
 267                 if (mregp->m_dst_addr != NULL) {
 268                         bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
 269                             mip->mi_type->mt_addr_length);
 270                         mip->mi_dstaddr_set = B_TRUE;
 271                 }
 272         } else if (mregp->m_src_addr != NULL) {
 273                 goto fail;
 274         }
 275 
 276         /*
 277          * The format of the m_pdata is specific to the plugin.  It is
 278          * passed in as an argument to all of the plugin callbacks.  The
 279          * driver can update this information by calling
 280          * mac_pdata_update().
 281          */
 282         if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
 283                 /*
 284                  * Verify if the supplied plugin data is valid.  Note that
 285                  * even if the caller passed in a NULL pointer as plugin data,
 286                  * we still need to verify if that's valid as the plugin may
 287                  * require plugin data to function.
 288                  */
 289                 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
 290                     mregp->m_pdata_size)) {
 291                         goto fail;
 292                 }
 293                 if (mregp->m_pdata != NULL) {
 294                         mip->mi_pdata =
 295                             kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
 296                         bcopy(mregp->m_pdata, mip->mi_pdata,
 297                             mregp->m_pdata_size);
 298                         mip->mi_pdata_size = mregp->m_pdata_size;
 299                 }
 300         } else if (mregp->m_pdata != NULL) {
 301                 /*
 302                  * The caller supplied non-NULL plugin data, but the plugin
 303                  * does not recognize plugin data.
 304                  */
 305                 err = EINVAL;
 306                 goto fail;
 307         }
 308 
 309         /*
 310          * Register the private properties.
 311          */
 312         mac_register_priv_prop(mip, mregp->m_priv_props);
 313 
 314         /*
 315          * Stash the driver callbacks into the mac_impl_t, but first sanity
 316          * check to make sure all mandatory callbacks are set.
 317          */
 318         if (mregp->m_callbacks->mc_getstat == NULL ||
 319             mregp->m_callbacks->mc_start == NULL ||
 320             mregp->m_callbacks->mc_stop == NULL ||
 321             mregp->m_callbacks->mc_setpromisc == NULL ||
 322             mregp->m_callbacks->mc_multicst == NULL) {
 323                 goto fail;
 324         }
 325         mip->mi_callbacks = mregp->m_callbacks;
 326 
 327         if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
 328             &mip->mi_capab_legacy)) {
 329                 mip->mi_state_flags |= MIS_LEGACY;
 330                 mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
 331         } else {
 332                 mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
 333                     mip->mi_minor);
 334         }
 335 
 336         /*
 337          * Allocate a notification thread. thread_create blocks for memory
 338          * if needed, it never fails.
 339          */
 340         mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
 341             mip, 0, &p0, TS_RUN, minclsyspri);
 342 
 343         /*
 344          * Initialize the capabilities
 345          */
 346 
 347         bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t));
 348         bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t));
 349 
 350         if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
 351                 mip->mi_state_flags |= MIS_IS_VNIC;
 352 
 353         if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
 354                 mip->mi_state_flags |= MIS_IS_AGGR;
 355 
 356         mac_addr_factory_init(mip);
 357 
 358         mac_transceiver_init(mip);
 359 
 360         mac_led_init(mip);
 361 
 362         /*
 363          * Enforce the virtrualization level registered.
 364          */
 365         if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
 366                 if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
 367                     mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
 368                         goto fail;
 369 
 370                 /*
 371                  * The driver needs to register at least rx rings for this
 372                  * virtualization level.
 373                  */
 374                 if (mip->mi_rx_groups == NULL)
 375                         goto fail;
 376         }
 377 
 378         /*
 379          * The driver must set mc_unicst entry point to NULL when it advertises
 380          * CAP_RINGS for rx groups.
 381          */
 382         if (mip->mi_rx_groups != NULL) {
 383                 if (mregp->m_callbacks->mc_unicst != NULL)
 384                         goto fail;
 385         } else {
 386                 if (mregp->m_callbacks->mc_unicst == NULL)
 387                         goto fail;
 388         }
 389 
 390         /*
 391          * Initialize MAC addresses. Must be called after mac_init_rings().
 392          */
 393         mac_init_macaddr(mip);
 394 
 395         mip->mi_share_capab.ms_snum = 0;
 396         if (mip->mi_v12n_level & MAC_VIRT_HIO) {
 397                 (void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
 398                     &mip->mi_share_capab);
 399         }
 400 
 401         /*
 402          * Initialize the kstats for this device.
 403          */
 404         mac_driver_stat_create(mip);
 405 
 406         /* Zero out any properties. */
 407         bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
 408 
 409         if (mip->mi_minor <= MAC_MAX_MINOR) {
 410                 /* Create a style-2 DLPI device */
 411                 if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
 412                     DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
 413                         goto fail;
 414                 style2_created = B_TRUE;
 415 
 416                 /* Create a style-1 DLPI device */
 417                 if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
 418                     mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
 419                         goto fail;
 420                 style1_created = B_TRUE;
 421         }
 422 
 423         mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
 424 
 425         rw_enter(&i_mac_impl_lock, RW_WRITER);
 426         if (mod_hash_insert(i_mac_impl_hash,
 427             (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
 428                 rw_exit(&i_mac_impl_lock);
 429                 err = EEXIST;
 430                 goto fail;
 431         }
 432 
 433         DTRACE_PROBE2(mac__register, struct devnames *, dnp,
 434             (mac_impl_t *), mip);
 435 
 436         /*
 437          * Mark the MAC to be ready for open.
 438          */
 439         mip->mi_state_flags &= ~MIS_DISABLED;
 440         rw_exit(&i_mac_impl_lock);
 441 
 442         atomic_inc_32(&i_mac_impl_count);
 443 
 444         cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
 445         *mhp = (mac_handle_t)mip;
 446         return (0);
 447 
 448 fail:
 449         if (style1_created)
 450                 ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
 451 
 452         if (style2_created)
 453                 ddi_remove_minor_node(mip->mi_dip, driver);
 454 
 455         mac_addr_factory_fini(mip);
 456 
 457         /* Clean up registered MAC addresses */
 458         mac_fini_macaddr(mip);
 459 
 460         /* Clean up registered rings */
 461         mac_free_rings(mip, MAC_RING_TYPE_RX);
 462         mac_free_rings(mip, MAC_RING_TYPE_TX);
 463 
 464         /* Clean up notification thread */
 465         if (mip->mi_notify_thread != NULL)
 466                 i_mac_notify_exit(mip);
 467 
 468         if (mip->mi_info.mi_unicst_addr != NULL) {
 469                 kmem_free(mip->mi_info.mi_unicst_addr,
 470                     mip->mi_type->mt_addr_length);
 471                 mip->mi_info.mi_unicst_addr = NULL;
 472         }
 473 
 474         mac_driver_stat_delete(mip);
 475 
 476         if (mip->mi_type != NULL) {
 477                 atomic_dec_32(&mip->mi_type->mt_ref);
 478                 mip->mi_type = NULL;
 479         }
 480 
 481         if (mip->mi_pdata != NULL) {
 482                 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
 483                 mip->mi_pdata = NULL;
 484                 mip->mi_pdata_size = 0;
 485         }
 486 
 487         if (minor != 0) {
 488                 ASSERT(minor > MAC_MAX_MINOR);
 489                 mac_minor_rele(minor);
 490         }
 491 
 492         mip->mi_state_flags = 0;
 493         mac_unregister_priv_prop(mip);
 494 
 495         /*
 496          * Clear the state before destroying the mac_impl_t
 497          */
 498         mip->mi_state_flags = 0;
 499 
 500         kmem_cache_free(i_mac_impl_cachep, mip);
 501         return (err);
 502 }
 503 
 504 /*
 505  * Unregister from the GLDv3 framework
 506  */
 507 int
 508 mac_unregister(mac_handle_t mh)
 509 {
 510         int                     err;
 511         mac_impl_t              *mip = (mac_impl_t *)mh;
 512         mod_hash_val_t          val;
 513         mac_margin_req_t        *mmr, *nextmmr;
 514 
 515         /* Fail the unregister if there are any open references to this mac. */
 516         if ((err = mac_disable_nowait(mh)) != 0)
 517                 return (err);
 518 
 519         /*
 520          * Clean up notification thread and wait for it to exit.
 521          */
 522         i_mac_notify_exit(mip);
 523 
 524         /*
 525          * Prior to acquiring the MAC perimeter, remove the MAC instance from
 526          * the internal hash table. Such removal means table-walkers that
 527          * acquire the perimeter will not do so on behalf of what we are
 528          * unregistering, which prevents a deadlock.
 529          */
 530         rw_enter(&i_mac_impl_lock, RW_WRITER);
 531         (void) mod_hash_remove(i_mac_impl_hash,
 532             (mod_hash_key_t)mip->mi_name, &val);
 533         rw_exit(&i_mac_impl_lock);
 534         ASSERT(mip == (mac_impl_t *)val);
 535 
 536         i_mac_perim_enter(mip);
 537 
 538         /*
 539          * There is still resource properties configured over this mac.
 540          */
 541         if (mip->mi_resource_props.mrp_mask != 0)
 542                 mac_fastpath_enable((mac_handle_t)mip);
 543 
 544         if (mip->mi_minor < MAC_MAX_MINOR + 1) {
 545                 ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
 546                 ddi_remove_minor_node(mip->mi_dip,
 547                     (char *)ddi_driver_name(mip->mi_dip));
 548         }
 549 
 550         ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
 551             MIS_EXCLUSIVE));
 552 
 553         mac_driver_stat_delete(mip);
 554 
 555         ASSERT(i_mac_impl_count > 0);
 556         atomic_dec_32(&i_mac_impl_count);
 557 
 558         if (mip->mi_pdata != NULL)
 559                 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
 560         mip->mi_pdata = NULL;
 561         mip->mi_pdata_size = 0;
 562 
 563         /*
 564          * Free the list of margin request.
 565          */
 566         for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
 567                 nextmmr = mmr->mmr_nextp;
 568                 kmem_free(mmr, sizeof (mac_margin_req_t));
 569         }
 570         mip->mi_mmrp = NULL;
 571 
 572         mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
 573         kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
 574         mip->mi_info.mi_unicst_addr = NULL;
 575 
 576         atomic_dec_32(&mip->mi_type->mt_ref);
 577         mip->mi_type = NULL;
 578 
 579         /*
 580          * Free the primary MAC address.
 581          */
 582         mac_fini_macaddr(mip);
 583 
 584         /*
 585          * free all rings
 586          */
 587         mac_free_rings(mip, MAC_RING_TYPE_RX);
 588         mac_free_rings(mip, MAC_RING_TYPE_TX);
 589 
 590         mac_addr_factory_fini(mip);
 591 
 592         bzero(mip->mi_addr, MAXMACADDRLEN);
 593         bzero(mip->mi_dstaddr, MAXMACADDRLEN);
 594         mip->mi_dstaddr_set = B_FALSE;
 595 
 596         /* and the flows */
 597         mac_flow_tab_destroy(mip->mi_flow_tab);
 598         mip->mi_flow_tab = NULL;
 599 
 600         if (mip->mi_minor > MAC_MAX_MINOR)
 601                 mac_minor_rele(mip->mi_minor);
 602 
 603         cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
 604 
 605         /*
 606          * Reset the perim related fields to default values before
 607          * kmem_cache_free
 608          */
 609         i_mac_perim_exit(mip);
 610         mip->mi_state_flags = 0;
 611 
 612         mac_unregister_priv_prop(mip);
 613 
 614         ASSERT(mip->mi_bridge_link == NULL);
 615         kmem_cache_free(i_mac_impl_cachep, mip);
 616 
 617         return (0);
 618 }
 619 
 620 /* DATA RECEPTION */
 621 
 622 /*
 623  * This function is invoked for packets received by the MAC driver in
 624  * interrupt context. The ring generation number provided by the driver
 625  * is matched with the ring generation number held in MAC. If they do not
 626  * match, received packets are considered stale packets coming from an older
 627  * assignment of the ring. Drop them.
 628  */
 629 void
 630 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
 631     uint64_t mr_gen_num)
 632 {
 633         mac_ring_t              *mr = (mac_ring_t *)mrh;
 634 
 635         if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
 636                 DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
 637                     mr->mr_gen_num, uint64_t, mr_gen_num);
 638                 freemsgchain(mp_chain);
 639                 return;
 640         }
 641         mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
 642 }
 643 
 644 /*
 645  * This function is invoked for each packet received by the underlying driver.
 646  */
 647 void
 648 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
 649 {
 650         mac_impl_t *mip = (mac_impl_t *)mh;
 651 
 652         /*
 653          * Check if the link is part of a bridge.  If not, then we don't need
 654          * to take the lock to remain consistent.  Make this common case
 655          * lock-free and tail-call optimized.
 656          */
 657         if (mip->mi_bridge_link == NULL) {
 658                 mac_rx_common(mh, mrh, mp_chain);
 659         } else {
 660                 /*
 661                  * Once we take a reference on the bridge link, the bridge
 662                  * module itself can't unload, so the callback pointers are
 663                  * stable.
 664                  */
 665                 mutex_enter(&mip->mi_bridge_lock);
 666                 if ((mh = mip->mi_bridge_link) != NULL)
 667                         mac_bridge_ref_cb(mh, B_TRUE);
 668                 mutex_exit(&mip->mi_bridge_lock);
 669                 if (mh == NULL) {
 670                         mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
 671                 } else {
 672                         mac_bridge_rx_cb(mh, mrh, mp_chain);
 673                         mac_bridge_ref_cb(mh, B_FALSE);
 674                 }
 675         }
 676 }
 677 
 678 /*
 679  * Special case function: this allows snooping of packets transmitted and
 680  * received by TRILL. By design, they go directly into the TRILL module.
 681  */
 682 void
 683 mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
 684 {
 685         mac_impl_t *mip = (mac_impl_t *)mh;
 686 
 687         if (mip->mi_promisc_list != NULL)
 688                 mac_promisc_dispatch(mip, mp, NULL);
 689 }
 690 
 691 /*
 692  * This is the upward reentry point for packets arriving from the bridging
 693  * module and from mac_rx for links not part of a bridge.
 694  */
 695 void
 696 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
 697 {
 698         mac_impl_t              *mip = (mac_impl_t *)mh;
 699         mac_ring_t              *mr = (mac_ring_t *)mrh;
 700         mac_soft_ring_set_t     *mac_srs;
 701         mblk_t                  *bp = mp_chain;
 702 
 703         /*
 704          * If there are any promiscuous mode callbacks defined for
 705          * this MAC, pass them a copy if appropriate.
 706          */
 707         if (mip->mi_promisc_list != NULL)
 708                 mac_promisc_dispatch(mip, mp_chain, NULL);
 709 
 710         if (mr != NULL) {
 711                 /*
 712                  * If the SRS teardown has started, just return. The 'mr'
 713                  * continues to be valid until the driver unregisters the MAC.
 714                  * Hardware classified packets will not make their way up
 715                  * beyond this point once the teardown has started. The driver
 716                  * is never passed a pointer to a flow entry or SRS or any
 717                  * structure that can be freed much before mac_unregister.
 718                  */
 719                 mutex_enter(&mr->mr_lock);
 720                 if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
 721                     (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
 722                         mutex_exit(&mr->mr_lock);
 723                         freemsgchain(mp_chain);
 724                         return;
 725                 }
 726 
 727                 /*
 728                  * The ring is in passthru mode; pass the chain up to
 729                  * the pseudo ring.
 730                  */
 731                 if (mr->mr_classify_type == MAC_PASSTHRU_CLASSIFIER) {
 732                         MR_REFHOLD_LOCKED(mr);
 733                         mutex_exit(&mr->mr_lock);
 734                         mr->mr_pt_fn(mr->mr_pt_arg1, mr->mr_pt_arg2, mp_chain,
 735                             B_FALSE);
 736                         MR_REFRELE(mr);
 737                         return;
 738                 }
 739 
 740                 /*
 741                  * The passthru callback should only be set when in
 742                  * MAC_PASSTHRU_CLASSIFIER mode.
 743                  */
 744                 ASSERT3P(mr->mr_pt_fn, ==, NULL);
 745 
 746                 /*
 747                  * We check if an SRS is controlling this ring.
 748                  * If so, we can directly call the srs_lower_proc
 749                  * routine otherwise we need to go through mac_rx_classify
 750                  * to reach the right place.
 751                  */
 752                 if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
 753                         MR_REFHOLD_LOCKED(mr);
 754                         mutex_exit(&mr->mr_lock);
 755                         ASSERT3P(mr->mr_srs, !=, NULL);
 756                         mac_srs = mr->mr_srs;
 757 
 758                         /*
 759                          * This is the fast path. All packets received
 760                          * on this ring are hardware classified and
 761                          * share the same MAC header info.
 762                          */
 763                         mac_srs->srs_rx.sr_lower_proc(mh,
 764                             (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
 765                         MR_REFRELE(mr);
 766                         return;
 767                 }
 768 
 769                 mutex_exit(&mr->mr_lock);
 770                 /* We'll fall through to software classification */
 771         } else {
 772                 flow_entry_t *flent;
 773                 int err;
 774 
 775                 rw_enter(&mip->mi_rw_lock, RW_READER);
 776                 if (mip->mi_single_active_client != NULL) {
 777                         flent = mip->mi_single_active_client->mci_flent_list;
 778                         FLOW_TRY_REFHOLD(flent, err);
 779                         rw_exit(&mip->mi_rw_lock);
 780                         if (err == 0) {
 781                                 (flent->fe_cb_fn)(flent->fe_cb_arg1,
 782                                     flent->fe_cb_arg2, mp_chain, B_FALSE);
 783                                 FLOW_REFRELE(flent);
 784                                 return;
 785                         }
 786                 } else {
 787                         rw_exit(&mip->mi_rw_lock);
 788                 }
 789         }
 790 
 791         if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
 792                 if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
 793                         return;
 794         }
 795 
 796         freemsgchain(bp);
 797 }
 798 
 799 /* DATA TRANSMISSION */
 800 
 801 /*
 802  * A driver's notification to resume transmission, in case of a provider
 803  * without TX rings.
 804  */
 805 void
 806 mac_tx_update(mac_handle_t mh)
 807 {
 808         mac_tx_ring_update(mh, NULL);
 809 }
 810 
 811 /*
 812  * A driver's notification to resume transmission on the specified TX ring.
 813  */
 814 void
 815 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
 816 {
 817         i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
 818 }
 819 
 820 /* LINK STATE */
 821 /*
 822  * Notify the MAC layer about a link state change
 823  */
 824 void
 825 mac_link_update(mac_handle_t mh, link_state_t link)
 826 {
 827         mac_impl_t      *mip = (mac_impl_t *)mh;
 828 
 829         /*
 830          * Save the link state.
 831          */
 832         mip->mi_lowlinkstate = link;
 833 
 834         /*
 835          * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
 836          * thread to deliver both lower and upper notifications.
 837          */
 838         i_mac_notify(mip, MAC_NOTE_LOWLINK);
 839 }
 840 
 841 /*
 842  * Notify the MAC layer about a link state change due to bridging.
 843  */
 844 void
 845 mac_link_redo(mac_handle_t mh, link_state_t link)
 846 {
 847         mac_impl_t      *mip = (mac_impl_t *)mh;
 848 
 849         /*
 850          * Save the link state.
 851          */
 852         mip->mi_linkstate = link;
 853 
 854         /*
 855          * Send a MAC_NOTE_LINK notification.  Only upper notifications are
 856          * made.
 857          */
 858         i_mac_notify(mip, MAC_NOTE_LINK);
 859 }
 860 
 861 /* MINOR NODE HANDLING */
 862 
 863 /*
 864  * Given a dev_t, return the instance number (PPA) associated with it.
 865  * Drivers can use this in their getinfo(9e) implementation to lookup
 866  * the instance number (i.e. PPA) of the device, to use as an index to
 867  * their own array of soft state structures.
 868  *
 869  * Returns -1 on error.
 870  */
 871 int
 872 mac_devt_to_instance(dev_t devt)
 873 {
 874         return (dld_devt_to_instance(devt));
 875 }
 876 
 877 /*
 878  * This function returns the first minor number that is available for
 879  * driver private use.  All minor numbers smaller than this are
 880  * reserved for GLDv3 use.
 881  */
 882 minor_t
 883 mac_private_minor(void)
 884 {
 885         return (MAC_PRIVATE_MINOR);
 886 }
 887 
 888 /* OTHER CONTROL INFORMATION */
 889 
 890 /*
 891  * A driver notified us that its primary MAC address has changed.
 892  */
 893 void
 894 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
 895 {
 896         mac_impl_t      *mip = (mac_impl_t *)mh;
 897 
 898         if (mip->mi_type->mt_addr_length == 0)
 899                 return;
 900 
 901         i_mac_perim_enter(mip);
 902 
 903         /*
 904          * If address changes, freshen the MAC address value and update
 905          * all MAC clients that share this MAC address.
 906          */
 907         if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
 908                 mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
 909                     (uint8_t *)addr);
 910         }
 911 
 912         i_mac_perim_exit(mip);
 913 
 914         /*
 915          * Send a MAC_NOTE_UNICST notification.
 916          */
 917         i_mac_notify(mip, MAC_NOTE_UNICST);
 918 }
 919 
 920 void
 921 mac_dst_update(mac_handle_t mh, const uint8_t *addr)
 922 {
 923         mac_impl_t      *mip = (mac_impl_t *)mh;
 924 
 925         if (mip->mi_type->mt_addr_length == 0)
 926                 return;
 927 
 928         i_mac_perim_enter(mip);
 929         bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
 930         i_mac_perim_exit(mip);
 931         i_mac_notify(mip, MAC_NOTE_DEST);
 932 }
 933 
 934 /*
 935  * MAC plugin information changed.
 936  */
 937 int
 938 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
 939 {
 940         mac_impl_t      *mip = (mac_impl_t *)mh;
 941 
 942         /*
 943          * Verify that the plugin supports MAC plugin data and that the
 944          * supplied data is valid.
 945          */
 946         if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
 947                 return (EINVAL);
 948         if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
 949                 return (EINVAL);
 950 
 951         if (mip->mi_pdata != NULL)
 952                 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
 953 
 954         mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
 955         bcopy(mac_pdata, mip->mi_pdata, dsize);
 956         mip->mi_pdata_size = dsize;
 957 
 958         /*
 959          * Since the MAC plugin data is used to construct MAC headers that
 960          * were cached in fast-path headers, we need to flush fast-path
 961          * information for links associated with this mac.
 962          */
 963         i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
 964         return (0);
 965 }
 966 
 967 /*
 968  * Invoked by driver as well as the framework to notify its capability change.
 969  */
 970 void
 971 mac_capab_update(mac_handle_t mh)
 972 {
 973         /* Send MAC_NOTE_CAPAB_CHG notification */
 974         i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
 975 }
 976 
 977 /*
 978  * Used by normal drivers to update the max sdu size.
 979  * We need to handle the case of a smaller mi_sdu_multicast
 980  * since this is called by mac_set_mtu() even for drivers that
 981  * have differing unicast and multicast mtu and we don't want to
 982  * increase the multicast mtu by accident in that case.
 983  */
 984 int
 985 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
 986 {
 987         mac_impl_t      *mip = (mac_impl_t *)mh;
 988 
 989         if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
 990                 return (EINVAL);
 991         mip->mi_sdu_max = sdu_max;
 992         if (mip->mi_sdu_multicast > mip->mi_sdu_max)
 993                 mip->mi_sdu_multicast = mip->mi_sdu_max;
 994 
 995         /* Send a MAC_NOTE_SDU_SIZE notification. */
 996         i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
 997         return (0);
 998 }
 999 
1000 /*
1001  * Version of the above function that is used by drivers that have a different
1002  * max sdu size for multicast/broadcast vs. unicast.
1003  */
1004 int
1005 mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast)
1006 {
1007         mac_impl_t      *mip = (mac_impl_t *)mh;
1008 
1009         if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
1010                 return (EINVAL);
1011         if (sdu_multicast == 0)
1012                 sdu_multicast = sdu_max;
1013         if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min)
1014                 return (EINVAL);
1015         mip->mi_sdu_max = sdu_max;
1016         mip->mi_sdu_multicast = sdu_multicast;
1017 
1018         /* Send a MAC_NOTE_SDU_SIZE notification. */
1019         i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
1020         return (0);
1021 }
1022 
1023 static void
1024 mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring)
1025 {
1026         mac_client_impl_t *mcip;
1027         flow_entry_t *flent;
1028         mac_soft_ring_set_t *mac_rx_srs;
1029         mac_cpus_t *srs_cpu;
1030         int i;
1031 
1032         if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) &&
1033             (!ring->mr_info.mri_intr.mi_ddi_shared)) {
1034                 /* interrupt can be re-targeted */
1035                 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED);
1036                 flent = mcip->mci_flent;
1037                 if (ring->mr_type == MAC_RING_TYPE_RX) {
1038                         for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
1039                                 mac_rx_srs = flent->fe_rx_srs[i];
1040                                 if (mac_rx_srs->srs_ring != ring)
1041                                         continue;
1042                                 srs_cpu = &mac_rx_srs->srs_cpu;
1043                                 mutex_enter(&cpu_lock);
1044                                 mac_rx_srs_retarget_intr(mac_rx_srs,
1045                                     srs_cpu->mc_rx_intr_cpu);
1046                                 mutex_exit(&cpu_lock);
1047                                 break;
1048                         }
1049                 } else {
1050                         if (flent->fe_tx_srs != NULL) {
1051                                 mutex_enter(&cpu_lock);
1052                                 mac_tx_srs_retarget_intr(
1053                                     flent->fe_tx_srs);
1054                                 mutex_exit(&cpu_lock);
1055                         }
1056                 }
1057         }
1058 }
1059 
1060 /*
1061  * Clients like aggr create pseudo rings (mac_ring_t) and expose them to
1062  * their clients. There is a 1-1 mapping pseudo ring and the hardware
1063  * ring. ddi interrupt handles are exported from the hardware ring to
1064  * the pseudo ring. Thus when the interrupt handle changes, clients of
1065  * aggr that are using the handle need to use the new handle and
1066  * re-target their interrupts.
1067  */
1068 static void
1069 mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring,
1070     ddi_intr_handle_t ddh)
1071 {
1072         mac_ring_t *pring;
1073         mac_group_t *pgroup;
1074         mac_impl_t *pmip;
1075         char macname[MAXNAMELEN];
1076         mac_perim_handle_t p_mph;
1077         uint64_t saved_gen_num;
1078 
1079 again:
1080         pring = (mac_ring_t *)ring->mr_prh;
1081         pgroup = (mac_group_t *)pring->mr_gh;
1082         pmip = (mac_impl_t *)pgroup->mrg_mh;
1083         saved_gen_num = ring->mr_gen_num;
1084         (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN);
1085         /*
1086          * We need to enter aggr's perimeter. The locking hierarchy
1087          * dictates that aggr's perimeter should be entered first
1088          * and then the port's perimeter. So drop the port's
1089          * perimeter, enter aggr's and then re-enter port's
1090          * perimeter.
1091          */
1092         i_mac_perim_exit(mip);
1093         /*
1094          * While we know pmip is the aggr's mip, there is a
1095          * possibility that aggr could have unregistered by
1096          * the time we exit port's perimeter (mip) and
1097          * enter aggr's perimeter (pmip). To avoid that
1098          * scenario, enter aggr's perimeter using its name.
1099          */
1100         if (mac_perim_enter_by_macname(macname, &p_mph) != 0)
1101                 return;
1102         i_mac_perim_enter(mip);
1103         /*
1104          * Check if the ring got assigned to another aggregation before
1105          * be could enter aggr's and the port's perimeter. When a ring
1106          * gets deleted from an aggregation, it calls mac_stop_ring()
1107          * which increments the generation number. So checking
1108          * generation number will be enough.
1109          */
1110         if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) {
1111                 i_mac_perim_exit(mip);
1112                 mac_perim_exit(p_mph);
1113                 i_mac_perim_enter(mip);
1114                 goto again;
1115         }
1116 
1117         /* Check if pseudo ring is still present */
1118         if (ring->mr_prh != NULL) {
1119                 pring->mr_info.mri_intr.mi_ddi_handle = ddh;
1120                 pring->mr_info.mri_intr.mi_ddi_shared =
1121                     ring->mr_info.mri_intr.mi_ddi_shared;
1122                 if (ddh != NULL)
1123                         mac_ring_intr_retarget(pgroup, pring);
1124         }
1125         i_mac_perim_exit(mip);
1126         mac_perim_exit(p_mph);
1127 }
1128 /*
1129  * API called by driver to provide new interrupt handle for TX/RX rings.
1130  * This usually happens when IRM (Interrupt Resource Manangement)
1131  * framework either gives the driver more MSI-x interrupts or takes
1132  * away MSI-x interrupts from the driver.
1133  */
1134 void
1135 mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh)
1136 {
1137         mac_ring_t      *ring = (mac_ring_t *)mrh;
1138         mac_group_t     *group = (mac_group_t *)ring->mr_gh;
1139         mac_impl_t      *mip = (mac_impl_t *)group->mrg_mh;
1140 
1141         i_mac_perim_enter(mip);
1142         ring->mr_info.mri_intr.mi_ddi_handle = ddh;
1143         if (ddh == NULL) {
1144                 /* Interrupts being reset */
1145                 ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE;
1146                 if (ring->mr_prh != NULL) {
1147                         mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1148                         return;
1149                 }
1150         } else {
1151                 /* New interrupt handle */
1152                 mac_compare_ddi_handle(mip->mi_rx_groups,
1153                     mip->mi_rx_group_count, ring);
1154                 if (!ring->mr_info.mri_intr.mi_ddi_shared) {
1155                         mac_compare_ddi_handle(mip->mi_tx_groups,
1156                             mip->mi_tx_group_count, ring);
1157                 }
1158                 if (ring->mr_prh != NULL) {
1159                         mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1160                         return;
1161                 } else {
1162                         mac_ring_intr_retarget(group, ring);
1163                 }
1164         }
1165         i_mac_perim_exit(mip);
1166 }
1167 
1168 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
1169 
1170 /*
1171  * Updates the mac_impl structure with the current state of the link
1172  */
1173 static void
1174 i_mac_log_link_state(mac_impl_t *mip)
1175 {
1176         /*
1177          * If no change, then it is not interesting.
1178          */
1179         if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
1180                 return;
1181 
1182         switch (mip->mi_lowlinkstate) {
1183         case LINK_STATE_UP:
1184                 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
1185                         char det[200];
1186 
1187                         mip->mi_type->mt_ops.mtops_link_details(det,
1188                             sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
1189 
1190                         cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
1191                 } else {
1192                         cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
1193                 }
1194                 break;
1195 
1196         case LINK_STATE_DOWN:
1197                 /*
1198                  * Only transitions from UP to DOWN are interesting
1199                  */
1200                 if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
1201                         cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
1202                 break;
1203 
1204         case LINK_STATE_UNKNOWN:
1205                 /*
1206                  * This case is normally not interesting.
1207                  */
1208                 break;
1209         }
1210         mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1211 }
1212 
1213 /*
1214  * Main routine for the callbacks notifications thread
1215  */
1216 static void
1217 i_mac_notify_thread(void *arg)
1218 {
1219         mac_impl_t      *mip = arg;
1220         callb_cpr_t     cprinfo;
1221         mac_cb_t        *mcb;
1222         mac_cb_info_t   *mcbi;
1223         mac_notify_cb_t *mncb;
1224 
1225         mcbi = &mip->mi_notify_cb_info;
1226         CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1227             "i_mac_notify_thread");
1228 
1229         mutex_enter(mcbi->mcbi_lockp);
1230 
1231         for (;;) {
1232                 uint32_t        bits;
1233                 uint32_t        type;
1234 
1235                 bits = mip->mi_notify_bits;
1236                 if (bits == 0) {
1237                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
1238                         cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1239                         CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1240                         continue;
1241                 }
1242                 mip->mi_notify_bits = 0;
1243                 if ((bits & (1 << MAC_NNOTE)) != 0) {
1244                         /* request to quit */
1245                         ASSERT(mip->mi_state_flags & MIS_DISABLED);
1246                         break;
1247                 }
1248 
1249                 mutex_exit(mcbi->mcbi_lockp);
1250 
1251                 /*
1252                  * Log link changes on the actual link, but then do reports on
1253                  * synthetic state (if part of a bridge).
1254                  */
1255                 if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1256                         link_state_t newstate;
1257                         mac_handle_t mh;
1258 
1259                         i_mac_log_link_state(mip);
1260                         newstate = mip->mi_lowlinkstate;
1261                         if (mip->mi_bridge_link != NULL) {
1262                                 mutex_enter(&mip->mi_bridge_lock);
1263                                 if ((mh = mip->mi_bridge_link) != NULL) {
1264                                         newstate = mac_bridge_ls_cb(mh,
1265                                             newstate);
1266                                 }
1267                                 mutex_exit(&mip->mi_bridge_lock);
1268                         }
1269                         if (newstate != mip->mi_linkstate) {
1270                                 mip->mi_linkstate = newstate;
1271                                 bits |= 1 << MAC_NOTE_LINK;
1272                         }
1273                 }
1274 
1275                 /*
1276                  * Do notification callbacks for each notification type.
1277                  */
1278                 for (type = 0; type < MAC_NNOTE; type++) {
1279                         if ((bits & (1 << type)) == 0) {
1280                                 continue;
1281                         }
1282 
1283                         if (mac_notify_cb_list[type] != NULL)
1284                                 (*mac_notify_cb_list[type])(mip);
1285 
1286                         /*
1287                          * Walk the list of notifications.
1288                          */
1289                         MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1290                         for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1291                             mcb = mcb->mcb_nextp) {
1292                                 mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1293                                 mncb->mncb_fn(mncb->mncb_arg, type);
1294                         }
1295                         MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1296                             &mip->mi_notify_cb_list);
1297                 }
1298 
1299                 mutex_enter(mcbi->mcbi_lockp);
1300         }
1301 
1302         mip->mi_state_flags |= MIS_NOTIFY_DONE;
1303         cv_broadcast(&mcbi->mcbi_cv);
1304 
1305         /* CALLB_CPR_EXIT drops the lock */
1306         CALLB_CPR_EXIT(&cprinfo);
1307         thread_exit();
1308 }
1309 
1310 /*
1311  * Signal the i_mac_notify_thread asking it to quit.
1312  * Then wait till it is done.
1313  */
1314 void
1315 i_mac_notify_exit(mac_impl_t *mip)
1316 {
1317         mac_cb_info_t   *mcbi;
1318 
1319         mcbi = &mip->mi_notify_cb_info;
1320 
1321         mutex_enter(mcbi->mcbi_lockp);
1322         mip->mi_notify_bits = (1 << MAC_NNOTE);
1323         cv_broadcast(&mcbi->mcbi_cv);
1324 
1325 
1326         while ((mip->mi_notify_thread != NULL) &&
1327             !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1328                 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1329         }
1330 
1331         /* Necessary clean up before doing kmem_cache_free */
1332         mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1333         mip->mi_notify_bits = 0;
1334         mip->mi_notify_thread = NULL;
1335         mutex_exit(mcbi->mcbi_lockp);
1336 }
1337 
1338 /*
1339  * Entry point invoked by drivers to dynamically add a ring to an
1340  * existing group.
1341  */
1342 int
1343 mac_group_add_ring(mac_group_handle_t gh, int index)
1344 {
1345         mac_group_t *group = (mac_group_t *)gh;
1346         mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1347         int ret;
1348 
1349         i_mac_perim_enter(mip);
1350         ret = i_mac_group_add_ring(group, NULL, index);
1351         i_mac_perim_exit(mip);
1352         return (ret);
1353 }
1354 
1355 /*
1356  * Entry point invoked by drivers to dynamically remove a ring
1357  * from an existing group. The specified ring handle must no longer
1358  * be used by the driver after a call to this function.
1359  */
1360 void
1361 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1362 {
1363         mac_group_t *group = (mac_group_t *)gh;
1364         mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1365 
1366         i_mac_perim_enter(mip);
1367         i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1368         i_mac_perim_exit(mip);
1369 }
1370 
1371 /*
1372  * mac_prop_info_*() callbacks called from the driver's prefix_propinfo()
1373  * entry points.
1374  */
1375 
1376 void
1377 mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val)
1378 {
1379         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1380 
1381         /* nothing to do if the caller doesn't want the default value */
1382         if (pr->pr_default == NULL)
1383                 return;
1384 
1385         ASSERT(pr->pr_default_size >= sizeof (uint8_t));
1386 
1387         *(uint8_t *)(pr->pr_default) = val;
1388         pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1389 }
1390 
1391 void
1392 mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val)
1393 {
1394         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1395 
1396         /* nothing to do if the caller doesn't want the default value */
1397         if (pr->pr_default == NULL)
1398                 return;
1399 
1400         ASSERT(pr->pr_default_size >= sizeof (uint64_t));
1401 
1402         bcopy(&val, pr->pr_default, sizeof (val));
1403 
1404         pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1405 }
1406 
1407 void
1408 mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val)
1409 {
1410         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1411 
1412         /* nothing to do if the caller doesn't want the default value */
1413         if (pr->pr_default == NULL)
1414                 return;
1415 
1416         ASSERT(pr->pr_default_size >= sizeof (uint32_t));
1417 
1418         bcopy(&val, pr->pr_default, sizeof (val));
1419 
1420         pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1421 }
1422 
1423 void
1424 mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str)
1425 {
1426         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1427 
1428         /* nothing to do if the caller doesn't want the default value */
1429         if (pr->pr_default == NULL)
1430                 return;
1431 
1432         if (strlen(str) >= pr->pr_default_size)
1433                 pr->pr_errno = ENOBUFS;
1434         else
1435                 (void) strlcpy(pr->pr_default, str, pr->pr_default_size);
1436         pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1437 }
1438 
1439 void
1440 mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
1441     link_flowctrl_t val)
1442 {
1443         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1444 
1445         /* nothing to do if the caller doesn't want the default value */
1446         if (pr->pr_default == NULL)
1447                 return;
1448 
1449         ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t));
1450 
1451         bcopy(&val, pr->pr_default, sizeof (val));
1452 
1453         pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1454 }
1455 
1456 void
1457 mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
1458     uint32_t max)
1459 {
1460         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1461         mac_propval_range_t *range = pr->pr_range;
1462         mac_propval_uint32_range_t *range32;
1463 
1464         /* nothing to do if the caller doesn't want the range info */
1465         if (range == NULL)
1466                 return;
1467 
1468         if (pr->pr_range_cur_count++ == 0) {
1469                 /* first range */
1470                 pr->pr_flags |= MAC_PROP_INFO_RANGE;
1471                 range->mpr_type = MAC_PROPVAL_UINT32;
1472         } else {
1473                 /* all ranges of a property should be of the same type */
1474                 ASSERT(range->mpr_type == MAC_PROPVAL_UINT32);
1475                 if (pr->pr_range_cur_count > range->mpr_count) {
1476                         pr->pr_errno = ENOSPC;
1477                         return;
1478                 }
1479         }
1480 
1481         range32 = range->mpr_range_uint32;
1482         range32[pr->pr_range_cur_count - 1].mpur_min = min;
1483         range32[pr->pr_range_cur_count - 1].mpur_max = max;
1484 }
1485 
1486 void
1487 mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm)
1488 {
1489         mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1490 
1491         pr->pr_perm = perm;
1492         pr->pr_flags |= MAC_PROP_INFO_PERM;
1493 }
1494 
1495 void mac_hcksum_get(mblk_t *mp, uint32_t *start, uint32_t *stuff,
1496     uint32_t *end, uint32_t *value, uint32_t *flags_ptr)
1497 {
1498         uint32_t flags;
1499 
1500         ASSERT(DB_TYPE(mp) == M_DATA);
1501 
1502         flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS;
1503         if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) {
1504                 if (value != NULL)
1505                         *value = (uint32_t)DB_CKSUM16(mp);
1506                 if ((flags & HCK_PARTIALCKSUM) != 0) {
1507                         if (start != NULL)
1508                                 *start = (uint32_t)DB_CKSUMSTART(mp);
1509                         if (stuff != NULL)
1510                                 *stuff = (uint32_t)DB_CKSUMSTUFF(mp);
1511                         if (end != NULL)
1512                                 *end = (uint32_t)DB_CKSUMEND(mp);
1513                 }
1514         }
1515 
1516         if (flags_ptr != NULL)
1517                 *flags_ptr = flags;
1518 }
1519 
1520 void mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff,
1521     uint32_t end, uint32_t value, uint32_t flags)
1522 {
1523         ASSERT(DB_TYPE(mp) == M_DATA);
1524 
1525         DB_CKSUMSTART(mp) = (intptr_t)start;
1526         DB_CKSUMSTUFF(mp) = (intptr_t)stuff;
1527         DB_CKSUMEND(mp) = (intptr_t)end;
1528         DB_CKSUMFLAGS(mp) = (uint16_t)flags;
1529         DB_CKSUM16(mp) = (uint16_t)value;
1530 }
1531 
1532 void
1533 mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
1534 {
1535         ASSERT(DB_TYPE(mp) == M_DATA);
1536 
1537         if (flags != NULL) {
1538                 *flags = DB_CKSUMFLAGS(mp) & HW_LSO;
1539                 if ((*flags != 0) && (mss != NULL))
1540                         *mss = (uint32_t)DB_LSOMSS(mp);
1541         }
1542 }
1543 
1544 void
1545 mac_transceiver_info_set_present(mac_transceiver_info_t *infop,
1546     boolean_t present)
1547 {
1548         infop->mti_present = present;
1549 }
1550 
1551 void
1552 mac_transceiver_info_set_usable(mac_transceiver_info_t *infop,
1553     boolean_t usable)
1554 {
1555         infop->mti_usable = usable;
1556 }