1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2018 Joyent, Inc.
  25  */
  26 
  27 /*
  28  * Data-Link Services Module
  29  */
  30 
  31 #include        <sys/sysmacros.h>
  32 #include        <sys/strsubr.h>
  33 #include        <sys/strsun.h>
  34 #include        <sys/vlan.h>
  35 #include        <sys/dld_impl.h>
  36 #include        <sys/sdt.h>
  37 #include        <sys/atomic.h>
  38 #include        <sys/sysevent.h>
  39 #include        <sys/sysevent/eventdefs.h>
  40 #include        <sys/sysevent/datalink.h>
  41 
  42 static kmem_cache_t     *i_dls_link_cachep;
  43 mod_hash_t              *i_dls_link_hash;
  44 static uint_t           i_dls_link_count;
  45 
  46 #define         LINK_HASHSZ     67      /* prime */
  47 #define         IMPL_HASHSZ     67      /* prime */
  48 
  49 /*
  50  * Construct a hash key from the DLSAP value.
  51  */
  52 #define MAKE_KEY(_sap)                                          \
  53         ((mod_hash_key_t)(uintptr_t)((_sap) << VLAN_ID_SIZE))
  54 
  55 #define DLS_STRIP_PADDING(pktsize, p) {                 \
  56         if (pktsize != 0) {                             \
  57                 ssize_t delta = pktsize - msgdsize(p);  \
  58                                                         \
  59                 if (delta < 0)                               \
  60                         (void) adjmsg(p, delta);        \
  61         }                                               \
  62 }
  63 
  64 /*
  65  * Private functions.
  66  */
  67 
  68 /*ARGSUSED*/
  69 static int
  70 i_dls_link_constructor(void *buf, void *arg, int kmflag)
  71 {
  72         dls_link_t      *dlp = buf;
  73         char            name[MAXNAMELEN];
  74 
  75         bzero(buf, sizeof (dls_link_t));
  76 
  77         (void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf);
  78         dlp->dl_str_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
  79             mod_hash_null_valdtor);
  80 
  81         return (0);
  82 }
  83 
  84 /*ARGSUSED*/
  85 static void
  86 i_dls_link_destructor(void *buf, void *arg)
  87 {
  88         dls_link_t      *dlp = buf;
  89 
  90         ASSERT(dlp->dl_ref == 0);
  91         ASSERT(dlp->dl_mh == NULL);
  92         ASSERT(dlp->dl_mah == NULL);
  93         ASSERT(dlp->dl_unknowns == 0);
  94 
  95         mod_hash_destroy_idhash(dlp->dl_str_hash);
  96         dlp->dl_str_hash = NULL;
  97 
  98 }
  99 
 100 /*
 101  * - Parse the mac header information of the given packet.
 102  * - Strip the padding and skip over the header. Note that because some
 103  *   DLS consumers only check the db_ref count of the first mblk, we
 104  *   pullup the message into a single mblk. Because the original message
 105  *   is freed as the result of message pulling up, mac_vlan_header_info()
 106  *   is called again to update the mhi_saddr and mhi_daddr pointers in the
 107  *   mhip. Further, the mac_vlan_header_info() function ensures that the
 108  *   size of the pulled message is greater than the MAC header size,
 109  *   therefore we can directly advance b_rptr to point at the payload.
 110  *
 111  * We choose to use a macro for performance reasons.
 112  */
 113 #define DLS_PREPARE_PKT(mh, mp, mhip, err) {                            \
 114         mblk_t *nextp = (mp)->b_next;                                        \
 115         if (((err) = mac_vlan_header_info((mh), (mp), (mhip))) == 0) {  \
 116                 DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp));                \
 117                 if (MBLKL((mp)) < (mhip)->mhi_hdrsize) {          \
 118                         mblk_t *newmp;                                  \
 119                         if ((newmp = msgpullup((mp), -1)) == NULL) {    \
 120                                 (err) = EINVAL;                         \
 121                         } else {                                        \
 122                                 (mp)->b_next = NULL;                 \
 123                                 freemsg((mp));                          \
 124                                 (mp) = newmp;                           \
 125                                 VERIFY(mac_vlan_header_info((mh),       \
 126                                     (mp), (mhip)) == 0);                \
 127                                 (mp)->b_next = nextp;                        \
 128                                 (mp)->b_rptr += (mhip)->mhi_hdrsize;      \
 129                         }                                               \
 130                 } else {                                                \
 131                         (mp)->b_rptr += (mhip)->mhi_hdrsize;              \
 132                 }                                                       \
 133         }                                                               \
 134 }
 135 
 136 /*
 137  * Truncate the chain starting at mp such that all packets in the chain
 138  * have identical source and destination addresses, saps, and tag types
 139  * (see below).  It returns a pointer to the mblk following the chain,
 140  * NULL if there is no further packet following the processed chain.
 141  * The countp argument is set to the number of valid packets in the chain.
 142  * Note that the whole MAC header (including the VLAN tag if any) in each
 143  * packet will be stripped.
 144  */
 145 static mblk_t *
 146 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
 147     uint_t *countp)
 148 {
 149         mblk_t          *prevp;
 150         uint_t          npacket = 1;
 151         size_t          addr_size = dlp->dl_mip->mi_addr_length;
 152         uint16_t        vid = VLAN_ID(mhip->mhi_tci);
 153         uint16_t        pri = VLAN_PRI(mhip->mhi_tci);
 154 
 155         /*
 156          * Compare with subsequent headers until we find one that has
 157          * differing header information. After checking each packet
 158          * strip padding and skip over the header.
 159          */
 160         for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
 161                 mac_header_info_t cmhi;
 162                 uint16_t cvid, cpri;
 163                 int err;
 164 
 165                 DLS_PREPARE_PKT(dlp->dl_mh, mp, &cmhi, err);
 166                 if (err != 0)
 167                         break;
 168 
 169                 prevp->b_next = mp;
 170 
 171                 /*
 172                  * The source, destination, sap, vlan tag must all match in
 173                  * a given subchain.
 174                  */
 175                 if (mhip->mhi_saddr == NULL || cmhi.mhi_saddr == NULL ||
 176                     memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
 177                     memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
 178                     mhip->mhi_bindsap != cmhi.mhi_bindsap) {
 179                         /*
 180                          * Note that we don't need to restore the padding.
 181                          */
 182                         mp->b_rptr -= cmhi.mhi_hdrsize;
 183                         break;
 184                 }
 185 
 186                 cvid = VLAN_ID(cmhi.mhi_tci);
 187                 cpri = VLAN_PRI(cmhi.mhi_tci);
 188 
 189                 /*
 190                  * There are several types of packets. Packets don't match
 191                  * if they are classified to different type or if they are
 192                  * VLAN packets but belong to different VLANs:
 193                  *
 194                  * packet type          tagged          vid             pri
 195                  * ---------------------------------------------------------
 196                  * untagged             No              zero            zero
 197                  * VLAN packets         Yes             non-zero        -
 198                  * priority tagged      Yes             zero            non-zero
 199                  * 0 tagged             Yes             zero            zero
 200                  */
 201                 if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
 202                     (vid != cvid) || ((vid == VLAN_ID_NONE) &&
 203                     (((pri == 0) && (cpri != 0)) ||
 204                     ((pri != 0) && (cpri == 0))))) {
 205                         mp->b_rptr -= cmhi.mhi_hdrsize;
 206                         break;
 207                 }
 208 
 209                 npacket++;
 210         }
 211 
 212         /*
 213          * Break the chain at this point and return a pointer to the next
 214          * sub-chain.
 215          */
 216         prevp->b_next = NULL;
 217         *countp = npacket;
 218         return (mp);
 219 }
 220 
 221 /* ARGSUSED */
 222 static int
 223 i_dls_head_hold(mod_hash_key_t key, mod_hash_val_t val)
 224 {
 225         dls_head_t *dhp = (dls_head_t *)val;
 226 
 227         /*
 228          * The lock order is  mod_hash's internal lock -> dh_lock as in the
 229          * call to i_dls_link_rx -> mod_hash_find_cb_rval -> i_dls_head_hold
 230          */
 231         mutex_enter(&dhp->dh_lock);
 232         if (dhp->dh_removing) {
 233                 mutex_exit(&dhp->dh_lock);
 234                 return (-1);
 235         }
 236         dhp->dh_ref++;
 237         mutex_exit(&dhp->dh_lock);
 238         return (0);
 239 }
 240 
 241 void
 242 i_dls_head_rele(dls_head_t *dhp)
 243 {
 244         mutex_enter(&dhp->dh_lock);
 245         dhp->dh_ref--;
 246         if (dhp->dh_ref == 0 && dhp->dh_removing != 0)
 247                 cv_broadcast(&dhp->dh_cv);
 248         mutex_exit(&dhp->dh_lock);
 249 }
 250 
 251 static dls_head_t *
 252 i_dls_head_alloc(mod_hash_key_t key)
 253 {
 254         dls_head_t      *dhp;
 255 
 256         dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
 257         dhp->dh_key = key;
 258         return (dhp);
 259 }
 260 
 261 static void
 262 i_dls_head_free(dls_head_t *dhp)
 263 {
 264         ASSERT(dhp->dh_ref == 0);
 265         kmem_free(dhp, sizeof (dls_head_t));
 266 }
 267 
 268 /*
 269  * Try to send mp up to the streams of the given sap. Return the
 270  * number of streams which accepted this message, or 0 if no streams
 271  * accepted the message.
 272  *
 273  * Note that this function copies the message chain and the original
 274  * mp remains valid after this function returns.
 275  */
 276 static uint_t
 277 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
 278     mac_header_info_t *mhip, mblk_t *mp, uint32_t sap,
 279     boolean_t (*acceptfunc)())
 280 {
 281         mod_hash_t      *hash = dlp->dl_str_hash;
 282         mod_hash_key_t  key;
 283         dls_head_t      *dhp;
 284         dld_str_t       *dsp;
 285         mblk_t          *nmp;
 286         dls_rx_t        ds_rx;
 287         void            *ds_rx_arg;
 288         uint_t          naccepted = 0;
 289         int             rval;
 290 
 291         /*
 292          * Construct a hash key from the DLSAP.
 293          */
 294         key = MAKE_KEY(sap);
 295 
 296         /*
 297          * Search the hash table for a dld_str_t eligible to receive a
 298          * packet chain for this DLSAP. The mod hash's internal lock
 299          * serializes find/insert/remove from the mod hash list.
 300          * Incrementing the dh_ref (while holding the mod hash lock)
 301          * ensures dls_link_remove will wait for the upcall to finish.
 302          */
 303         if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
 304             i_dls_head_hold, &rval) != 0 || (rval != 0)) {
 305                 return (0);
 306         }
 307 
 308         /*
 309          * Find all dld_str_t that will accept the sub-chain.
 310          */
 311         for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next) {
 312                 if (!acceptfunc(dsp, mhip, &ds_rx, &ds_rx_arg))
 313                         continue;
 314 
 315                 /*
 316                  * We have at least one acceptor.
 317                  */
 318                 naccepted++;
 319 
 320                 /*
 321                  * There will normally be at least one more dld_str_t
 322                  * (since we've yet to check for non-promiscuous
 323                  * dld_str_t) so dup the sub-chain.
 324                  */
 325                 if ((nmp = copymsgchain(mp)) != NULL)
 326                         ds_rx(ds_rx_arg, mrh, nmp, mhip);
 327         }
 328 
 329         /*
 330          * Release the hold on the dld_str_t chain now that we have
 331          * finished walking it.
 332          */
 333         i_dls_head_rele(dhp);
 334         return (naccepted);
 335 }
 336 
 337 /* ARGSUSED */
 338 void
 339 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
 340     boolean_t loopback)
 341 {
 342         dls_link_t                      *dlp = arg;
 343         mod_hash_t                      *hash = dlp->dl_str_hash;
 344         mblk_t                          *nextp;
 345         mac_header_info_t               mhi;
 346         dls_head_t                      *dhp;
 347         dld_str_t                       *dsp;
 348         dld_str_t                       *ndsp;
 349         mblk_t                          *nmp;
 350         mod_hash_key_t                  key;
 351         uint_t                          npacket;
 352         boolean_t                       accepted;
 353         dls_rx_t                        ds_rx, nds_rx;
 354         void                            *ds_rx_arg, *nds_rx_arg;
 355         uint16_t                        vid;
 356         int                             err, rval;
 357 
 358         /*
 359          * Walk the packet chain.
 360          */
 361         for (; mp != NULL; mp = nextp) {
 362                 /*
 363                  * Wipe the accepted state.
 364                  */
 365                 accepted = B_FALSE;
 366 
 367                 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
 368                 if (err != 0) {
 369                         atomic_inc_32(&(dlp->dl_unknowns));
 370                         nextp = mp->b_next;
 371                         mp->b_next = NULL;
 372                         freemsg(mp);
 373                         continue;
 374                 }
 375 
 376                 /*
 377                  * Grab the longest sub-chain we can process as a single
 378                  * unit.
 379                  */
 380                 nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
 381                 ASSERT(npacket != 0);
 382 
 383                 vid = VLAN_ID(mhi.mhi_tci);
 384 
 385                 /*
 386                  * This condition is true only when a sun4v vsw client
 387                  * is on the scene; as it is the only type of client
 388                  * that multiplexes VLANs on a single client instance.
 389                  * All other types of clients have one VLAN per client
 390                  * instance. In that case, MAC strips the VLAN tag
 391                  * before delivering it to DLS (see mac_rx_deliver()).
 392                  */
 393                 if (mhi.mhi_istagged) {
 394 
 395                         /*
 396                          * If it is tagged traffic, send it upstream to
 397                          * all dld_str_t which are attached to the physical
 398                          * link and bound to SAP 0x8100.
 399                          */
 400                         if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
 401                             ETHERTYPE_VLAN, dls_accept) > 0) {
 402                                 accepted = B_TRUE;
 403                         }
 404 
 405                         /*
 406                          * Don't pass the packets up if they are tagged
 407                          * packets and:
 408                          *  - their VID and priority are both zero and the
 409                          *    original packet isn't using the PVID (invalid
 410                          *    packets).
 411                          *  - their sap is ETHERTYPE_VLAN and their VID is
 412                          *    zero as they have already been sent upstreams.
 413                          */
 414                         if ((vid == VLAN_ID_NONE && !mhi.mhi_ispvid &&
 415                             VLAN_PRI(mhi.mhi_tci) == 0) ||
 416                             (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
 417                             vid == VLAN_ID_NONE)) {
 418                                 freemsgchain(mp);
 419                                 goto loop;
 420                         }
 421                 }
 422 
 423                 /*
 424                  * Construct a hash key from the DLSAP.
 425                  */
 426                 key = MAKE_KEY(mhi.mhi_bindsap);
 427 
 428                 /*
 429                  * Search the hash table for dld_str_t eligible to receive
 430                  * a packet chain for this DLSAP.
 431                  */
 432                 if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
 433                     i_dls_head_hold, &rval) != 0 || (rval != 0)) {
 434                         freemsgchain(mp);
 435                         goto loop;
 436                 }
 437 
 438                 /*
 439                  * Find the first dld_str_t that will accept the sub-chain.
 440                  */
 441                 for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next)
 442                         if (dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
 443                                 break;
 444 
 445                 /*
 446                  * If we did not find any dld_str_t willing to accept the
 447                  * sub-chain then throw it away.
 448                  */
 449                 if (dsp == NULL) {
 450                         i_dls_head_rele(dhp);
 451                         freemsgchain(mp);
 452                         goto loop;
 453                 }
 454 
 455                 /*
 456                  * We have at least one acceptor.
 457                  */
 458                 accepted = B_TRUE;
 459                 for (;;) {
 460                         /*
 461                          * Find the next dld_str_t that will accept the
 462                          * sub-chain.
 463                          */
 464                         for (ndsp = dsp->ds_next; ndsp != NULL;
 465                             ndsp = ndsp->ds_next)
 466                                 if (dls_accept(ndsp, &mhi, &nds_rx,
 467                                     &nds_rx_arg))
 468                                         break;
 469 
 470                         /*
 471                          * If there are no more dld_str_t that are willing
 472                          * to accept the sub-chain then we don't need to dup
 473                          * it before handing it to the current one.
 474                          */
 475                         if (ndsp == NULL) {
 476                                 ds_rx(ds_rx_arg, mrh, mp, &mhi);
 477 
 478                                 /*
 479                                  * Since there are no more dld_str_t, we're
 480                                  * done.
 481                                  */
 482                                 break;
 483                         }
 484 
 485                         /*
 486                          * There are more dld_str_t so dup the sub-chain.
 487                          */
 488                         if ((nmp = copymsgchain(mp)) != NULL)
 489                                 ds_rx(ds_rx_arg, mrh, nmp, &mhi);
 490 
 491                         dsp = ndsp;
 492                         ds_rx = nds_rx;
 493                         ds_rx_arg = nds_rx_arg;
 494                 }
 495 
 496                 /*
 497                  * Release the hold on the dld_str_t chain now that we have
 498                  * finished walking it.
 499                  */
 500                 i_dls_head_rele(dhp);
 501 
 502 loop:
 503                 /*
 504                  * If there were no acceptors then add the packet count to the
 505                  * 'unknown' count.
 506                  */
 507                 if (!accepted)
 508                         atomic_add_32(&(dlp->dl_unknowns), npacket);
 509         }
 510 }
 511 
 512 /* ARGSUSED */
 513 void
 514 dls_rx_vlan_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
 515     boolean_t loopback)
 516 {
 517         dld_str_t                       *dsp = arg;
 518         dls_link_t                      *dlp = dsp->ds_dlp;
 519         mac_header_info_t               mhi;
 520         dls_rx_t                        ds_rx;
 521         void                            *ds_rx_arg;
 522         int                             err;
 523 
 524         DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
 525         if (err != 0)
 526                 goto drop;
 527 
 528         /*
 529          * If there is promiscuous handle for vlan, we filter out the untagged
 530          * pkts and pkts that are not for the primary unicast address.
 531          */
 532         if (dsp->ds_vlan_mph != NULL) {
 533                 uint8_t prim_addr[MAXMACADDRLEN];
 534                 size_t  addr_length = dsp->ds_mip->mi_addr_length;
 535 
 536                 if (!(mhi.mhi_istagged))
 537                         goto drop;
 538                 ASSERT(dsp->ds_mh != NULL);
 539                 mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)prim_addr);
 540                 if (memcmp(mhi.mhi_daddr, prim_addr, addr_length) != 0)
 541                         goto drop;
 542 
 543                 if (!dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
 544                         goto drop;
 545 
 546                 ds_rx(ds_rx_arg, NULL, mp, &mhi);
 547                 return;
 548         }
 549 
 550 drop:
 551         atomic_inc_32(&dlp->dl_unknowns);
 552         freemsg(mp);
 553 }
 554 
 555 /* ARGSUSED */
 556 void
 557 dls_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
 558     boolean_t loopback)
 559 {
 560         dld_str_t                       *dsp = arg;
 561         dls_link_t                      *dlp = dsp->ds_dlp;
 562         mac_header_info_t               mhi;
 563         dls_rx_t                        ds_rx;
 564         void                            *ds_rx_arg;
 565         int                             err;
 566         dls_head_t                      *dhp;
 567         mod_hash_key_t                  key;
 568 
 569         DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
 570         if (err != 0)
 571                 goto drop;
 572 
 573         /*
 574          * In order to filter out sap pkt that no dls channel listens, search
 575          * the hash table trying to find a dld_str_t eligible to receive the pkt
 576          */
 577         if ((dsp->ds_promisc & DLS_PROMISC_SAP) == 0) {
 578                 key = MAKE_KEY(mhi.mhi_bindsap);
 579                 if (mod_hash_find(dsp->ds_dlp->dl_str_hash, key,
 580                     (mod_hash_val_t *)&dhp) != 0)
 581                         goto drop;
 582         }
 583 
 584         if (!dls_accept_promisc(dsp, &mhi, &ds_rx, &ds_rx_arg, loopback))
 585                 goto drop;
 586 
 587         ds_rx(ds_rx_arg, NULL, mp, &mhi);
 588         return;
 589 
 590 drop:
 591         atomic_inc_32(&dlp->dl_unknowns);
 592         freemsg(mp);
 593 }
 594 
 595 /*
 596  * We'd like to notify via sysevents that a link state change has occurred.
 597  * There are a couple of challenges associated with this. The first is that if
 598  * the link is flapping a lot, we may not see an accurate state when we launch
 599  * the notification, we're told it changed, not what it changed to.
 600  *
 601  * The next problem is that all of the information that a user has associated
 602  * with this device is the exact opposite of what we have on the dls_link_t. We
 603  * have the name of the mac device, which has no bearing on what users see.
 604  * Likewise, we don't have the datalink id either. So we're going to have to get
 605  * this from dls.
 606  *
 607  * This is all further complicated by the fact that this could be going on in
 608  * another thread at the same time as someone is tearing down the dls_link_t
 609  * that we're associated with. We need to be careful not to grab the mac
 610  * perimeter, otherwise we stand a good chance of deadlock.
 611  */
 612 static void
 613 dls_link_notify(void *arg, mac_notify_type_t type)
 614 {
 615         dls_link_t      *dlp = arg;
 616         dls_dl_handle_t dhp;
 617         nvlist_t        *nvp;
 618         sysevent_t      *event;
 619         sysevent_id_t   eid;
 620 
 621         if (type != MAC_NOTE_LINK && type != MAC_NOTE_LOWLINK)
 622                 return;
 623 
 624         /*
 625          * If we can't find a devnet handle for this link, then there is no user
 626          * knowable device for this at the moment and there's nothing we can
 627          * really share with them that will make sense.
 628          */
 629         if (dls_devnet_hold_tmp_by_link(dlp, &dhp) != 0)
 630                 return;
 631 
 632         /*
 633          * Because we're attaching this nvlist_t to the sysevent, it'll get
 634          * cleaned up when we call sysevent_free.
 635          */
 636         VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 637         VERIFY(nvlist_add_int32(nvp, DATALINK_EV_LINK_ID,
 638             dls_devnet_linkid(dhp)) == 0);
 639         VERIFY(nvlist_add_string(nvp, DATALINK_EV_LINK_NAME,
 640             dls_devnet_link(dhp)) == 0);
 641         VERIFY(nvlist_add_int32(nvp, DATALINK_EV_ZONE_ID,
 642             dls_devnet_getzid(dhp)) == 0);
 643 
 644         dls_devnet_rele_tmp(dhp);
 645 
 646         event = sysevent_alloc(EC_DATALINK, ESC_DATALINK_LINK_STATE,
 647             ILLUMOS_KERN_PUB"dls", SE_SLEEP);
 648         VERIFY(event != NULL);
 649         (void) sysevent_attach_attributes(event, (sysevent_attr_list_t *)nvp);
 650 
 651         (void) log_sysevent(event, SE_SLEEP, &eid);
 652         sysevent_free(event);
 653 
 654 }
 655 
 656 static void
 657 i_dls_link_destroy(dls_link_t *dlp)
 658 {
 659         ASSERT(dlp->dl_nactive == 0);
 660         ASSERT(dlp->dl_impl_count == 0);
 661         ASSERT(dlp->dl_zone_ref == 0);
 662 
 663         /*
 664          * Free the structure back to the cache.
 665          */
 666         if (dlp->dl_mnh != NULL)
 667                 mac_notify_remove(dlp->dl_mnh, B_TRUE);
 668 
 669         if (dlp->dl_mch != NULL)
 670                 mac_client_close(dlp->dl_mch, 0);
 671 
 672         if (dlp->dl_mh != NULL) {
 673                 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 674                 mac_close(dlp->dl_mh);
 675         }
 676 
 677         dlp->dl_mh = NULL;
 678         dlp->dl_mch = NULL;
 679         dlp->dl_mip = NULL;
 680         dlp->dl_mnh = NULL;
 681         dlp->dl_unknowns = 0;
 682         dlp->dl_nonip_cnt = 0;
 683         kmem_cache_free(i_dls_link_cachep, dlp);
 684 }
 685 
 686 static int
 687 i_dls_link_create(const char *name, dls_link_t **dlpp)
 688 {
 689         dls_link_t              *dlp;
 690         int                     err;
 691 
 692         /*
 693          * Allocate a new dls_link_t structure.
 694          */
 695         dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
 696 
 697         /*
 698          * Name the dls_link_t after the MAC interface it represents.
 699          */
 700         (void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
 701 
 702         /*
 703          * First reference; hold open the MAC interface.
 704          */
 705         ASSERT(dlp->dl_mh == NULL);
 706         err = mac_open(dlp->dl_name, &dlp->dl_mh);
 707         if (err != 0)
 708                 goto bail;
 709 
 710         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 711         dlp->dl_mip = mac_info(dlp->dl_mh);
 712 
 713         /* DLS is the "primary" MAC client */
 714         ASSERT(dlp->dl_mch == NULL);
 715 
 716         err = mac_client_open(dlp->dl_mh, &dlp->dl_mch, NULL,
 717             MAC_OPEN_FLAGS_USE_DATALINK_NAME);
 718         if (err != 0)
 719                 goto bail;
 720 
 721         dlp->dl_mnh = mac_notify_add(dlp->dl_mh, dls_link_notify, dlp);
 722 
 723         DTRACE_PROBE2(dls__primary__client, char *, dlp->dl_name, void *,
 724             dlp->dl_mch);
 725 
 726         *dlpp = dlp;
 727         return (0);
 728 
 729 bail:
 730         i_dls_link_destroy(dlp);
 731         return (err);
 732 }
 733 
 734 /*
 735  * Module initialization functions.
 736  */
 737 
 738 void
 739 dls_link_init(void)
 740 {
 741         /*
 742          * Create a kmem_cache of dls_link_t structures.
 743          */
 744         i_dls_link_cachep = kmem_cache_create("dls_link_cache",
 745             sizeof (dls_link_t), 0, i_dls_link_constructor,
 746             i_dls_link_destructor, NULL, NULL, NULL, 0);
 747         ASSERT(i_dls_link_cachep != NULL);
 748 
 749         /*
 750          * Create a dls_link_t hash table and associated lock.
 751          */
 752         i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
 753             IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
 754             mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
 755         i_dls_link_count = 0;
 756 }
 757 
 758 int
 759 dls_link_fini(void)
 760 {
 761         if (i_dls_link_count > 0)
 762                 return (EBUSY);
 763 
 764         /*
 765          * Destroy the kmem_cache.
 766          */
 767         kmem_cache_destroy(i_dls_link_cachep);
 768 
 769         /*
 770          * Destroy the hash table and associated lock.
 771          */
 772         mod_hash_destroy_hash(i_dls_link_hash);
 773         return (0);
 774 }
 775 
 776 /*
 777  * Exported functions.
 778  */
 779 
 780 static int
 781 dls_link_hold_common(const char *name, dls_link_t **dlpp, boolean_t create)
 782 {
 783         dls_link_t              *dlp;
 784         int                     err;
 785 
 786         /*
 787          * Look up a dls_link_t corresponding to the given macname in the
 788          * global hash table. The i_dls_link_hash itself is protected by the
 789          * mod_hash package's internal lock which synchronizes
 790          * find/insert/remove into the global mod_hash list. Assumes that
 791          * inserts and removes are single threaded on a per mac end point
 792          * by the mac perimeter.
 793          */
 794         if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
 795             (mod_hash_val_t *)&dlp)) == 0)
 796                 goto done;
 797 
 798         ASSERT(err == MH_ERR_NOTFOUND);
 799         if (!create)
 800                 return (ENOENT);
 801 
 802         /*
 803          * We didn't find anything so we need to create one.
 804          */
 805         if ((err = i_dls_link_create(name, &dlp)) != 0)
 806                 return (err);
 807 
 808         /*
 809          * Insert the dls_link_t.
 810          */
 811         err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
 812             (mod_hash_val_t)dlp);
 813         ASSERT(err == 0);
 814 
 815         atomic_inc_32(&i_dls_link_count);
 816         ASSERT(i_dls_link_count != 0);
 817 
 818 done:
 819         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 820         /*
 821          * Bump the reference count and hand back the reference.
 822          */
 823         dlp->dl_ref++;
 824         *dlpp = dlp;
 825         return (0);
 826 }
 827 
 828 int
 829 dls_link_hold_create(const char *name, dls_link_t **dlpp)
 830 {
 831         return (dls_link_hold_common(name, dlpp, B_TRUE));
 832 }
 833 
 834 int
 835 dls_link_hold(const char *name, dls_link_t **dlpp)
 836 {
 837         return (dls_link_hold_common(name, dlpp, B_FALSE));
 838 }
 839 
 840 dev_info_t *
 841 dls_link_devinfo(dev_t dev)
 842 {
 843         dls_link_t      *dlp;
 844         dev_info_t      *dip;
 845         char    macname[MAXNAMELEN];
 846         char    *drv;
 847         mac_perim_handle_t      mph;
 848 
 849         if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
 850                 return (NULL);
 851         (void) snprintf(macname, MAXNAMELEN, "%s%d", drv,
 852             DLS_MINOR2INST(getminor(dev)));
 853 
 854         /*
 855          * The code below assumes that the name constructed above is the
 856          * macname. This is not the case for legacy devices. Currently this
 857          * is ok because this function is only called in the getinfo(9e) path,
 858          * which for a legacy device would directly end up in the driver's
 859          * getinfo, rather than here
 860          */
 861         if (mac_perim_enter_by_macname(macname, &mph) != 0)
 862                 return (NULL);
 863 
 864         if (dls_link_hold(macname, &dlp) != 0) {
 865                 mac_perim_exit(mph);
 866                 return (NULL);
 867         }
 868 
 869         dip = mac_devinfo_get(dlp->dl_mh);
 870         dls_link_rele(dlp);
 871         mac_perim_exit(mph);
 872 
 873         return (dip);
 874 }
 875 
 876 dev_t
 877 dls_link_dev(dls_link_t *dlp)
 878 {
 879         return (makedevice(ddi_driver_major(mac_devinfo_get(dlp->dl_mh)),
 880             mac_minor(dlp->dl_mh)));
 881 }
 882 
 883 void
 884 dls_link_rele(dls_link_t *dlp)
 885 {
 886         mod_hash_val_t  val;
 887 
 888         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 889         /*
 890          * Check if there are any more references.
 891          */
 892         if (--dlp->dl_ref == 0) {
 893                 (void) mod_hash_remove(i_dls_link_hash,
 894                     (mod_hash_key_t)dlp->dl_name, &val);
 895                 ASSERT(dlp == (dls_link_t *)val);
 896 
 897                 /*
 898                  * Destroy the dls_link_t.
 899                  */
 900                 i_dls_link_destroy(dlp);
 901                 ASSERT(i_dls_link_count > 0);
 902                 atomic_dec_32(&i_dls_link_count);
 903         }
 904 }
 905 
 906 int
 907 dls_link_rele_by_name(const char *name)
 908 {
 909         dls_link_t              *dlp;
 910 
 911         if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
 912             (mod_hash_val_t *)&dlp) != 0)
 913                 return (ENOENT);
 914 
 915         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 916 
 917         /*
 918          * Must fail detach if mac client is busy.
 919          */
 920         ASSERT(dlp->dl_ref > 0 && dlp->dl_mch != NULL);
 921         if (mac_link_has_flows(dlp->dl_mch))
 922                 return (ENOTEMPTY);
 923 
 924         dls_link_rele(dlp);
 925         return (0);
 926 }
 927 
 928 int
 929 dls_link_setzid(const char *name, zoneid_t zid)
 930 {
 931         dls_link_t      *dlp;
 932         int             err = 0;
 933         zoneid_t        old_zid;
 934 
 935         if ((err = dls_link_hold_create(name, &dlp)) != 0)
 936                 return (err);
 937 
 938         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 939 
 940         if ((old_zid = dlp->dl_zid) == zid)
 941                 goto done;
 942 
 943         /*
 944          * Check whether this dlp is used by its own zone.  If yes, we cannot
 945          * change its zoneid.
 946          */
 947         if (dlp->dl_zone_ref != 0) {
 948                 err = EBUSY;
 949                 goto done;
 950         }
 951 
 952         dlp->dl_zid = zid;
 953 
 954         if (zid == GLOBAL_ZONEID) {
 955                 /*
 956                  * The link is moving from a non-global zone to the global
 957                  * zone, so we need to release the reference that was held
 958                  * when the link was originally assigned to the non-global
 959                  * zone.
 960                  */
 961                 dls_link_rele(dlp);
 962         }
 963 
 964 done:
 965         /*
 966          * We only keep the reference to this link open if the link has
 967          * successfully moved from the global zone to a non-global zone.
 968          */
 969         if (err != 0 || old_zid != GLOBAL_ZONEID)
 970                 dls_link_rele(dlp);
 971         return (err);
 972 }
 973 
 974 int
 975 dls_link_getzid(const char *name, zoneid_t *zidp)
 976 {
 977         dls_link_t      *dlp;
 978         int             err = 0;
 979 
 980         if ((err = dls_link_hold(name, &dlp)) != 0)
 981                 return (err);
 982 
 983         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
 984 
 985         *zidp = dlp->dl_zid;
 986 
 987         dls_link_rele(dlp);
 988         return (0);
 989 }
 990 
 991 void
 992 dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp)
 993 {
 994         mod_hash_t      *hash = dlp->dl_str_hash;
 995         mod_hash_key_t  key;
 996         dls_head_t      *dhp;
 997         dld_str_t       *p;
 998         int             err;
 999 
1000         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
1001 
1002         /*
1003          * Generate a hash key based on the sap.
1004          */
1005         key = MAKE_KEY(sap);
1006 
1007         /*
1008          * Search the table for a list head with this key.
1009          */
1010         if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
1011                 ASSERT(err == MH_ERR_NOTFOUND);
1012 
1013                 dhp = i_dls_head_alloc(key);
1014                 err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
1015                 ASSERT(err == 0);
1016         }
1017 
1018         /*
1019          * Add the dld_str_t to the head of the list. List walkers in
1020          * i_dls_link_rx_* bump up dh_ref to ensure the list does not change
1021          * while they walk the list. The membar below ensures that list walkers
1022          * see exactly the old list or the new list.
1023          */
1024         ASSERT(dsp->ds_next == NULL);
1025         p = dhp->dh_list;
1026         dsp->ds_next = p;
1027 
1028         membar_producer();
1029 
1030         dhp->dh_list = dsp;
1031 
1032         /*
1033          * Save a pointer to the list head.
1034          */
1035         dsp->ds_head = dhp;
1036         dlp->dl_impl_count++;
1037 }
1038 
1039 void
1040 dls_link_remove(dls_link_t *dlp, dld_str_t *dsp)
1041 {
1042         mod_hash_t      *hash = dlp->dl_str_hash;
1043         dld_str_t       **pp;
1044         dld_str_t       *p;
1045         dls_head_t      *dhp;
1046 
1047         ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
1048 
1049         /*
1050          * We set dh_removing here to tell the receive callbacks not to pass
1051          * up packets anymore. Then wait till the current callbacks are done.
1052          * This happens either in the close path or in processing the
1053          * DL_UNBIND_REQ via a taskq thread, and it is ok to cv_wait in either.
1054          * The dh_ref ensures there aren't and there won't be any upcalls
1055          * walking or using the dh_list. The mod hash internal lock ensures
1056          * that the insert/remove of the dls_head_t itself synchronizes with
1057          * any i_dls_link_rx trying to locate it. The perimeter ensures that
1058          * there isn't another simultaneous dls_link_add/remove.
1059          */
1060         dhp = dsp->ds_head;
1061 
1062         mutex_enter(&dhp->dh_lock);
1063         dhp->dh_removing = B_TRUE;
1064         while (dhp->dh_ref != 0)
1065                 cv_wait(&dhp->dh_cv, &dhp->dh_lock);
1066         mutex_exit(&dhp->dh_lock);
1067 
1068         /*
1069          * Walk the list and remove the dld_str_t.
1070          */
1071         for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->ds_next)) {
1072                 if (p == dsp)
1073                         break;
1074         }
1075         ASSERT(p != NULL);
1076         *pp = p->ds_next;
1077         p->ds_next = NULL;
1078         p->ds_head = NULL;
1079 
1080         ASSERT(dlp->dl_impl_count != 0);
1081         dlp->dl_impl_count--;
1082 
1083         if (dhp->dh_list == NULL) {
1084                 mod_hash_val_t  val = NULL;
1085 
1086                 /*
1087                  * The list is empty so remove the hash table entry.
1088                  */
1089                 (void) mod_hash_remove(hash, dhp->dh_key, &val);
1090                 ASSERT(dhp == (dls_head_t *)val);
1091                 i_dls_head_free(dhp);
1092         } else {
1093                 mutex_enter(&dhp->dh_lock);
1094                 dhp->dh_removing = B_FALSE;
1095                 mutex_exit(&dhp->dh_lock);
1096         }
1097 }