1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
  24  * Copyright (c) 2018, Joyent, Inc.
  25  */
  26 
  27 /*
  28  * Data-Link Driver
  29  */
  30 #include <sys/sysmacros.h>
  31 #include <sys/strsubr.h>
  32 #include <sys/strsun.h>
  33 #include <sys/vlan.h>
  34 #include <sys/dld_impl.h>
  35 #include <sys/mac_client.h>
  36 #include <sys/mac_client_impl.h>
  37 #include <sys/mac_client_priv.h>
  38 
  39 typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
  40 
  41 static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
  42     proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
  43     proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
  44     proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
  45     proto_notify_req, proto_passive_req;
  46 
  47 static void proto_capability_advertise(dld_str_t *, mblk_t *);
  48 static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
  49 static boolean_t check_mod_above(queue_t *, const char *);
  50 
  51 #define DL_ACK_PENDING(state) \
  52         ((state) == DL_ATTACH_PENDING || \
  53         (state) == DL_DETACH_PENDING || \
  54         (state) == DL_BIND_PENDING || \
  55         (state) == DL_UNBIND_PENDING)
  56 
  57 /*
  58  * Process a DLPI protocol message.
  59  * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
  60  * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
  61  * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
  62  * as 'passive' and forbids it from being subsequently made 'active'
  63  * by the above primitives.
  64  */
  65 void
  66 dld_proto(dld_str_t *dsp, mblk_t *mp)
  67 {
  68         t_uscalar_t             prim;
  69 
  70         if (MBLKL(mp) < sizeof (t_uscalar_t)) {
  71                 freemsg(mp);
  72                 return;
  73         }
  74         prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
  75 
  76         switch (prim) {
  77         case DL_INFO_REQ:
  78                 proto_info_req(dsp, mp);
  79                 break;
  80         case DL_BIND_REQ:
  81                 proto_bind_req(dsp, mp);
  82                 break;
  83         case DL_UNBIND_REQ:
  84                 proto_unbind_req(dsp, mp);
  85                 break;
  86         case DL_UNITDATA_REQ:
  87                 proto_unitdata_req(dsp, mp);
  88                 break;
  89         case DL_UDQOS_REQ:
  90                 proto_udqos_req(dsp, mp);
  91                 break;
  92         case DL_ATTACH_REQ:
  93                 proto_attach_req(dsp, mp);
  94                 break;
  95         case DL_DETACH_REQ:
  96                 proto_detach_req(dsp, mp);
  97                 break;
  98         case DL_ENABMULTI_REQ:
  99                 proto_enabmulti_req(dsp, mp);
 100                 break;
 101         case DL_DISABMULTI_REQ:
 102                 proto_disabmulti_req(dsp, mp);
 103                 break;
 104         case DL_PROMISCON_REQ:
 105                 proto_promiscon_req(dsp, mp);
 106                 break;
 107         case DL_PROMISCOFF_REQ:
 108                 proto_promiscoff_req(dsp, mp);
 109                 break;
 110         case DL_PHYS_ADDR_REQ:
 111                 proto_physaddr_req(dsp, mp);
 112                 break;
 113         case DL_SET_PHYS_ADDR_REQ:
 114                 proto_setphysaddr_req(dsp, mp);
 115                 break;
 116         case DL_NOTIFY_REQ:
 117                 proto_notify_req(dsp, mp);
 118                 break;
 119         case DL_CAPABILITY_REQ:
 120                 proto_capability_req(dsp, mp);
 121                 break;
 122         case DL_PASSIVE_REQ:
 123                 proto_passive_req(dsp, mp);
 124                 break;
 125         default:
 126                 proto_req(dsp, mp);
 127                 break;
 128         }
 129 }
 130 
 131 #define NEG(x)  -(x)
 132 typedef struct dl_info_ack_wrapper {
 133         dl_info_ack_t           dl_info;
 134         uint8_t                 dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
 135         uint8_t                 dl_brdcst_addr[MAXMACADDRLEN];
 136         dl_qos_cl_range1_t      dl_qos_range1;
 137         dl_qos_cl_sel1_t        dl_qos_sel1;
 138 } dl_info_ack_wrapper_t;
 139 
 140 /*
 141  * DL_INFO_REQ
 142  */
 143 static void
 144 proto_info_req(dld_str_t *dsp, mblk_t *mp)
 145 {
 146         dl_info_ack_wrapper_t   *dlwp;
 147         dl_info_ack_t           *dlp;
 148         dl_qos_cl_sel1_t        *selp;
 149         dl_qos_cl_range1_t      *rangep;
 150         uint8_t                 *addr;
 151         uint8_t                 *brdcst_addr;
 152         uint_t                  addr_length;
 153         uint_t                  sap_length;
 154         mac_info_t              minfo;
 155         mac_info_t              *minfop;
 156         queue_t                 *q = dsp->ds_wq;
 157 
 158         /*
 159          * Swap the request message for one large enough to contain the
 160          * wrapper structure defined above.
 161          */
 162         if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
 163             M_PCPROTO, 0)) == NULL)
 164                 return;
 165 
 166         bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
 167         dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
 168 
 169         dlp = &(dlwp->dl_info);
 170         ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
 171 
 172         dlp->dl_primitive = DL_INFO_ACK;
 173 
 174         /*
 175          * Set up the sub-structure pointers.
 176          */
 177         addr = dlwp->dl_addr;
 178         brdcst_addr = dlwp->dl_brdcst_addr;
 179         rangep = &(dlwp->dl_qos_range1);
 180         selp = &(dlwp->dl_qos_sel1);
 181 
 182         /*
 183          * This driver supports only version 2 connectionless DLPI provider
 184          * nodes.
 185          */
 186         dlp->dl_service_mode = DL_CLDLS;
 187         dlp->dl_version = DL_VERSION_2;
 188 
 189         /*
 190          * Set the style of the provider
 191          */
 192         dlp->dl_provider_style = dsp->ds_style;
 193         ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
 194             dlp->dl_provider_style == DL_STYLE2);
 195 
 196         /*
 197          * Set the current DLPI state.
 198          */
 199         dlp->dl_current_state = dsp->ds_dlstate;
 200 
 201         /*
 202          * Gratuitously set the media type. This is to deal with modules
 203          * that assume the media type is known prior to DL_ATTACH_REQ
 204          * being completed.
 205          */
 206         dlp->dl_mac_type = DL_ETHER;
 207 
 208         /*
 209          * If the stream is not at least attached we try to retrieve the
 210          * mac_info using mac_info_get()
 211          */
 212         if (dsp->ds_dlstate == DL_UNATTACHED ||
 213             dsp->ds_dlstate == DL_ATTACH_PENDING ||
 214             dsp->ds_dlstate == DL_DETACH_PENDING) {
 215                 if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
 216                         /*
 217                          * Cannot find mac_info. giving up.
 218                          */
 219                         goto done;
 220                 }
 221                 minfop = &minfo;
 222         } else {
 223                 minfop = (mac_info_t *)dsp->ds_mip;
 224                 /* We can only get the sdu if we're attached. */
 225                 mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
 226         }
 227 
 228         /*
 229          * Set the media type (properly this time).
 230          */
 231         if (dsp->ds_native)
 232                 dlp->dl_mac_type = minfop->mi_nativemedia;
 233         else
 234                 dlp->dl_mac_type = minfop->mi_media;
 235 
 236         /*
 237          * Set the DLSAP length. We only support 16 bit values and they
 238          * appear after the MAC address portion of DLSAP addresses.
 239          */
 240         sap_length = sizeof (uint16_t);
 241         dlp->dl_sap_length = NEG(sap_length);
 242 
 243         addr_length = minfop->mi_addr_length;
 244 
 245         /*
 246          * Copy in the media broadcast address.
 247          */
 248         if (minfop->mi_brdcst_addr != NULL) {
 249                 dlp->dl_brdcst_addr_offset =
 250                     (uintptr_t)brdcst_addr - (uintptr_t)dlp;
 251                 bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
 252                 dlp->dl_brdcst_addr_length = addr_length;
 253         }
 254 
 255         /* Only VLAN links and links that have a normal tag mode support QOS. */
 256         if ((dsp->ds_mch != NULL &&
 257             mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) ||
 258             (dsp->ds_dlp != NULL &&
 259             dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL)) {
 260                 dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
 261                 dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
 262 
 263                 rangep->dl_qos_type = DL_QOS_CL_RANGE1;
 264                 rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
 265                 rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
 266                 rangep->dl_protection.dl_min = DL_UNKNOWN;
 267                 rangep->dl_protection.dl_max = DL_UNKNOWN;
 268                 rangep->dl_residual_error = DL_UNKNOWN;
 269 
 270                 /*
 271                  * Specify the supported range of priorities.
 272                  */
 273                 rangep->dl_priority.dl_min = 0;
 274                 rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
 275 
 276                 dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
 277                 dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
 278 
 279                 selp->dl_qos_type = DL_QOS_CL_SEL1;
 280                 selp->dl_trans_delay = DL_UNKNOWN;
 281                 selp->dl_protection = DL_UNKNOWN;
 282                 selp->dl_residual_error = DL_UNKNOWN;
 283 
 284                 /*
 285                  * Specify the current priority (which can be changed by
 286                  * the DL_UDQOS_REQ primitive).
 287                  */
 288                 selp->dl_priority = dsp->ds_pri;
 289         }
 290 
 291         dlp->dl_addr_length = addr_length + sizeof (uint16_t);
 292         if (dsp->ds_dlstate == DL_IDLE) {
 293                 /*
 294                  * The stream is bound. Therefore we can formulate a valid
 295                  * DLSAP address.
 296                  */
 297                 dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
 298                 if (addr_length > 0)
 299                         mac_unicast_primary_get(dsp->ds_mh, addr);
 300 
 301                 *(uint16_t *)(addr + addr_length) = dsp->ds_sap;
 302         }
 303 
 304 done:
 305         IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0);
 306         IMPLY(dlp->dl_qos_range_offset != 0,
 307             dlp->dl_qos_range_length != 0);
 308         IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0);
 309         IMPLY(dlp->dl_brdcst_addr_offset != 0,
 310             dlp->dl_brdcst_addr_length != 0);
 311 
 312         qreply(q, mp);
 313 }
 314 
 315 /*
 316  * DL_ATTACH_REQ
 317  */
 318 static void
 319 proto_attach_req(dld_str_t *dsp, mblk_t *mp)
 320 {
 321         dl_attach_req_t *dlp = (dl_attach_req_t *)mp->b_rptr;
 322         int             err = 0;
 323         t_uscalar_t     dl_err;
 324         queue_t         *q = dsp->ds_wq;
 325 
 326         if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
 327             dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
 328                 dl_err = DL_BADPRIM;
 329                 goto failed;
 330         }
 331 
 332         if (dsp->ds_dlstate != DL_UNATTACHED) {
 333                 dl_err = DL_OUTSTATE;
 334                 goto failed;
 335         }
 336 
 337         dsp->ds_dlstate = DL_ATTACH_PENDING;
 338 
 339         err = dld_str_attach(dsp, dlp->dl_ppa);
 340         if (err != 0) {
 341                 switch (err) {
 342                 case ENOENT:
 343                         dl_err = DL_BADPPA;
 344                         err = 0;
 345                         break;
 346                 default:
 347                         dl_err = DL_SYSERR;
 348                         break;
 349                 }
 350                 dsp->ds_dlstate = DL_UNATTACHED;
 351                 goto failed;
 352         }
 353         ASSERT(dsp->ds_dlstate == DL_UNBOUND);
 354         dlokack(q, mp, DL_ATTACH_REQ);
 355         return;
 356 
 357 failed:
 358         dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
 359 }
 360 
 361 /*
 362  * DL_DETACH_REQ
 363  */
 364 static void
 365 proto_detach_req(dld_str_t *dsp, mblk_t *mp)
 366 {
 367         queue_t         *q = dsp->ds_wq;
 368         t_uscalar_t     dl_err;
 369 
 370         if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
 371                 dl_err = DL_BADPRIM;
 372                 goto failed;
 373         }
 374 
 375         if (dsp->ds_dlstate != DL_UNBOUND) {
 376                 dl_err = DL_OUTSTATE;
 377                 goto failed;
 378         }
 379 
 380         if (dsp->ds_style == DL_STYLE1) {
 381                 dl_err = DL_BADPRIM;
 382                 goto failed;
 383         }
 384 
 385         ASSERT(dsp->ds_datathr_cnt == 0);
 386         dsp->ds_dlstate = DL_DETACH_PENDING;
 387 
 388         dld_str_detach(dsp);
 389         dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
 390         return;
 391 
 392 failed:
 393         dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
 394 }
 395 
 396 /*
 397  * DL_BIND_REQ
 398  */
 399 static void
 400 proto_bind_req(dld_str_t *dsp, mblk_t *mp)
 401 {
 402         dl_bind_req_t   *dlp = (dl_bind_req_t *)mp->b_rptr;
 403         int             err = 0;
 404         uint8_t         dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
 405         uint_t          dlsap_addr_length;
 406         t_uscalar_t     dl_err;
 407         t_scalar_t      sap;
 408         queue_t         *q = dsp->ds_wq;
 409         mac_perim_handle_t      mph;
 410         void            *mdip;
 411         int32_t         intr_cpu;
 412 
 413         if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
 414                 dl_err = DL_BADPRIM;
 415                 goto failed;
 416         }
 417 
 418         if (dlp->dl_xidtest_flg != 0) {
 419                 dl_err = DL_NOAUTO;
 420                 goto failed;
 421         }
 422 
 423         if (dlp->dl_service_mode != DL_CLDLS) {
 424                 dl_err = DL_UNSUPPORTED;
 425                 goto failed;
 426         }
 427 
 428         if (dsp->ds_dlstate != DL_UNBOUND) {
 429                 dl_err = DL_OUTSTATE;
 430                 goto failed;
 431         }
 432 
 433         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 434 
 435         if ((err = dls_active_set(dsp)) != 0) {
 436                 dl_err = DL_SYSERR;
 437                 goto failed2;
 438         }
 439 
 440         dsp->ds_dlstate = DL_BIND_PENDING;
 441         /*
 442          * Set the receive callback.
 443          */
 444         dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
 445             dld_str_rx_raw : dld_str_rx_unitdata, dsp);
 446 
 447         /*
 448          * Bind the channel such that it can receive packets.
 449          */
 450         sap = dlp->dl_sap;
 451         dsp->ds_nonip = !check_mod_above(dsp->ds_rq, "ip") &&
 452             !check_mod_above(dsp->ds_rq, "arp");
 453 
 454         err = dls_bind(dsp, sap);
 455         if (err != 0) {
 456                 switch (err) {
 457                 case EINVAL:
 458                         dl_err = DL_BADADDR;
 459                         err = 0;
 460                         break;
 461                 default:
 462                         dl_err = DL_SYSERR;
 463                         break;
 464                 }
 465 
 466                 dsp->ds_dlstate = DL_UNBOUND;
 467                 dls_active_clear(dsp, B_FALSE);
 468                 goto failed2;
 469         }
 470 
 471         intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
 472         mdip = mac_get_devinfo(dsp->ds_mh);
 473         mac_perim_exit(mph);
 474 
 475         /*
 476          * We do this after we get out of the perim to avoid deadlocks
 477          * etc. since part of mac_client_retarget_intr is to walk the
 478          * device tree in order to find and retarget the interrupts.
 479          */
 480         if (intr_cpu != -1)
 481                 mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
 482 
 483         /*
 484          * Copy in MAC address.
 485          */
 486         dlsap_addr_length = dsp->ds_mip->mi_addr_length;
 487         mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
 488 
 489         /*
 490          * Copy in the SAP.
 491          */
 492         *(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
 493         dlsap_addr_length += sizeof (uint16_t);
 494 
 495         dsp->ds_dlstate = DL_IDLE;
 496         dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
 497         return;
 498 
 499 failed2:
 500         mac_perim_exit(mph);
 501 failed:
 502         dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
 503 }
 504 
 505 /*
 506  * DL_UNBIND_REQ
 507  */
 508 static void
 509 proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
 510 {
 511         queue_t         *q = dsp->ds_wq;
 512         t_uscalar_t     dl_err;
 513         mac_perim_handle_t      mph;
 514 
 515         if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
 516                 dl_err = DL_BADPRIM;
 517                 goto failed;
 518         }
 519 
 520         if (dsp->ds_dlstate != DL_IDLE) {
 521                 dl_err = DL_OUTSTATE;
 522                 goto failed;
 523         }
 524 
 525         mutex_enter(&dsp->ds_lock);
 526         while (dsp->ds_datathr_cnt != 0)
 527                 cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
 528 
 529         dsp->ds_dlstate = DL_UNBIND_PENDING;
 530         mutex_exit(&dsp->ds_lock);
 531 
 532         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 533         /*
 534          * Unbind the channel to stop packets being received.
 535          */
 536         dls_unbind(dsp);
 537 
 538         /*
 539          * Disable polling mode, if it is enabled.
 540          */
 541         (void) dld_capab_poll_disable(dsp, NULL);
 542 
 543         /*
 544          * Clear LSO flags.
 545          */
 546         dsp->ds_lso = B_FALSE;
 547         dsp->ds_lso_max = 0;
 548 
 549         /*
 550          * Clear the receive callback.
 551          */
 552         dls_rx_set(dsp, NULL, NULL);
 553         dsp->ds_direct = B_FALSE;
 554 
 555         /*
 556          * Set the mode back to the default (unitdata).
 557          */
 558         dsp->ds_mode = DLD_UNITDATA;
 559         dsp->ds_dlstate = DL_UNBOUND;
 560 
 561         dls_active_clear(dsp, B_FALSE);
 562         mac_perim_exit(mph);
 563         dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
 564         return;
 565 failed:
 566         dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
 567 }
 568 
 569 /*
 570  * DL_PROMISCON_REQ
 571  */
 572 static void
 573 proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
 574 {
 575         dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
 576         int             err = 0;
 577         t_uscalar_t     dl_err;
 578         uint32_t        new_flags, promisc_saved;
 579         queue_t         *q = dsp->ds_wq;
 580         mac_perim_handle_t      mph;
 581 
 582         if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
 583                 dl_err = DL_BADPRIM;
 584                 goto failed;
 585         }
 586 
 587         if (dsp->ds_dlstate == DL_UNATTACHED ||
 588             DL_ACK_PENDING(dsp->ds_dlstate)) {
 589                 dl_err = DL_OUTSTATE;
 590                 goto failed;
 591         }
 592 
 593         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 594 
 595         new_flags = promisc_saved = dsp->ds_promisc;
 596         switch (dlp->dl_level) {
 597         case DL_PROMISC_SAP:
 598                 new_flags |= DLS_PROMISC_SAP;
 599                 break;
 600 
 601         case DL_PROMISC_MULTI:
 602                 new_flags |= DLS_PROMISC_MULTI;
 603                 break;
 604 
 605         case DL_PROMISC_PHYS:
 606                 new_flags |= DLS_PROMISC_PHYS;
 607                 break;
 608 
 609         default:
 610                 dl_err = DL_NOTSUPPORTED;
 611                 goto failed2;
 612         }
 613 
 614         if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
 615                 ASSERT(dsp->ds_promisc == promisc_saved);
 616                 dl_err = DL_SYSERR;
 617                 goto failed2;
 618         }
 619 
 620         /*
 621          * Adjust channel promiscuity.
 622          */
 623         err = dls_promisc(dsp, new_flags);
 624 
 625         if (err != 0) {
 626                 dl_err = DL_SYSERR;
 627                 dsp->ds_promisc = promisc_saved;
 628                 if (promisc_saved == 0)
 629                         dls_active_clear(dsp, B_FALSE);
 630                 goto failed2;
 631         }
 632 
 633         mac_perim_exit(mph);
 634 
 635         dlokack(q, mp, DL_PROMISCON_REQ);
 636         return;
 637 
 638 failed2:
 639         mac_perim_exit(mph);
 640 failed:
 641         dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
 642 }
 643 
 644 /*
 645  * DL_PROMISCOFF_REQ
 646  */
 647 static void
 648 proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
 649 {
 650         dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
 651         int             err = 0;
 652         t_uscalar_t     dl_err;
 653         uint32_t        new_flags;
 654         queue_t         *q = dsp->ds_wq;
 655         mac_perim_handle_t      mph;
 656 
 657         if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
 658                 dl_err = DL_BADPRIM;
 659                 goto failed;
 660         }
 661 
 662         if (dsp->ds_dlstate == DL_UNATTACHED ||
 663             DL_ACK_PENDING(dsp->ds_dlstate)) {
 664                 dl_err = DL_OUTSTATE;
 665                 goto failed;
 666         }
 667 
 668         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 669 
 670         new_flags = dsp->ds_promisc;
 671         switch (dlp->dl_level) {
 672         case DL_PROMISC_SAP:
 673                 if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
 674                         dl_err = DL_NOTENAB;
 675                         goto failed2;
 676                 }
 677                 new_flags &= ~DLS_PROMISC_SAP;
 678                 break;
 679 
 680         case DL_PROMISC_MULTI:
 681                 if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
 682                         dl_err = DL_NOTENAB;
 683                         goto failed2;
 684                 }
 685                 new_flags &= ~DLS_PROMISC_MULTI;
 686                 break;
 687 
 688         case DL_PROMISC_PHYS:
 689                 if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
 690                         dl_err = DL_NOTENAB;
 691                         goto failed2;
 692                 }
 693                 new_flags &= ~DLS_PROMISC_PHYS;
 694                 break;
 695 
 696         default:
 697                 dl_err = DL_NOTSUPPORTED;
 698                 goto failed2;
 699         }
 700 
 701         /*
 702          * Adjust channel promiscuity.
 703          */
 704         err = dls_promisc(dsp, new_flags);
 705 
 706         if (err != 0) {
 707                 dl_err = DL_SYSERR;
 708                 goto failed2;
 709         }
 710 
 711         ASSERT(dsp->ds_promisc == new_flags);
 712         if (dsp->ds_promisc == 0)
 713                 dls_active_clear(dsp, B_FALSE);
 714 
 715         mac_perim_exit(mph);
 716 
 717         dlokack(q, mp, DL_PROMISCOFF_REQ);
 718         return;
 719 failed2:
 720         mac_perim_exit(mph);
 721 failed:
 722         dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
 723 }
 724 
 725 /*
 726  * DL_ENABMULTI_REQ
 727  */
 728 static void
 729 proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
 730 {
 731         dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
 732         int             err = 0;
 733         t_uscalar_t     dl_err;
 734         queue_t         *q = dsp->ds_wq;
 735         mac_perim_handle_t      mph;
 736 
 737         if (dsp->ds_dlstate == DL_UNATTACHED ||
 738             DL_ACK_PENDING(dsp->ds_dlstate)) {
 739                 dl_err = DL_OUTSTATE;
 740                 goto failed;
 741         }
 742 
 743         if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
 744             !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
 745             dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
 746                 dl_err = DL_BADPRIM;
 747                 goto failed;
 748         }
 749 
 750         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 751 
 752         if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
 753                 dl_err = DL_SYSERR;
 754                 goto failed2;
 755         }
 756 
 757         err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
 758         if (err != 0) {
 759                 switch (err) {
 760                 case EINVAL:
 761                         dl_err = DL_BADADDR;
 762                         err = 0;
 763                         break;
 764                 case ENOSPC:
 765                         dl_err = DL_TOOMANY;
 766                         err = 0;
 767                         break;
 768                 default:
 769                         dl_err = DL_SYSERR;
 770                         break;
 771                 }
 772                 if (dsp->ds_dmap == NULL)
 773                         dls_active_clear(dsp, B_FALSE);
 774                 goto failed2;
 775         }
 776 
 777         mac_perim_exit(mph);
 778 
 779         dlokack(q, mp, DL_ENABMULTI_REQ);
 780         return;
 781 
 782 failed2:
 783         mac_perim_exit(mph);
 784 failed:
 785         dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
 786 }
 787 
 788 /*
 789  * DL_DISABMULTI_REQ
 790  */
 791 static void
 792 proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
 793 {
 794         dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
 795         int             err = 0;
 796         t_uscalar_t     dl_err;
 797         queue_t         *q = dsp->ds_wq;
 798         mac_perim_handle_t      mph;
 799 
 800         if (dsp->ds_dlstate == DL_UNATTACHED ||
 801             DL_ACK_PENDING(dsp->ds_dlstate)) {
 802                 dl_err = DL_OUTSTATE;
 803                 goto failed;
 804         }
 805 
 806         if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
 807             !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
 808             dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
 809                 dl_err = DL_BADPRIM;
 810                 goto failed;
 811         }
 812 
 813         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 814         err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
 815         if ((err == 0) && (dsp->ds_dmap == NULL))
 816                 dls_active_clear(dsp, B_FALSE);
 817         mac_perim_exit(mph);
 818 
 819         if (err != 0) {
 820                 switch (err) {
 821                 case EINVAL:
 822                         dl_err = DL_BADADDR;
 823                         err = 0;
 824                         break;
 825 
 826                 case ENOENT:
 827                         dl_err = DL_NOTENAB;
 828                         err = 0;
 829                         break;
 830 
 831                 default:
 832                         dl_err = DL_SYSERR;
 833                         break;
 834                 }
 835                 goto failed;
 836         }
 837         dlokack(q, mp, DL_DISABMULTI_REQ);
 838         return;
 839 failed:
 840         dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
 841 }
 842 
 843 /*
 844  * DL_PHYS_ADDR_REQ
 845  */
 846 static void
 847 proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
 848 {
 849         dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
 850         queue_t         *q = dsp->ds_wq;
 851         t_uscalar_t     dl_err = 0;
 852         char            *addr = NULL;
 853         uint_t          addr_length;
 854 
 855         if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
 856                 dl_err = DL_BADPRIM;
 857                 goto done;
 858         }
 859 
 860         if (dsp->ds_dlstate == DL_UNATTACHED ||
 861             DL_ACK_PENDING(dsp->ds_dlstate)) {
 862                 dl_err = DL_OUTSTATE;
 863                 goto done;
 864         }
 865 
 866         addr_length = dsp->ds_mip->mi_addr_length;
 867         if (addr_length > 0) {
 868                 addr = kmem_alloc(addr_length, KM_SLEEP);
 869                 switch (dlp->dl_addr_type) {
 870                 case DL_CURR_PHYS_ADDR:
 871                         mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
 872                         break;
 873                 case DL_FACT_PHYS_ADDR:
 874                         bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
 875                         break;
 876                 case DL_CURR_DEST_ADDR:
 877                         if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr))
 878                                 dl_err = DL_NOTSUPPORTED;
 879                         break;
 880                 default:
 881                         dl_err = DL_UNSUPPORTED;
 882                 }
 883         }
 884 done:
 885         if (dl_err == 0)
 886                 dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
 887         else
 888                 dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
 889         if (addr != NULL)
 890                 kmem_free(addr, addr_length);
 891 }
 892 
 893 /*
 894  * DL_SET_PHYS_ADDR_REQ
 895  */
 896 static void
 897 proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
 898 {
 899         dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
 900         int             err = 0;
 901         t_uscalar_t     dl_err;
 902         queue_t         *q = dsp->ds_wq;
 903         mac_perim_handle_t      mph;
 904 
 905         if (dsp->ds_dlstate == DL_UNATTACHED ||
 906             DL_ACK_PENDING(dsp->ds_dlstate)) {
 907                 dl_err = DL_OUTSTATE;
 908                 goto failed;
 909         }
 910 
 911         if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
 912             !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
 913             dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
 914                 dl_err = DL_BADPRIM;
 915                 goto failed;
 916         }
 917 
 918         mac_perim_enter_by_mh(dsp->ds_mh, &mph);
 919 
 920         if ((err = dls_active_set(dsp)) != 0) {
 921                 dl_err = DL_SYSERR;
 922                 goto failed2;
 923         }
 924 
 925         /*
 926          * If mac-nospoof is enabled and the link is owned by a
 927          * non-global zone, changing the mac address is not allowed.
 928          */
 929         if (dsp->ds_dlp->dl_zid != GLOBAL_ZONEID &&
 930             mac_protect_enabled(dsp->ds_mch, MPT_MACNOSPOOF)) {
 931                 dls_active_clear(dsp, B_FALSE);
 932                 err = EACCES;
 933                 goto failed2;
 934         }
 935 
 936         err = mac_unicast_primary_set(dsp->ds_mh,
 937             mp->b_rptr + dlp->dl_addr_offset);
 938         if (err != 0) {
 939                 switch (err) {
 940                 case EINVAL:
 941                         dl_err = DL_BADADDR;
 942                         err = 0;
 943                         break;
 944 
 945                 default:
 946                         dl_err = DL_SYSERR;
 947                         break;
 948                 }
 949                 dls_active_clear(dsp, B_FALSE);
 950                 goto failed2;
 951 
 952         }
 953 
 954         mac_perim_exit(mph);
 955 
 956         dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
 957         return;
 958 
 959 failed2:
 960         mac_perim_exit(mph);
 961 failed:
 962         dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
 963 }
 964 
 965 /*
 966  * DL_UDQOS_REQ
 967  */
 968 static void
 969 proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
 970 {
 971         dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
 972         dl_qos_cl_sel1_t *selp;
 973         int             off, len;
 974         t_uscalar_t     dl_err;
 975         queue_t         *q = dsp->ds_wq;
 976 
 977         off = dlp->dl_qos_offset;
 978         len = dlp->dl_qos_length;
 979 
 980         if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
 981                 dl_err = DL_BADPRIM;
 982                 goto failed;
 983         }
 984 
 985         selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
 986         if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
 987                 dl_err = DL_BADQOSTYPE;
 988                 goto failed;
 989         }
 990 
 991         if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
 992             selp->dl_priority < 0) {
 993                 dl_err = DL_BADQOSPARAM;
 994                 goto failed;
 995         }
 996 
 997         dsp->ds_pri = selp->dl_priority;
 998         dlokack(q, mp, DL_UDQOS_REQ);
 999         return;
1000 failed:
1001         dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1002 }
1003 
1004 static boolean_t
1005 check_mod_above(queue_t *q, const char *mod)
1006 {
1007         queue_t         *next_q;
1008         boolean_t       ret = B_TRUE;
1009 
1010         claimstr(q);
1011         next_q = q->q_next;
1012         if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, mod) != 0)
1013                 ret = B_FALSE;
1014         releasestr(q);
1015         return (ret);
1016 }
1017 
1018 /*
1019  * DL_CAPABILITY_REQ
1020  */
1021 static void
1022 proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1023 {
1024         dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1025         dl_capability_sub_t *sp;
1026         size_t          size, len;
1027         offset_t        off, end;
1028         t_uscalar_t     dl_err;
1029         queue_t         *q = dsp->ds_wq;
1030 
1031         if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1032                 dl_err = DL_BADPRIM;
1033                 goto failed;
1034         }
1035 
1036         if (dsp->ds_dlstate == DL_UNATTACHED ||
1037             DL_ACK_PENDING(dsp->ds_dlstate)) {
1038                 dl_err = DL_OUTSTATE;
1039                 goto failed;
1040         }
1041 
1042         /*
1043          * This request is overloaded. If there are no requested capabilities
1044          * then we just want to acknowledge with all the capabilities we
1045          * support. Otherwise we enable the set of capabilities requested.
1046          */
1047         if (dlp->dl_sub_length == 0) {
1048                 proto_capability_advertise(dsp, mp);
1049                 return;
1050         }
1051 
1052         if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1053                 dl_err = DL_BADPRIM;
1054                 goto failed;
1055         }
1056 
1057         dlp->dl_primitive = DL_CAPABILITY_ACK;
1058 
1059         off = dlp->dl_sub_offset;
1060         len = dlp->dl_sub_length;
1061 
1062         /*
1063          * Walk the list of capabilities to be enabled.
1064          */
1065         for (end = off + len; off < end; ) {
1066                 sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1067                 size = sizeof (dl_capability_sub_t) + sp->dl_length;
1068 
1069                 if (off + size > end ||
1070                     !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1071                         dl_err = DL_BADPRIM;
1072                         goto failed;
1073                 }
1074 
1075                 switch (sp->dl_cap) {
1076                 /*
1077                  * TCP/IP checksum offload to hardware.
1078                  */
1079                 case DL_CAPAB_HCKSUM: {
1080                         dl_capab_hcksum_t *hcksump;
1081                         dl_capab_hcksum_t hcksum;
1082 
1083                         hcksump = (dl_capab_hcksum_t *)&sp[1];
1084                         /*
1085                          * Copy for alignment.
1086                          */
1087                         bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1088                         dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1089                         bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1090                         break;
1091                 }
1092 
1093                 case DL_CAPAB_DLD: {
1094                         dl_capab_dld_t  *dldp;
1095                         dl_capab_dld_t  dld;
1096 
1097                         dldp = (dl_capab_dld_t *)&sp[1];
1098                         /*
1099                          * Copy for alignment.
1100                          */
1101                         bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1102                         dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1103                         bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1104                         break;
1105                 }
1106                 default:
1107                         break;
1108                 }
1109                 off += size;
1110         }
1111         qreply(q, mp);
1112         return;
1113 failed:
1114         dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1115 }
1116 
1117 /*
1118  * DL_NOTIFY_REQ
1119  */
1120 static void
1121 proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1122 {
1123         dl_notify_req_t *dlp = (dl_notify_req_t *)mp->b_rptr;
1124         t_uscalar_t     dl_err;
1125         queue_t         *q = dsp->ds_wq;
1126         uint_t          note =
1127             DL_NOTE_PROMISC_ON_PHYS |
1128             DL_NOTE_PROMISC_OFF_PHYS |
1129             DL_NOTE_PHYS_ADDR |
1130             DL_NOTE_LINK_UP |
1131             DL_NOTE_LINK_DOWN |
1132             DL_NOTE_CAPAB_RENEG |
1133             DL_NOTE_FASTPATH_FLUSH |
1134             DL_NOTE_SPEED |
1135             DL_NOTE_SDU_SIZE|
1136             DL_NOTE_SDU_SIZE2|
1137             DL_NOTE_ALLOWED_IPS;
1138 
1139         if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1140                 dl_err = DL_BADPRIM;
1141                 goto failed;
1142         }
1143 
1144         if (dsp->ds_dlstate == DL_UNATTACHED ||
1145             DL_ACK_PENDING(dsp->ds_dlstate)) {
1146                 dl_err = DL_OUTSTATE;
1147                 goto failed;
1148         }
1149 
1150         note &= ~(mac_no_notification(dsp->ds_mh));
1151 
1152         /*
1153          * Cache the notifications that are being enabled.
1154          */
1155         dsp->ds_notifications = dlp->dl_notifications & note;
1156         /*
1157          * The ACK carries all notifications regardless of which set is
1158          * being enabled.
1159          */
1160         dlnotifyack(q, mp, note);
1161 
1162         /*
1163          * Generate DL_NOTIFY_IND messages for each enabled notification.
1164          */
1165         if (dsp->ds_notifications != 0) {
1166                 dld_str_notify_ind(dsp);
1167         }
1168         return;
1169 failed:
1170         dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1171 }
1172 
1173 /*
1174  * DL_UINTDATA_REQ
1175  */
1176 void
1177 proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1178 {
1179         queue_t                 *q = dsp->ds_wq;
1180         dl_unitdata_req_t       *dlp = (dl_unitdata_req_t *)mp->b_rptr;
1181         off_t                   off;
1182         size_t                  len, size;
1183         const uint8_t           *addr;
1184         uint16_t                sap;
1185         uint_t                  addr_length;
1186         mblk_t                  *bp, *payload;
1187         uint32_t                start, stuff, end, value, flags;
1188         t_uscalar_t             dl_err;
1189         uint_t                  max_sdu;
1190 
1191         if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1192                 dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1193                 return;
1194         }
1195 
1196         mutex_enter(&dsp->ds_lock);
1197         if (dsp->ds_dlstate != DL_IDLE) {
1198                 mutex_exit(&dsp->ds_lock);
1199                 dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1200                 return;
1201         }
1202         DLD_DATATHR_INC(dsp);
1203         mutex_exit(&dsp->ds_lock);
1204 
1205         addr_length = dsp->ds_mip->mi_addr_length;
1206 
1207         off = dlp->dl_dest_addr_offset;
1208         len = dlp->dl_dest_addr_length;
1209 
1210         if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1211                 dl_err = DL_BADPRIM;
1212                 goto failed;
1213         }
1214 
1215         if (len != addr_length + sizeof (uint16_t)) {
1216                 dl_err = DL_BADADDR;
1217                 goto failed;
1218         }
1219 
1220         addr = mp->b_rptr + off;
1221         sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1222 
1223         /*
1224          * Check the length of the packet and the block types.
1225          */
1226         size = 0;
1227         payload = mp->b_cont;
1228         for (bp = payload; bp != NULL; bp = bp->b_cont) {
1229                 if (DB_TYPE(bp) != M_DATA)
1230                         goto baddata;
1231 
1232                 size += MBLKL(bp);
1233         }
1234 
1235         mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1236         if (size > max_sdu)
1237                 goto baddata;
1238 
1239         /*
1240          * Build a packet header.
1241          */
1242         if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1243             &payload)) == NULL) {
1244                 dl_err = DL_BADADDR;
1245                 goto failed;
1246         }
1247 
1248         /*
1249          * We no longer need the M_PROTO header, so free it.
1250          */
1251         freeb(mp);
1252 
1253         /*
1254          * Transfer the checksum offload information if it is present.
1255          */
1256         hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1257             &flags);
1258         (void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1259 
1260         /*
1261          * Link the payload onto the new header.
1262          */
1263         ASSERT(bp->b_cont == NULL);
1264         bp->b_cont = payload;
1265 
1266         /*
1267          * No lock can be held across modules and putnext()'s,
1268          * which can happen here with the call from DLD_TX().
1269          */
1270         if (DLD_TX(dsp, bp, 0, 0) != 0) {
1271                 /* flow-controlled */
1272                 DLD_SETQFULL(dsp);
1273         }
1274         DLD_DATATHR_DCR(dsp);
1275         return;
1276 
1277 failed:
1278         dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1279         DLD_DATATHR_DCR(dsp);
1280         return;
1281 
1282 baddata:
1283         dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1284         DLD_DATATHR_DCR(dsp);
1285 }
1286 
1287 /*
1288  * DL_PASSIVE_REQ
1289  */
1290 static void
1291 proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1292 {
1293         t_uscalar_t dl_err;
1294 
1295         /*
1296          * If we've already become active by issuing an active primitive,
1297          * then it's too late to try to become passive.
1298          */
1299         if (dsp->ds_passivestate == DLD_ACTIVE) {
1300                 dl_err = DL_OUTSTATE;
1301                 goto failed;
1302         }
1303 
1304         if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1305                 dl_err = DL_BADPRIM;
1306                 goto failed;
1307         }
1308 
1309         dsp->ds_passivestate = DLD_PASSIVE;
1310         dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1311         return;
1312 failed:
1313         dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1314 }
1315 
1316 
1317 /*
1318  * Catch-all handler.
1319  */
1320 static void
1321 proto_req(dld_str_t *dsp, mblk_t *mp)
1322 {
1323         union DL_primitives     *dlp = (union DL_primitives *)mp->b_rptr;
1324 
1325         dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1326 }
1327 
1328 static int
1329 dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1330 {
1331         switch (flags) {
1332         case DLD_ENABLE:
1333                 mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1334                 return (0);
1335 
1336         case DLD_DISABLE:
1337                 mac_perim_exit((mac_perim_handle_t)data);
1338                 return (0);
1339 
1340         case DLD_QUERY:
1341                 return (mac_perim_held(dsp->ds_mh));
1342         }
1343         return (0);
1344 }
1345 
1346 static int
1347 dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1348 {
1349         dld_capab_direct_t      *direct = data;
1350 
1351         ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1352 
1353         switch (flags) {
1354         case DLD_ENABLE:
1355                 dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1356                     direct->di_rx_ch);
1357 
1358                 direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1359                 direct->di_tx_dh = dsp;
1360                 direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1361                 direct->di_tx_cb_dh = dsp->ds_mch;
1362                 direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1363                 direct->di_tx_fctl_dh = dsp->ds_mch;
1364 
1365                 dsp->ds_direct = B_TRUE;
1366 
1367                 return (0);
1368 
1369         case DLD_DISABLE:
1370                 dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1371                     dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1372                 dsp->ds_direct = B_FALSE;
1373 
1374                 return (0);
1375         }
1376         return (ENOTSUP);
1377 }
1378 
1379 /*
1380  * dld_capab_poll_enable()
1381  *
1382  * This function is misnamed. All polling  and fanouts are run out of the
1383  * lower mac (in case of VNIC and the only mac in case of NICs). The
1384  * availability of Rx ring and promiscous mode is all taken care between
1385  * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any
1386  * fanout necessary is done by the soft rings that are part of the
1387  * mac_srs (by default mac_srs sends the packets up via a TCP and
1388  * non TCP soft ring).
1389  *
1390  * The mac_srs (or its associated soft rings) always store the ill_rx_ring
1391  * (the cookie returned when they registered with IP during plumb) as their
1392  * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1393  * function and 1st argument is what the caller registered when they
1394  * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1395  * the function is vnic_rx and argument is vnic_t. For regular NIC
1396  * case, it mac_rx_default and mac_handle_t. As explained above, the
1397  * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1398  * from its stored 2nd argument.
1399  */
1400 static int
1401 dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1402 {
1403         if (dsp->ds_polling)
1404                 return (EINVAL);
1405 
1406         if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1407                 return (ENOTSUP);
1408 
1409         /*
1410          * Enable client polling if and only if DLS bypass is possible.
1411          * Special cases like VLANs need DLS processing in the Rx data path.
1412          * In such a case we can neither allow the client (IP) to directly
1413          * poll the softring (since DLS processing hasn't been done) nor can
1414          * we allow DLS bypass.
1415          */
1416         if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
1417                 return (ENOTSUP);
1418 
1419         /*
1420          * Register soft ring resources. This will come in handy later if
1421          * the user decides to modify CPU bindings to use more CPUs for the
1422          * device in which case we will switch to fanout using soft rings.
1423          */
1424         mac_resource_set_common(dsp->ds_mch,
1425             (mac_resource_add_t)poll->poll_ring_add_cf,
1426             (mac_resource_remove_t)poll->poll_ring_remove_cf,
1427             (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1428             (mac_resource_restart_t)poll->poll_ring_restart_cf,
1429             (mac_resource_bind_t)poll->poll_ring_bind_cf,
1430             poll->poll_ring_ch);
1431 
1432         mac_client_poll_enable(dsp->ds_mch);
1433 
1434         dsp->ds_polling = B_TRUE;
1435         return (0);
1436 }
1437 
1438 /* ARGSUSED */
1439 static int
1440 dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1441 {
1442         if (!dsp->ds_polling)
1443                 return (EINVAL);
1444 
1445         mac_client_poll_disable(dsp->ds_mch);
1446         mac_resource_set(dsp->ds_mch, NULL, NULL);
1447 
1448         dsp->ds_polling = B_FALSE;
1449         return (0);
1450 }
1451 
1452 static int
1453 dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1454 {
1455         dld_capab_poll_t        *poll = data;
1456 
1457         ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1458 
1459         switch (flags) {
1460         case DLD_ENABLE:
1461                 return (dld_capab_poll_enable(dsp, poll));
1462         case DLD_DISABLE:
1463                 return (dld_capab_poll_disable(dsp, poll));
1464         }
1465         return (ENOTSUP);
1466 }
1467 
1468 static int
1469 dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1470 {
1471         dld_capab_lso_t         *lso = data;
1472 
1473         ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1474 
1475         switch (flags) {
1476         case DLD_ENABLE: {
1477                 mac_capab_lso_t         mac_lso;
1478 
1479                 /*
1480                  * Check if LSO is supported on this MAC & enable LSO
1481                  * accordingly.
1482                  */
1483                 if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1484                         lso->lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1485                         lso->lso_flags = 0;
1486                         /* translate the flag for mac clients */
1487                         if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1488                                 lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4;
1489                         dsp->ds_lso = B_TRUE;
1490                         dsp->ds_lso_max = lso->lso_max;
1491                 } else {
1492                         dsp->ds_lso = B_FALSE;
1493                         dsp->ds_lso_max = 0;
1494                         return (ENOTSUP);
1495                 }
1496                 return (0);
1497         }
1498         case DLD_DISABLE: {
1499                 dsp->ds_lso = B_FALSE;
1500                 dsp->ds_lso_max = 0;
1501                 return (0);
1502         }
1503         }
1504         return (ENOTSUP);
1505 }
1506 
1507 static int
1508 dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1509 {
1510         int     err;
1511 
1512         /*
1513          * Don't enable direct callback capabilities unless the caller is
1514          * the IP client. When a module is inserted in a stream (_I_INSERT)
1515          * the stack initiates capability disable, but due to races, the
1516          * module insertion may complete before the capability disable
1517          * completes. So we limit the check to DLD_ENABLE case.
1518          */
1519         if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1520             (dsp->ds_sap != ETHERTYPE_IP ||
1521             !check_mod_above(dsp->ds_rq, "ip"))) {
1522                 return (ENOTSUP);
1523         }
1524 
1525         switch (type) {
1526         case DLD_CAPAB_DIRECT:
1527                 err = dld_capab_direct(dsp, data, flags);
1528                 break;
1529 
1530         case DLD_CAPAB_POLL:
1531                 err =  dld_capab_poll(dsp, data, flags);
1532                 break;
1533 
1534         case DLD_CAPAB_PERIM:
1535                 err = dld_capab_perim(dsp, data, flags);
1536                 break;
1537 
1538         case DLD_CAPAB_LSO:
1539                 err = dld_capab_lso(dsp, data, flags);
1540                 break;
1541 
1542         default:
1543                 err = ENOTSUP;
1544                 break;
1545         }
1546 
1547         return (err);
1548 }
1549 
1550 /*
1551  * DL_CAPABILITY_ACK/DL_ERROR_ACK
1552  */
1553 static void
1554 proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1555 {
1556         dl_capability_ack_t     *dlap;
1557         dl_capability_sub_t     *dlsp;
1558         size_t                  subsize;
1559         dl_capab_dld_t          dld;
1560         dl_capab_hcksum_t       hcksum;
1561         dl_capab_zerocopy_t     zcopy;
1562         dl_capab_vrrp_t         vrrp;
1563         mac_capab_vrrp_t        vrrp_capab;
1564         uint8_t                 *ptr;
1565         queue_t                 *q = dsp->ds_wq;
1566         mblk_t                  *mp1;
1567         boolean_t               hcksum_capable = B_FALSE;
1568         boolean_t               zcopy_capable = B_FALSE;
1569         boolean_t               dld_capable = B_FALSE;
1570         boolean_t               vrrp_capable = B_FALSE;
1571 
1572         /*
1573          * Initially assume no capabilities.
1574          */
1575         subsize = 0;
1576 
1577         /*
1578          * Check if checksum offload is supported on this MAC.
1579          */
1580         bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1581         if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1582             &hcksum.hcksum_txflags)) {
1583                 if (hcksum.hcksum_txflags != 0) {
1584                         hcksum_capable = B_TRUE;
1585                         subsize += sizeof (dl_capability_sub_t) +
1586                             sizeof (dl_capab_hcksum_t);
1587                 }
1588         }
1589 
1590         /*
1591          * Check if zerocopy is supported on this interface.
1592          * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1593          * then reserve space for that capability.
1594          */
1595         if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1596             !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1597                 zcopy_capable = B_TRUE;
1598                 subsize += sizeof (dl_capability_sub_t) +
1599                     sizeof (dl_capab_zerocopy_t);
1600         }
1601 
1602         /*
1603          * Direct capability negotiation interface between IP and DLD
1604          */
1605         if (dsp->ds_sap == ETHERTYPE_IP && check_mod_above(dsp->ds_rq, "ip")) {
1606                 dld_capable = B_TRUE;
1607                 subsize += sizeof (dl_capability_sub_t) +
1608                     sizeof (dl_capab_dld_t);
1609         }
1610 
1611         /*
1612          * Check if vrrp is supported on this interface. If so, reserve
1613          * space for that capability.
1614          */
1615         if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_VRRP, &vrrp_capab)) {
1616                 vrrp_capable = B_TRUE;
1617                 subsize += sizeof (dl_capability_sub_t) +
1618                     sizeof (dl_capab_vrrp_t);
1619         }
1620 
1621         /*
1622          * If there are no capabilities to advertise or if we
1623          * can't allocate a response, send a DL_ERROR_ACK.
1624          */
1625         if ((mp1 = reallocb(mp,
1626             sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1627                 dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1628                 return;
1629         }
1630 
1631         mp = mp1;
1632         DB_TYPE(mp) = M_PROTO;
1633         mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1634         bzero(mp->b_rptr, MBLKL(mp));
1635         dlap = (dl_capability_ack_t *)mp->b_rptr;
1636         dlap->dl_primitive = DL_CAPABILITY_ACK;
1637         dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1638         dlap->dl_sub_length = subsize;
1639         ptr = (uint8_t *)&dlap[1];
1640 
1641         /*
1642          * TCP/IP checksum offload.
1643          */
1644         if (hcksum_capable) {
1645                 dlsp = (dl_capability_sub_t *)ptr;
1646 
1647                 dlsp->dl_cap = DL_CAPAB_HCKSUM;
1648                 dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1649                 ptr += sizeof (dl_capability_sub_t);
1650 
1651                 hcksum.hcksum_version = HCKSUM_VERSION_1;
1652                 dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1653                 bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1654                 ptr += sizeof (dl_capab_hcksum_t);
1655         }
1656 
1657         /*
1658          * Zero copy
1659          */
1660         if (zcopy_capable) {
1661                 dlsp = (dl_capability_sub_t *)ptr;
1662 
1663                 dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1664                 dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1665                 ptr += sizeof (dl_capability_sub_t);
1666 
1667                 bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1668                 zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1669                 zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1670 
1671                 dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1672                 bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1673                 ptr += sizeof (dl_capab_zerocopy_t);
1674         }
1675 
1676         /*
1677          * VRRP capability negotiation
1678          */
1679         if (vrrp_capable) {
1680                 dlsp = (dl_capability_sub_t *)ptr;
1681                 dlsp->dl_cap = DL_CAPAB_VRRP;
1682                 dlsp->dl_length = sizeof (dl_capab_vrrp_t);
1683                 ptr += sizeof (dl_capability_sub_t);
1684 
1685                 bzero(&vrrp, sizeof (dl_capab_vrrp_t));
1686                 vrrp.vrrp_af = vrrp_capab.mcv_af;
1687                 bcopy(&vrrp, ptr, sizeof (dl_capab_vrrp_t));
1688                 ptr += sizeof (dl_capab_vrrp_t);
1689         }
1690 
1691         /*
1692          * Direct capability negotiation interface between IP and DLD.
1693          * Refer to dld.h for details.
1694          */
1695         if (dld_capable) {
1696                 dlsp = (dl_capability_sub_t *)ptr;
1697                 dlsp->dl_cap = DL_CAPAB_DLD;
1698                 dlsp->dl_length = sizeof (dl_capab_dld_t);
1699                 ptr += sizeof (dl_capability_sub_t);
1700 
1701                 bzero(&dld, sizeof (dl_capab_dld_t));
1702                 dld.dld_version = DLD_CURRENT_VERSION;
1703                 dld.dld_capab = (uintptr_t)dld_capab;
1704                 dld.dld_capab_handle = (uintptr_t)dsp;
1705 
1706                 dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1707                 bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1708                 ptr += sizeof (dl_capab_dld_t);
1709         }
1710 
1711         ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1712         qreply(q, mp);
1713 }
1714 
1715 /*
1716  * Disable any enabled capabilities.
1717  */
1718 void
1719 dld_capabilities_disable(dld_str_t *dsp)
1720 {
1721         if (dsp->ds_polling)
1722                 (void) dld_capab_poll_disable(dsp, NULL);
1723 }