1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2018 Joyent, Inc.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/errno.h>
  30 #include <sys/param.h>
  31 #include <sys/callb.h>
  32 #include <sys/stream.h>
  33 #include <sys/kmem.h>
  34 #include <sys/conf.h>
  35 #include <sys/devops.h>
  36 #include <sys/ksynch.h>
  37 #include <sys/stat.h>
  38 #include <sys/modctl.h>
  39 #include <sys/modhash.h>
  40 #include <sys/debug.h>
  41 #include <sys/ethernet.h>
  42 #include <sys/dlpi.h>
  43 #include <net/if.h>
  44 #include <sys/mac_provider.h>
  45 #include <sys/mac_client.h>
  46 #include <sys/mac_client_priv.h>
  47 #include <sys/mac_ether.h>
  48 #include <sys/ddi.h>
  49 #include <sys/sunddi.h>
  50 #include <sys/strsun.h>
  51 #include <sys/note.h>
  52 #include <sys/atomic.h>
  53 #include <sys/vnet.h>
  54 #include <sys/vlan.h>
  55 #include <sys/vnet_mailbox.h>
  56 #include <sys/vnet_common.h>
  57 #include <sys/dds.h>
  58 #include <sys/strsubr.h>
  59 #include <sys/taskq.h>
  60 
  61 /*
  62  * Function prototypes.
  63  */
  64 
  65 /* DDI entrypoints */
  66 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
  67 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
  68 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
  69 
  70 /* MAC entrypoints  */
  71 static int vnet_m_stat(void *, uint_t, uint64_t *);
  72 static int vnet_m_start(void *);
  73 static void vnet_m_stop(void *);
  74 static int vnet_m_promisc(void *, boolean_t);
  75 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
  76 static int vnet_m_unicst(void *, const uint8_t *);
  77 mblk_t *vnet_m_tx(void *, mblk_t *);
  78 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
  79 #ifdef  VNET_IOC_DEBUG
  80 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
  81 #endif
  82 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
  83 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
  84         const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
  85 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
  86         mac_group_info_t *infop, mac_group_handle_t handle);
  87 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
  88 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
  89 static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
  90         uint64_t *val);
  91 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
  92 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
  93 static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
  94         uint64_t *val);
  95 static int vnet_ring_enable_intr(void *arg);
  96 static int vnet_ring_disable_intr(void *arg);
  97 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
  98 static int vnet_addmac(void *arg, const uint8_t *mac_addr);
  99 static int vnet_remmac(void *arg, const uint8_t *mac_addr);
 100 
 101 /* vnet internal functions */
 102 static int vnet_unattach(vnet_t *vnetp);
 103 static void vnet_ring_grp_init(vnet_t *vnetp);
 104 static void vnet_ring_grp_uninit(vnet_t *vnetp);
 105 static int vnet_mac_register(vnet_t *);
 106 static int vnet_read_mac_address(vnet_t *vnetp);
 107 static int vnet_bind_vgenring(vnet_res_t *vresp);
 108 static void vnet_unbind_vgenring(vnet_res_t *vresp);
 109 static int vnet_bind_hwrings(vnet_t *vnetp);
 110 static void vnet_unbind_hwrings(vnet_t *vnetp);
 111 static int vnet_bind_rings(vnet_res_t *vresp);
 112 static void vnet_unbind_rings(vnet_res_t *vresp);
 113 static int vnet_hio_stat(void *, uint_t, uint64_t *);
 114 static int vnet_hio_start(void *);
 115 static void vnet_hio_stop(void *);
 116 mblk_t *vnet_hio_tx(void *, mblk_t *);
 117 
 118 /* Forwarding database (FDB) routines */
 119 static void vnet_fdb_create(vnet_t *vnetp);
 120 static void vnet_fdb_destroy(vnet_t *vnetp);
 121 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
 122 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
 123 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
 124 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
 125 
 126 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
 127 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
 128 static void vnet_tx_update(vio_net_handle_t vrh);
 129 static void vnet_res_start_task(void *arg);
 130 static void vnet_start_resources(vnet_t *vnetp);
 131 static void vnet_stop_resources(vnet_t *vnetp);
 132 static void vnet_dispatch_res_task(vnet_t *vnetp);
 133 static void vnet_res_start_task(void *arg);
 134 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
 135 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
 136 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
 137 static void vnet_tx_notify_thread(void *);
 138 
 139 /* Exported to vnet_gen */
 140 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
 141 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
 142 void vnet_dds_cleanup_hio(vnet_t *vnetp);
 143 
 144 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
 145     vnet_res_t *vresp);
 146 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
 147 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
 148 static void vnet_hio_destroy_kstats(kstat_t *ksp);
 149 
 150 /* Exported to to vnet_dds */
 151 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
 152 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
 153 void vnet_hio_mac_cleanup(vnet_t *vnetp);
 154 
 155 /* Externs that are imported from vnet_gen */
 156 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
 157     const uint8_t *macaddr, void **vgenhdl);
 158 extern int vgen_init_mdeg(void *arg);
 159 extern void vgen_uninit(void *arg);
 160 extern int vgen_dds_tx(void *arg, void *dmsg);
 161 extern int vgen_enable_intr(void *arg);
 162 extern int vgen_disable_intr(void *arg);
 163 extern mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
 164 
 165 /* Externs that are imported from vnet_dds */
 166 extern void vdds_mod_init(void);
 167 extern void vdds_mod_fini(void);
 168 extern int vdds_init(vnet_t *vnetp);
 169 extern void vdds_cleanup(vnet_t *vnetp);
 170 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
 171 extern void vdds_cleanup_hybrid_res(void *arg);
 172 extern void vdds_cleanup_hio(vnet_t *vnetp);
 173 
 174 extern pri_t    minclsyspri;
 175 
 176 #define DRV_NAME        "vnet"
 177 #define VNET_FDBE_REFHOLD(p)                                            \
 178 {                                                                       \
 179         atomic_inc_32(&(p)->refcnt);                                     \
 180         ASSERT((p)->refcnt != 0);                                    \
 181 }
 182 
 183 #define VNET_FDBE_REFRELE(p)                                            \
 184 {                                                                       \
 185         ASSERT((p)->refcnt != 0);                                    \
 186         atomic_dec_32(&(p)->refcnt);                                     \
 187 }
 188 
 189 #ifdef  VNET_IOC_DEBUG
 190 #define VNET_M_CALLBACK_FLAGS   (MC_IOCTL | MC_GETCAPAB)
 191 #else
 192 #define VNET_M_CALLBACK_FLAGS   (MC_GETCAPAB)
 193 #endif
 194 
 195 static mac_callbacks_t vnet_m_callbacks = {
 196         VNET_M_CALLBACK_FLAGS,
 197         vnet_m_stat,
 198         vnet_m_start,
 199         vnet_m_stop,
 200         vnet_m_promisc,
 201         vnet_m_multicst,
 202         NULL,   /* m_unicst entry must be NULL while rx rings are exposed */
 203         NULL,   /* m_tx entry must be NULL while tx rings are exposed */
 204         NULL,
 205         vnet_m_ioctl,
 206         vnet_m_capab,
 207         NULL
 208 };
 209 
 210 static mac_callbacks_t vnet_hio_res_callbacks = {
 211         0,
 212         vnet_hio_stat,
 213         vnet_hio_start,
 214         vnet_hio_stop,
 215         NULL,
 216         NULL,
 217         NULL,
 218         vnet_hio_tx,
 219         NULL,
 220         NULL,
 221         NULL
 222 };
 223 
 224 /*
 225  * Linked list of "vnet_t" structures - one per instance.
 226  */
 227 static vnet_t   *vnet_headp = NULL;
 228 static krwlock_t vnet_rw;
 229 
 230 /* Tunables */
 231 uint32_t vnet_num_descriptors = VNET_NUM_DESCRIPTORS;
 232 
 233 /*
 234  * Configure tx serialization in mac layer for the vnet device. This tunable
 235  * should be enabled to improve performance only if HybridIO is configured for
 236  * the vnet device.
 237  */
 238 boolean_t vnet_mac_tx_serialize = B_FALSE;
 239 
 240 /* Configure enqueing at Rx soft rings in mac layer for the vnet device */
 241 boolean_t vnet_mac_rx_queuing = B_TRUE;
 242 
 243 /*
 244  * Set this to non-zero to enable additional internal receive buffer pools
 245  * based on the MTU of the device for better performance at the cost of more
 246  * memory consumption. This is turned off by default, to use allocb(9F) for
 247  * receive buffer allocations of sizes > 2K.
 248  */
 249 boolean_t vnet_jumbo_rxpools = B_FALSE;
 250 
 251 /* # of chains in fdb hash table */
 252 uint32_t        vnet_fdb_nchains = VNET_NFDB_HASH;
 253 
 254 /* Internal tunables */
 255 uint32_t        vnet_ethermtu = 1500;   /* mtu of the device */
 256 
 257 /*
 258  * Default vlan id. This is only used internally when the "default-vlan-id"
 259  * property is not present in the MD device node. Therefore, this should not be
 260  * used as a tunable; if this value is changed, the corresponding variable
 261  * should be updated to the same value in vsw and also other vnets connected to
 262  * the same vsw.
 263  */
 264 uint16_t        vnet_default_vlan_id = 1;
 265 
 266 /* delay in usec to wait for all references on a fdb entry to be dropped */
 267 uint32_t vnet_fdbe_refcnt_delay = 10;
 268 
 269 static struct ether_addr etherbroadcastaddr = {
 270         0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 271 };
 272 
 273 /* mac_open() retry delay in usec */
 274 uint32_t vnet_mac_open_delay = 100;     /* 0.1 ms */
 275 
 276 /* max # of mac_open() retries */
 277 uint32_t vnet_mac_open_retries = 100;
 278 
 279 /*
 280  * Property names
 281  */
 282 static char macaddr_propname[] = "local-mac-address";
 283 
 284 /*
 285  * This is the string displayed by modinfo(1m).
 286  */
 287 static char vnet_ident[] = "vnet driver";
 288 extern struct mod_ops mod_driverops;
 289 static struct cb_ops cb_vnetops = {
 290         nulldev,                /* cb_open */
 291         nulldev,                /* cb_close */
 292         nodev,                  /* cb_strategy */
 293         nodev,                  /* cb_print */
 294         nodev,                  /* cb_dump */
 295         nodev,                  /* cb_read */
 296         nodev,                  /* cb_write */
 297         nodev,                  /* cb_ioctl */
 298         nodev,                  /* cb_devmap */
 299         nodev,                  /* cb_mmap */
 300         nodev,                  /* cb_segmap */
 301         nochpoll,               /* cb_chpoll */
 302         ddi_prop_op,            /* cb_prop_op */
 303         NULL,                   /* cb_stream */
 304         (int)(D_MP)             /* cb_flag */
 305 };
 306 
 307 static struct dev_ops vnetops = {
 308         DEVO_REV,               /* devo_rev */
 309         0,                      /* devo_refcnt */
 310         NULL,                   /* devo_getinfo */
 311         nulldev,                /* devo_identify */
 312         nulldev,                /* devo_probe */
 313         vnetattach,             /* devo_attach */
 314         vnetdetach,             /* devo_detach */
 315         nodev,                  /* devo_reset */
 316         &cb_vnetops,                /* devo_cb_ops */
 317         (struct bus_ops *)NULL, /* devo_bus_ops */
 318         NULL,                   /* devo_power */
 319         ddi_quiesce_not_supported,      /* devo_quiesce */
 320 };
 321 
 322 static struct modldrv modldrv = {
 323         &mod_driverops,             /* Type of module.  This one is a driver */
 324         vnet_ident,             /* ID string */
 325         &vnetops            /* driver specific ops */
 326 };
 327 
 328 static struct modlinkage modlinkage = {
 329         MODREV_1, (void *)&modldrv, NULL
 330 };
 331 
 332 #ifdef DEBUG
 333 
 334 #define DEBUG_PRINTF    debug_printf
 335 
 336 /*
 337  * Print debug messages - set to 0xf to enable all msgs
 338  */
 339 int vnet_dbglevel = 0x8;
 340 
 341 static void
 342 debug_printf(const char *fname, void *arg, const char *fmt, ...)
 343 {
 344         char    buf[512];
 345         va_list ap;
 346         vnet_t *vnetp = (vnet_t *)arg;
 347         char    *bufp = buf;
 348 
 349         if (vnetp == NULL) {
 350                 (void) sprintf(bufp, "%s: ", fname);
 351                 bufp += strlen(bufp);
 352         } else {
 353                 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
 354                 bufp += strlen(bufp);
 355         }
 356         va_start(ap, fmt);
 357         (void) vsprintf(bufp, fmt, ap);
 358         va_end(ap);
 359         cmn_err(CE_CONT, "%s\n", buf);
 360 }
 361 
 362 #endif
 363 
 364 /* _init(9E): initialize the loadable module */
 365 int
 366 _init(void)
 367 {
 368         int status;
 369 
 370         DBG1(NULL, "enter\n");
 371 
 372         mac_init_ops(&vnetops, "vnet");
 373         status = mod_install(&modlinkage);
 374         if (status != 0) {
 375                 mac_fini_ops(&vnetops);
 376         }
 377         vdds_mod_init();
 378         DBG1(NULL, "exit(%d)\n", status);
 379         return (status);
 380 }
 381 
 382 /* _fini(9E): prepare the module for unloading. */
 383 int
 384 _fini(void)
 385 {
 386         int             status;
 387 
 388         DBG1(NULL, "enter\n");
 389 
 390         status = mod_remove(&modlinkage);
 391         if (status != 0)
 392                 return (status);
 393         mac_fini_ops(&vnetops);
 394         vdds_mod_fini();
 395 
 396         DBG1(NULL, "exit(%d)\n", status);
 397         return (status);
 398 }
 399 
 400 /* _info(9E): return information about the loadable module */
 401 int
 402 _info(struct modinfo *modinfop)
 403 {
 404         return (mod_info(&modlinkage, modinfop));
 405 }
 406 
 407 /*
 408  * attach(9E): attach a device to the system.
 409  * called once for each instance of the device on the system.
 410  */
 411 static int
 412 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 413 {
 414         vnet_t                  *vnetp;
 415         int                     status;
 416         int                     instance;
 417         uint64_t                reg;
 418         char                    qname[TASKQ_NAMELEN];
 419         vnet_attach_progress_t  attach_progress;
 420 
 421         attach_progress = AST_init;
 422 
 423         switch (cmd) {
 424         case DDI_ATTACH:
 425                 break;
 426         case DDI_RESUME:
 427         case DDI_PM_RESUME:
 428         default:
 429                 goto vnet_attach_fail;
 430         }
 431 
 432         instance = ddi_get_instance(dip);
 433         DBG1(NULL, "instance(%d) enter\n", instance);
 434 
 435         /* allocate vnet_t and mac_t structures */
 436         vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
 437         vnetp->dip = dip;
 438         vnetp->instance = instance;
 439         rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
 440         rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
 441         attach_progress |= AST_vnet_alloc;
 442 
 443         vnet_ring_grp_init(vnetp);
 444         attach_progress |= AST_ring_init;
 445 
 446         status = vdds_init(vnetp);
 447         if (status != 0) {
 448                 goto vnet_attach_fail;
 449         }
 450         attach_progress |= AST_vdds_init;
 451 
 452         /* setup links to vnet_t from both devinfo and mac_t */
 453         ddi_set_driver_private(dip, (caddr_t)vnetp);
 454 
 455         /* read the mac address */
 456         status = vnet_read_mac_address(vnetp);
 457         if (status != DDI_SUCCESS) {
 458                 goto vnet_attach_fail;
 459         }
 460         attach_progress |= AST_read_macaddr;
 461 
 462         reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 463             DDI_PROP_DONTPASS, "reg", -1);
 464         if (reg == -1) {
 465                 goto vnet_attach_fail;
 466         }
 467         vnetp->reg = reg;
 468 
 469         vnet_fdb_create(vnetp);
 470         attach_progress |= AST_fdbh_alloc;
 471 
 472         (void) snprintf(qname, TASKQ_NAMELEN, "vres_taskq%d", instance);
 473         if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
 474             TASKQ_DEFAULTPRI, 0)) == NULL) {
 475                 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
 476                     instance);
 477                 goto vnet_attach_fail;
 478         }
 479         attach_progress |= AST_taskq_create;
 480 
 481         /* add to the list of vnet devices */
 482         WRITE_ENTER(&vnet_rw);
 483         vnetp->nextp = vnet_headp;
 484         vnet_headp = vnetp;
 485         RW_EXIT(&vnet_rw);
 486 
 487         attach_progress |= AST_vnet_list;
 488 
 489         /*
 490          * Initialize the generic vnet plugin which provides communication via
 491          * sun4v LDC (logical domain channel) based resources. This involves 2
 492          * steps; first, vgen_init() is invoked to read the various properties
 493          * of the vnet device from its MD node (including its mtu which is
 494          * needed to mac_register()) and obtain a handle to the vgen layer.
 495          * After mac_register() is done and we have a mac handle, we then
 496          * invoke vgen_init_mdeg() which registers with the the MD event
 497          * generator (mdeg) framework to allow LDC resource notifications.
 498          * Note: this sequence also allows us to report the correct default #
 499          * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
 500          * in the context of mac_register(); and avoids conflicting with
 501          * dynamic pseudo rx rings which get added/removed as a result of mdeg
 502          * events in vgen.
 503          */
 504         status = vgen_init(vnetp, reg, vnetp->dip,
 505             (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
 506         if (status != DDI_SUCCESS) {
 507                 DERR(vnetp, "vgen_init() failed\n");
 508                 goto vnet_attach_fail;
 509         }
 510         attach_progress |= AST_vgen_init;
 511 
 512         status = vnet_mac_register(vnetp);
 513         if (status != DDI_SUCCESS) {
 514                 goto vnet_attach_fail;
 515         }
 516         vnetp->link_state = LINK_STATE_UNKNOWN;
 517         attach_progress |= AST_macreg;
 518 
 519         status = vgen_init_mdeg(vnetp->vgenhdl);
 520         if (status != DDI_SUCCESS) {
 521                 goto vnet_attach_fail;
 522         }
 523         attach_progress |= AST_init_mdeg;
 524 
 525         vnetp->attach_progress = attach_progress;
 526 
 527         DBG1(NULL, "instance(%d) exit\n", instance);
 528         return (DDI_SUCCESS);
 529 
 530 vnet_attach_fail:
 531         vnetp->attach_progress = attach_progress;
 532         status = vnet_unattach(vnetp);
 533         ASSERT(status == 0);
 534         return (DDI_FAILURE);
 535 }
 536 
 537 /*
 538  * detach(9E): detach a device from the system.
 539  */
 540 static int
 541 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 542 {
 543         vnet_t          *vnetp;
 544         int             instance;
 545 
 546         instance = ddi_get_instance(dip);
 547         DBG1(NULL, "instance(%d) enter\n", instance);
 548 
 549         vnetp = ddi_get_driver_private(dip);
 550         if (vnetp == NULL) {
 551                 goto vnet_detach_fail;
 552         }
 553 
 554         switch (cmd) {
 555         case DDI_DETACH:
 556                 break;
 557         case DDI_SUSPEND:
 558         case DDI_PM_SUSPEND:
 559         default:
 560                 goto vnet_detach_fail;
 561         }
 562 
 563         if (vnet_unattach(vnetp) != 0) {
 564                 goto vnet_detach_fail;
 565         }
 566 
 567         return (DDI_SUCCESS);
 568 
 569 vnet_detach_fail:
 570         return (DDI_FAILURE);
 571 }
 572 
 573 /*
 574  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
 575  * the only reason this function could fail is if mac_unregister() fails.
 576  * Otherwise, this function must ensure that all resources are freed and return
 577  * success.
 578  */
 579 static int
 580 vnet_unattach(vnet_t *vnetp)
 581 {
 582         vnet_attach_progress_t  attach_progress;
 583 
 584         attach_progress = vnetp->attach_progress;
 585 
 586         /*
 587          * Disable the mac device in the gldv3 subsystem. This can fail, in
 588          * particular if there are still any open references to this mac
 589          * device; in which case we just return failure without continuing to
 590          * detach further.
 591          * If it succeeds, we then invoke vgen_uninit() which should unregister
 592          * any pseudo rings registered with the mac layer. Note we keep the
 593          * AST_macreg flag on, so we can unregister with the mac layer at
 594          * the end of this routine.
 595          */
 596         if (attach_progress & AST_macreg) {
 597                 if (mac_disable(vnetp->mh) != 0) {
 598                         return (1);
 599                 }
 600         }
 601 
 602         /*
 603          * Now that we have disabled the device, we must finish all other steps
 604          * and successfully return from this function; otherwise we will end up
 605          * leaving the device in a broken/unusable state.
 606          *
 607          * First, release any hybrid resources assigned to this vnet device.
 608          */
 609         if (attach_progress & AST_vdds_init) {
 610                 vdds_cleanup(vnetp);
 611                 attach_progress &= ~AST_vdds_init;
 612         }
 613 
 614         /*
 615          * Uninit vgen. This stops further mdeg callbacks to this vnet
 616          * device and/or its ports; and detaches any existing ports.
 617          */
 618         if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
 619                 vgen_uninit(vnetp->vgenhdl);
 620                 attach_progress &= ~AST_vgen_init;
 621                 attach_progress &= ~AST_init_mdeg;
 622         }
 623 
 624         /* Destroy the taskq. */
 625         if (attach_progress & AST_taskq_create) {
 626                 ddi_taskq_destroy(vnetp->taskqp);
 627                 attach_progress &= ~AST_taskq_create;
 628         }
 629 
 630         /* Destroy fdb. */
 631         if (attach_progress & AST_fdbh_alloc) {
 632                 vnet_fdb_destroy(vnetp);
 633                 attach_progress &= ~AST_fdbh_alloc;
 634         }
 635 
 636         /* Remove from the device list */
 637         if (attach_progress & AST_vnet_list) {
 638                 vnet_t          **vnetpp;
 639                 /* unlink from instance(vnet_t) list */
 640                 WRITE_ENTER(&vnet_rw);
 641                 for (vnetpp = &vnet_headp; *vnetpp;
 642                     vnetpp = &(*vnetpp)->nextp) {
 643                         if (*vnetpp == vnetp) {
 644                                 *vnetpp = vnetp->nextp;
 645                                 break;
 646                         }
 647                 }
 648                 RW_EXIT(&vnet_rw);
 649                 attach_progress &= ~AST_vnet_list;
 650         }
 651 
 652         if (attach_progress & AST_ring_init) {
 653                 vnet_ring_grp_uninit(vnetp);
 654                 attach_progress &= ~AST_ring_init;
 655         }
 656 
 657         if (attach_progress & AST_macreg) {
 658                 VERIFY(mac_unregister(vnetp->mh) == 0);
 659                 vnetp->mh = NULL;
 660                 attach_progress &= ~AST_macreg;
 661         }
 662 
 663         if (attach_progress & AST_vnet_alloc) {
 664                 rw_destroy(&vnetp->vrwlock);
 665                 rw_destroy(&vnetp->vsw_fp_rw);
 666                 attach_progress &= ~AST_vnet_list;
 667                 KMEM_FREE(vnetp);
 668         }
 669 
 670         return (0);
 671 }
 672 
 673 /* enable the device for transmit/receive */
 674 static int
 675 vnet_m_start(void *arg)
 676 {
 677         vnet_t          *vnetp = arg;
 678 
 679         DBG1(vnetp, "enter\n");
 680 
 681         WRITE_ENTER(&vnetp->vrwlock);
 682         vnetp->flags |= VNET_STARTED;
 683         vnet_start_resources(vnetp);
 684         RW_EXIT(&vnetp->vrwlock);
 685 
 686         DBG1(vnetp, "exit\n");
 687         return (VNET_SUCCESS);
 688 
 689 }
 690 
 691 /* stop transmit/receive for the device */
 692 static void
 693 vnet_m_stop(void *arg)
 694 {
 695         vnet_t          *vnetp = arg;
 696 
 697         DBG1(vnetp, "enter\n");
 698 
 699         WRITE_ENTER(&vnetp->vrwlock);
 700         if (vnetp->flags & VNET_STARTED) {
 701                 /*
 702                  * Set the flags appropriately; this should prevent starting of
 703                  * any new resources that are added(see vnet_res_start_task()),
 704                  * while we release the vrwlock in vnet_stop_resources() before
 705                  * stopping each resource.
 706                  */
 707                 vnetp->flags &= ~VNET_STARTED;
 708                 vnetp->flags |= VNET_STOPPING;
 709                 vnet_stop_resources(vnetp);
 710                 vnetp->flags &= ~VNET_STOPPING;
 711         }
 712         RW_EXIT(&vnetp->vrwlock);
 713 
 714         DBG1(vnetp, "exit\n");
 715 }
 716 
 717 /* set the unicast mac address of the device */
 718 static int
 719 vnet_m_unicst(void *arg, const uint8_t *macaddr)
 720 {
 721         _NOTE(ARGUNUSED(macaddr))
 722 
 723         vnet_t *vnetp = arg;
 724 
 725         DBG1(vnetp, "enter\n");
 726         /*
 727          * NOTE: setting mac address dynamically is not supported.
 728          */
 729         DBG1(vnetp, "exit\n");
 730 
 731         return (VNET_FAILURE);
 732 }
 733 
 734 /* enable/disable a multicast address */
 735 static int
 736 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
 737 {
 738         _NOTE(ARGUNUSED(add, mca))
 739 
 740         vnet_t          *vnetp = arg;
 741         vnet_res_t      *vresp;
 742         mac_register_t  *macp;
 743         mac_callbacks_t *cbp;
 744         int             rv = VNET_SUCCESS;
 745 
 746         DBG1(vnetp, "enter\n");
 747 
 748         READ_ENTER(&vnetp->vsw_fp_rw);
 749         if (vnetp->vsw_fp == NULL) {
 750                 RW_EXIT(&vnetp->vsw_fp_rw);
 751                 return (EAGAIN);
 752         }
 753         VNET_FDBE_REFHOLD(vnetp->vsw_fp);
 754         RW_EXIT(&vnetp->vsw_fp_rw);
 755 
 756         vresp = vnetp->vsw_fp;
 757         macp = &vresp->macreg;
 758         cbp = macp->m_callbacks;
 759         rv = cbp->mc_multicst(macp->m_driver, add, mca);
 760 
 761         VNET_FDBE_REFRELE(vnetp->vsw_fp);
 762 
 763         DBG1(vnetp, "exit(%d)\n", rv);
 764         return (rv);
 765 }
 766 
 767 /* set or clear promiscuous mode on the device */
 768 static int
 769 vnet_m_promisc(void *arg, boolean_t on)
 770 {
 771         _NOTE(ARGUNUSED(on))
 772 
 773         vnet_t *vnetp = arg;
 774         DBG1(vnetp, "enter\n");
 775         /*
 776          * NOTE: setting promiscuous mode is not supported, just return success.
 777          */
 778         DBG1(vnetp, "exit\n");
 779         return (VNET_SUCCESS);
 780 }
 781 
 782 /*
 783  * Transmit a chain of packets. This function provides switching functionality
 784  * based on the destination mac address to reach other guests (within ldoms) or
 785  * external hosts.
 786  */
 787 mblk_t *
 788 vnet_tx_ring_send(void *arg, mblk_t *mp)
 789 {
 790         vnet_pseudo_tx_ring_t   *tx_ringp;
 791         vnet_tx_ring_stats_t    *statsp;
 792         vnet_t                  *vnetp;
 793         vnet_res_t              *vresp;
 794         mblk_t                  *next;
 795         mblk_t                  *resid_mp;
 796         mac_register_t          *macp;
 797         struct ether_header     *ehp;
 798         boolean_t               is_unicast;
 799         boolean_t               is_pvid;        /* non-default pvid ? */
 800         boolean_t               hres;           /* Hybrid resource ? */
 801         void                    *tx_arg;
 802         size_t                  size;
 803 
 804         tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
 805         statsp = &tx_ringp->tx_ring_stats;
 806         vnetp = (vnet_t *)tx_ringp->vnetp;
 807         DBG1(vnetp, "enter\n");
 808         ASSERT(mp != NULL);
 809 
 810         is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
 811 
 812         while (mp != NULL) {
 813 
 814                 next = mp->b_next;
 815                 mp->b_next = NULL;
 816 
 817                 /* update stats */
 818                 size = msgsize(mp);
 819 
 820                 /*
 821                  * Find fdb entry for the destination
 822                  * and hold a reference to it.
 823                  */
 824                 ehp = (struct ether_header *)mp->b_rptr;
 825                 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
 826                 if (vresp != NULL) {
 827 
 828                         /*
 829                          * Destination found in FDB.
 830                          * The destination is a vnet device within ldoms
 831                          * and directly reachable, invoke the tx function
 832                          * in the fdb entry.
 833                          */
 834                         macp = &vresp->macreg;
 835                         resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
 836 
 837                         /* tx done; now release ref on fdb entry */
 838                         VNET_FDBE_REFRELE(vresp);
 839 
 840                         if (resid_mp != NULL) {
 841                                 /* m_tx failed */
 842                                 mp->b_next = next;
 843                                 break;
 844                         }
 845                 } else {
 846                         is_unicast = !(IS_BROADCAST(ehp) ||
 847                             (IS_MULTICAST(ehp)));
 848                         /*
 849                          * Destination is not in FDB.
 850                          * If the destination is broadcast or multicast,
 851                          * then forward the packet to vswitch.
 852                          * If a Hybrid resource avilable, then send the
 853                          * unicast packet via hybrid resource, otherwise
 854                          * forward it to vswitch.
 855                          */
 856                         READ_ENTER(&vnetp->vsw_fp_rw);
 857 
 858                         if ((is_unicast) && (vnetp->hio_fp != NULL)) {
 859                                 vresp = vnetp->hio_fp;
 860                                 hres = B_TRUE;
 861                         } else {
 862                                 vresp = vnetp->vsw_fp;
 863                                 hres = B_FALSE;
 864                         }
 865                         if (vresp == NULL) {
 866                                 /*
 867                                  * no fdb entry to vsw? drop the packet.
 868                                  */
 869                                 RW_EXIT(&vnetp->vsw_fp_rw);
 870                                 freemsg(mp);
 871                                 mp = next;
 872                                 continue;
 873                         }
 874 
 875                         /* ref hold the fdb entry to vsw */
 876                         VNET_FDBE_REFHOLD(vresp);
 877 
 878                         RW_EXIT(&vnetp->vsw_fp_rw);
 879 
 880                         /*
 881                          * In the case of a hybrid resource we need to insert
 882                          * the tag for the pvid case here; unlike packets that
 883                          * are destined to a vnet/vsw in which case the vgen
 884                          * layer does the tagging before sending it over ldc.
 885                          */
 886                         if (hres == B_TRUE) {
 887                                 /*
 888                                  * Determine if the frame being transmitted
 889                                  * over the hybrid resource is untagged. If so,
 890                                  * insert the tag before transmitting.
 891                                  */
 892                                 if (is_pvid == B_TRUE &&
 893                                     ehp->ether_type != htons(ETHERTYPE_VLAN)) {
 894 
 895                                         mp = vnet_vlan_insert_tag(mp,
 896                                             vnetp->pvid);
 897                                         if (mp == NULL) {
 898                                                 VNET_FDBE_REFRELE(vresp);
 899                                                 mp = next;
 900                                                 continue;
 901                                         }
 902 
 903                                 }
 904 
 905                                 macp = &vresp->macreg;
 906                                 tx_arg = tx_ringp;
 907                         } else {
 908                                 macp = &vresp->macreg;
 909                                 tx_arg = macp->m_driver;
 910                         }
 911                         resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
 912 
 913                         /* tx done; now release ref on fdb entry */
 914                         VNET_FDBE_REFRELE(vresp);
 915 
 916                         if (resid_mp != NULL) {
 917                                 /* m_tx failed */
 918                                 mp->b_next = next;
 919                                 break;
 920                         }
 921                 }
 922 
 923                 statsp->obytes += size;
 924                 statsp->opackets++;
 925                 mp = next;
 926         }
 927 
 928         DBG1(vnetp, "exit\n");
 929         return (mp);
 930 }
 931 
 932 /* get statistics from the device */
 933 int
 934 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
 935 {
 936         vnet_t *vnetp = arg;
 937         vnet_res_t      *vresp;
 938         mac_register_t  *macp;
 939         mac_callbacks_t *cbp;
 940         uint64_t val_total = 0;
 941 
 942         DBG1(vnetp, "enter\n");
 943 
 944         /*
 945          * get the specified statistic from each transport and return the
 946          * aggregate val.  This obviously only works for counters.
 947          */
 948         if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
 949             (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
 950                 return (ENOTSUP);
 951         }
 952 
 953         READ_ENTER(&vnetp->vrwlock);
 954         for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
 955                 macp = &vresp->macreg;
 956                 cbp = macp->m_callbacks;
 957                 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
 958                         val_total += *val;
 959         }
 960         RW_EXIT(&vnetp->vrwlock);
 961 
 962         *val = val_total;
 963 
 964         DBG1(vnetp, "exit\n");
 965         return (0);
 966 }
 967 
 968 static void
 969 vnet_ring_grp_init(vnet_t *vnetp)
 970 {
 971         vnet_pseudo_rx_group_t  *rx_grp;
 972         vnet_pseudo_rx_ring_t   *rx_ringp;
 973         vnet_pseudo_tx_group_t  *tx_grp;
 974         vnet_pseudo_tx_ring_t   *tx_ringp;
 975         int                     i;
 976 
 977         tx_grp = &vnetp->tx_grp[0];
 978         tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
 979             VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
 980         for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
 981                 tx_ringp[i].state |= VNET_TXRING_SHARED;
 982         }
 983         tx_grp->rings = tx_ringp;
 984         tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
 985         mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL);
 986         cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL);
 987         tx_grp->flowctl_thread = thread_create(NULL, 0,
 988             vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri);
 989 
 990         rx_grp = &vnetp->rx_grp[0];
 991         rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
 992         rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
 993         rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
 994             rx_grp->max_ring_cnt, KM_SLEEP);
 995 
 996         /*
 997          * Setup the first 3 Pseudo RX Rings that are reserved;
 998          * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
 999          */
1000         rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
1001         rx_ringp[0].index = 0;
1002         rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1003         rx_ringp[1].index = 1;
1004         rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1005         rx_ringp[2].index = 2;
1006 
1007         rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1008         rx_grp->rings = rx_ringp;
1009 
1010         for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1011             i < rx_grp->max_ring_cnt; i++) {
1012                 rx_ringp = &rx_grp->rings[i];
1013                 rx_ringp->state = VNET_RXRING_FREE;
1014                 rx_ringp->index = i;
1015         }
1016 }
1017 
1018 static void
1019 vnet_ring_grp_uninit(vnet_t *vnetp)
1020 {
1021         vnet_pseudo_rx_group_t  *rx_grp;
1022         vnet_pseudo_tx_group_t  *tx_grp;
1023         kt_did_t                tid = 0;
1024 
1025         tx_grp = &vnetp->tx_grp[0];
1026 
1027         /* Inform tx_notify_thread to exit */
1028         mutex_enter(&tx_grp->flowctl_lock);
1029         if (tx_grp->flowctl_thread != NULL) {
1030                 tid = tx_grp->flowctl_thread->t_did;
1031                 tx_grp->flowctl_done = B_TRUE;
1032                 cv_signal(&tx_grp->flowctl_cv);
1033         }
1034         mutex_exit(&tx_grp->flowctl_lock);
1035         if (tid != 0)
1036                 thread_join(tid);
1037 
1038         if (tx_grp->rings != NULL) {
1039                 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
1040                 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
1041                     tx_grp->ring_cnt);
1042                 tx_grp->rings = NULL;
1043         }
1044 
1045         rx_grp = &vnetp->rx_grp[0];
1046         if (rx_grp->rings != NULL) {
1047                 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
1048                 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1049                 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
1050                     rx_grp->max_ring_cnt);
1051                 rx_grp->rings = NULL;
1052         }
1053 }
1054 
1055 static vnet_pseudo_rx_ring_t *
1056 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
1057 {
1058         vnet_pseudo_rx_group_t  *rx_grp;
1059         vnet_pseudo_rx_ring_t   *rx_ringp;
1060         int                     index;
1061 
1062         rx_grp = &vnetp->rx_grp[0];
1063         WRITE_ENTER(&rx_grp->lock);
1064 
1065         if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
1066                 /* no rings available */
1067                 RW_EXIT(&rx_grp->lock);
1068                 return (NULL);
1069         }
1070 
1071         for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1072             index < rx_grp->max_ring_cnt; index++) {
1073                 rx_ringp = &rx_grp->rings[index];
1074                 if (rx_ringp->state == VNET_RXRING_FREE) {
1075                         rx_ringp->state |= VNET_RXRING_INUSE;
1076                         rx_grp->ring_cnt++;
1077                         break;
1078                 }
1079         }
1080 
1081         RW_EXIT(&rx_grp->lock);
1082         return (rx_ringp);
1083 }
1084 
1085 static void
1086 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
1087 {
1088         vnet_pseudo_rx_group_t  *rx_grp;
1089 
1090         ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1091         rx_grp = &vnetp->rx_grp[0];
1092         WRITE_ENTER(&rx_grp->lock);
1093 
1094         if (ringp->state != VNET_RXRING_FREE) {
1095                 ringp->state = VNET_RXRING_FREE;
1096                 ringp->handle = NULL;
1097                 rx_grp->ring_cnt--;
1098         }
1099 
1100         RW_EXIT(&rx_grp->lock);
1101 }
1102 
1103 /* wrapper function for mac_register() */
1104 static int
1105 vnet_mac_register(vnet_t *vnetp)
1106 {
1107         mac_register_t  *macp;
1108         int             err;
1109 
1110         if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1111                 return (DDI_FAILURE);
1112         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1113         macp->m_driver = vnetp;
1114         macp->m_dip = vnetp->dip;
1115         macp->m_src_addr = vnetp->curr_macaddr;
1116         macp->m_callbacks = &vnet_m_callbacks;
1117         macp->m_min_sdu = 0;
1118         macp->m_max_sdu = vnetp->mtu;
1119         macp->m_margin = VLAN_TAGSZ;
1120 
1121         macp->m_v12n = MAC_VIRT_LEVEL1;
1122 
1123         /*
1124          * Finally, we're ready to register ourselves with the MAC layer
1125          * interface; if this succeeds, we're all ready to start()
1126          */
1127         err = mac_register(macp, &vnetp->mh);
1128         mac_free(macp);
1129         return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
1130 }
1131 
1132 /* read the mac address of the device */
1133 static int
1134 vnet_read_mac_address(vnet_t *vnetp)
1135 {
1136         uchar_t         *macaddr;
1137         uint32_t        size;
1138         int             rv;
1139 
1140         rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
1141             DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
1142         if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
1143                 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
1144                     macaddr_propname, rv);
1145                 return (DDI_FAILURE);
1146         }
1147         bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
1148         bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
1149         ddi_prop_free(macaddr);
1150 
1151         return (DDI_SUCCESS);
1152 }
1153 
1154 static void
1155 vnet_fdb_create(vnet_t *vnetp)
1156 {
1157         char            hashname[MAXNAMELEN];
1158 
1159         (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
1160             vnetp->instance);
1161         vnetp->fdb_nchains = vnet_fdb_nchains;
1162         vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
1163             mod_hash_null_valdtor, sizeof (void *));
1164 }
1165 
1166 static void
1167 vnet_fdb_destroy(vnet_t *vnetp)
1168 {
1169         /* destroy fdb-hash-table */
1170         if (vnetp->fdb_hashp != NULL) {
1171                 mod_hash_destroy_hash(vnetp->fdb_hashp);
1172                 vnetp->fdb_hashp = NULL;
1173                 vnetp->fdb_nchains = 0;
1174         }
1175 }
1176 
1177 /*
1178  * Add an entry into the fdb.
1179  */
1180 void
1181 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
1182 {
1183         uint64_t        addr = 0;
1184         int             rv;
1185 
1186         KEY_HASH(addr, vresp->rem_macaddr);
1187 
1188         /*
1189          * If the entry being added corresponds to LDC_SERVICE resource,
1190          * that is, vswitch connection, it is added to the hash and also
1191          * the entry is cached, an additional reference count reflects
1192          * this. The HYBRID resource is not added to the hash, but only
1193          * cached, as it is only used for sending out packets for unknown
1194          * unicast destinations.
1195          */
1196         (vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1197             (vresp->refcnt = 1) : (vresp->refcnt = 0);
1198 
1199         /*
1200          * Note: duplicate keys will be rejected by mod_hash.
1201          */
1202         if (vresp->type != VIO_NET_RES_HYBRID) {
1203                 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1204                     (mod_hash_val_t)vresp);
1205                 if (rv != 0) {
1206                         DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
1207                         return;
1208                 }
1209         }
1210 
1211         if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1212                 /* Cache the fdb entry to vsw-port */
1213                 WRITE_ENTER(&vnetp->vsw_fp_rw);
1214                 if (vnetp->vsw_fp == NULL)
1215                         vnetp->vsw_fp = vresp;
1216                 RW_EXIT(&vnetp->vsw_fp_rw);
1217         } else if (vresp->type == VIO_NET_RES_HYBRID) {
1218                 /* Cache the fdb entry to hybrid resource */
1219                 WRITE_ENTER(&vnetp->vsw_fp_rw);
1220                 if (vnetp->hio_fp == NULL)
1221                         vnetp->hio_fp = vresp;
1222                 RW_EXIT(&vnetp->vsw_fp_rw);
1223         }
1224 }
1225 
1226 /*
1227  * Remove an entry from fdb.
1228  */
1229 static void
1230 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
1231 {
1232         uint64_t        addr = 0;
1233         int             rv;
1234         uint32_t        refcnt;
1235         vnet_res_t      *tmp;
1236 
1237         KEY_HASH(addr, vresp->rem_macaddr);
1238 
1239         /*
1240          * Remove the entry from fdb hash table.
1241          * This prevents further references to this fdb entry.
1242          */
1243         if (vresp->type != VIO_NET_RES_HYBRID) {
1244                 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1245                     (mod_hash_val_t *)&tmp);
1246                 if (rv != 0) {
1247                         /*
1248                          * As the resources are added to the hash only
1249                          * after they are started, this can occur if
1250                          * a resource unregisters before it is ever started.
1251                          */
1252                         return;
1253                 }
1254         }
1255 
1256         if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1257                 WRITE_ENTER(&vnetp->vsw_fp_rw);
1258 
1259                 ASSERT(tmp == vnetp->vsw_fp);
1260                 vnetp->vsw_fp = NULL;
1261 
1262                 RW_EXIT(&vnetp->vsw_fp_rw);
1263         } else if (vresp->type == VIO_NET_RES_HYBRID) {
1264                 WRITE_ENTER(&vnetp->vsw_fp_rw);
1265 
1266                 vnetp->hio_fp = NULL;
1267 
1268                 RW_EXIT(&vnetp->vsw_fp_rw);
1269         }
1270 
1271         /*
1272          * If there are threads already ref holding before the entry was
1273          * removed from hash table, then wait for ref count to drop to zero.
1274          */
1275         (vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1276             (refcnt = 1) : (refcnt = 0);
1277         while (vresp->refcnt > refcnt) {
1278                 delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1279         }
1280 }
1281 
1282 /*
1283  * Search fdb for a given mac address. If an entry is found, hold
1284  * a reference to it and return the entry; else returns NULL.
1285  */
1286 static vnet_res_t *
1287 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1288 {
1289         uint64_t        key = 0;
1290         vnet_res_t      *vresp;
1291         int             rv;
1292 
1293         KEY_HASH(key, addrp->ether_addr_octet);
1294 
1295         rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1296             (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1297 
1298         if (rv != 0)
1299                 return (NULL);
1300 
1301         return (vresp);
1302 }
1303 
1304 /*
1305  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1306  * entry corresponding to the key (macaddr), this callback will be invoked by
1307  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1308  * entry before returning the found entry.
1309  */
1310 static void
1311 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1312 {
1313         _NOTE(ARGUNUSED(key))
1314         VNET_FDBE_REFHOLD((vnet_res_t *)val);
1315 }
1316 
1317 /*
1318  * Frames received that are tagged with the pvid of the vnet device must be
1319  * untagged before sending up the stack. This function walks the chain of rx
1320  * frames, untags any such frames and returns the updated chain.
1321  *
1322  * Arguments:
1323  *    pvid:  pvid of the vnet device for which packets are being received
1324  *    mp:    head of pkt chain to be validated and untagged
1325  *
1326  * Returns:
1327  *    mp:    head of updated chain of packets
1328  */
1329 static void
1330 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1331 {
1332         struct ether_vlan_header        *evhp;
1333         mblk_t                          *bp;
1334         mblk_t                          *bpt;
1335         mblk_t                          *bph;
1336         mblk_t                          *bpn;
1337 
1338         bpn = bph = bpt = NULL;
1339 
1340         for (bp = *mp; bp != NULL; bp = bpn) {
1341 
1342                 bpn = bp->b_next;
1343                 bp->b_next = bp->b_prev = NULL;
1344 
1345                 evhp = (struct ether_vlan_header *)bp->b_rptr;
1346 
1347                 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1348                     VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1349 
1350                         bp = vnet_vlan_remove_tag(bp);
1351                         if (bp == NULL) {
1352                                 continue;
1353                         }
1354 
1355                 }
1356 
1357                 /* build a chain of processed packets */
1358                 if (bph == NULL) {
1359                         bph = bpt = bp;
1360                 } else {
1361                         bpt->b_next = bp;
1362                         bpt = bp;
1363                 }
1364 
1365         }
1366 
1367         *mp = bph;
1368 }
1369 
1370 static void
1371 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1372 {
1373         vnet_res_t              *vresp = (vnet_res_t *)vrh;
1374         vnet_t                  *vnetp = vresp->vnetp;
1375         vnet_pseudo_rx_ring_t   *ringp;
1376 
1377         if ((vnetp == NULL) || (vnetp->mh == 0)) {
1378                 freemsgchain(mp);
1379                 return;
1380         }
1381 
1382         ringp = vresp->rx_ringp;
1383         mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
1384 }
1385 
1386 void
1387 vnet_tx_update(vio_net_handle_t vrh)
1388 {
1389         vnet_res_t              *vresp = (vnet_res_t *)vrh;
1390         vnet_t                  *vnetp = vresp->vnetp;
1391         vnet_pseudo_tx_ring_t   *tx_ringp;
1392         vnet_pseudo_tx_group_t  *tx_grp;
1393         int                     i;
1394 
1395         if (vnetp == NULL || vnetp->mh == NULL) {
1396                 return;
1397         }
1398 
1399         /*
1400          * Currently, the tx hwring API (used to access rings that belong to
1401          * a Hybrid IO resource) does not provide us a per ring flow ctrl
1402          * update; also the pseudo rings are shared by the ports/ldcs in the
1403          * vgen layer. Thus we can't figure out which pseudo ring is being
1404          * re-enabled for transmits. To work around this, when we get a tx
1405          * restart notification from below, we simply propagate that to all
1406          * the tx pseudo rings registered with the mac layer above.
1407          *
1408          * There are a couple of side effects with this approach, but they are
1409          * not harmful, as outlined below:
1410          *
1411          * A) We might send an invalid ring_update() for a ring that is not
1412          * really flow controlled. This will not have any effect in the mac
1413          * layer and packets will continue to be transmitted on that ring.
1414          *
1415          * B) We might end up clearing the flow control in the mac layer for
1416          * a ring that is still flow controlled in the underlying resource.
1417          * This will result in the mac layer restarting transmit, only to be
1418          * flow controlled again on that ring.
1419          */
1420         tx_grp = &vnetp->tx_grp[0];
1421         for (i = 0; i < tx_grp->ring_cnt; i++) {
1422                 tx_ringp = &tx_grp->rings[i];
1423                 mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1424         }
1425 }
1426 
1427 /*
1428  * vnet_tx_notify_thread:
1429  *
1430  * vnet_tx_ring_update() callback function wakes up this thread when
1431  * it gets called. This thread will call mac_tx_ring_update() to
1432  * notify upper mac of flow control getting relieved. Note that
1433  * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly
1434  * because vnet_tx_ring_update() is called from lower mac with
1435  * mi_rw_lock held and mac_tx_ring_update() would also try to grab
1436  * the same lock.
1437  */
1438 static void
1439 vnet_tx_notify_thread(void *arg)
1440 {
1441         callb_cpr_t             cprinfo;
1442         vnet_pseudo_tx_group_t  *tx_grp = (vnet_pseudo_tx_group_t *)arg;
1443         vnet_pseudo_tx_ring_t   *tx_ringp;
1444         vnet_t                  *vnetp;
1445         int                     i;
1446 
1447         CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr,
1448             "vnet_tx_notify_thread");
1449 
1450         mutex_enter(&tx_grp->flowctl_lock);
1451         while (!tx_grp->flowctl_done) {
1452                 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1453                 cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock);
1454                 CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock);
1455 
1456                 for (i = 0; i < tx_grp->ring_cnt; i++) {
1457                         tx_ringp = &tx_grp->rings[i];
1458                         if (tx_ringp->woken_up) {
1459                                 tx_ringp->woken_up = B_FALSE;
1460                                 vnetp = tx_ringp->vnetp;
1461                                 mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1462                         }
1463                 }
1464         }
1465         /*
1466          * The tx_grp is being destroyed, exit the thread.
1467          */
1468         tx_grp->flowctl_thread = NULL;
1469         CALLB_CPR_EXIT(&cprinfo);
1470         thread_exit();
1471 }
1472 
1473 void
1474 vnet_tx_ring_update(void *arg1, uintptr_t arg2)
1475 {
1476         vnet_t                  *vnetp = (vnet_t *)arg1;
1477         vnet_pseudo_tx_group_t  *tx_grp;
1478         vnet_pseudo_tx_ring_t   *tx_ringp;
1479         int                     i;
1480 
1481         tx_grp = &vnetp->tx_grp[0];
1482         for (i = 0; i < tx_grp->ring_cnt; i++) {
1483                 tx_ringp = &tx_grp->rings[i];
1484                 if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) {
1485                         mutex_enter(&tx_grp->flowctl_lock);
1486                         tx_ringp->woken_up = B_TRUE;
1487                         cv_signal(&tx_grp->flowctl_cv);
1488                         mutex_exit(&tx_grp->flowctl_lock);
1489                         break;
1490                 }
1491         }
1492 }
1493 
1494 /*
1495  * Update the new mtu of vnet into the mac layer. First check if the device has
1496  * been plumbed and if so fail the mtu update. Returns 0 on success.
1497  */
1498 int
1499 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1500 {
1501         int     rv;
1502 
1503         if (vnetp == NULL || vnetp->mh == NULL) {
1504                 return (EINVAL);
1505         }
1506 
1507         WRITE_ENTER(&vnetp->vrwlock);
1508 
1509         if (vnetp->flags & VNET_STARTED) {
1510                 RW_EXIT(&vnetp->vrwlock);
1511                 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1512                     "update as the device is plumbed\n",
1513                     vnetp->instance);
1514                 return (EBUSY);
1515         }
1516 
1517         /* update mtu in the mac layer */
1518         rv = mac_maxsdu_update(vnetp->mh, mtu);
1519         if (rv != 0) {
1520                 RW_EXIT(&vnetp->vrwlock);
1521                 cmn_err(CE_NOTE,
1522                     "!vnet%d: Unable to update mtu with mac layer\n",
1523                     vnetp->instance);
1524                 return (EIO);
1525         }
1526 
1527         vnetp->mtu = mtu;
1528 
1529         RW_EXIT(&vnetp->vrwlock);
1530 
1531         return (0);
1532 }
1533 
1534 /*
1535  * Update the link state of vnet to the mac layer.
1536  */
1537 void
1538 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1539 {
1540         if (vnetp == NULL || vnetp->mh == NULL) {
1541                 return;
1542         }
1543 
1544         WRITE_ENTER(&vnetp->vrwlock);
1545         if (vnetp->link_state == link_state) {
1546                 RW_EXIT(&vnetp->vrwlock);
1547                 return;
1548         }
1549         vnetp->link_state = link_state;
1550         RW_EXIT(&vnetp->vrwlock);
1551 
1552         mac_link_update(vnetp->mh, link_state);
1553 }
1554 
1555 /*
1556  * vio_net_resource_reg -- An interface called to register a resource
1557  *      with vnet.
1558  *      macp -- a GLDv3 mac_register that has all the details of
1559  *              a resource and its callbacks etc.
1560  *      type -- resource type.
1561  *      local_macaddr -- resource's MAC address. This is used to
1562  *                       associate a resource with a corresponding vnet.
1563  *      remote_macaddr -- remote side MAC address. This is ignored for
1564  *                        the Hybrid resources.
1565  *      vhp -- A handle returned to the caller.
1566  *      vcb -- A set of callbacks provided to the callers.
1567  */
1568 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1569     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1570     vio_net_callbacks_t *vcb)
1571 {
1572         vnet_t          *vnetp;
1573         vnet_res_t      *vresp;
1574 
1575         vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1576         ether_copy(local_macaddr, vresp->local_macaddr);
1577         ether_copy(rem_macaddr, vresp->rem_macaddr);
1578         vresp->type = type;
1579         bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1580 
1581         DBG1(NULL, "Resource Registerig type=0%X\n", type);
1582 
1583         READ_ENTER(&vnet_rw);
1584         vnetp = vnet_headp;
1585         while (vnetp != NULL) {
1586                 if (VNET_MATCH_RES(vresp, vnetp)) {
1587                         vresp->vnetp = vnetp;
1588 
1589                         /* Setup kstats for hio resource */
1590                         if (vresp->type == VIO_NET_RES_HYBRID) {
1591                                 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1592                                     "hio", vresp);
1593                                 if (vresp->ksp == NULL) {
1594                                         cmn_err(CE_NOTE, "!vnet%d: Cannot "
1595                                             "create kstats for hio resource",
1596                                             vnetp->instance);
1597                                 }
1598                         }
1599                         vnet_add_resource(vnetp, vresp);
1600                         break;
1601                 }
1602                 vnetp = vnetp->nextp;
1603         }
1604         RW_EXIT(&vnet_rw);
1605         if (vresp->vnetp == NULL) {
1606                 DWARN(NULL, "No vnet instance");
1607                 kmem_free(vresp, sizeof (vnet_res_t));
1608                 return (ENXIO);
1609         }
1610 
1611         *vhp = vresp;
1612         vcb->vio_net_rx_cb = vnet_rx;
1613         vcb->vio_net_tx_update = vnet_tx_update;
1614         vcb->vio_net_report_err = vnet_handle_res_err;
1615 
1616         /* Bind the resource to pseudo ring(s) */
1617         if (vnet_bind_rings(vresp) != 0) {
1618                 (void) vnet_rem_resource(vnetp, vresp);
1619                 vnet_hio_destroy_kstats(vresp->ksp);
1620                 KMEM_FREE(vresp);
1621                 return (1);
1622         }
1623 
1624         /* Dispatch a task to start resources */
1625         vnet_dispatch_res_task(vnetp);
1626         return (0);
1627 }
1628 
1629 /*
1630  * vio_net_resource_unreg -- An interface to unregister a resource.
1631  */
1632 void
1633 vio_net_resource_unreg(vio_net_handle_t vhp)
1634 {
1635         vnet_res_t      *vresp = (vnet_res_t *)vhp;
1636         vnet_t          *vnetp = vresp->vnetp;
1637 
1638         DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1639 
1640         ASSERT(vnetp != NULL);
1641         /*
1642          * Remove the resource from fdb; this ensures
1643          * there are no references to the resource.
1644          */
1645         vnet_fdbe_del(vnetp, vresp);
1646 
1647         vnet_unbind_rings(vresp);
1648 
1649         /* Now remove the resource from the list */
1650         (void) vnet_rem_resource(vnetp, vresp);
1651 
1652         vnet_hio_destroy_kstats(vresp->ksp);
1653         KMEM_FREE(vresp);
1654 }
1655 
1656 static void
1657 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
1658 {
1659         WRITE_ENTER(&vnetp->vrwlock);
1660         vresp->nextp = vnetp->vres_list;
1661         vnetp->vres_list = vresp;
1662         RW_EXIT(&vnetp->vrwlock);
1663 }
1664 
1665 static vnet_res_t *
1666 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
1667 {
1668         vnet_res_t      *vrp;
1669 
1670         WRITE_ENTER(&vnetp->vrwlock);
1671         if (vresp == vnetp->vres_list) {
1672                 vnetp->vres_list = vresp->nextp;
1673         } else {
1674                 vrp = vnetp->vres_list;
1675                 while (vrp->nextp != NULL) {
1676                         if (vrp->nextp == vresp) {
1677                                 vrp->nextp = vresp->nextp;
1678                                 break;
1679                         }
1680                         vrp = vrp->nextp;
1681                 }
1682         }
1683         vresp->vnetp = NULL;
1684         vresp->nextp = NULL;
1685 
1686         RW_EXIT(&vnetp->vrwlock);
1687 
1688         return (vresp);
1689 }
1690 
1691 /*
1692  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1693  */
1694 void
1695 vnet_dds_rx(void *arg, void *dmsg)
1696 {
1697         vnet_t *vnetp = arg;
1698         vdds_process_dds_msg(vnetp, dmsg);
1699 }
1700 
1701 /*
1702  * vnet_send_dds_msg -- An interface provided to DDS to send
1703  *      DDS messages. This simply sends meessages via vgen.
1704  */
1705 int
1706 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1707 {
1708         int rv;
1709 
1710         if (vnetp->vgenhdl != NULL) {
1711                 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1712         }
1713         return (rv);
1714 }
1715 
1716 /*
1717  * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1718  */
1719 void
1720 vnet_dds_cleanup_hio(vnet_t *vnetp)
1721 {
1722         vdds_cleanup_hio(vnetp);
1723 }
1724 
1725 /*
1726  * vnet_handle_res_err -- A callback function called by a resource
1727  *      to report an error. For example, vgen can call to report
1728  *      an LDC down/reset event. This will trigger cleanup of associated
1729  *      Hybrid resource.
1730  */
1731 /* ARGSUSED */
1732 static void
1733 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1734 {
1735         vnet_res_t *vresp = (vnet_res_t *)vrh;
1736         vnet_t *vnetp = vresp->vnetp;
1737 
1738         if (vnetp == NULL) {
1739                 return;
1740         }
1741         if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1742             (vresp->type != VIO_NET_RES_HYBRID)) {
1743                 return;
1744         }
1745 
1746         vdds_cleanup_hio(vnetp);
1747 }
1748 
1749 /*
1750  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1751  */
1752 static void
1753 vnet_dispatch_res_task(vnet_t *vnetp)
1754 {
1755         int rv;
1756 
1757         /*
1758          * Dispatch the task. It could be the case that vnetp->flags does
1759          * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1760          * can abort the task when the task is started. See related comments
1761          * in vnet_m_stop() and vnet_stop_resources().
1762          */
1763         rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1764             vnetp, DDI_NOSLEEP);
1765         if (rv != DDI_SUCCESS) {
1766                 cmn_err(CE_WARN,
1767                     "vnet%d:Can't dispatch start resource task",
1768                     vnetp->instance);
1769         }
1770 }
1771 
1772 /*
1773  * vnet_res_start_task -- A taskq callback function that starts a resource.
1774  */
1775 static void
1776 vnet_res_start_task(void *arg)
1777 {
1778         vnet_t *vnetp = arg;
1779 
1780         WRITE_ENTER(&vnetp->vrwlock);
1781         if (vnetp->flags & VNET_STARTED) {
1782                 vnet_start_resources(vnetp);
1783         }
1784         RW_EXIT(&vnetp->vrwlock);
1785 }
1786 
1787 /*
1788  * vnet_start_resources -- starts all resources associated with
1789  *      a vnet.
1790  */
1791 static void
1792 vnet_start_resources(vnet_t *vnetp)
1793 {
1794         mac_register_t  *macp;
1795         mac_callbacks_t *cbp;
1796         vnet_res_t      *vresp;
1797         int rv;
1798 
1799         DBG1(vnetp, "enter\n");
1800 
1801         ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1802 
1803         for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1804                 /* skip if it is already started */
1805                 if (vresp->flags & VNET_STARTED) {
1806                         continue;
1807                 }
1808                 macp = &vresp->macreg;
1809                 cbp = macp->m_callbacks;
1810                 rv = cbp->mc_start(macp->m_driver);
1811                 if (rv == 0) {
1812                         /*
1813                          * Successfully started the resource, so now
1814                          * add it to the fdb.
1815                          */
1816                         vresp->flags |= VNET_STARTED;
1817                         vnet_fdbe_add(vnetp, vresp);
1818                 }
1819         }
1820 
1821         DBG1(vnetp, "exit\n");
1822 
1823 }
1824 
1825 /*
1826  * vnet_stop_resources -- stop all resources associated with a vnet.
1827  */
1828 static void
1829 vnet_stop_resources(vnet_t *vnetp)
1830 {
1831         vnet_res_t      *vresp;
1832         mac_register_t  *macp;
1833         mac_callbacks_t *cbp;
1834 
1835         DBG1(vnetp, "enter\n");
1836 
1837         ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1838 
1839         for (vresp = vnetp->vres_list; vresp != NULL; ) {
1840                 if (vresp->flags & VNET_STARTED) {
1841                         /*
1842                          * Release the lock while invoking mc_stop() of the
1843                          * underlying resource. We hold a reference to this
1844                          * resource to prevent being removed from the list in
1845                          * vio_net_resource_unreg(). Note that new resources
1846                          * can be added to the head of the list while the lock
1847                          * is released, but they won't be started, as
1848                          * VNET_STARTED flag has been cleared for the vnet
1849                          * device in vnet_m_stop(). Also, while the lock is
1850                          * released a resource could be removed from the list
1851                          * in vio_net_resource_unreg(); but that is ok, as we
1852                          * re-acquire the lock and only then access the forward
1853                          * link (vresp->nextp) to continue with the next
1854                          * resource.
1855                          */
1856                         vresp->flags &= ~VNET_STARTED;
1857                         vresp->flags |= VNET_STOPPING;
1858                         macp = &vresp->macreg;
1859                         cbp = macp->m_callbacks;
1860                         VNET_FDBE_REFHOLD(vresp);
1861                         RW_EXIT(&vnetp->vrwlock);
1862 
1863                         cbp->mc_stop(macp->m_driver);
1864 
1865                         WRITE_ENTER(&vnetp->vrwlock);
1866                         vresp->flags &= ~VNET_STOPPING;
1867                         VNET_FDBE_REFRELE(vresp);
1868                 }
1869                 vresp = vresp->nextp;
1870         }
1871         DBG1(vnetp, "exit\n");
1872 }
1873 
1874 /*
1875  * Setup kstats for the HIO statistics.
1876  * NOTE: the synchronization for the statistics is the
1877  * responsibility of the caller.
1878  */
1879 kstat_t *
1880 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1881 {
1882         kstat_t *ksp;
1883         vnet_t *vnetp = vresp->vnetp;
1884         vnet_hio_kstats_t *hiokp;
1885         size_t size;
1886 
1887         ASSERT(vnetp != NULL);
1888         size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1889         ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1890             KSTAT_TYPE_NAMED, size, 0);
1891         if (ksp == NULL) {
1892                 return (NULL);
1893         }
1894 
1895         hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1896         kstat_named_init(&hiokp->ipackets,               "ipackets",
1897             KSTAT_DATA_ULONG);
1898         kstat_named_init(&hiokp->ierrors,                "ierrors",
1899             KSTAT_DATA_ULONG);
1900         kstat_named_init(&hiokp->opackets,               "opackets",
1901             KSTAT_DATA_ULONG);
1902         kstat_named_init(&hiokp->oerrors,                "oerrors",
1903             KSTAT_DATA_ULONG);
1904 
1905 
1906         /* MIB II kstat variables */
1907         kstat_named_init(&hiokp->rbytes,         "rbytes",
1908             KSTAT_DATA_ULONG);
1909         kstat_named_init(&hiokp->obytes,         "obytes",
1910             KSTAT_DATA_ULONG);
1911         kstat_named_init(&hiokp->multircv,               "multircv",
1912             KSTAT_DATA_ULONG);
1913         kstat_named_init(&hiokp->multixmt,               "multixmt",
1914             KSTAT_DATA_ULONG);
1915         kstat_named_init(&hiokp->brdcstrcv,              "brdcstrcv",
1916             KSTAT_DATA_ULONG);
1917         kstat_named_init(&hiokp->brdcstxmt,              "brdcstxmt",
1918             KSTAT_DATA_ULONG);
1919         kstat_named_init(&hiokp->norcvbuf,               "norcvbuf",
1920             KSTAT_DATA_ULONG);
1921         kstat_named_init(&hiokp->noxmtbuf,               "noxmtbuf",
1922             KSTAT_DATA_ULONG);
1923 
1924         ksp->ks_update = vnet_hio_update_kstats;
1925         ksp->ks_private = (void *)vresp;
1926         kstat_install(ksp);
1927         return (ksp);
1928 }
1929 
1930 /*
1931  * Destroy kstats.
1932  */
1933 static void
1934 vnet_hio_destroy_kstats(kstat_t *ksp)
1935 {
1936         if (ksp != NULL)
1937                 kstat_delete(ksp);
1938 }
1939 
1940 /*
1941  * Update the kstats.
1942  */
1943 static int
1944 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1945 {
1946         vnet_t *vnetp;
1947         vnet_res_t *vresp;
1948         vnet_hio_stats_t statsp;
1949         vnet_hio_kstats_t *hiokp;
1950 
1951         vresp = (vnet_res_t *)ksp->ks_private;
1952         vnetp = vresp->vnetp;
1953 
1954         bzero(&statsp, sizeof (vnet_hio_stats_t));
1955 
1956         READ_ENTER(&vnetp->vsw_fp_rw);
1957         if (vnetp->hio_fp == NULL) {
1958                 /* not using hio resources, just return */
1959                 RW_EXIT(&vnetp->vsw_fp_rw);
1960                 return (0);
1961         }
1962         VNET_FDBE_REFHOLD(vnetp->hio_fp);
1963         RW_EXIT(&vnetp->vsw_fp_rw);
1964         vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1965         VNET_FDBE_REFRELE(vnetp->hio_fp);
1966 
1967         hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1968 
1969         if (rw == KSTAT_READ) {
1970                 /* Link Input/Output stats */
1971                 hiokp->ipackets.value.ul     = (uint32_t)statsp.ipackets;
1972                 hiokp->ipackets64.value.ull  = statsp.ipackets;
1973                 hiokp->ierrors.value.ul              = statsp.ierrors;
1974                 hiokp->opackets.value.ul     = (uint32_t)statsp.opackets;
1975                 hiokp->opackets64.value.ull  = statsp.opackets;
1976                 hiokp->oerrors.value.ul              = statsp.oerrors;
1977 
1978                 /* MIB II kstat variables */
1979                 hiokp->rbytes.value.ul               = (uint32_t)statsp.rbytes;
1980                 hiokp->rbytes64.value.ull    = statsp.rbytes;
1981                 hiokp->obytes.value.ul               = (uint32_t)statsp.obytes;
1982                 hiokp->obytes64.value.ull    = statsp.obytes;
1983                 hiokp->multircv.value.ul     = statsp.multircv;
1984                 hiokp->multixmt.value.ul     = statsp.multixmt;
1985                 hiokp->brdcstrcv.value.ul    = statsp.brdcstrcv;
1986                 hiokp->brdcstxmt.value.ul    = statsp.brdcstxmt;
1987                 hiokp->norcvbuf.value.ul     = statsp.norcvbuf;
1988                 hiokp->noxmtbuf.value.ul     = statsp.noxmtbuf;
1989         } else {
1990                 return (EACCES);
1991         }
1992 
1993         return (0);
1994 }
1995 
1996 static void
1997 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1998 {
1999         mac_register_t          *macp;
2000         mac_callbacks_t         *cbp;
2001         uint64_t                val;
2002         int                     stat;
2003 
2004         /*
2005          * get the specified statistics from the underlying nxge.
2006          */
2007         macp = &vresp->macreg;
2008         cbp = macp->m_callbacks;
2009         for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
2010                 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
2011                         switch (stat) {
2012                         case MAC_STAT_IPACKETS:
2013                                 statsp->ipackets = val;
2014                                 break;
2015 
2016                         case MAC_STAT_IERRORS:
2017                                 statsp->ierrors = val;
2018                                 break;
2019 
2020                         case MAC_STAT_OPACKETS:
2021                                 statsp->opackets = val;
2022                                 break;
2023 
2024                         case MAC_STAT_OERRORS:
2025                                 statsp->oerrors = val;
2026                                 break;
2027 
2028                         case MAC_STAT_RBYTES:
2029                                 statsp->rbytes = val;
2030                                 break;
2031 
2032                         case MAC_STAT_OBYTES:
2033                                 statsp->obytes = val;
2034                                 break;
2035 
2036                         case MAC_STAT_MULTIRCV:
2037                                 statsp->multircv = val;
2038                                 break;
2039 
2040                         case MAC_STAT_MULTIXMT:
2041                                 statsp->multixmt = val;
2042                                 break;
2043 
2044                         case MAC_STAT_BRDCSTRCV:
2045                                 statsp->brdcstrcv = val;
2046                                 break;
2047 
2048                         case MAC_STAT_BRDCSTXMT:
2049                                 statsp->brdcstxmt = val;
2050                                 break;
2051 
2052                         case MAC_STAT_NOXMTBUF:
2053                                 statsp->noxmtbuf = val;
2054                                 break;
2055 
2056                         case MAC_STAT_NORCVBUF:
2057                                 statsp->norcvbuf = val;
2058                                 break;
2059 
2060                         default:
2061                                 /*
2062                                  * parameters not interested.
2063                                  */
2064                                 break;
2065                         }
2066                 }
2067         }
2068 }
2069 
2070 static boolean_t
2071 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
2072 {
2073         vnet_t  *vnetp = (vnet_t *)arg;
2074 
2075         if (vnetp == NULL) {
2076                 return (0);
2077         }
2078 
2079         switch (cap) {
2080 
2081         case MAC_CAPAB_RINGS: {
2082 
2083                 mac_capab_rings_t *cap_rings = cap_data;
2084                 /*
2085                  * Rings Capability Notes:
2086                  * We advertise rings to make use of the rings framework in
2087                  * gldv3 mac layer, to improve the performance. This is
2088                  * specifically needed when a Hybrid resource (with multiple
2089                  * tx/rx hardware rings) is assigned to a vnet device. We also
2090                  * leverage this for the normal case when no Hybrid resource is
2091                  * assigned.
2092                  *
2093                  * Ring Allocation:
2094                  * - TX path:
2095                  * We expose a pseudo ring group with 2 pseudo tx rings (as
2096                  * currently HybridIO exports only 2 rings) In the normal case,
2097                  * transmit traffic that comes down to the driver through the
2098                  * mri_tx (vnet_tx_ring_send()) entry point goes through the
2099                  * distributed switching algorithm in vnet and gets transmitted
2100                  * over a port/LDC in the vgen layer to either the vswitch or a
2101                  * peer vnet. If and when a Hybrid resource is assigned to the
2102                  * vnet, we obtain the tx ring information of the Hybrid device
2103                  * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
2104                  * Traffic being sent over the Hybrid resource by the mac layer
2105                  * gets spread across both hw rings, as they are mapped to the
2106                  * 2 pseudo tx rings in vnet.
2107                  *
2108                  * - RX path:
2109                  * We expose a pseudo ring group with 3 pseudo rx rings (static
2110                  * rings) initially. The first (default) pseudo rx ring is
2111                  * reserved for the resource that connects to the vswitch
2112                  * service. The next 2 rings are reserved for a Hybrid resource
2113                  * that may be assigned to the vnet device. If and when a
2114                  * Hybrid resource is assigned to the vnet, we obtain the rx
2115                  * ring information of the Hybrid device (nxge) and map these
2116                  * pseudo rings 1:1 to the 2 hw rx rings. For each additional
2117                  * resource that connects to a peer vnet, we dynamically
2118                  * allocate a pseudo rx ring and map it to that resource, when
2119                  * the resource gets added; and the pseudo rx ring is
2120                  * dynamically registered with the upper mac layer. We do the
2121                  * reverse and unregister the ring with the mac layer when
2122                  * the resource gets removed.
2123                  *
2124                  * Synchronization notes:
2125                  * We don't need any lock to protect members of ring structure,
2126                  * specifically ringp->hw_rh, in either the TX or the RX ring,
2127                  * as explained below.
2128                  * - TX ring:
2129                  * ring->hw_rh is initialized only when a Hybrid resource is
2130                  * associated; and gets referenced only in vnet_hio_tx(). The
2131                  * Hybrid resource itself is available in fdb only after tx
2132                  * hwrings are found and mapped; i.e, in vio_net_resource_reg()
2133                  * we call vnet_bind_rings() first and then call
2134                  * vnet_start_resources() which adds an entry to fdb. For
2135                  * traffic going over LDC resources, we don't reference
2136                  * ring->hw_rh at all.
2137                  * - RX ring:
2138                  * For rings mapped to Hybrid resource ring->hw_rh is
2139                  * initialized and only then do we add the rx callback for
2140                  * the underlying Hybrid resource; we disable callbacks before
2141                  * we unmap ring->hw_rh. For rings mapped to LDC resources, we
2142                  * stop the rx callbacks (in vgen) before we remove ring->hw_rh
2143                  * (vio_net_resource_unreg()).
2144                  * Also, we access ring->hw_rh in vnet_rx_ring_stat().
2145                  * Note that for rings mapped to Hybrid resource, though the
2146                  * rings are statically registered with the mac layer, its
2147                  * hardware ring mapping (ringp->hw_rh) can be torn down in
2148                  * vnet_unbind_hwrings() while the kstat operation is in
2149                  * progress. To protect against this, we hold a reference to
2150                  * the resource in FDB; this ensures that the thread in
2151                  * vio_net_resource_unreg() waits for the reference to be
2152                  * dropped before unbinding the ring.
2153                  *
2154                  * We don't need to do this for rings mapped to LDC resources.
2155                  * These rings are registered/unregistered dynamically with
2156                  * the mac layer and so any attempt to unregister the ring
2157                  * while kstat operation is in progress will block in
2158                  * mac_group_rem_ring(). Thus implicitly protects the
2159                  * resource (ringp->hw_rh) from disappearing.
2160                  */
2161 
2162                 if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2163                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2164 
2165                         /*
2166                          * The ring_cnt for rx grp is initialized in
2167                          * vnet_ring_grp_init(). Later, the ring_cnt gets
2168                          * updated dynamically whenever LDC resources are added
2169                          * or removed.
2170                          */
2171                         cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
2172                         cap_rings->mr_rget = vnet_get_ring;
2173 
2174                         cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
2175                         cap_rings->mr_gget = vnet_get_group;
2176                         cap_rings->mr_gaddring = NULL;
2177                         cap_rings->mr_gremring = NULL;
2178                 } else {
2179                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2180 
2181                         /*
2182                          * The ring_cnt for tx grp is initialized in
2183                          * vnet_ring_grp_init() and remains constant, as we
2184                          * do not support dymanic tx rings for now.
2185                          */
2186                         cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
2187                         cap_rings->mr_rget = vnet_get_ring;
2188 
2189                         /*
2190                          * Transmit rings are not grouped; i.e, the number of
2191                          * transmit ring groups advertised should be set to 0.
2192                          */
2193                         cap_rings->mr_gnum = 0;
2194 
2195                         cap_rings->mr_gget = vnet_get_group;
2196                         cap_rings->mr_gaddring = NULL;
2197                         cap_rings->mr_gremring = NULL;
2198                 }
2199                 return (B_TRUE);
2200 
2201         }
2202 
2203         default:
2204                 break;
2205 
2206         }
2207 
2208         return (B_FALSE);
2209 }
2210 
2211 /*
2212  * Callback funtion for MAC layer to get ring information.
2213  */
2214 static void
2215 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
2216     const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
2217 {
2218         vnet_t  *vnetp = arg;
2219 
2220         switch (rtype) {
2221 
2222         case MAC_RING_TYPE_RX: {
2223 
2224                 vnet_pseudo_rx_group_t  *rx_grp;
2225                 vnet_pseudo_rx_ring_t   *rx_ringp;
2226                 mac_intr_t              *mintr;
2227 
2228                 /* We advertised only one RX group */
2229                 ASSERT(g_index == 0);
2230                 rx_grp = &vnetp->rx_grp[g_index];
2231 
2232                 /* Check the current # of rings in the rx group */
2233                 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
2234 
2235                 /* Get the ring based on the index */
2236                 rx_ringp = &rx_grp->rings[r_index];
2237 
2238                 rx_ringp->handle = r_handle;
2239                 /*
2240                  * Note: we don't need to save the incoming r_index in rx_ring,
2241                  * as vnet_ring_grp_init() would have initialized the index for
2242                  * each ring in the array.
2243                  */
2244                 rx_ringp->grp = rx_grp;
2245                 rx_ringp->vnetp = vnetp;
2246 
2247                 mintr = &infop->mri_intr;
2248                 mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
2249                 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
2250                 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
2251 
2252                 infop->mri_driver = (mac_ring_driver_t)rx_ringp;
2253                 infop->mri_start = vnet_rx_ring_start;
2254                 infop->mri_stop = vnet_rx_ring_stop;
2255                 infop->mri_stat = vnet_rx_ring_stat;
2256 
2257                 /* Set the poll function, as this is an rx ring */
2258                 infop->mri_poll = vnet_rx_poll;
2259                 /*
2260                  * MAC_RING_RX_ENQUEUE bit needed to be set for nxge
2261                  * which was not sending packet chains in interrupt
2262                  * context. For such drivers, packets are queued in
2263                  * Rx soft rings so that we get a chance to switch
2264                  * into a polling mode under backlog. This bug (not
2265                  * sending packet chains) has now been fixed. Once
2266                  * the performance impact is measured, this change
2267                  * will be removed.
2268                  */
2269                 infop->mri_flags = (vnet_mac_rx_queuing ?
2270                     MAC_RING_RX_ENQUEUE : 0);
2271                 break;
2272         }
2273 
2274         case MAC_RING_TYPE_TX: {
2275                 vnet_pseudo_tx_group_t  *tx_grp;
2276                 vnet_pseudo_tx_ring_t   *tx_ringp;
2277 
2278                 /*
2279                  * No need to check grp index; mac layer passes -1 for it.
2280                  */
2281                 tx_grp = &vnetp->tx_grp[0];
2282 
2283                 /* Check the # of rings in the tx group */
2284                 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
2285 
2286                 /* Get the ring based on the index */
2287                 tx_ringp = &tx_grp->rings[r_index];
2288 
2289                 tx_ringp->handle = r_handle;
2290                 tx_ringp->index = r_index;
2291                 tx_ringp->grp = tx_grp;
2292                 tx_ringp->vnetp = vnetp;
2293 
2294                 infop->mri_driver = (mac_ring_driver_t)tx_ringp;
2295                 infop->mri_start = vnet_tx_ring_start;
2296                 infop->mri_stop = vnet_tx_ring_stop;
2297                 infop->mri_stat = vnet_tx_ring_stat;
2298 
2299                 /* Set the transmit function, as this is a tx ring */
2300                 infop->mri_tx = vnet_tx_ring_send;
2301                 /*
2302                  * MAC_RING_TX_SERIALIZE bit needs to be set while
2303                  * hybridIO is enabled to workaround tx lock
2304                  * contention issues in nxge.
2305                  */
2306                 infop->mri_flags = (vnet_mac_tx_serialize ?
2307                     MAC_RING_TX_SERIALIZE : 0);
2308                 break;
2309         }
2310 
2311         default:
2312                 break;
2313         }
2314 }
2315 
2316 /*
2317  * Callback funtion for MAC layer to get group information.
2318  */
2319 static void
2320 vnet_get_group(void *arg, mac_ring_type_t type, const int index,
2321     mac_group_info_t *infop, mac_group_handle_t handle)
2322 {
2323         vnet_t  *vnetp = (vnet_t *)arg;
2324 
2325         switch (type) {
2326 
2327         case MAC_RING_TYPE_RX:
2328         {
2329                 vnet_pseudo_rx_group_t  *rx_grp;
2330 
2331                 /* We advertised only one RX group */
2332                 ASSERT(index == 0);
2333 
2334                 rx_grp = &vnetp->rx_grp[index];
2335                 rx_grp->handle = handle;
2336                 rx_grp->index = index;
2337                 rx_grp->vnetp = vnetp;
2338 
2339                 infop->mgi_driver = (mac_group_driver_t)rx_grp;
2340                 infop->mgi_start = NULL;
2341                 infop->mgi_stop = NULL;
2342                 infop->mgi_addmac = vnet_addmac;
2343                 infop->mgi_remmac = vnet_remmac;
2344                 infop->mgi_count = rx_grp->ring_cnt;
2345 
2346                 break;
2347         }
2348 
2349         case MAC_RING_TYPE_TX:
2350         {
2351                 vnet_pseudo_tx_group_t  *tx_grp;
2352 
2353                 /* We advertised only one TX group */
2354                 ASSERT(index == 0);
2355 
2356                 tx_grp = &vnetp->tx_grp[index];
2357                 tx_grp->handle = handle;
2358                 tx_grp->index = index;
2359                 tx_grp->vnetp = vnetp;
2360 
2361                 infop->mgi_driver = (mac_group_driver_t)tx_grp;
2362                 infop->mgi_start = NULL;
2363                 infop->mgi_stop = NULL;
2364                 infop->mgi_addmac = NULL;
2365                 infop->mgi_remmac = NULL;
2366                 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
2367 
2368                 break;
2369         }
2370 
2371         default:
2372                 break;
2373 
2374         }
2375 }
2376 
2377 static int
2378 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2379 {
2380         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2381         int                     err;
2382 
2383         /*
2384          * If this ring is mapped to a LDC resource, simply mark the state to
2385          * indicate the ring is started and return.
2386          */
2387         if ((rx_ringp->state &
2388             (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2389                 rx_ringp->gen_num = mr_gen_num;
2390                 rx_ringp->state |= VNET_RXRING_STARTED;
2391                 return (0);
2392         }
2393 
2394         ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2395 
2396         /*
2397          * This must be a ring reserved for a hwring. If the hwring is not
2398          * bound yet, simply mark the state to indicate the ring is started and
2399          * return. If and when a hybrid resource is activated for this vnet
2400          * device, we will bind the hwring and start it then. If a hwring is
2401          * already bound, start it now.
2402          */
2403         if (rx_ringp->hw_rh == NULL) {
2404                 rx_ringp->gen_num = mr_gen_num;
2405                 rx_ringp->state |= VNET_RXRING_STARTED;
2406                 return (0);
2407         }
2408 
2409         err = mac_hwring_activate(rx_ringp->hw_rh);
2410         if (err == 0) {
2411                 rx_ringp->gen_num = mr_gen_num;
2412                 rx_ringp->state |= VNET_RXRING_STARTED;
2413         } else {
2414                 err = ENXIO;
2415         }
2416 
2417         return (err);
2418 }
2419 
2420 static void
2421 vnet_rx_ring_stop(mac_ring_driver_t arg)
2422 {
2423         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2424 
2425         /*
2426          * If this ring is mapped to a LDC resource, simply mark the state to
2427          * indicate the ring is now stopped and return.
2428          */
2429         if ((rx_ringp->state &
2430             (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2431                 rx_ringp->state &= ~VNET_RXRING_STARTED;
2432                 return;
2433         }
2434 
2435         ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2436 
2437         /*
2438          * This must be a ring reserved for a hwring. If the hwring is not
2439          * bound yet, simply mark the state to indicate the ring is stopped and
2440          * return. If a hwring is already bound, stop it now.
2441          */
2442         if (rx_ringp->hw_rh == NULL) {
2443                 rx_ringp->state &= ~VNET_RXRING_STARTED;
2444                 return;
2445         }
2446 
2447         mac_hwring_quiesce(rx_ringp->hw_rh);
2448         rx_ringp->state &= ~VNET_RXRING_STARTED;
2449 }
2450 
2451 static int
2452 vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2453 {
2454         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver;
2455         vnet_t                  *vnetp = (vnet_t *)rx_ringp->vnetp;
2456         vnet_res_t              *vresp;
2457         mac_register_t          *macp;
2458         mac_callbacks_t         *cbp;
2459 
2460         /*
2461          * Refer to vnet_m_capab() function for detailed comments on ring
2462          * synchronization.
2463          */
2464         if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) {
2465                 READ_ENTER(&vnetp->vsw_fp_rw);
2466                 if (vnetp->hio_fp == NULL) {
2467                         RW_EXIT(&vnetp->vsw_fp_rw);
2468                         return (0);
2469                 }
2470 
2471                 VNET_FDBE_REFHOLD(vnetp->hio_fp);
2472                 RW_EXIT(&vnetp->vsw_fp_rw);
2473                 (void) mac_hwring_getstat(rx_ringp->hw_rh, stat, val);
2474                 VNET_FDBE_REFRELE(vnetp->hio_fp);
2475                 return (0);
2476         }
2477 
2478         ASSERT((rx_ringp->state &
2479             (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0);
2480         vresp = (vnet_res_t *)rx_ringp->hw_rh;
2481         macp = &vresp->macreg;
2482         cbp = macp->m_callbacks;
2483 
2484         cbp->mc_getstat(macp->m_driver, stat, val);
2485 
2486         return (0);
2487 }
2488 
2489 /* ARGSUSED */
2490 static int
2491 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2492 {
2493         vnet_pseudo_tx_ring_t   *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2494 
2495         tx_ringp->state |= VNET_TXRING_STARTED;
2496         return (0);
2497 }
2498 
2499 static void
2500 vnet_tx_ring_stop(mac_ring_driver_t arg)
2501 {
2502         vnet_pseudo_tx_ring_t   *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2503 
2504         tx_ringp->state &= ~VNET_TXRING_STARTED;
2505 }
2506 
2507 static int
2508 vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2509 {
2510         vnet_pseudo_tx_ring_t   *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver;
2511         vnet_tx_ring_stats_t    *statsp;
2512 
2513         statsp = &tx_ringp->tx_ring_stats;
2514 
2515         switch (stat) {
2516         case MAC_STAT_OPACKETS:
2517                 *val = statsp->opackets;
2518                 break;
2519 
2520         case MAC_STAT_OBYTES:
2521                 *val = statsp->obytes;
2522                 break;
2523 
2524         default:
2525                 *val = 0;
2526                 return (ENOTSUP);
2527         }
2528 
2529         return (0);
2530 }
2531 
2532 /*
2533  * Disable polling for a ring and enable its interrupt.
2534  */
2535 static int
2536 vnet_ring_enable_intr(void *arg)
2537 {
2538         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2539         vnet_res_t              *vresp;
2540 
2541         if (rx_ringp->hw_rh == NULL) {
2542                 /*
2543                  * Ring enable intr func is being invoked, but the ring is
2544                  * not bound to any underlying resource ? This must be a ring
2545                  * reserved for Hybrid resource and no such resource has been
2546                  * assigned to this vnet device yet. We simply return success.
2547                  */
2548                 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2549                 return (0);
2550         }
2551 
2552         /*
2553          * The rx ring has been bound to either a LDC or a Hybrid resource.
2554          * Call the appropriate function to enable interrupts for the ring.
2555          */
2556         if (rx_ringp->state & VNET_RXRING_HYBRID) {
2557                 return (mac_hwring_enable_intr(rx_ringp->hw_rh));
2558         } else {
2559                 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2560                 return (vgen_enable_intr(vresp->macreg.m_driver));
2561         }
2562 }
2563 
2564 /*
2565  * Enable polling for a ring and disable its interrupt.
2566  */
2567 static int
2568 vnet_ring_disable_intr(void *arg)
2569 {
2570         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2571         vnet_res_t              *vresp;
2572 
2573         if (rx_ringp->hw_rh == NULL) {
2574                 /*
2575                  * Ring disable intr func is being invoked, but the ring is
2576                  * not bound to any underlying resource ? This must be a ring
2577                  * reserved for Hybrid resource and no such resource has been
2578                  * assigned to this vnet device yet. We simply return success.
2579                  */
2580                 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2581                 return (0);
2582         }
2583 
2584         /*
2585          * The rx ring has been bound to either a LDC or a Hybrid resource.
2586          * Call the appropriate function to disable interrupts for the ring.
2587          */
2588         if (rx_ringp->state & VNET_RXRING_HYBRID) {
2589                 return (mac_hwring_disable_intr(rx_ringp->hw_rh));
2590         } else {
2591                 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2592                 return (vgen_disable_intr(vresp->macreg.m_driver));
2593         }
2594 }
2595 
2596 /*
2597  * Poll 'bytes_to_pickup' bytes of message from the rx ring.
2598  */
2599 static mblk_t *
2600 vnet_rx_poll(void *arg, int bytes_to_pickup)
2601 {
2602         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2603         mblk_t                  *mp = NULL;
2604         vnet_res_t              *vresp;
2605         vnet_t                  *vnetp = rx_ringp->vnetp;
2606 
2607         if (rx_ringp->hw_rh == NULL) {
2608                 return (NULL);
2609         }
2610 
2611         if (rx_ringp->state & VNET_RXRING_HYBRID) {
2612                 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
2613                 /*
2614                  * Packets received over a hybrid resource need additional
2615                  * processing to remove the tag, for the pvid case. The
2616                  * underlying resource is not aware of the vnet's pvid and thus
2617                  * packets are received with the vlan tag in the header; unlike
2618                  * packets that are received over a ldc channel in which case
2619                  * the peer vnet/vsw would have already removed the tag.
2620                  */
2621                 if (vnetp->pvid != vnetp->default_vlan_id) {
2622                         vnet_rx_frames_untag(vnetp->pvid, &mp);
2623                 }
2624         } else {
2625                 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2626                 mp = vgen_rx_poll(vresp->macreg.m_driver, bytes_to_pickup);
2627         }
2628         return (mp);
2629 }
2630 
2631 /* ARGSUSED */
2632 void
2633 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
2634     boolean_t loopback)
2635 {
2636         vnet_t                  *vnetp = (vnet_t *)arg;
2637         vnet_pseudo_rx_ring_t   *ringp = (vnet_pseudo_rx_ring_t *)mrh;
2638 
2639         /*
2640          * Packets received over a hybrid resource need additional processing
2641          * to remove the tag, for the pvid case. The underlying resource is
2642          * not aware of the vnet's pvid and thus packets are received with the
2643          * vlan tag in the header; unlike packets that are received over a ldc
2644          * channel in which case the peer vnet/vsw would have already removed
2645          * the tag.
2646          */
2647         if (vnetp->pvid != vnetp->default_vlan_id) {
2648                 vnet_rx_frames_untag(vnetp->pvid, &mp);
2649                 if (mp == NULL) {
2650                         return;
2651                 }
2652         }
2653         mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
2654 }
2655 
2656 static int
2657 vnet_addmac(void *arg, const uint8_t *mac_addr)
2658 {
2659         vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2660         vnet_t                  *vnetp;
2661 
2662         vnetp = rx_grp->vnetp;
2663 
2664         if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2665                 return (0);
2666         }
2667 
2668         cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
2669             vnetp->instance, __func__);
2670         return (EINVAL);
2671 }
2672 
2673 static int
2674 vnet_remmac(void *arg, const uint8_t *mac_addr)
2675 {
2676         vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2677         vnet_t                  *vnetp;
2678 
2679         vnetp = rx_grp->vnetp;
2680 
2681         if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2682                 return (0);
2683         }
2684 
2685         cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
2686             vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
2687         return (EINVAL);
2688 }
2689 
2690 int
2691 vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
2692 {
2693         mac_handle_t            mh;
2694         mac_client_handle_t     mch = NULL;
2695         mac_unicast_handle_t    muh = NULL;
2696         mac_diag_t              diag;
2697         mac_register_t          *macp;
2698         char                    client_name[MAXNAMELEN];
2699         int                     rv;
2700         uint16_t                mac_flags = MAC_UNICAST_TAG_DISABLE |
2701             MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
2702         vio_net_callbacks_t     vcb;
2703         ether_addr_t            rem_addr =
2704                 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2705         uint32_t                retries = 0;
2706 
2707         if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2708                 return (EAGAIN);
2709         }
2710 
2711         do {
2712                 rv = mac_open_by_linkname(ifname, &mh);
2713                 if (rv == 0) {
2714                         break;
2715                 }
2716                 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
2717                         mac_free(macp);
2718                         return (rv);
2719                 }
2720                 drv_usecwait(vnet_mac_open_delay);
2721         } while (rv == ENOENT);
2722 
2723         vnetp->hio_mh = mh;
2724 
2725         (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
2726             ifname);
2727         rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
2728         if (rv != 0) {
2729                 goto fail;
2730         }
2731         vnetp->hio_mch = mch;
2732 
2733         rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
2734             &diag);
2735         if (rv != 0) {
2736                 goto fail;
2737         }
2738         vnetp->hio_muh = muh;
2739 
2740         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2741         macp->m_driver = vnetp;
2742         macp->m_dip = NULL;
2743         macp->m_src_addr = NULL;
2744         macp->m_callbacks = &vnet_hio_res_callbacks;
2745         macp->m_min_sdu = 0;
2746         macp->m_max_sdu = ETHERMTU;
2747 
2748         rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
2749             vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
2750         if (rv != 0) {
2751                 goto fail;
2752         }
2753         mac_free(macp);
2754 
2755         /* add the recv callback */
2756         mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
2757 
2758         return (0);
2759 
2760 fail:
2761         mac_free(macp);
2762         vnet_hio_mac_cleanup(vnetp);
2763         return (1);
2764 }
2765 
2766 void
2767 vnet_hio_mac_cleanup(vnet_t *vnetp)
2768 {
2769         if (vnetp->hio_vhp != NULL) {
2770                 vio_net_resource_unreg(vnetp->hio_vhp);
2771                 vnetp->hio_vhp = NULL;
2772         }
2773 
2774         if (vnetp->hio_muh != NULL) {
2775                 (void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
2776                 vnetp->hio_muh = NULL;
2777         }
2778 
2779         if (vnetp->hio_mch != NULL) {
2780                 mac_client_close(vnetp->hio_mch, 0);
2781                 vnetp->hio_mch = NULL;
2782         }
2783 
2784         if (vnetp->hio_mh != NULL) {
2785                 mac_close(vnetp->hio_mh);
2786                 vnetp->hio_mh = NULL;
2787         }
2788 }
2789 
2790 /* Bind pseudo rings to hwrings */
2791 static int
2792 vnet_bind_hwrings(vnet_t *vnetp)
2793 {
2794         mac_ring_handle_t       hw_rh[VNET_NUM_HYBRID_RINGS];
2795         mac_perim_handle_t      mph1;
2796         vnet_pseudo_rx_group_t  *rx_grp;
2797         vnet_pseudo_rx_ring_t   *rx_ringp;
2798         vnet_pseudo_tx_group_t  *tx_grp;
2799         vnet_pseudo_tx_ring_t   *tx_ringp;
2800         int                     hw_ring_cnt;
2801         int                     i;
2802         int                     rv;
2803 
2804         mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2805 
2806         /* Get the list of the underlying RX rings. */
2807         hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
2808             MAC_RING_TYPE_RX);
2809 
2810         /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
2811         if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2812                 cmn_err(CE_WARN,
2813                     "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
2814                     vnetp->instance, hw_ring_cnt);
2815                 goto fail;
2816         }
2817 
2818         if (vnetp->rx_hwgh != NULL) {
2819                 /*
2820                  * Quiesce the HW ring and the mac srs on the ring. Note
2821                  * that the HW ring will be restarted when the pseudo ring
2822                  * is started. At that time all the packets will be
2823                  * directly passed up to the pseudo RX ring and handled
2824                  * by mac srs created over the pseudo RX ring.
2825                  */
2826                 mac_rx_client_quiesce(vnetp->hio_mch);
2827                 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
2828         }
2829 
2830         /*
2831          * Bind the pseudo rings to the hwrings and start the hwrings.
2832          * Note we don't need to register these with the upper mac, as we have
2833          * statically exported these pseudo rxrings which are reserved for
2834          * rxrings of Hybrid resource.
2835          */
2836         rx_grp = &vnetp->rx_grp[0];
2837         for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2838                 /* Pick the rxrings reserved for Hybrid resource */
2839                 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2840 
2841                 /* Store the hw ring handle */
2842                 rx_ringp->hw_rh = hw_rh[i];
2843 
2844                 /* Bind the pseudo ring to the underlying hwring */
2845                 mac_hwring_setup(rx_ringp->hw_rh,
2846                     (mac_resource_handle_t)rx_ringp, NULL);
2847 
2848                 /* Start the hwring if needed */
2849                 if (rx_ringp->state & VNET_RXRING_STARTED) {
2850                         rv = mac_hwring_activate(rx_ringp->hw_rh);
2851                         if (rv != 0) {
2852                                 mac_hwring_teardown(rx_ringp->hw_rh);
2853                                 rx_ringp->hw_rh = NULL;
2854                                 goto fail;
2855                         }
2856                 }
2857         }
2858 
2859         /* Get the list of the underlying TX rings. */
2860         hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
2861             MAC_RING_TYPE_TX);
2862 
2863         /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
2864         if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2865                 cmn_err(CE_WARN,
2866                     "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
2867                     vnetp->instance, hw_ring_cnt);
2868                 goto fail;
2869         }
2870 
2871         /*
2872          * Now map the pseudo txrings to the hw txrings. Note we don't need
2873          * to register these with the upper mac, as we have statically exported
2874          * these rings. Note that these rings will continue to be used for LDC
2875          * resources to peer vnets and vswitch (shared ring).
2876          */
2877         tx_grp = &vnetp->tx_grp[0];
2878         for (i = 0; i < tx_grp->ring_cnt; i++) {
2879                 tx_ringp = &tx_grp->rings[i];
2880                 tx_ringp->hw_rh = hw_rh[i];
2881                 tx_ringp->state |= VNET_TXRING_HYBRID;
2882         }
2883         tx_grp->tx_notify_handle =
2884             mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp);
2885 
2886         mac_perim_exit(mph1);
2887         return (0);
2888 
2889 fail:
2890         mac_perim_exit(mph1);
2891         vnet_unbind_hwrings(vnetp);
2892         return (1);
2893 }
2894 
2895 /* Unbind pseudo rings from hwrings */
2896 static void
2897 vnet_unbind_hwrings(vnet_t *vnetp)
2898 {
2899         mac_perim_handle_t      mph1;
2900         vnet_pseudo_rx_ring_t   *rx_ringp;
2901         vnet_pseudo_rx_group_t  *rx_grp;
2902         vnet_pseudo_tx_group_t  *tx_grp;
2903         vnet_pseudo_tx_ring_t   *tx_ringp;
2904         int                     i;
2905 
2906         mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2907 
2908         tx_grp = &vnetp->tx_grp[0];
2909         for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2910                 tx_ringp = &tx_grp->rings[i];
2911                 if (tx_ringp->state & VNET_TXRING_HYBRID) {
2912                         tx_ringp->state &= ~VNET_TXRING_HYBRID;
2913                         tx_ringp->hw_rh = NULL;
2914                 }
2915         }
2916         (void) mac_client_tx_notify(vnetp->hio_mch, NULL,
2917             tx_grp->tx_notify_handle);
2918 
2919         rx_grp = &vnetp->rx_grp[0];
2920         for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2921                 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2922                 if (rx_ringp->hw_rh != NULL) {
2923                         /* Stop the hwring */
2924                         mac_hwring_quiesce(rx_ringp->hw_rh);
2925 
2926                         /* Teardown the hwring */
2927                         mac_hwring_teardown(rx_ringp->hw_rh);
2928                         rx_ringp->hw_rh = NULL;
2929                 }
2930         }
2931 
2932         if (vnetp->rx_hwgh != NULL) {
2933                 vnetp->rx_hwgh = NULL;
2934                 /*
2935                  * First clear the permanent-quiesced flag of the RX srs then
2936                  * restart the HW ring and the mac srs on the ring.
2937                  */
2938                 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
2939                 mac_rx_client_restart(vnetp->hio_mch);
2940         }
2941 
2942         mac_perim_exit(mph1);
2943 }
2944 
2945 /* Bind pseudo ring to a LDC resource */
2946 static int
2947 vnet_bind_vgenring(vnet_res_t *vresp)
2948 {
2949         vnet_t                  *vnetp;
2950         vnet_pseudo_rx_group_t  *rx_grp;
2951         vnet_pseudo_rx_ring_t   *rx_ringp;
2952         mac_perim_handle_t      mph1;
2953         int                     rv;
2954         int                     type;
2955 
2956         vnetp = vresp->vnetp;
2957         type = vresp->type;
2958         rx_grp = &vnetp->rx_grp[0];
2959 
2960         if (type == VIO_NET_RES_LDC_SERVICE) {
2961                 /*
2962                  * Ring Index 0 is the default ring in the group and is
2963                  * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2964                  * is allocated statically and is reported to the mac layer
2965                  * in vnet_m_capab(). So, all we need to do here, is save a
2966                  * reference to the associated vresp.
2967                  */
2968                 rx_ringp = &rx_grp->rings[0];
2969                 rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2970                 vresp->rx_ringp = (void *)rx_ringp;
2971                 return (0);
2972         }
2973         ASSERT(type == VIO_NET_RES_LDC_GUEST);
2974 
2975         mac_perim_enter_by_mh(vnetp->mh, &mph1);
2976 
2977         rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
2978         if (rx_ringp == NULL) {
2979                 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
2980                     vnetp->instance);
2981                 goto fail;
2982         }
2983 
2984         /* Store the LDC resource itself as the ring handle */
2985         rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2986 
2987         /*
2988          * Save a reference to the ring in the resource for lookup during
2989          * unbind. Note this is only done for LDC resources. We don't need this
2990          * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
2991          * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
2992          */
2993         vresp->rx_ringp = (void *)rx_ringp;
2994         rx_ringp->state |= VNET_RXRING_LDC_GUEST;
2995 
2996         /* Register the pseudo ring with upper-mac */
2997         rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
2998         if (rv != 0) {
2999                 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
3000                 rx_ringp->hw_rh = NULL;
3001                 vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3002                 goto fail;
3003         }
3004 
3005         mac_perim_exit(mph1);
3006         return (0);
3007 fail:
3008         mac_perim_exit(mph1);
3009         return (1);
3010 }
3011 
3012 /* Unbind pseudo ring from a LDC resource */
3013 static void
3014 vnet_unbind_vgenring(vnet_res_t *vresp)
3015 {
3016         vnet_t                  *vnetp;
3017         vnet_pseudo_rx_group_t  *rx_grp;
3018         vnet_pseudo_rx_ring_t   *rx_ringp;
3019         mac_perim_handle_t      mph1;
3020         int                     type;
3021 
3022         vnetp = vresp->vnetp;
3023         type = vresp->type;
3024         rx_grp = &vnetp->rx_grp[0];
3025 
3026         if (vresp->rx_ringp == NULL) {
3027                 return;
3028         }
3029 
3030         if (type == VIO_NET_RES_LDC_SERVICE) {
3031                 /*
3032                  * Ring Index 0 is the default ring in the group and is
3033                  * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
3034                  * is allocated statically and is reported to the mac layer
3035                  * in vnet_m_capab(). So, all we need to do here, is remove its
3036                  * reference to the associated vresp.
3037                  */
3038                 rx_ringp = &rx_grp->rings[0];
3039                 rx_ringp->hw_rh = NULL;
3040                 vresp->rx_ringp = NULL;
3041                 return;
3042         }
3043         ASSERT(type == VIO_NET_RES_LDC_GUEST);
3044 
3045         mac_perim_enter_by_mh(vnetp->mh, &mph1);
3046 
3047         rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
3048         vresp->rx_ringp = NULL;
3049 
3050         if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
3051                 /* Unregister the pseudo ring with upper-mac */
3052                 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
3053 
3054                 rx_ringp->hw_rh = NULL;
3055                 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
3056 
3057                 /* Free the pseudo rx ring */
3058                 vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3059         }
3060 
3061         mac_perim_exit(mph1);
3062 }
3063 
3064 static void
3065 vnet_unbind_rings(vnet_res_t *vresp)
3066 {
3067         switch (vresp->type) {
3068 
3069         case VIO_NET_RES_LDC_SERVICE:
3070         case VIO_NET_RES_LDC_GUEST:
3071                 vnet_unbind_vgenring(vresp);
3072                 break;
3073 
3074         case VIO_NET_RES_HYBRID:
3075                 vnet_unbind_hwrings(vresp->vnetp);
3076                 break;
3077 
3078         default:
3079                 break;
3080 
3081         }
3082 }
3083 
3084 static int
3085 vnet_bind_rings(vnet_res_t *vresp)
3086 {
3087         int     rv;
3088 
3089         switch (vresp->type) {
3090 
3091         case VIO_NET_RES_LDC_SERVICE:
3092         case VIO_NET_RES_LDC_GUEST:
3093                 rv = vnet_bind_vgenring(vresp);
3094                 break;
3095 
3096         case VIO_NET_RES_HYBRID:
3097                 rv = vnet_bind_hwrings(vresp->vnetp);
3098                 break;
3099 
3100         default:
3101                 rv = 1;
3102                 break;
3103 
3104         }
3105 
3106         return (rv);
3107 }
3108 
3109 /* ARGSUSED */
3110 int
3111 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
3112 {
3113         vnet_t  *vnetp = (vnet_t *)arg;
3114 
3115         *val = mac_stat_get(vnetp->hio_mh, stat);
3116         return (0);
3117 }
3118 
3119 /*
3120  * The start() and stop() routines for the Hybrid resource below, are just
3121  * dummy functions. This is provided to avoid resource type specific code in
3122  * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
3123  * of the Hybrid resource happens in the context of the mac_client interfaces
3124  * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
3125  */
3126 /* ARGSUSED */
3127 static int
3128 vnet_hio_start(void *arg)
3129 {
3130         return (0);
3131 }
3132 
3133 /* ARGSUSED */
3134 static void
3135 vnet_hio_stop(void *arg)
3136 {
3137 }
3138 
3139 mblk_t *
3140 vnet_hio_tx(void *arg, mblk_t *mp)
3141 {
3142         vnet_pseudo_tx_ring_t   *tx_ringp;
3143         mblk_t                  *nextp;
3144         mblk_t                  *ret_mp;
3145 
3146         tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
3147         for (;;) {
3148                 nextp = mp->b_next;
3149                 mp->b_next = NULL;
3150 
3151                 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
3152                 if (ret_mp != NULL) {
3153                         ret_mp->b_next = nextp;
3154                         mp = ret_mp;
3155                         break;
3156                 }
3157 
3158                 if ((mp = nextp) == NULL)
3159                         break;
3160         }
3161         return (mp);
3162 }
3163 
3164 #ifdef  VNET_IOC_DEBUG
3165 
3166 /*
3167  * The ioctl entry point is used only for debugging for now. The ioctl commands
3168  * can be used to force the link state of the channel connected to vsw.
3169  */
3170 static void
3171 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3172 {
3173         struct iocblk   *iocp;
3174         vnet_t          *vnetp;
3175 
3176         iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
3177         iocp->ioc_error = 0;
3178         vnetp = (vnet_t *)arg;
3179 
3180         if (vnetp == NULL) {
3181                 miocnak(q, mp, 0, EINVAL);
3182                 return;
3183         }
3184 
3185         switch (iocp->ioc_cmd) {
3186 
3187         case VNET_FORCE_LINK_DOWN:
3188         case VNET_FORCE_LINK_UP:
3189                 vnet_force_link_state(vnetp, q, mp);
3190                 break;
3191 
3192         default:
3193                 iocp->ioc_error = EINVAL;
3194                 miocnak(q, mp, 0, iocp->ioc_error);
3195                 break;
3196 
3197         }
3198 }
3199 
3200 static void
3201 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
3202 {
3203         mac_register_t  *macp;
3204         mac_callbacks_t *cbp;
3205         vnet_res_t      *vresp;
3206 
3207         READ_ENTER(&vnetp->vsw_fp_rw);
3208 
3209         vresp = vnetp->vsw_fp;
3210         if (vresp == NULL) {
3211                 RW_EXIT(&vnetp->vsw_fp_rw);
3212                 return;
3213         }
3214 
3215         macp = &vresp->macreg;
3216         cbp = macp->m_callbacks;
3217         cbp->mc_ioctl(macp->m_driver, q, mp);
3218 
3219         RW_EXIT(&vnetp->vsw_fp_rw);
3220 }
3221 
3222 #else
3223 
3224 static void
3225 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3226 {
3227         vnet_t          *vnetp;
3228 
3229         vnetp = (vnet_t *)arg;
3230 
3231         if (vnetp == NULL) {
3232                 miocnak(q, mp, 0, EINVAL);
3233                 return;
3234         }
3235 
3236         /* ioctl support only for debugging */
3237         miocnak(q, mp, 0, ENOTSUP);
3238 }
3239 
3240 #endif