1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/errno.h>
  29 #include <sys/param.h>
  30 #include <sys/callb.h>
  31 #include <sys/stream.h>
  32 #include <sys/kmem.h>
  33 #include <sys/conf.h>
  34 #include <sys/devops.h>
  35 #include <sys/ksynch.h>
  36 #include <sys/stat.h>
  37 #include <sys/modctl.h>
  38 #include <sys/modhash.h>
  39 #include <sys/debug.h>
  40 #include <sys/ethernet.h>
  41 #include <sys/dlpi.h>
  42 #include <net/if.h>
  43 #include <sys/mac_provider.h>
  44 #include <sys/mac_client.h>
  45 #include <sys/mac_client_priv.h>
  46 #include <sys/mac_ether.h>
  47 #include <sys/ddi.h>
  48 #include <sys/sunddi.h>
  49 #include <sys/strsun.h>
  50 #include <sys/note.h>
  51 #include <sys/atomic.h>
  52 #include <sys/vnet.h>
  53 #include <sys/vlan.h>
  54 #include <sys/vnet_mailbox.h>
  55 #include <sys/vnet_common.h>
  56 #include <sys/dds.h>
  57 #include <sys/strsubr.h>
  58 #include <sys/taskq.h>
  59 
  60 /*
  61  * Function prototypes.
  62  */
  63 
  64 /* DDI entrypoints */
  65 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
  66 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
  67 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
  68 
  69 /* MAC entrypoints  */
  70 static int vnet_m_stat(void *, uint_t, uint64_t *);
  71 static int vnet_m_start(void *);
  72 static void vnet_m_stop(void *);
  73 static int vnet_m_promisc(void *, boolean_t);
  74 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
  75 static int vnet_m_unicst(void *, const uint8_t *);
  76 mblk_t *vnet_m_tx(void *, mblk_t *);
  77 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
  78 #ifdef  VNET_IOC_DEBUG
  79 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
  80 #endif
  81 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
  82 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
  83         const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
  84 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
  85         mac_group_info_t *infop, mac_group_handle_t handle);
  86 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
  87 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
  88 static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
  89         uint64_t *val);
  90 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
  91 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
  92 static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
  93         uint64_t *val);
  94 static int vnet_ring_enable_intr(void *arg);
  95 static int vnet_ring_disable_intr(void *arg);
  96 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
  97 static int vnet_addmac(void *arg, const uint8_t *mac_addr);
  98 static int vnet_remmac(void *arg, const uint8_t *mac_addr);
  99 
 100 /* vnet internal functions */
 101 static int vnet_unattach(vnet_t *vnetp);
 102 static void vnet_ring_grp_init(vnet_t *vnetp);
 103 static void vnet_ring_grp_uninit(vnet_t *vnetp);
 104 static int vnet_mac_register(vnet_t *);
 105 static int vnet_read_mac_address(vnet_t *vnetp);
 106 static int vnet_bind_vgenring(vnet_res_t *vresp);
 107 static void vnet_unbind_vgenring(vnet_res_t *vresp);
 108 static int vnet_bind_hwrings(vnet_t *vnetp);
 109 static void vnet_unbind_hwrings(vnet_t *vnetp);
 110 static int vnet_bind_rings(vnet_res_t *vresp);
 111 static void vnet_unbind_rings(vnet_res_t *vresp);
 112 static int vnet_hio_stat(void *, uint_t, uint64_t *);
 113 static int vnet_hio_start(void *);
 114 static void vnet_hio_stop(void *);
 115 mblk_t *vnet_hio_tx(void *, mblk_t *);
 116 
 117 /* Forwarding database (FDB) routines */
 118 static void vnet_fdb_create(vnet_t *vnetp);
 119 static void vnet_fdb_destroy(vnet_t *vnetp);
 120 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
 121 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
 122 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
 123 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
 124 
 125 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
 126 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
 127 static void vnet_tx_update(vio_net_handle_t vrh);
 128 static void vnet_res_start_task(void *arg);
 129 static void vnet_start_resources(vnet_t *vnetp);
 130 static void vnet_stop_resources(vnet_t *vnetp);
 131 static void vnet_dispatch_res_task(vnet_t *vnetp);
 132 static void vnet_res_start_task(void *arg);
 133 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
 134 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
 135 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
 136 static void vnet_tx_notify_thread(void *);
 137 
 138 /* Exported to vnet_gen */
 139 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
 140 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
 141 void vnet_dds_cleanup_hio(vnet_t *vnetp);
 142 
 143 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
 144     vnet_res_t *vresp);
 145 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
 146 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
 147 static void vnet_hio_destroy_kstats(kstat_t *ksp);
 148 
 149 /* Exported to to vnet_dds */
 150 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
 151 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
 152 void vnet_hio_mac_cleanup(vnet_t *vnetp);
 153 
 154 /* Externs that are imported from vnet_gen */
 155 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
 156     const uint8_t *macaddr, void **vgenhdl);
 157 extern int vgen_init_mdeg(void *arg);
 158 extern void vgen_uninit(void *arg);
 159 extern int vgen_dds_tx(void *arg, void *dmsg);
 160 extern int vgen_enable_intr(void *arg);
 161 extern int vgen_disable_intr(void *arg);
 162 extern mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
 163 
 164 /* Externs that are imported from vnet_dds */
 165 extern void vdds_mod_init(void);
 166 extern void vdds_mod_fini(void);
 167 extern int vdds_init(vnet_t *vnetp);
 168 extern void vdds_cleanup(vnet_t *vnetp);
 169 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
 170 extern void vdds_cleanup_hybrid_res(void *arg);
 171 extern void vdds_cleanup_hio(vnet_t *vnetp);
 172 
 173 extern pri_t    minclsyspri;
 174 
 175 #define DRV_NAME        "vnet"
 176 #define VNET_FDBE_REFHOLD(p)                                            \
 177 {                                                                       \
 178         atomic_inc_32(&(p)->refcnt);                                     \
 179         ASSERT((p)->refcnt != 0);                                    \
 180 }
 181 
 182 #define VNET_FDBE_REFRELE(p)                                            \
 183 {                                                                       \
 184         ASSERT((p)->refcnt != 0);                                    \
 185         atomic_dec_32(&(p)->refcnt);                                     \
 186 }
 187 
 188 #ifdef  VNET_IOC_DEBUG
 189 #define VNET_M_CALLBACK_FLAGS   (MC_IOCTL | MC_GETCAPAB)
 190 #else
 191 #define VNET_M_CALLBACK_FLAGS   (MC_GETCAPAB)
 192 #endif
 193 
 194 static mac_callbacks_t vnet_m_callbacks = {
 195         VNET_M_CALLBACK_FLAGS,
 196         vnet_m_stat,
 197         vnet_m_start,
 198         vnet_m_stop,
 199         vnet_m_promisc,
 200         vnet_m_multicst,
 201         NULL,   /* m_unicst entry must be NULL while rx rings are exposed */
 202         NULL,   /* m_tx entry must be NULL while tx rings are exposed */
 203         NULL,
 204         vnet_m_ioctl,
 205         vnet_m_capab,
 206         NULL
 207 };
 208 
 209 static mac_callbacks_t vnet_hio_res_callbacks = {
 210         0,
 211         vnet_hio_stat,
 212         vnet_hio_start,
 213         vnet_hio_stop,
 214         NULL,
 215         NULL,
 216         NULL,
 217         vnet_hio_tx,
 218         NULL,
 219         NULL,
 220         NULL
 221 };
 222 
 223 /*
 224  * Linked list of "vnet_t" structures - one per instance.
 225  */
 226 static vnet_t   *vnet_headp = NULL;
 227 static krwlock_t vnet_rw;
 228 
 229 /* Tunables */
 230 uint32_t vnet_num_descriptors = VNET_NUM_DESCRIPTORS;
 231 
 232 /*
 233  * Configure tx serialization in mac layer for the vnet device. This tunable
 234  * should be enabled to improve performance only if HybridIO is configured for
 235  * the vnet device.
 236  */
 237 boolean_t vnet_mac_tx_serialize = B_FALSE;
 238 
 239 /* Configure enqueing at Rx soft rings in mac layer for the vnet device */
 240 boolean_t vnet_mac_rx_queuing = B_TRUE;
 241 
 242 /*
 243  * Set this to non-zero to enable additional internal receive buffer pools
 244  * based on the MTU of the device for better performance at the cost of more
 245  * memory consumption. This is turned off by default, to use allocb(9F) for
 246  * receive buffer allocations of sizes > 2K.
 247  */
 248 boolean_t vnet_jumbo_rxpools = B_FALSE;
 249 
 250 /* # of chains in fdb hash table */
 251 uint32_t        vnet_fdb_nchains = VNET_NFDB_HASH;
 252 
 253 /* Internal tunables */
 254 uint32_t        vnet_ethermtu = 1500;   /* mtu of the device */
 255 
 256 /*
 257  * Default vlan id. This is only used internally when the "default-vlan-id"
 258  * property is not present in the MD device node. Therefore, this should not be
 259  * used as a tunable; if this value is changed, the corresponding variable
 260  * should be updated to the same value in vsw and also other vnets connected to
 261  * the same vsw.
 262  */
 263 uint16_t        vnet_default_vlan_id = 1;
 264 
 265 /* delay in usec to wait for all references on a fdb entry to be dropped */
 266 uint32_t vnet_fdbe_refcnt_delay = 10;
 267 
 268 static struct ether_addr etherbroadcastaddr = {
 269         0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 270 };
 271 
 272 /* mac_open() retry delay in usec */
 273 uint32_t vnet_mac_open_delay = 100;     /* 0.1 ms */
 274 
 275 /* max # of mac_open() retries */
 276 uint32_t vnet_mac_open_retries = 100;
 277 
 278 /*
 279  * Property names
 280  */
 281 static char macaddr_propname[] = "local-mac-address";
 282 
 283 /*
 284  * This is the string displayed by modinfo(1m).
 285  */
 286 static char vnet_ident[] = "vnet driver";
 287 extern struct mod_ops mod_driverops;
 288 static struct cb_ops cb_vnetops = {
 289         nulldev,                /* cb_open */
 290         nulldev,                /* cb_close */
 291         nodev,                  /* cb_strategy */
 292         nodev,                  /* cb_print */
 293         nodev,                  /* cb_dump */
 294         nodev,                  /* cb_read */
 295         nodev,                  /* cb_write */
 296         nodev,                  /* cb_ioctl */
 297         nodev,                  /* cb_devmap */
 298         nodev,                  /* cb_mmap */
 299         nodev,                  /* cb_segmap */
 300         nochpoll,               /* cb_chpoll */
 301         ddi_prop_op,            /* cb_prop_op */
 302         NULL,                   /* cb_stream */
 303         (int)(D_MP)             /* cb_flag */
 304 };
 305 
 306 static struct dev_ops vnetops = {
 307         DEVO_REV,               /* devo_rev */
 308         0,                      /* devo_refcnt */
 309         NULL,                   /* devo_getinfo */
 310         nulldev,                /* devo_identify */
 311         nulldev,                /* devo_probe */
 312         vnetattach,             /* devo_attach */
 313         vnetdetach,             /* devo_detach */
 314         nodev,                  /* devo_reset */
 315         &cb_vnetops,                /* devo_cb_ops */
 316         (struct bus_ops *)NULL, /* devo_bus_ops */
 317         NULL,                   /* devo_power */
 318         ddi_quiesce_not_supported,      /* devo_quiesce */
 319 };
 320 
 321 static struct modldrv modldrv = {
 322         &mod_driverops,             /* Type of module.  This one is a driver */
 323         vnet_ident,             /* ID string */
 324         &vnetops            /* driver specific ops */
 325 };
 326 
 327 static struct modlinkage modlinkage = {
 328         MODREV_1, (void *)&modldrv, NULL
 329 };
 330 
 331 #ifdef DEBUG
 332 
 333 #define DEBUG_PRINTF    debug_printf
 334 
 335 /*
 336  * Print debug messages - set to 0xf to enable all msgs
 337  */
 338 int vnet_dbglevel = 0x8;
 339 
 340 static void
 341 debug_printf(const char *fname, void *arg, const char *fmt, ...)
 342 {
 343         char    buf[512];
 344         va_list ap;
 345         vnet_t *vnetp = (vnet_t *)arg;
 346         char    *bufp = buf;
 347 
 348         if (vnetp == NULL) {
 349                 (void) sprintf(bufp, "%s: ", fname);
 350                 bufp += strlen(bufp);
 351         } else {
 352                 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
 353                 bufp += strlen(bufp);
 354         }
 355         va_start(ap, fmt);
 356         (void) vsprintf(bufp, fmt, ap);
 357         va_end(ap);
 358         cmn_err(CE_CONT, "%s\n", buf);
 359 }
 360 
 361 #endif
 362 
 363 /* _init(9E): initialize the loadable module */
 364 int
 365 _init(void)
 366 {
 367         int status;
 368 
 369         DBG1(NULL, "enter\n");
 370 
 371         mac_init_ops(&vnetops, "vnet");
 372         status = mod_install(&modlinkage);
 373         if (status != 0) {
 374                 mac_fini_ops(&vnetops);
 375         }
 376         vdds_mod_init();
 377         DBG1(NULL, "exit(%d)\n", status);
 378         return (status);
 379 }
 380 
 381 /* _fini(9E): prepare the module for unloading. */
 382 int
 383 _fini(void)
 384 {
 385         int             status;
 386 
 387         DBG1(NULL, "enter\n");
 388 
 389         status = mod_remove(&modlinkage);
 390         if (status != 0)
 391                 return (status);
 392         mac_fini_ops(&vnetops);
 393         vdds_mod_fini();
 394 
 395         DBG1(NULL, "exit(%d)\n", status);
 396         return (status);
 397 }
 398 
 399 /* _info(9E): return information about the loadable module */
 400 int
 401 _info(struct modinfo *modinfop)
 402 {
 403         return (mod_info(&modlinkage, modinfop));
 404 }
 405 
 406 /*
 407  * attach(9E): attach a device to the system.
 408  * called once for each instance of the device on the system.
 409  */
 410 static int
 411 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 412 {
 413         vnet_t                  *vnetp;
 414         int                     status;
 415         int                     instance;
 416         uint64_t                reg;
 417         char                    qname[TASKQ_NAMELEN];
 418         vnet_attach_progress_t  attach_progress;
 419 
 420         attach_progress = AST_init;
 421 
 422         switch (cmd) {
 423         case DDI_ATTACH:
 424                 break;
 425         case DDI_RESUME:
 426         case DDI_PM_RESUME:
 427         default:
 428                 goto vnet_attach_fail;
 429         }
 430 
 431         instance = ddi_get_instance(dip);
 432         DBG1(NULL, "instance(%d) enter\n", instance);
 433 
 434         /* allocate vnet_t and mac_t structures */
 435         vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
 436         vnetp->dip = dip;
 437         vnetp->instance = instance;
 438         rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
 439         rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
 440         attach_progress |= AST_vnet_alloc;
 441 
 442         vnet_ring_grp_init(vnetp);
 443         attach_progress |= AST_ring_init;
 444 
 445         status = vdds_init(vnetp);
 446         if (status != 0) {
 447                 goto vnet_attach_fail;
 448         }
 449         attach_progress |= AST_vdds_init;
 450 
 451         /* setup links to vnet_t from both devinfo and mac_t */
 452         ddi_set_driver_private(dip, (caddr_t)vnetp);
 453 
 454         /* read the mac address */
 455         status = vnet_read_mac_address(vnetp);
 456         if (status != DDI_SUCCESS) {
 457                 goto vnet_attach_fail;
 458         }
 459         attach_progress |= AST_read_macaddr;
 460 
 461         reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
 462             DDI_PROP_DONTPASS, "reg", -1);
 463         if (reg == -1) {
 464                 goto vnet_attach_fail;
 465         }
 466         vnetp->reg = reg;
 467 
 468         vnet_fdb_create(vnetp);
 469         attach_progress |= AST_fdbh_alloc;
 470 
 471         (void) snprintf(qname, TASKQ_NAMELEN, "vres_taskq%d", instance);
 472         if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
 473             TASKQ_DEFAULTPRI, 0)) == NULL) {
 474                 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
 475                     instance);
 476                 goto vnet_attach_fail;
 477         }
 478         attach_progress |= AST_taskq_create;
 479 
 480         /* add to the list of vnet devices */
 481         WRITE_ENTER(&vnet_rw);
 482         vnetp->nextp = vnet_headp;
 483         vnet_headp = vnetp;
 484         RW_EXIT(&vnet_rw);
 485 
 486         attach_progress |= AST_vnet_list;
 487 
 488         /*
 489          * Initialize the generic vnet plugin which provides communication via
 490          * sun4v LDC (logical domain channel) based resources. This involves 2
 491          * steps; first, vgen_init() is invoked to read the various properties
 492          * of the vnet device from its MD node (including its mtu which is
 493          * needed to mac_register()) and obtain a handle to the vgen layer.
 494          * After mac_register() is done and we have a mac handle, we then
 495          * invoke vgen_init_mdeg() which registers with the the MD event
 496          * generator (mdeg) framework to allow LDC resource notifications.
 497          * Note: this sequence also allows us to report the correct default #
 498          * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
 499          * in the context of mac_register(); and avoids conflicting with
 500          * dynamic pseudo rx rings which get added/removed as a result of mdeg
 501          * events in vgen.
 502          */
 503         status = vgen_init(vnetp, reg, vnetp->dip,
 504             (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
 505         if (status != DDI_SUCCESS) {
 506                 DERR(vnetp, "vgen_init() failed\n");
 507                 goto vnet_attach_fail;
 508         }
 509         attach_progress |= AST_vgen_init;
 510 
 511         status = vnet_mac_register(vnetp);
 512         if (status != DDI_SUCCESS) {
 513                 goto vnet_attach_fail;
 514         }
 515         vnetp->link_state = LINK_STATE_UNKNOWN;
 516         attach_progress |= AST_macreg;
 517 
 518         status = vgen_init_mdeg(vnetp->vgenhdl);
 519         if (status != DDI_SUCCESS) {
 520                 goto vnet_attach_fail;
 521         }
 522         attach_progress |= AST_init_mdeg;
 523 
 524         vnetp->attach_progress = attach_progress;
 525 
 526         DBG1(NULL, "instance(%d) exit\n", instance);
 527         return (DDI_SUCCESS);
 528 
 529 vnet_attach_fail:
 530         vnetp->attach_progress = attach_progress;
 531         status = vnet_unattach(vnetp);
 532         ASSERT(status == 0);
 533         return (DDI_FAILURE);
 534 }
 535 
 536 /*
 537  * detach(9E): detach a device from the system.
 538  */
 539 static int
 540 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 541 {
 542         vnet_t          *vnetp;
 543         int             instance;
 544 
 545         instance = ddi_get_instance(dip);
 546         DBG1(NULL, "instance(%d) enter\n", instance);
 547 
 548         vnetp = ddi_get_driver_private(dip);
 549         if (vnetp == NULL) {
 550                 goto vnet_detach_fail;
 551         }
 552 
 553         switch (cmd) {
 554         case DDI_DETACH:
 555                 break;
 556         case DDI_SUSPEND:
 557         case DDI_PM_SUSPEND:
 558         default:
 559                 goto vnet_detach_fail;
 560         }
 561 
 562         if (vnet_unattach(vnetp) != 0) {
 563                 goto vnet_detach_fail;
 564         }
 565 
 566         return (DDI_SUCCESS);
 567 
 568 vnet_detach_fail:
 569         return (DDI_FAILURE);
 570 }
 571 
 572 /*
 573  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
 574  * the only reason this function could fail is if mac_unregister() fails.
 575  * Otherwise, this function must ensure that all resources are freed and return
 576  * success.
 577  */
 578 static int
 579 vnet_unattach(vnet_t *vnetp)
 580 {
 581         vnet_attach_progress_t  attach_progress;
 582 
 583         attach_progress = vnetp->attach_progress;
 584 
 585         /*
 586          * Disable the mac device in the gldv3 subsystem. This can fail, in
 587          * particular if there are still any open references to this mac
 588          * device; in which case we just return failure without continuing to
 589          * detach further.
 590          * If it succeeds, we then invoke vgen_uninit() which should unregister
 591          * any pseudo rings registered with the mac layer. Note we keep the
 592          * AST_macreg flag on, so we can unregister with the mac layer at
 593          * the end of this routine.
 594          */
 595         if (attach_progress & AST_macreg) {
 596                 if (mac_disable(vnetp->mh) != 0) {
 597                         return (1);
 598                 }
 599         }
 600 
 601         /*
 602          * Now that we have disabled the device, we must finish all other steps
 603          * and successfully return from this function; otherwise we will end up
 604          * leaving the device in a broken/unusable state.
 605          *
 606          * First, release any hybrid resources assigned to this vnet device.
 607          */
 608         if (attach_progress & AST_vdds_init) {
 609                 vdds_cleanup(vnetp);
 610                 attach_progress &= ~AST_vdds_init;
 611         }
 612 
 613         /*
 614          * Uninit vgen. This stops further mdeg callbacks to this vnet
 615          * device and/or its ports; and detaches any existing ports.
 616          */
 617         if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
 618                 vgen_uninit(vnetp->vgenhdl);
 619                 attach_progress &= ~AST_vgen_init;
 620                 attach_progress &= ~AST_init_mdeg;
 621         }
 622 
 623         /* Destroy the taskq. */
 624         if (attach_progress & AST_taskq_create) {
 625                 ddi_taskq_destroy(vnetp->taskqp);
 626                 attach_progress &= ~AST_taskq_create;
 627         }
 628 
 629         /* Destroy fdb. */
 630         if (attach_progress & AST_fdbh_alloc) {
 631                 vnet_fdb_destroy(vnetp);
 632                 attach_progress &= ~AST_fdbh_alloc;
 633         }
 634 
 635         /* Remove from the device list */
 636         if (attach_progress & AST_vnet_list) {
 637                 vnet_t          **vnetpp;
 638                 /* unlink from instance(vnet_t) list */
 639                 WRITE_ENTER(&vnet_rw);
 640                 for (vnetpp = &vnet_headp; *vnetpp;
 641                     vnetpp = &(*vnetpp)->nextp) {
 642                         if (*vnetpp == vnetp) {
 643                                 *vnetpp = vnetp->nextp;
 644                                 break;
 645                         }
 646                 }
 647                 RW_EXIT(&vnet_rw);
 648                 attach_progress &= ~AST_vnet_list;
 649         }
 650 
 651         if (attach_progress & AST_ring_init) {
 652                 vnet_ring_grp_uninit(vnetp);
 653                 attach_progress &= ~AST_ring_init;
 654         }
 655 
 656         if (attach_progress & AST_macreg) {
 657                 VERIFY(mac_unregister(vnetp->mh) == 0);
 658                 vnetp->mh = NULL;
 659                 attach_progress &= ~AST_macreg;
 660         }
 661 
 662         if (attach_progress & AST_vnet_alloc) {
 663                 rw_destroy(&vnetp->vrwlock);
 664                 rw_destroy(&vnetp->vsw_fp_rw);
 665                 attach_progress &= ~AST_vnet_list;
 666                 KMEM_FREE(vnetp);
 667         }
 668 
 669         return (0);
 670 }
 671 
 672 /* enable the device for transmit/receive */
 673 static int
 674 vnet_m_start(void *arg)
 675 {
 676         vnet_t          *vnetp = arg;
 677 
 678         DBG1(vnetp, "enter\n");
 679 
 680         WRITE_ENTER(&vnetp->vrwlock);
 681         vnetp->flags |= VNET_STARTED;
 682         vnet_start_resources(vnetp);
 683         RW_EXIT(&vnetp->vrwlock);
 684 
 685         DBG1(vnetp, "exit\n");
 686         return (VNET_SUCCESS);
 687 
 688 }
 689 
 690 /* stop transmit/receive for the device */
 691 static void
 692 vnet_m_stop(void *arg)
 693 {
 694         vnet_t          *vnetp = arg;
 695 
 696         DBG1(vnetp, "enter\n");
 697 
 698         WRITE_ENTER(&vnetp->vrwlock);
 699         if (vnetp->flags & VNET_STARTED) {
 700                 /*
 701                  * Set the flags appropriately; this should prevent starting of
 702                  * any new resources that are added(see vnet_res_start_task()),
 703                  * while we release the vrwlock in vnet_stop_resources() before
 704                  * stopping each resource.
 705                  */
 706                 vnetp->flags &= ~VNET_STARTED;
 707                 vnetp->flags |= VNET_STOPPING;
 708                 vnet_stop_resources(vnetp);
 709                 vnetp->flags &= ~VNET_STOPPING;
 710         }
 711         RW_EXIT(&vnetp->vrwlock);
 712 
 713         DBG1(vnetp, "exit\n");
 714 }
 715 
 716 /* set the unicast mac address of the device */
 717 static int
 718 vnet_m_unicst(void *arg, const uint8_t *macaddr)
 719 {
 720         _NOTE(ARGUNUSED(macaddr))
 721 
 722         vnet_t *vnetp = arg;
 723 
 724         DBG1(vnetp, "enter\n");
 725         /*
 726          * NOTE: setting mac address dynamically is not supported.
 727          */
 728         DBG1(vnetp, "exit\n");
 729 
 730         return (VNET_FAILURE);
 731 }
 732 
 733 /* enable/disable a multicast address */
 734 static int
 735 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
 736 {
 737         _NOTE(ARGUNUSED(add, mca))
 738 
 739         vnet_t          *vnetp = arg;
 740         vnet_res_t      *vresp;
 741         mac_register_t  *macp;
 742         mac_callbacks_t *cbp;
 743         int             rv = VNET_SUCCESS;
 744 
 745         DBG1(vnetp, "enter\n");
 746 
 747         READ_ENTER(&vnetp->vsw_fp_rw);
 748         if (vnetp->vsw_fp == NULL) {
 749                 RW_EXIT(&vnetp->vsw_fp_rw);
 750                 return (EAGAIN);
 751         }
 752         VNET_FDBE_REFHOLD(vnetp->vsw_fp);
 753         RW_EXIT(&vnetp->vsw_fp_rw);
 754 
 755         vresp = vnetp->vsw_fp;
 756         macp = &vresp->macreg;
 757         cbp = macp->m_callbacks;
 758         rv = cbp->mc_multicst(macp->m_driver, add, mca);
 759 
 760         VNET_FDBE_REFRELE(vnetp->vsw_fp);
 761 
 762         DBG1(vnetp, "exit(%d)\n", rv);
 763         return (rv);
 764 }
 765 
 766 /* set or clear promiscuous mode on the device */
 767 static int
 768 vnet_m_promisc(void *arg, boolean_t on)
 769 {
 770         _NOTE(ARGUNUSED(on))
 771 
 772         vnet_t *vnetp = arg;
 773         DBG1(vnetp, "enter\n");
 774         /*
 775          * NOTE: setting promiscuous mode is not supported, just return success.
 776          */
 777         DBG1(vnetp, "exit\n");
 778         return (VNET_SUCCESS);
 779 }
 780 
 781 /*
 782  * Transmit a chain of packets. This function provides switching functionality
 783  * based on the destination mac address to reach other guests (within ldoms) or
 784  * external hosts.
 785  */
 786 mblk_t *
 787 vnet_tx_ring_send(void *arg, mblk_t *mp)
 788 {
 789         vnet_pseudo_tx_ring_t   *tx_ringp;
 790         vnet_tx_ring_stats_t    *statsp;
 791         vnet_t                  *vnetp;
 792         vnet_res_t              *vresp;
 793         mblk_t                  *next;
 794         mblk_t                  *resid_mp;
 795         mac_register_t          *macp;
 796         struct ether_header     *ehp;
 797         boolean_t               is_unicast;
 798         boolean_t               is_pvid;        /* non-default pvid ? */
 799         boolean_t               hres;           /* Hybrid resource ? */
 800         void                    *tx_arg;
 801         size_t                  size;
 802 
 803         tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
 804         statsp = &tx_ringp->tx_ring_stats;
 805         vnetp = (vnet_t *)tx_ringp->vnetp;
 806         DBG1(vnetp, "enter\n");
 807         ASSERT(mp != NULL);
 808 
 809         is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
 810 
 811         while (mp != NULL) {
 812 
 813                 next = mp->b_next;
 814                 mp->b_next = NULL;
 815 
 816                 /* update stats */
 817                 size = msgsize(mp);
 818 
 819                 /*
 820                  * Find fdb entry for the destination
 821                  * and hold a reference to it.
 822                  */
 823                 ehp = (struct ether_header *)mp->b_rptr;
 824                 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
 825                 if (vresp != NULL) {
 826 
 827                         /*
 828                          * Destination found in FDB.
 829                          * The destination is a vnet device within ldoms
 830                          * and directly reachable, invoke the tx function
 831                          * in the fdb entry.
 832                          */
 833                         macp = &vresp->macreg;
 834                         resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
 835 
 836                         /* tx done; now release ref on fdb entry */
 837                         VNET_FDBE_REFRELE(vresp);
 838 
 839                         if (resid_mp != NULL) {
 840                                 /* m_tx failed */
 841                                 mp->b_next = next;
 842                                 break;
 843                         }
 844                 } else {
 845                         is_unicast = !(IS_BROADCAST(ehp) ||
 846                             (IS_MULTICAST(ehp)));
 847                         /*
 848                          * Destination is not in FDB.
 849                          * If the destination is broadcast or multicast,
 850                          * then forward the packet to vswitch.
 851                          * If a Hybrid resource avilable, then send the
 852                          * unicast packet via hybrid resource, otherwise
 853                          * forward it to vswitch.
 854                          */
 855                         READ_ENTER(&vnetp->vsw_fp_rw);
 856 
 857                         if ((is_unicast) && (vnetp->hio_fp != NULL)) {
 858                                 vresp = vnetp->hio_fp;
 859                                 hres = B_TRUE;
 860                         } else {
 861                                 vresp = vnetp->vsw_fp;
 862                                 hres = B_FALSE;
 863                         }
 864                         if (vresp == NULL) {
 865                                 /*
 866                                  * no fdb entry to vsw? drop the packet.
 867                                  */
 868                                 RW_EXIT(&vnetp->vsw_fp_rw);
 869                                 freemsg(mp);
 870                                 mp = next;
 871                                 continue;
 872                         }
 873 
 874                         /* ref hold the fdb entry to vsw */
 875                         VNET_FDBE_REFHOLD(vresp);
 876 
 877                         RW_EXIT(&vnetp->vsw_fp_rw);
 878 
 879                         /*
 880                          * In the case of a hybrid resource we need to insert
 881                          * the tag for the pvid case here; unlike packets that
 882                          * are destined to a vnet/vsw in which case the vgen
 883                          * layer does the tagging before sending it over ldc.
 884                          */
 885                         if (hres == B_TRUE) {
 886                                 /*
 887                                  * Determine if the frame being transmitted
 888                                  * over the hybrid resource is untagged. If so,
 889                                  * insert the tag before transmitting.
 890                                  */
 891                                 if (is_pvid == B_TRUE &&
 892                                     ehp->ether_type != htons(ETHERTYPE_VLAN)) {
 893 
 894                                         mp = vnet_vlan_insert_tag(mp,
 895                                             vnetp->pvid);
 896                                         if (mp == NULL) {
 897                                                 VNET_FDBE_REFRELE(vresp);
 898                                                 mp = next;
 899                                                 continue;
 900                                         }
 901 
 902                                 }
 903 
 904                                 macp = &vresp->macreg;
 905                                 tx_arg = tx_ringp;
 906                         } else {
 907                                 macp = &vresp->macreg;
 908                                 tx_arg = macp->m_driver;
 909                         }
 910                         resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
 911 
 912                         /* tx done; now release ref on fdb entry */
 913                         VNET_FDBE_REFRELE(vresp);
 914 
 915                         if (resid_mp != NULL) {
 916                                 /* m_tx failed */
 917                                 mp->b_next = next;
 918                                 break;
 919                         }
 920                 }
 921 
 922                 statsp->obytes += size;
 923                 statsp->opackets++;
 924                 mp = next;
 925         }
 926 
 927         DBG1(vnetp, "exit\n");
 928         return (mp);
 929 }
 930 
 931 /* get statistics from the device */
 932 int
 933 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
 934 {
 935         vnet_t *vnetp = arg;
 936         vnet_res_t      *vresp;
 937         mac_register_t  *macp;
 938         mac_callbacks_t *cbp;
 939         uint64_t val_total = 0;
 940 
 941         DBG1(vnetp, "enter\n");
 942 
 943         /*
 944          * get the specified statistic from each transport and return the
 945          * aggregate val.  This obviously only works for counters.
 946          */
 947         if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
 948             (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
 949                 return (ENOTSUP);
 950         }
 951 
 952         READ_ENTER(&vnetp->vrwlock);
 953         for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
 954                 macp = &vresp->macreg;
 955                 cbp = macp->m_callbacks;
 956                 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
 957                         val_total += *val;
 958         }
 959         RW_EXIT(&vnetp->vrwlock);
 960 
 961         *val = val_total;
 962 
 963         DBG1(vnetp, "exit\n");
 964         return (0);
 965 }
 966 
 967 static void
 968 vnet_ring_grp_init(vnet_t *vnetp)
 969 {
 970         vnet_pseudo_rx_group_t  *rx_grp;
 971         vnet_pseudo_rx_ring_t   *rx_ringp;
 972         vnet_pseudo_tx_group_t  *tx_grp;
 973         vnet_pseudo_tx_ring_t   *tx_ringp;
 974         int                     i;
 975 
 976         tx_grp = &vnetp->tx_grp[0];
 977         tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
 978             VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
 979         for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
 980                 tx_ringp[i].state |= VNET_TXRING_SHARED;
 981         }
 982         tx_grp->rings = tx_ringp;
 983         tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
 984         mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL);
 985         cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL);
 986         tx_grp->flowctl_thread = thread_create(NULL, 0,
 987             vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri);
 988 
 989         rx_grp = &vnetp->rx_grp[0];
 990         rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
 991         rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
 992         rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
 993             rx_grp->max_ring_cnt, KM_SLEEP);
 994 
 995         /*
 996          * Setup the first 3 Pseudo RX Rings that are reserved;
 997          * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
 998          */
 999         rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
1000         rx_ringp[0].index = 0;
1001         rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1002         rx_ringp[1].index = 1;
1003         rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1004         rx_ringp[2].index = 2;
1005 
1006         rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1007         rx_grp->rings = rx_ringp;
1008 
1009         for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1010             i < rx_grp->max_ring_cnt; i++) {
1011                 rx_ringp = &rx_grp->rings[i];
1012                 rx_ringp->state = VNET_RXRING_FREE;
1013                 rx_ringp->index = i;
1014         }
1015 }
1016 
1017 static void
1018 vnet_ring_grp_uninit(vnet_t *vnetp)
1019 {
1020         vnet_pseudo_rx_group_t  *rx_grp;
1021         vnet_pseudo_tx_group_t  *tx_grp;
1022         kt_did_t                tid = 0;
1023 
1024         tx_grp = &vnetp->tx_grp[0];
1025 
1026         /* Inform tx_notify_thread to exit */
1027         mutex_enter(&tx_grp->flowctl_lock);
1028         if (tx_grp->flowctl_thread != NULL) {
1029                 tid = tx_grp->flowctl_thread->t_did;
1030                 tx_grp->flowctl_done = B_TRUE;
1031                 cv_signal(&tx_grp->flowctl_cv);
1032         }
1033         mutex_exit(&tx_grp->flowctl_lock);
1034         if (tid != 0)
1035                 thread_join(tid);
1036 
1037         if (tx_grp->rings != NULL) {
1038                 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
1039                 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
1040                     tx_grp->ring_cnt);
1041                 tx_grp->rings = NULL;
1042         }
1043 
1044         rx_grp = &vnetp->rx_grp[0];
1045         if (rx_grp->rings != NULL) {
1046                 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
1047                 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1048                 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
1049                     rx_grp->max_ring_cnt);
1050                 rx_grp->rings = NULL;
1051         }
1052 }
1053 
1054 static vnet_pseudo_rx_ring_t *
1055 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
1056 {
1057         vnet_pseudo_rx_group_t  *rx_grp;
1058         vnet_pseudo_rx_ring_t   *rx_ringp;
1059         int                     index;
1060 
1061         rx_grp = &vnetp->rx_grp[0];
1062         WRITE_ENTER(&rx_grp->lock);
1063 
1064         if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
1065                 /* no rings available */
1066                 RW_EXIT(&rx_grp->lock);
1067                 return (NULL);
1068         }
1069 
1070         for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1071             index < rx_grp->max_ring_cnt; index++) {
1072                 rx_ringp = &rx_grp->rings[index];
1073                 if (rx_ringp->state == VNET_RXRING_FREE) {
1074                         rx_ringp->state |= VNET_RXRING_INUSE;
1075                         rx_grp->ring_cnt++;
1076                         break;
1077                 }
1078         }
1079 
1080         RW_EXIT(&rx_grp->lock);
1081         return (rx_ringp);
1082 }
1083 
1084 static void
1085 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
1086 {
1087         vnet_pseudo_rx_group_t  *rx_grp;
1088 
1089         ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1090         rx_grp = &vnetp->rx_grp[0];
1091         WRITE_ENTER(&rx_grp->lock);
1092 
1093         if (ringp->state != VNET_RXRING_FREE) {
1094                 ringp->state = VNET_RXRING_FREE;
1095                 ringp->handle = NULL;
1096                 rx_grp->ring_cnt--;
1097         }
1098 
1099         RW_EXIT(&rx_grp->lock);
1100 }
1101 
1102 /* wrapper function for mac_register() */
1103 static int
1104 vnet_mac_register(vnet_t *vnetp)
1105 {
1106         mac_register_t  *macp;
1107         int             err;
1108 
1109         if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1110                 return (DDI_FAILURE);
1111         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1112         macp->m_driver = vnetp;
1113         macp->m_dip = vnetp->dip;
1114         macp->m_src_addr = vnetp->curr_macaddr;
1115         macp->m_callbacks = &vnet_m_callbacks;
1116         macp->m_min_sdu = 0;
1117         macp->m_max_sdu = vnetp->mtu;
1118         macp->m_margin = VLAN_TAGSZ;
1119 
1120         macp->m_v12n = MAC_VIRT_LEVEL1;
1121 
1122         /*
1123          * Finally, we're ready to register ourselves with the MAC layer
1124          * interface; if this succeeds, we're all ready to start()
1125          */
1126         err = mac_register(macp, &vnetp->mh);
1127         mac_free(macp);
1128         return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
1129 }
1130 
1131 /* read the mac address of the device */
1132 static int
1133 vnet_read_mac_address(vnet_t *vnetp)
1134 {
1135         uchar_t         *macaddr;
1136         uint32_t        size;
1137         int             rv;
1138 
1139         rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
1140             DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
1141         if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
1142                 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
1143                     macaddr_propname, rv);
1144                 return (DDI_FAILURE);
1145         }
1146         bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
1147         bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
1148         ddi_prop_free(macaddr);
1149 
1150         return (DDI_SUCCESS);
1151 }
1152 
1153 static void
1154 vnet_fdb_create(vnet_t *vnetp)
1155 {
1156         char            hashname[MAXNAMELEN];
1157 
1158         (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
1159             vnetp->instance);
1160         vnetp->fdb_nchains = vnet_fdb_nchains;
1161         vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
1162             mod_hash_null_valdtor, sizeof (void *));
1163 }
1164 
1165 static void
1166 vnet_fdb_destroy(vnet_t *vnetp)
1167 {
1168         /* destroy fdb-hash-table */
1169         if (vnetp->fdb_hashp != NULL) {
1170                 mod_hash_destroy_hash(vnetp->fdb_hashp);
1171                 vnetp->fdb_hashp = NULL;
1172                 vnetp->fdb_nchains = 0;
1173         }
1174 }
1175 
1176 /*
1177  * Add an entry into the fdb.
1178  */
1179 void
1180 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
1181 {
1182         uint64_t        addr = 0;
1183         int             rv;
1184 
1185         KEY_HASH(addr, vresp->rem_macaddr);
1186 
1187         /*
1188          * If the entry being added corresponds to LDC_SERVICE resource,
1189          * that is, vswitch connection, it is added to the hash and also
1190          * the entry is cached, an additional reference count reflects
1191          * this. The HYBRID resource is not added to the hash, but only
1192          * cached, as it is only used for sending out packets for unknown
1193          * unicast destinations.
1194          */
1195         (vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1196             (vresp->refcnt = 1) : (vresp->refcnt = 0);
1197 
1198         /*
1199          * Note: duplicate keys will be rejected by mod_hash.
1200          */
1201         if (vresp->type != VIO_NET_RES_HYBRID) {
1202                 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1203                     (mod_hash_val_t)vresp);
1204                 if (rv != 0) {
1205                         DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
1206                         return;
1207                 }
1208         }
1209 
1210         if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1211                 /* Cache the fdb entry to vsw-port */
1212                 WRITE_ENTER(&vnetp->vsw_fp_rw);
1213                 if (vnetp->vsw_fp == NULL)
1214                         vnetp->vsw_fp = vresp;
1215                 RW_EXIT(&vnetp->vsw_fp_rw);
1216         } else if (vresp->type == VIO_NET_RES_HYBRID) {
1217                 /* Cache the fdb entry to hybrid resource */
1218                 WRITE_ENTER(&vnetp->vsw_fp_rw);
1219                 if (vnetp->hio_fp == NULL)
1220                         vnetp->hio_fp = vresp;
1221                 RW_EXIT(&vnetp->vsw_fp_rw);
1222         }
1223 }
1224 
1225 /*
1226  * Remove an entry from fdb.
1227  */
1228 static void
1229 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
1230 {
1231         uint64_t        addr = 0;
1232         int             rv;
1233         uint32_t        refcnt;
1234         vnet_res_t      *tmp;
1235 
1236         KEY_HASH(addr, vresp->rem_macaddr);
1237 
1238         /*
1239          * Remove the entry from fdb hash table.
1240          * This prevents further references to this fdb entry.
1241          */
1242         if (vresp->type != VIO_NET_RES_HYBRID) {
1243                 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1244                     (mod_hash_val_t *)&tmp);
1245                 if (rv != 0) {
1246                         /*
1247                          * As the resources are added to the hash only
1248                          * after they are started, this can occur if
1249                          * a resource unregisters before it is ever started.
1250                          */
1251                         return;
1252                 }
1253         }
1254 
1255         if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1256                 WRITE_ENTER(&vnetp->vsw_fp_rw);
1257 
1258                 ASSERT(tmp == vnetp->vsw_fp);
1259                 vnetp->vsw_fp = NULL;
1260 
1261                 RW_EXIT(&vnetp->vsw_fp_rw);
1262         } else if (vresp->type == VIO_NET_RES_HYBRID) {
1263                 WRITE_ENTER(&vnetp->vsw_fp_rw);
1264 
1265                 vnetp->hio_fp = NULL;
1266 
1267                 RW_EXIT(&vnetp->vsw_fp_rw);
1268         }
1269 
1270         /*
1271          * If there are threads already ref holding before the entry was
1272          * removed from hash table, then wait for ref count to drop to zero.
1273          */
1274         (vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1275             (refcnt = 1) : (refcnt = 0);
1276         while (vresp->refcnt > refcnt) {
1277                 delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1278         }
1279 }
1280 
1281 /*
1282  * Search fdb for a given mac address. If an entry is found, hold
1283  * a reference to it and return the entry; else returns NULL.
1284  */
1285 static vnet_res_t *
1286 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1287 {
1288         uint64_t        key = 0;
1289         vnet_res_t      *vresp;
1290         int             rv;
1291 
1292         KEY_HASH(key, addrp->ether_addr_octet);
1293 
1294         rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1295             (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1296 
1297         if (rv != 0)
1298                 return (NULL);
1299 
1300         return (vresp);
1301 }
1302 
1303 /*
1304  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1305  * entry corresponding to the key (macaddr), this callback will be invoked by
1306  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1307  * entry before returning the found entry.
1308  */
1309 static void
1310 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1311 {
1312         _NOTE(ARGUNUSED(key))
1313         VNET_FDBE_REFHOLD((vnet_res_t *)val);
1314 }
1315 
1316 /*
1317  * Frames received that are tagged with the pvid of the vnet device must be
1318  * untagged before sending up the stack. This function walks the chain of rx
1319  * frames, untags any such frames and returns the updated chain.
1320  *
1321  * Arguments:
1322  *    pvid:  pvid of the vnet device for which packets are being received
1323  *    mp:    head of pkt chain to be validated and untagged
1324  *
1325  * Returns:
1326  *    mp:    head of updated chain of packets
1327  */
1328 static void
1329 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1330 {
1331         struct ether_vlan_header        *evhp;
1332         mblk_t                          *bp;
1333         mblk_t                          *bpt;
1334         mblk_t                          *bph;
1335         mblk_t                          *bpn;
1336 
1337         bpn = bph = bpt = NULL;
1338 
1339         for (bp = *mp; bp != NULL; bp = bpn) {
1340 
1341                 bpn = bp->b_next;
1342                 bp->b_next = bp->b_prev = NULL;
1343 
1344                 evhp = (struct ether_vlan_header *)bp->b_rptr;
1345 
1346                 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1347                     VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1348 
1349                         bp = vnet_vlan_remove_tag(bp);
1350                         if (bp == NULL) {
1351                                 continue;
1352                         }
1353 
1354                 }
1355 
1356                 /* build a chain of processed packets */
1357                 if (bph == NULL) {
1358                         bph = bpt = bp;
1359                 } else {
1360                         bpt->b_next = bp;
1361                         bpt = bp;
1362                 }
1363 
1364         }
1365 
1366         *mp = bph;
1367 }
1368 
1369 static void
1370 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1371 {
1372         vnet_res_t              *vresp = (vnet_res_t *)vrh;
1373         vnet_t                  *vnetp = vresp->vnetp;
1374         vnet_pseudo_rx_ring_t   *ringp;
1375 
1376         if ((vnetp == NULL) || (vnetp->mh == 0)) {
1377                 freemsgchain(mp);
1378                 return;
1379         }
1380 
1381         ringp = vresp->rx_ringp;
1382         mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
1383 }
1384 
1385 void
1386 vnet_tx_update(vio_net_handle_t vrh)
1387 {
1388         vnet_res_t              *vresp = (vnet_res_t *)vrh;
1389         vnet_t                  *vnetp = vresp->vnetp;
1390         vnet_pseudo_tx_ring_t   *tx_ringp;
1391         vnet_pseudo_tx_group_t  *tx_grp;
1392         int                     i;
1393 
1394         if (vnetp == NULL || vnetp->mh == NULL) {
1395                 return;
1396         }
1397 
1398         /*
1399          * Currently, the tx hwring API (used to access rings that belong to
1400          * a Hybrid IO resource) does not provide us a per ring flow ctrl
1401          * update; also the pseudo rings are shared by the ports/ldcs in the
1402          * vgen layer. Thus we can't figure out which pseudo ring is being
1403          * re-enabled for transmits. To work around this, when we get a tx
1404          * restart notification from below, we simply propagate that to all
1405          * the tx pseudo rings registered with the mac layer above.
1406          *
1407          * There are a couple of side effects with this approach, but they are
1408          * not harmful, as outlined below:
1409          *
1410          * A) We might send an invalid ring_update() for a ring that is not
1411          * really flow controlled. This will not have any effect in the mac
1412          * layer and packets will continue to be transmitted on that ring.
1413          *
1414          * B) We might end up clearing the flow control in the mac layer for
1415          * a ring that is still flow controlled in the underlying resource.
1416          * This will result in the mac layer restarting transmit, only to be
1417          * flow controlled again on that ring.
1418          */
1419         tx_grp = &vnetp->tx_grp[0];
1420         for (i = 0; i < tx_grp->ring_cnt; i++) {
1421                 tx_ringp = &tx_grp->rings[i];
1422                 mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1423         }
1424 }
1425 
1426 /*
1427  * vnet_tx_notify_thread:
1428  *
1429  * vnet_tx_ring_update() callback function wakes up this thread when
1430  * it gets called. This thread will call mac_tx_ring_update() to
1431  * notify upper mac of flow control getting relieved. Note that
1432  * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly
1433  * because vnet_tx_ring_update() is called from lower mac with
1434  * mi_rw_lock held and mac_tx_ring_update() would also try to grab
1435  * the same lock.
1436  */
1437 static void
1438 vnet_tx_notify_thread(void *arg)
1439 {
1440         callb_cpr_t             cprinfo;
1441         vnet_pseudo_tx_group_t  *tx_grp = (vnet_pseudo_tx_group_t *)arg;
1442         vnet_pseudo_tx_ring_t   *tx_ringp;
1443         vnet_t                  *vnetp;
1444         int                     i;
1445 
1446         CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr,
1447             "vnet_tx_notify_thread");
1448 
1449         mutex_enter(&tx_grp->flowctl_lock);
1450         while (!tx_grp->flowctl_done) {
1451                 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1452                 cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock);
1453                 CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock);
1454 
1455                 for (i = 0; i < tx_grp->ring_cnt; i++) {
1456                         tx_ringp = &tx_grp->rings[i];
1457                         if (tx_ringp->woken_up) {
1458                                 tx_ringp->woken_up = B_FALSE;
1459                                 vnetp = tx_ringp->vnetp;
1460                                 mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1461                         }
1462                 }
1463         }
1464         /*
1465          * The tx_grp is being destroyed, exit the thread.
1466          */
1467         tx_grp->flowctl_thread = NULL;
1468         CALLB_CPR_EXIT(&cprinfo);
1469         thread_exit();
1470 }
1471 
1472 void
1473 vnet_tx_ring_update(void *arg1, uintptr_t arg2)
1474 {
1475         vnet_t                  *vnetp = (vnet_t *)arg1;
1476         vnet_pseudo_tx_group_t  *tx_grp;
1477         vnet_pseudo_tx_ring_t   *tx_ringp;
1478         int                     i;
1479 
1480         tx_grp = &vnetp->tx_grp[0];
1481         for (i = 0; i < tx_grp->ring_cnt; i++) {
1482                 tx_ringp = &tx_grp->rings[i];
1483                 if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) {
1484                         mutex_enter(&tx_grp->flowctl_lock);
1485                         tx_ringp->woken_up = B_TRUE;
1486                         cv_signal(&tx_grp->flowctl_cv);
1487                         mutex_exit(&tx_grp->flowctl_lock);
1488                         break;
1489                 }
1490         }
1491 }
1492 
1493 /*
1494  * Update the new mtu of vnet into the mac layer. First check if the device has
1495  * been plumbed and if so fail the mtu update. Returns 0 on success.
1496  */
1497 int
1498 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1499 {
1500         int     rv;
1501 
1502         if (vnetp == NULL || vnetp->mh == NULL) {
1503                 return (EINVAL);
1504         }
1505 
1506         WRITE_ENTER(&vnetp->vrwlock);
1507 
1508         if (vnetp->flags & VNET_STARTED) {
1509                 RW_EXIT(&vnetp->vrwlock);
1510                 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1511                     "update as the device is plumbed\n",
1512                     vnetp->instance);
1513                 return (EBUSY);
1514         }
1515 
1516         /* update mtu in the mac layer */
1517         rv = mac_maxsdu_update(vnetp->mh, mtu);
1518         if (rv != 0) {
1519                 RW_EXIT(&vnetp->vrwlock);
1520                 cmn_err(CE_NOTE,
1521                     "!vnet%d: Unable to update mtu with mac layer\n",
1522                     vnetp->instance);
1523                 return (EIO);
1524         }
1525 
1526         vnetp->mtu = mtu;
1527 
1528         RW_EXIT(&vnetp->vrwlock);
1529 
1530         return (0);
1531 }
1532 
1533 /*
1534  * Update the link state of vnet to the mac layer.
1535  */
1536 void
1537 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1538 {
1539         if (vnetp == NULL || vnetp->mh == NULL) {
1540                 return;
1541         }
1542 
1543         WRITE_ENTER(&vnetp->vrwlock);
1544         if (vnetp->link_state == link_state) {
1545                 RW_EXIT(&vnetp->vrwlock);
1546                 return;
1547         }
1548         vnetp->link_state = link_state;
1549         RW_EXIT(&vnetp->vrwlock);
1550 
1551         mac_link_update(vnetp->mh, link_state);
1552 }
1553 
1554 /*
1555  * vio_net_resource_reg -- An interface called to register a resource
1556  *      with vnet.
1557  *      macp -- a GLDv3 mac_register that has all the details of
1558  *              a resource and its callbacks etc.
1559  *      type -- resource type.
1560  *      local_macaddr -- resource's MAC address. This is used to
1561  *                       associate a resource with a corresponding vnet.
1562  *      remote_macaddr -- remote side MAC address. This is ignored for
1563  *                        the Hybrid resources.
1564  *      vhp -- A handle returned to the caller.
1565  *      vcb -- A set of callbacks provided to the callers.
1566  */
1567 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1568     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1569     vio_net_callbacks_t *vcb)
1570 {
1571         vnet_t          *vnetp;
1572         vnet_res_t      *vresp;
1573 
1574         vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1575         ether_copy(local_macaddr, vresp->local_macaddr);
1576         ether_copy(rem_macaddr, vresp->rem_macaddr);
1577         vresp->type = type;
1578         bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1579 
1580         DBG1(NULL, "Resource Registerig type=0%X\n", type);
1581 
1582         READ_ENTER(&vnet_rw);
1583         vnetp = vnet_headp;
1584         while (vnetp != NULL) {
1585                 if (VNET_MATCH_RES(vresp, vnetp)) {
1586                         vresp->vnetp = vnetp;
1587 
1588                         /* Setup kstats for hio resource */
1589                         if (vresp->type == VIO_NET_RES_HYBRID) {
1590                                 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1591                                     "hio", vresp);
1592                                 if (vresp->ksp == NULL) {
1593                                         cmn_err(CE_NOTE, "!vnet%d: Cannot "
1594                                             "create kstats for hio resource",
1595                                             vnetp->instance);
1596                                 }
1597                         }
1598                         vnet_add_resource(vnetp, vresp);
1599                         break;
1600                 }
1601                 vnetp = vnetp->nextp;
1602         }
1603         RW_EXIT(&vnet_rw);
1604         if (vresp->vnetp == NULL) {
1605                 DWARN(NULL, "No vnet instance");
1606                 kmem_free(vresp, sizeof (vnet_res_t));
1607                 return (ENXIO);
1608         }
1609 
1610         *vhp = vresp;
1611         vcb->vio_net_rx_cb = vnet_rx;
1612         vcb->vio_net_tx_update = vnet_tx_update;
1613         vcb->vio_net_report_err = vnet_handle_res_err;
1614 
1615         /* Bind the resource to pseudo ring(s) */
1616         if (vnet_bind_rings(vresp) != 0) {
1617                 (void) vnet_rem_resource(vnetp, vresp);
1618                 vnet_hio_destroy_kstats(vresp->ksp);
1619                 KMEM_FREE(vresp);
1620                 return (1);
1621         }
1622 
1623         /* Dispatch a task to start resources */
1624         vnet_dispatch_res_task(vnetp);
1625         return (0);
1626 }
1627 
1628 /*
1629  * vio_net_resource_unreg -- An interface to unregister a resource.
1630  */
1631 void
1632 vio_net_resource_unreg(vio_net_handle_t vhp)
1633 {
1634         vnet_res_t      *vresp = (vnet_res_t *)vhp;
1635         vnet_t          *vnetp = vresp->vnetp;
1636 
1637         DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1638 
1639         ASSERT(vnetp != NULL);
1640         /*
1641          * Remove the resource from fdb; this ensures
1642          * there are no references to the resource.
1643          */
1644         vnet_fdbe_del(vnetp, vresp);
1645 
1646         vnet_unbind_rings(vresp);
1647 
1648         /* Now remove the resource from the list */
1649         (void) vnet_rem_resource(vnetp, vresp);
1650 
1651         vnet_hio_destroy_kstats(vresp->ksp);
1652         KMEM_FREE(vresp);
1653 }
1654 
1655 static void
1656 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
1657 {
1658         WRITE_ENTER(&vnetp->vrwlock);
1659         vresp->nextp = vnetp->vres_list;
1660         vnetp->vres_list = vresp;
1661         RW_EXIT(&vnetp->vrwlock);
1662 }
1663 
1664 static vnet_res_t *
1665 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
1666 {
1667         vnet_res_t      *vrp;
1668 
1669         WRITE_ENTER(&vnetp->vrwlock);
1670         if (vresp == vnetp->vres_list) {
1671                 vnetp->vres_list = vresp->nextp;
1672         } else {
1673                 vrp = vnetp->vres_list;
1674                 while (vrp->nextp != NULL) {
1675                         if (vrp->nextp == vresp) {
1676                                 vrp->nextp = vresp->nextp;
1677                                 break;
1678                         }
1679                         vrp = vrp->nextp;
1680                 }
1681         }
1682         vresp->vnetp = NULL;
1683         vresp->nextp = NULL;
1684 
1685         RW_EXIT(&vnetp->vrwlock);
1686 
1687         return (vresp);
1688 }
1689 
1690 /*
1691  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1692  */
1693 void
1694 vnet_dds_rx(void *arg, void *dmsg)
1695 {
1696         vnet_t *vnetp = arg;
1697         vdds_process_dds_msg(vnetp, dmsg);
1698 }
1699 
1700 /*
1701  * vnet_send_dds_msg -- An interface provided to DDS to send
1702  *      DDS messages. This simply sends meessages via vgen.
1703  */
1704 int
1705 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1706 {
1707         int rv;
1708 
1709         if (vnetp->vgenhdl != NULL) {
1710                 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1711         }
1712         return (rv);
1713 }
1714 
1715 /*
1716  * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1717  */
1718 void
1719 vnet_dds_cleanup_hio(vnet_t *vnetp)
1720 {
1721         vdds_cleanup_hio(vnetp);
1722 }
1723 
1724 /*
1725  * vnet_handle_res_err -- A callback function called by a resource
1726  *      to report an error. For example, vgen can call to report
1727  *      an LDC down/reset event. This will trigger cleanup of associated
1728  *      Hybrid resource.
1729  */
1730 /* ARGSUSED */
1731 static void
1732 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1733 {
1734         vnet_res_t *vresp = (vnet_res_t *)vrh;
1735         vnet_t *vnetp = vresp->vnetp;
1736 
1737         if (vnetp == NULL) {
1738                 return;
1739         }
1740         if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1741             (vresp->type != VIO_NET_RES_HYBRID)) {
1742                 return;
1743         }
1744 
1745         vdds_cleanup_hio(vnetp);
1746 }
1747 
1748 /*
1749  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1750  */
1751 static void
1752 vnet_dispatch_res_task(vnet_t *vnetp)
1753 {
1754         int rv;
1755 
1756         /*
1757          * Dispatch the task. It could be the case that vnetp->flags does
1758          * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1759          * can abort the task when the task is started. See related comments
1760          * in vnet_m_stop() and vnet_stop_resources().
1761          */
1762         rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1763             vnetp, DDI_NOSLEEP);
1764         if (rv != DDI_SUCCESS) {
1765                 cmn_err(CE_WARN,
1766                     "vnet%d:Can't dispatch start resource task",
1767                     vnetp->instance);
1768         }
1769 }
1770 
1771 /*
1772  * vnet_res_start_task -- A taskq callback function that starts a resource.
1773  */
1774 static void
1775 vnet_res_start_task(void *arg)
1776 {
1777         vnet_t *vnetp = arg;
1778 
1779         WRITE_ENTER(&vnetp->vrwlock);
1780         if (vnetp->flags & VNET_STARTED) {
1781                 vnet_start_resources(vnetp);
1782         }
1783         RW_EXIT(&vnetp->vrwlock);
1784 }
1785 
1786 /*
1787  * vnet_start_resources -- starts all resources associated with
1788  *      a vnet.
1789  */
1790 static void
1791 vnet_start_resources(vnet_t *vnetp)
1792 {
1793         mac_register_t  *macp;
1794         mac_callbacks_t *cbp;
1795         vnet_res_t      *vresp;
1796         int rv;
1797 
1798         DBG1(vnetp, "enter\n");
1799 
1800         ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1801 
1802         for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1803                 /* skip if it is already started */
1804                 if (vresp->flags & VNET_STARTED) {
1805                         continue;
1806                 }
1807                 macp = &vresp->macreg;
1808                 cbp = macp->m_callbacks;
1809                 rv = cbp->mc_start(macp->m_driver);
1810                 if (rv == 0) {
1811                         /*
1812                          * Successfully started the resource, so now
1813                          * add it to the fdb.
1814                          */
1815                         vresp->flags |= VNET_STARTED;
1816                         vnet_fdbe_add(vnetp, vresp);
1817                 }
1818         }
1819 
1820         DBG1(vnetp, "exit\n");
1821 
1822 }
1823 
1824 /*
1825  * vnet_stop_resources -- stop all resources associated with a vnet.
1826  */
1827 static void
1828 vnet_stop_resources(vnet_t *vnetp)
1829 {
1830         vnet_res_t      *vresp;
1831         mac_register_t  *macp;
1832         mac_callbacks_t *cbp;
1833 
1834         DBG1(vnetp, "enter\n");
1835 
1836         ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1837 
1838         for (vresp = vnetp->vres_list; vresp != NULL; ) {
1839                 if (vresp->flags & VNET_STARTED) {
1840                         /*
1841                          * Release the lock while invoking mc_stop() of the
1842                          * underlying resource. We hold a reference to this
1843                          * resource to prevent being removed from the list in
1844                          * vio_net_resource_unreg(). Note that new resources
1845                          * can be added to the head of the list while the lock
1846                          * is released, but they won't be started, as
1847                          * VNET_STARTED flag has been cleared for the vnet
1848                          * device in vnet_m_stop(). Also, while the lock is
1849                          * released a resource could be removed from the list
1850                          * in vio_net_resource_unreg(); but that is ok, as we
1851                          * re-acquire the lock and only then access the forward
1852                          * link (vresp->nextp) to continue with the next
1853                          * resource.
1854                          */
1855                         vresp->flags &= ~VNET_STARTED;
1856                         vresp->flags |= VNET_STOPPING;
1857                         macp = &vresp->macreg;
1858                         cbp = macp->m_callbacks;
1859                         VNET_FDBE_REFHOLD(vresp);
1860                         RW_EXIT(&vnetp->vrwlock);
1861 
1862                         cbp->mc_stop(macp->m_driver);
1863 
1864                         WRITE_ENTER(&vnetp->vrwlock);
1865                         vresp->flags &= ~VNET_STOPPING;
1866                         VNET_FDBE_REFRELE(vresp);
1867                 }
1868                 vresp = vresp->nextp;
1869         }
1870         DBG1(vnetp, "exit\n");
1871 }
1872 
1873 /*
1874  * Setup kstats for the HIO statistics.
1875  * NOTE: the synchronization for the statistics is the
1876  * responsibility of the caller.
1877  */
1878 kstat_t *
1879 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1880 {
1881         kstat_t *ksp;
1882         vnet_t *vnetp = vresp->vnetp;
1883         vnet_hio_kstats_t *hiokp;
1884         size_t size;
1885 
1886         ASSERT(vnetp != NULL);
1887         size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1888         ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1889             KSTAT_TYPE_NAMED, size, 0);
1890         if (ksp == NULL) {
1891                 return (NULL);
1892         }
1893 
1894         hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1895         kstat_named_init(&hiokp->ipackets,               "ipackets",
1896             KSTAT_DATA_ULONG);
1897         kstat_named_init(&hiokp->ierrors,                "ierrors",
1898             KSTAT_DATA_ULONG);
1899         kstat_named_init(&hiokp->opackets,               "opackets",
1900             KSTAT_DATA_ULONG);
1901         kstat_named_init(&hiokp->oerrors,                "oerrors",
1902             KSTAT_DATA_ULONG);
1903 
1904 
1905         /* MIB II kstat variables */
1906         kstat_named_init(&hiokp->rbytes,         "rbytes",
1907             KSTAT_DATA_ULONG);
1908         kstat_named_init(&hiokp->obytes,         "obytes",
1909             KSTAT_DATA_ULONG);
1910         kstat_named_init(&hiokp->multircv,               "multircv",
1911             KSTAT_DATA_ULONG);
1912         kstat_named_init(&hiokp->multixmt,               "multixmt",
1913             KSTAT_DATA_ULONG);
1914         kstat_named_init(&hiokp->brdcstrcv,              "brdcstrcv",
1915             KSTAT_DATA_ULONG);
1916         kstat_named_init(&hiokp->brdcstxmt,              "brdcstxmt",
1917             KSTAT_DATA_ULONG);
1918         kstat_named_init(&hiokp->norcvbuf,               "norcvbuf",
1919             KSTAT_DATA_ULONG);
1920         kstat_named_init(&hiokp->noxmtbuf,               "noxmtbuf",
1921             KSTAT_DATA_ULONG);
1922 
1923         ksp->ks_update = vnet_hio_update_kstats;
1924         ksp->ks_private = (void *)vresp;
1925         kstat_install(ksp);
1926         return (ksp);
1927 }
1928 
1929 /*
1930  * Destroy kstats.
1931  */
1932 static void
1933 vnet_hio_destroy_kstats(kstat_t *ksp)
1934 {
1935         if (ksp != NULL)
1936                 kstat_delete(ksp);
1937 }
1938 
1939 /*
1940  * Update the kstats.
1941  */
1942 static int
1943 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1944 {
1945         vnet_t *vnetp;
1946         vnet_res_t *vresp;
1947         vnet_hio_stats_t statsp;
1948         vnet_hio_kstats_t *hiokp;
1949 
1950         vresp = (vnet_res_t *)ksp->ks_private;
1951         vnetp = vresp->vnetp;
1952 
1953         bzero(&statsp, sizeof (vnet_hio_stats_t));
1954 
1955         READ_ENTER(&vnetp->vsw_fp_rw);
1956         if (vnetp->hio_fp == NULL) {
1957                 /* not using hio resources, just return */
1958                 RW_EXIT(&vnetp->vsw_fp_rw);
1959                 return (0);
1960         }
1961         VNET_FDBE_REFHOLD(vnetp->hio_fp);
1962         RW_EXIT(&vnetp->vsw_fp_rw);
1963         vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1964         VNET_FDBE_REFRELE(vnetp->hio_fp);
1965 
1966         hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1967 
1968         if (rw == KSTAT_READ) {
1969                 /* Link Input/Output stats */
1970                 hiokp->ipackets.value.ul     = (uint32_t)statsp.ipackets;
1971                 hiokp->ipackets64.value.ull  = statsp.ipackets;
1972                 hiokp->ierrors.value.ul              = statsp.ierrors;
1973                 hiokp->opackets.value.ul     = (uint32_t)statsp.opackets;
1974                 hiokp->opackets64.value.ull  = statsp.opackets;
1975                 hiokp->oerrors.value.ul              = statsp.oerrors;
1976 
1977                 /* MIB II kstat variables */
1978                 hiokp->rbytes.value.ul               = (uint32_t)statsp.rbytes;
1979                 hiokp->rbytes64.value.ull    = statsp.rbytes;
1980                 hiokp->obytes.value.ul               = (uint32_t)statsp.obytes;
1981                 hiokp->obytes64.value.ull    = statsp.obytes;
1982                 hiokp->multircv.value.ul     = statsp.multircv;
1983                 hiokp->multixmt.value.ul     = statsp.multixmt;
1984                 hiokp->brdcstrcv.value.ul    = statsp.brdcstrcv;
1985                 hiokp->brdcstxmt.value.ul    = statsp.brdcstxmt;
1986                 hiokp->norcvbuf.value.ul     = statsp.norcvbuf;
1987                 hiokp->noxmtbuf.value.ul     = statsp.noxmtbuf;
1988         } else {
1989                 return (EACCES);
1990         }
1991 
1992         return (0);
1993 }
1994 
1995 static void
1996 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1997 {
1998         mac_register_t          *macp;
1999         mac_callbacks_t         *cbp;
2000         uint64_t                val;
2001         int                     stat;
2002 
2003         /*
2004          * get the specified statistics from the underlying nxge.
2005          */
2006         macp = &vresp->macreg;
2007         cbp = macp->m_callbacks;
2008         for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
2009                 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
2010                         switch (stat) {
2011                         case MAC_STAT_IPACKETS:
2012                                 statsp->ipackets = val;
2013                                 break;
2014 
2015                         case MAC_STAT_IERRORS:
2016                                 statsp->ierrors = val;
2017                                 break;
2018 
2019                         case MAC_STAT_OPACKETS:
2020                                 statsp->opackets = val;
2021                                 break;
2022 
2023                         case MAC_STAT_OERRORS:
2024                                 statsp->oerrors = val;
2025                                 break;
2026 
2027                         case MAC_STAT_RBYTES:
2028                                 statsp->rbytes = val;
2029                                 break;
2030 
2031                         case MAC_STAT_OBYTES:
2032                                 statsp->obytes = val;
2033                                 break;
2034 
2035                         case MAC_STAT_MULTIRCV:
2036                                 statsp->multircv = val;
2037                                 break;
2038 
2039                         case MAC_STAT_MULTIXMT:
2040                                 statsp->multixmt = val;
2041                                 break;
2042 
2043                         case MAC_STAT_BRDCSTRCV:
2044                                 statsp->brdcstrcv = val;
2045                                 break;
2046 
2047                         case MAC_STAT_BRDCSTXMT:
2048                                 statsp->brdcstxmt = val;
2049                                 break;
2050 
2051                         case MAC_STAT_NOXMTBUF:
2052                                 statsp->noxmtbuf = val;
2053                                 break;
2054 
2055                         case MAC_STAT_NORCVBUF:
2056                                 statsp->norcvbuf = val;
2057                                 break;
2058 
2059                         default:
2060                                 /*
2061                                  * parameters not interested.
2062                                  */
2063                                 break;
2064                         }
2065                 }
2066         }
2067 }
2068 
2069 static boolean_t
2070 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
2071 {
2072         vnet_t  *vnetp = (vnet_t *)arg;
2073 
2074         if (vnetp == NULL) {
2075                 return (0);
2076         }
2077 
2078         switch (cap) {
2079 
2080         case MAC_CAPAB_RINGS: {
2081 
2082                 mac_capab_rings_t *cap_rings = cap_data;
2083                 /*
2084                  * Rings Capability Notes:
2085                  * We advertise rings to make use of the rings framework in
2086                  * gldv3 mac layer, to improve the performance. This is
2087                  * specifically needed when a Hybrid resource (with multiple
2088                  * tx/rx hardware rings) is assigned to a vnet device. We also
2089                  * leverage this for the normal case when no Hybrid resource is
2090                  * assigned.
2091                  *
2092                  * Ring Allocation:
2093                  * - TX path:
2094                  * We expose a pseudo ring group with 2 pseudo tx rings (as
2095                  * currently HybridIO exports only 2 rings) In the normal case,
2096                  * transmit traffic that comes down to the driver through the
2097                  * mri_tx (vnet_tx_ring_send()) entry point goes through the
2098                  * distributed switching algorithm in vnet and gets transmitted
2099                  * over a port/LDC in the vgen layer to either the vswitch or a
2100                  * peer vnet. If and when a Hybrid resource is assigned to the
2101                  * vnet, we obtain the tx ring information of the Hybrid device
2102                  * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
2103                  * Traffic being sent over the Hybrid resource by the mac layer
2104                  * gets spread across both hw rings, as they are mapped to the
2105                  * 2 pseudo tx rings in vnet.
2106                  *
2107                  * - RX path:
2108                  * We expose a pseudo ring group with 3 pseudo rx rings (static
2109                  * rings) initially. The first (default) pseudo rx ring is
2110                  * reserved for the resource that connects to the vswitch
2111                  * service. The next 2 rings are reserved for a Hybrid resource
2112                  * that may be assigned to the vnet device. If and when a
2113                  * Hybrid resource is assigned to the vnet, we obtain the rx
2114                  * ring information of the Hybrid device (nxge) and map these
2115                  * pseudo rings 1:1 to the 2 hw rx rings. For each additional
2116                  * resource that connects to a peer vnet, we dynamically
2117                  * allocate a pseudo rx ring and map it to that resource, when
2118                  * the resource gets added; and the pseudo rx ring is
2119                  * dynamically registered with the upper mac layer. We do the
2120                  * reverse and unregister the ring with the mac layer when
2121                  * the resource gets removed.
2122                  *
2123                  * Synchronization notes:
2124                  * We don't need any lock to protect members of ring structure,
2125                  * specifically ringp->hw_rh, in either the TX or the RX ring,
2126                  * as explained below.
2127                  * - TX ring:
2128                  * ring->hw_rh is initialized only when a Hybrid resource is
2129                  * associated; and gets referenced only in vnet_hio_tx(). The
2130                  * Hybrid resource itself is available in fdb only after tx
2131                  * hwrings are found and mapped; i.e, in vio_net_resource_reg()
2132                  * we call vnet_bind_rings() first and then call
2133                  * vnet_start_resources() which adds an entry to fdb. For
2134                  * traffic going over LDC resources, we don't reference
2135                  * ring->hw_rh at all.
2136                  * - RX ring:
2137                  * For rings mapped to Hybrid resource ring->hw_rh is
2138                  * initialized and only then do we add the rx callback for
2139                  * the underlying Hybrid resource; we disable callbacks before
2140                  * we unmap ring->hw_rh. For rings mapped to LDC resources, we
2141                  * stop the rx callbacks (in vgen) before we remove ring->hw_rh
2142                  * (vio_net_resource_unreg()).
2143                  * Also, we access ring->hw_rh in vnet_rx_ring_stat().
2144                  * Note that for rings mapped to Hybrid resource, though the
2145                  * rings are statically registered with the mac layer, its
2146                  * hardware ring mapping (ringp->hw_rh) can be torn down in
2147                  * vnet_unbind_hwrings() while the kstat operation is in
2148                  * progress. To protect against this, we hold a reference to
2149                  * the resource in FDB; this ensures that the thread in
2150                  * vio_net_resource_unreg() waits for the reference to be
2151                  * dropped before unbinding the ring.
2152                  *
2153                  * We don't need to do this for rings mapped to LDC resources.
2154                  * These rings are registered/unregistered dynamically with
2155                  * the mac layer and so any attempt to unregister the ring
2156                  * while kstat operation is in progress will block in
2157                  * mac_group_rem_ring(). Thus implicitly protects the
2158                  * resource (ringp->hw_rh) from disappearing.
2159                  */
2160 
2161                 if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2162                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2163 
2164                         /*
2165                          * The ring_cnt for rx grp is initialized in
2166                          * vnet_ring_grp_init(). Later, the ring_cnt gets
2167                          * updated dynamically whenever LDC resources are added
2168                          * or removed.
2169                          */
2170                         cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
2171                         cap_rings->mr_rget = vnet_get_ring;
2172 
2173                         cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
2174                         cap_rings->mr_gget = vnet_get_group;
2175                         cap_rings->mr_gaddring = NULL;
2176                         cap_rings->mr_gremring = NULL;
2177                 } else {
2178                         cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2179 
2180                         /*
2181                          * The ring_cnt for tx grp is initialized in
2182                          * vnet_ring_grp_init() and remains constant, as we
2183                          * do not support dymanic tx rings for now.
2184                          */
2185                         cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
2186                         cap_rings->mr_rget = vnet_get_ring;
2187 
2188                         /*
2189                          * Transmit rings are not grouped; i.e, the number of
2190                          * transmit ring groups advertised should be set to 0.
2191                          */
2192                         cap_rings->mr_gnum = 0;
2193 
2194                         cap_rings->mr_gget = vnet_get_group;
2195                         cap_rings->mr_gaddring = NULL;
2196                         cap_rings->mr_gremring = NULL;
2197                 }
2198                 return (B_TRUE);
2199 
2200         }
2201 
2202         default:
2203                 break;
2204 
2205         }
2206 
2207         return (B_FALSE);
2208 }
2209 
2210 /*
2211  * Callback funtion for MAC layer to get ring information.
2212  */
2213 static void
2214 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
2215     const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
2216 {
2217         vnet_t  *vnetp = arg;
2218 
2219         switch (rtype) {
2220 
2221         case MAC_RING_TYPE_RX: {
2222 
2223                 vnet_pseudo_rx_group_t  *rx_grp;
2224                 vnet_pseudo_rx_ring_t   *rx_ringp;
2225                 mac_intr_t              *mintr;
2226 
2227                 /* We advertised only one RX group */
2228                 ASSERT(g_index == 0);
2229                 rx_grp = &vnetp->rx_grp[g_index];
2230 
2231                 /* Check the current # of rings in the rx group */
2232                 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
2233 
2234                 /* Get the ring based on the index */
2235                 rx_ringp = &rx_grp->rings[r_index];
2236 
2237                 rx_ringp->handle = r_handle;
2238                 /*
2239                  * Note: we don't need to save the incoming r_index in rx_ring,
2240                  * as vnet_ring_grp_init() would have initialized the index for
2241                  * each ring in the array.
2242                  */
2243                 rx_ringp->grp = rx_grp;
2244                 rx_ringp->vnetp = vnetp;
2245 
2246                 mintr = &infop->mri_intr;
2247                 mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
2248                 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
2249                 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
2250 
2251                 infop->mri_driver = (mac_ring_driver_t)rx_ringp;
2252                 infop->mri_start = vnet_rx_ring_start;
2253                 infop->mri_stop = vnet_rx_ring_stop;
2254                 infop->mri_stat = vnet_rx_ring_stat;
2255 
2256                 /* Set the poll function, as this is an rx ring */
2257                 infop->mri_poll = vnet_rx_poll;
2258                 /*
2259                  * MAC_RING_RX_ENQUEUE bit needed to be set for nxge
2260                  * which was not sending packet chains in interrupt
2261                  * context. For such drivers, packets are queued in
2262                  * Rx soft rings so that we get a chance to switch
2263                  * into a polling mode under backlog. This bug (not
2264                  * sending packet chains) has now been fixed. Once
2265                  * the performance impact is measured, this change
2266                  * will be removed.
2267                  */
2268                 infop->mri_flags = (vnet_mac_rx_queuing ?
2269                     MAC_RING_RX_ENQUEUE : 0);
2270                 break;
2271         }
2272 
2273         case MAC_RING_TYPE_TX: {
2274                 vnet_pseudo_tx_group_t  *tx_grp;
2275                 vnet_pseudo_tx_ring_t   *tx_ringp;
2276 
2277                 /*
2278                  * No need to check grp index; mac layer passes -1 for it.
2279                  */
2280                 tx_grp = &vnetp->tx_grp[0];
2281 
2282                 /* Check the # of rings in the tx group */
2283                 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
2284 
2285                 /* Get the ring based on the index */
2286                 tx_ringp = &tx_grp->rings[r_index];
2287 
2288                 tx_ringp->handle = r_handle;
2289                 tx_ringp->index = r_index;
2290                 tx_ringp->grp = tx_grp;
2291                 tx_ringp->vnetp = vnetp;
2292 
2293                 infop->mri_driver = (mac_ring_driver_t)tx_ringp;
2294                 infop->mri_start = vnet_tx_ring_start;
2295                 infop->mri_stop = vnet_tx_ring_stop;
2296                 infop->mri_stat = vnet_tx_ring_stat;
2297 
2298                 /* Set the transmit function, as this is a tx ring */
2299                 infop->mri_tx = vnet_tx_ring_send;
2300                 /*
2301                  * MAC_RING_TX_SERIALIZE bit needs to be set while
2302                  * hybridIO is enabled to workaround tx lock
2303                  * contention issues in nxge.
2304                  */
2305                 infop->mri_flags = (vnet_mac_tx_serialize ?
2306                     MAC_RING_TX_SERIALIZE : 0);
2307                 break;
2308         }
2309 
2310         default:
2311                 break;
2312         }
2313 }
2314 
2315 /*
2316  * Callback funtion for MAC layer to get group information.
2317  */
2318 static void
2319 vnet_get_group(void *arg, mac_ring_type_t type, const int index,
2320         mac_group_info_t *infop, mac_group_handle_t handle)
2321 {
2322         vnet_t  *vnetp = (vnet_t *)arg;
2323 
2324         switch (type) {
2325 
2326         case MAC_RING_TYPE_RX:
2327         {
2328                 vnet_pseudo_rx_group_t  *rx_grp;
2329 
2330                 /* We advertised only one RX group */
2331                 ASSERT(index == 0);
2332 
2333                 rx_grp = &vnetp->rx_grp[index];
2334                 rx_grp->handle = handle;
2335                 rx_grp->index = index;
2336                 rx_grp->vnetp = vnetp;
2337 
2338                 infop->mgi_driver = (mac_group_driver_t)rx_grp;
2339                 infop->mgi_start = NULL;
2340                 infop->mgi_stop = NULL;
2341                 infop->mgi_addmac = vnet_addmac;
2342                 infop->mgi_remmac = vnet_remmac;
2343                 infop->mgi_count = rx_grp->ring_cnt;
2344 
2345                 break;
2346         }
2347 
2348         case MAC_RING_TYPE_TX:
2349         {
2350                 vnet_pseudo_tx_group_t  *tx_grp;
2351 
2352                 /* We advertised only one TX group */
2353                 ASSERT(index == 0);
2354 
2355                 tx_grp = &vnetp->tx_grp[index];
2356                 tx_grp->handle = handle;
2357                 tx_grp->index = index;
2358                 tx_grp->vnetp = vnetp;
2359 
2360                 infop->mgi_driver = (mac_group_driver_t)tx_grp;
2361                 infop->mgi_start = NULL;
2362                 infop->mgi_stop = NULL;
2363                 infop->mgi_addmac = NULL;
2364                 infop->mgi_remmac = NULL;
2365                 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
2366 
2367                 break;
2368         }
2369 
2370         default:
2371                 break;
2372 
2373         }
2374 }
2375 
2376 static int
2377 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2378 {
2379         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2380         int                     err;
2381 
2382         /*
2383          * If this ring is mapped to a LDC resource, simply mark the state to
2384          * indicate the ring is started and return.
2385          */
2386         if ((rx_ringp->state &
2387             (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2388                 rx_ringp->gen_num = mr_gen_num;
2389                 rx_ringp->state |= VNET_RXRING_STARTED;
2390                 return (0);
2391         }
2392 
2393         ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2394 
2395         /*
2396          * This must be a ring reserved for a hwring. If the hwring is not
2397          * bound yet, simply mark the state to indicate the ring is started and
2398          * return. If and when a hybrid resource is activated for this vnet
2399          * device, we will bind the hwring and start it then. If a hwring is
2400          * already bound, start it now.
2401          */
2402         if (rx_ringp->hw_rh == NULL) {
2403                 rx_ringp->gen_num = mr_gen_num;
2404                 rx_ringp->state |= VNET_RXRING_STARTED;
2405                 return (0);
2406         }
2407 
2408         err = mac_hwring_start(rx_ringp->hw_rh);
2409         if (err == 0) {
2410                 rx_ringp->gen_num = mr_gen_num;
2411                 rx_ringp->state |= VNET_RXRING_STARTED;
2412         } else {
2413                 err = ENXIO;
2414         }
2415 
2416         return (err);
2417 }
2418 
2419 static void
2420 vnet_rx_ring_stop(mac_ring_driver_t arg)
2421 {
2422         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2423 
2424         /*
2425          * If this ring is mapped to a LDC resource, simply mark the state to
2426          * indicate the ring is now stopped and return.
2427          */
2428         if ((rx_ringp->state &
2429             (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2430                 rx_ringp->state &= ~VNET_RXRING_STARTED;
2431                 return;
2432         }
2433 
2434         ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2435 
2436         /*
2437          * This must be a ring reserved for a hwring. If the hwring is not
2438          * bound yet, simply mark the state to indicate the ring is stopped and
2439          * return. If a hwring is already bound, stop it now.
2440          */
2441         if (rx_ringp->hw_rh == NULL) {
2442                 rx_ringp->state &= ~VNET_RXRING_STARTED;
2443                 return;
2444         }
2445 
2446         mac_hwring_stop(rx_ringp->hw_rh);
2447         rx_ringp->state &= ~VNET_RXRING_STARTED;
2448 }
2449 
2450 static int
2451 vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2452 {
2453         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver;
2454         vnet_t                  *vnetp = (vnet_t *)rx_ringp->vnetp;
2455         vnet_res_t              *vresp;
2456         mac_register_t          *macp;
2457         mac_callbacks_t         *cbp;
2458 
2459         /*
2460          * Refer to vnet_m_capab() function for detailed comments on ring
2461          * synchronization.
2462          */
2463         if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) {
2464                 READ_ENTER(&vnetp->vsw_fp_rw);
2465                 if (vnetp->hio_fp == NULL) {
2466                         RW_EXIT(&vnetp->vsw_fp_rw);
2467                         return (0);
2468                 }
2469 
2470                 VNET_FDBE_REFHOLD(vnetp->hio_fp);
2471                 RW_EXIT(&vnetp->vsw_fp_rw);
2472                 (void) mac_hwring_getstat(rx_ringp->hw_rh, stat, val);
2473                 VNET_FDBE_REFRELE(vnetp->hio_fp);
2474                 return (0);
2475         }
2476 
2477         ASSERT((rx_ringp->state &
2478             (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0);
2479         vresp = (vnet_res_t *)rx_ringp->hw_rh;
2480         macp = &vresp->macreg;
2481         cbp = macp->m_callbacks;
2482 
2483         cbp->mc_getstat(macp->m_driver, stat, val);
2484 
2485         return (0);
2486 }
2487 
2488 /* ARGSUSED */
2489 static int
2490 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2491 {
2492         vnet_pseudo_tx_ring_t   *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2493 
2494         tx_ringp->state |= VNET_TXRING_STARTED;
2495         return (0);
2496 }
2497 
2498 static void
2499 vnet_tx_ring_stop(mac_ring_driver_t arg)
2500 {
2501         vnet_pseudo_tx_ring_t   *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2502 
2503         tx_ringp->state &= ~VNET_TXRING_STARTED;
2504 }
2505 
2506 static int
2507 vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2508 {
2509         vnet_pseudo_tx_ring_t   *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver;
2510         vnet_tx_ring_stats_t    *statsp;
2511 
2512         statsp = &tx_ringp->tx_ring_stats;
2513 
2514         switch (stat) {
2515         case MAC_STAT_OPACKETS:
2516                 *val = statsp->opackets;
2517                 break;
2518 
2519         case MAC_STAT_OBYTES:
2520                 *val = statsp->obytes;
2521                 break;
2522 
2523         default:
2524                 *val = 0;
2525                 return (ENOTSUP);
2526         }
2527 
2528         return (0);
2529 }
2530 
2531 /*
2532  * Disable polling for a ring and enable its interrupt.
2533  */
2534 static int
2535 vnet_ring_enable_intr(void *arg)
2536 {
2537         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2538         vnet_res_t              *vresp;
2539 
2540         if (rx_ringp->hw_rh == NULL) {
2541                 /*
2542                  * Ring enable intr func is being invoked, but the ring is
2543                  * not bound to any underlying resource ? This must be a ring
2544                  * reserved for Hybrid resource and no such resource has been
2545                  * assigned to this vnet device yet. We simply return success.
2546                  */
2547                 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2548                 return (0);
2549         }
2550 
2551         /*
2552          * The rx ring has been bound to either a LDC or a Hybrid resource.
2553          * Call the appropriate function to enable interrupts for the ring.
2554          */
2555         if (rx_ringp->state & VNET_RXRING_HYBRID) {
2556                 return (mac_hwring_enable_intr(rx_ringp->hw_rh));
2557         } else {
2558                 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2559                 return (vgen_enable_intr(vresp->macreg.m_driver));
2560         }
2561 }
2562 
2563 /*
2564  * Enable polling for a ring and disable its interrupt.
2565  */
2566 static int
2567 vnet_ring_disable_intr(void *arg)
2568 {
2569         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2570         vnet_res_t              *vresp;
2571 
2572         if (rx_ringp->hw_rh == NULL) {
2573                 /*
2574                  * Ring disable intr func is being invoked, but the ring is
2575                  * not bound to any underlying resource ? This must be a ring
2576                  * reserved for Hybrid resource and no such resource has been
2577                  * assigned to this vnet device yet. We simply return success.
2578                  */
2579                 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2580                 return (0);
2581         }
2582 
2583         /*
2584          * The rx ring has been bound to either a LDC or a Hybrid resource.
2585          * Call the appropriate function to disable interrupts for the ring.
2586          */
2587         if (rx_ringp->state & VNET_RXRING_HYBRID) {
2588                 return (mac_hwring_disable_intr(rx_ringp->hw_rh));
2589         } else {
2590                 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2591                 return (vgen_disable_intr(vresp->macreg.m_driver));
2592         }
2593 }
2594 
2595 /*
2596  * Poll 'bytes_to_pickup' bytes of message from the rx ring.
2597  */
2598 static mblk_t *
2599 vnet_rx_poll(void *arg, int bytes_to_pickup)
2600 {
2601         vnet_pseudo_rx_ring_t   *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2602         mblk_t                  *mp = NULL;
2603         vnet_res_t              *vresp;
2604         vnet_t                  *vnetp = rx_ringp->vnetp;
2605 
2606         if (rx_ringp->hw_rh == NULL) {
2607                 return (NULL);
2608         }
2609 
2610         if (rx_ringp->state & VNET_RXRING_HYBRID) {
2611                 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
2612                 /*
2613                  * Packets received over a hybrid resource need additional
2614                  * processing to remove the tag, for the pvid case. The
2615                  * underlying resource is not aware of the vnet's pvid and thus
2616                  * packets are received with the vlan tag in the header; unlike
2617                  * packets that are received over a ldc channel in which case
2618                  * the peer vnet/vsw would have already removed the tag.
2619                  */
2620                 if (vnetp->pvid != vnetp->default_vlan_id) {
2621                         vnet_rx_frames_untag(vnetp->pvid, &mp);
2622                 }
2623         } else {
2624                 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2625                 mp = vgen_rx_poll(vresp->macreg.m_driver, bytes_to_pickup);
2626         }
2627         return (mp);
2628 }
2629 
2630 /* ARGSUSED */
2631 void
2632 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
2633         boolean_t loopback)
2634 {
2635         vnet_t                  *vnetp = (vnet_t *)arg;
2636         vnet_pseudo_rx_ring_t   *ringp = (vnet_pseudo_rx_ring_t *)mrh;
2637 
2638         /*
2639          * Packets received over a hybrid resource need additional processing
2640          * to remove the tag, for the pvid case. The underlying resource is
2641          * not aware of the vnet's pvid and thus packets are received with the
2642          * vlan tag in the header; unlike packets that are received over a ldc
2643          * channel in which case the peer vnet/vsw would have already removed
2644          * the tag.
2645          */
2646         if (vnetp->pvid != vnetp->default_vlan_id) {
2647                 vnet_rx_frames_untag(vnetp->pvid, &mp);
2648                 if (mp == NULL) {
2649                         return;
2650                 }
2651         }
2652         mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
2653 }
2654 
2655 static int
2656 vnet_addmac(void *arg, const uint8_t *mac_addr)
2657 {
2658         vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2659         vnet_t                  *vnetp;
2660 
2661         vnetp = rx_grp->vnetp;
2662 
2663         if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2664                 return (0);
2665         }
2666 
2667         cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
2668             vnetp->instance, __func__);
2669         return (EINVAL);
2670 }
2671 
2672 static int
2673 vnet_remmac(void *arg, const uint8_t *mac_addr)
2674 {
2675         vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2676         vnet_t                  *vnetp;
2677 
2678         vnetp = rx_grp->vnetp;
2679 
2680         if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2681                 return (0);
2682         }
2683 
2684         cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
2685             vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
2686         return (EINVAL);
2687 }
2688 
2689 int
2690 vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
2691 {
2692         mac_handle_t            mh;
2693         mac_client_handle_t     mch = NULL;
2694         mac_unicast_handle_t    muh = NULL;
2695         mac_diag_t              diag;
2696         mac_register_t          *macp;
2697         char                    client_name[MAXNAMELEN];
2698         int                     rv;
2699         uint16_t                mac_flags = MAC_UNICAST_TAG_DISABLE |
2700             MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
2701         vio_net_callbacks_t     vcb;
2702         ether_addr_t            rem_addr =
2703                 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2704         uint32_t                retries = 0;
2705 
2706         if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2707                 return (EAGAIN);
2708         }
2709 
2710         do {
2711                 rv = mac_open_by_linkname(ifname, &mh);
2712                 if (rv == 0) {
2713                         break;
2714                 }
2715                 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
2716                         mac_free(macp);
2717                         return (rv);
2718                 }
2719                 drv_usecwait(vnet_mac_open_delay);
2720         } while (rv == ENOENT);
2721 
2722         vnetp->hio_mh = mh;
2723 
2724         (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
2725             ifname);
2726         rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
2727         if (rv != 0) {
2728                 goto fail;
2729         }
2730         vnetp->hio_mch = mch;
2731 
2732         rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
2733             &diag);
2734         if (rv != 0) {
2735                 goto fail;
2736         }
2737         vnetp->hio_muh = muh;
2738 
2739         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2740         macp->m_driver = vnetp;
2741         macp->m_dip = NULL;
2742         macp->m_src_addr = NULL;
2743         macp->m_callbacks = &vnet_hio_res_callbacks;
2744         macp->m_min_sdu = 0;
2745         macp->m_max_sdu = ETHERMTU;
2746 
2747         rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
2748             vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
2749         if (rv != 0) {
2750                 goto fail;
2751         }
2752         mac_free(macp);
2753 
2754         /* add the recv callback */
2755         mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
2756 
2757         return (0);
2758 
2759 fail:
2760         mac_free(macp);
2761         vnet_hio_mac_cleanup(vnetp);
2762         return (1);
2763 }
2764 
2765 void
2766 vnet_hio_mac_cleanup(vnet_t *vnetp)
2767 {
2768         if (vnetp->hio_vhp != NULL) {
2769                 vio_net_resource_unreg(vnetp->hio_vhp);
2770                 vnetp->hio_vhp = NULL;
2771         }
2772 
2773         if (vnetp->hio_muh != NULL) {
2774                 (void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
2775                 vnetp->hio_muh = NULL;
2776         }
2777 
2778         if (vnetp->hio_mch != NULL) {
2779                 mac_client_close(vnetp->hio_mch, 0);
2780                 vnetp->hio_mch = NULL;
2781         }
2782 
2783         if (vnetp->hio_mh != NULL) {
2784                 mac_close(vnetp->hio_mh);
2785                 vnetp->hio_mh = NULL;
2786         }
2787 }
2788 
2789 /* Bind pseudo rings to hwrings */
2790 static int
2791 vnet_bind_hwrings(vnet_t *vnetp)
2792 {
2793         mac_ring_handle_t       hw_rh[VNET_NUM_HYBRID_RINGS];
2794         mac_perim_handle_t      mph1;
2795         vnet_pseudo_rx_group_t  *rx_grp;
2796         vnet_pseudo_rx_ring_t   *rx_ringp;
2797         vnet_pseudo_tx_group_t  *tx_grp;
2798         vnet_pseudo_tx_ring_t   *tx_ringp;
2799         int                     hw_ring_cnt;
2800         int                     i;
2801         int                     rv;
2802 
2803         mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2804 
2805         /* Get the list of the underlying RX rings. */
2806         hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
2807             MAC_RING_TYPE_RX);
2808 
2809         /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
2810         if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2811                 cmn_err(CE_WARN,
2812                     "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
2813                     vnetp->instance, hw_ring_cnt);
2814                 goto fail;
2815         }
2816 
2817         if (vnetp->rx_hwgh != NULL) {
2818                 /*
2819                  * Quiesce the HW ring and the mac srs on the ring. Note
2820                  * that the HW ring will be restarted when the pseudo ring
2821                  * is started. At that time all the packets will be
2822                  * directly passed up to the pseudo RX ring and handled
2823                  * by mac srs created over the pseudo RX ring.
2824                  */
2825                 mac_rx_client_quiesce(vnetp->hio_mch);
2826                 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
2827         }
2828 
2829         /*
2830          * Bind the pseudo rings to the hwrings and start the hwrings.
2831          * Note we don't need to register these with the upper mac, as we have
2832          * statically exported these pseudo rxrings which are reserved for
2833          * rxrings of Hybrid resource.
2834          */
2835         rx_grp = &vnetp->rx_grp[0];
2836         for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2837                 /* Pick the rxrings reserved for Hybrid resource */
2838                 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2839 
2840                 /* Store the hw ring handle */
2841                 rx_ringp->hw_rh = hw_rh[i];
2842 
2843                 /* Bind the pseudo ring to the underlying hwring */
2844                 mac_hwring_setup(rx_ringp->hw_rh,
2845                     (mac_resource_handle_t)rx_ringp, NULL);
2846 
2847                 /* Start the hwring if needed */
2848                 if (rx_ringp->state & VNET_RXRING_STARTED) {
2849                         rv = mac_hwring_start(rx_ringp->hw_rh);
2850                         if (rv != 0) {
2851                                 mac_hwring_teardown(rx_ringp->hw_rh);
2852                                 rx_ringp->hw_rh = NULL;
2853                                 goto fail;
2854                         }
2855                 }
2856         }
2857 
2858         /* Get the list of the underlying TX rings. */
2859         hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
2860             MAC_RING_TYPE_TX);
2861 
2862         /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
2863         if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2864                 cmn_err(CE_WARN,
2865                     "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
2866                     vnetp->instance, hw_ring_cnt);
2867                 goto fail;
2868         }
2869 
2870         /*
2871          * Now map the pseudo txrings to the hw txrings. Note we don't need
2872          * to register these with the upper mac, as we have statically exported
2873          * these rings. Note that these rings will continue to be used for LDC
2874          * resources to peer vnets and vswitch (shared ring).
2875          */
2876         tx_grp = &vnetp->tx_grp[0];
2877         for (i = 0; i < tx_grp->ring_cnt; i++) {
2878                 tx_ringp = &tx_grp->rings[i];
2879                 tx_ringp->hw_rh = hw_rh[i];
2880                 tx_ringp->state |= VNET_TXRING_HYBRID;
2881         }
2882         tx_grp->tx_notify_handle =
2883             mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp);
2884 
2885         mac_perim_exit(mph1);
2886         return (0);
2887 
2888 fail:
2889         mac_perim_exit(mph1);
2890         vnet_unbind_hwrings(vnetp);
2891         return (1);
2892 }
2893 
2894 /* Unbind pseudo rings from hwrings */
2895 static void
2896 vnet_unbind_hwrings(vnet_t *vnetp)
2897 {
2898         mac_perim_handle_t      mph1;
2899         vnet_pseudo_rx_ring_t   *rx_ringp;
2900         vnet_pseudo_rx_group_t  *rx_grp;
2901         vnet_pseudo_tx_group_t  *tx_grp;
2902         vnet_pseudo_tx_ring_t   *tx_ringp;
2903         int                     i;
2904 
2905         mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2906 
2907         tx_grp = &vnetp->tx_grp[0];
2908         for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2909                 tx_ringp = &tx_grp->rings[i];
2910                 if (tx_ringp->state & VNET_TXRING_HYBRID) {
2911                         tx_ringp->state &= ~VNET_TXRING_HYBRID;
2912                         tx_ringp->hw_rh = NULL;
2913                 }
2914         }
2915         (void) mac_client_tx_notify(vnetp->hio_mch, NULL,
2916             tx_grp->tx_notify_handle);
2917 
2918         rx_grp = &vnetp->rx_grp[0];
2919         for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2920                 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2921                 if (rx_ringp->hw_rh != NULL) {
2922                         /* Stop the hwring */
2923                         mac_hwring_stop(rx_ringp->hw_rh);
2924 
2925                         /* Teardown the hwring */
2926                         mac_hwring_teardown(rx_ringp->hw_rh);
2927                         rx_ringp->hw_rh = NULL;
2928                 }
2929         }
2930 
2931         if (vnetp->rx_hwgh != NULL) {
2932                 vnetp->rx_hwgh = NULL;
2933                 /*
2934                  * First clear the permanent-quiesced flag of the RX srs then
2935                  * restart the HW ring and the mac srs on the ring.
2936                  */
2937                 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
2938                 mac_rx_client_restart(vnetp->hio_mch);
2939         }
2940 
2941         mac_perim_exit(mph1);
2942 }
2943 
2944 /* Bind pseudo ring to a LDC resource */
2945 static int
2946 vnet_bind_vgenring(vnet_res_t *vresp)
2947 {
2948         vnet_t                  *vnetp;
2949         vnet_pseudo_rx_group_t  *rx_grp;
2950         vnet_pseudo_rx_ring_t   *rx_ringp;
2951         mac_perim_handle_t      mph1;
2952         int                     rv;
2953         int                     type;
2954 
2955         vnetp = vresp->vnetp;
2956         type = vresp->type;
2957         rx_grp = &vnetp->rx_grp[0];
2958 
2959         if (type == VIO_NET_RES_LDC_SERVICE) {
2960                 /*
2961                  * Ring Index 0 is the default ring in the group and is
2962                  * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2963                  * is allocated statically and is reported to the mac layer
2964                  * in vnet_m_capab(). So, all we need to do here, is save a
2965                  * reference to the associated vresp.
2966                  */
2967                 rx_ringp = &rx_grp->rings[0];
2968                 rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2969                 vresp->rx_ringp = (void *)rx_ringp;
2970                 return (0);
2971         }
2972         ASSERT(type == VIO_NET_RES_LDC_GUEST);
2973 
2974         mac_perim_enter_by_mh(vnetp->mh, &mph1);
2975 
2976         rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
2977         if (rx_ringp == NULL) {
2978                 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
2979                     vnetp->instance);
2980                 goto fail;
2981         }
2982 
2983         /* Store the LDC resource itself as the ring handle */
2984         rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2985 
2986         /*
2987          * Save a reference to the ring in the resource for lookup during
2988          * unbind. Note this is only done for LDC resources. We don't need this
2989          * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
2990          * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
2991          */
2992         vresp->rx_ringp = (void *)rx_ringp;
2993         rx_ringp->state |= VNET_RXRING_LDC_GUEST;
2994 
2995         /* Register the pseudo ring with upper-mac */
2996         rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
2997         if (rv != 0) {
2998                 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
2999                 rx_ringp->hw_rh = NULL;
3000                 vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3001                 goto fail;
3002         }
3003 
3004         mac_perim_exit(mph1);
3005         return (0);
3006 fail:
3007         mac_perim_exit(mph1);
3008         return (1);
3009 }
3010 
3011 /* Unbind pseudo ring from a LDC resource */
3012 static void
3013 vnet_unbind_vgenring(vnet_res_t *vresp)
3014 {
3015         vnet_t                  *vnetp;
3016         vnet_pseudo_rx_group_t  *rx_grp;
3017         vnet_pseudo_rx_ring_t   *rx_ringp;
3018         mac_perim_handle_t      mph1;
3019         int                     type;
3020 
3021         vnetp = vresp->vnetp;
3022         type = vresp->type;
3023         rx_grp = &vnetp->rx_grp[0];
3024 
3025         if (vresp->rx_ringp == NULL) {
3026                 return;
3027         }
3028 
3029         if (type == VIO_NET_RES_LDC_SERVICE) {
3030                 /*
3031                  * Ring Index 0 is the default ring in the group and is
3032                  * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
3033                  * is allocated statically and is reported to the mac layer
3034                  * in vnet_m_capab(). So, all we need to do here, is remove its
3035                  * reference to the associated vresp.
3036                  */
3037                 rx_ringp = &rx_grp->rings[0];
3038                 rx_ringp->hw_rh = NULL;
3039                 vresp->rx_ringp = NULL;
3040                 return;
3041         }
3042         ASSERT(type == VIO_NET_RES_LDC_GUEST);
3043 
3044         mac_perim_enter_by_mh(vnetp->mh, &mph1);
3045 
3046         rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
3047         vresp->rx_ringp = NULL;
3048 
3049         if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
3050                 /* Unregister the pseudo ring with upper-mac */
3051                 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
3052 
3053                 rx_ringp->hw_rh = NULL;
3054                 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
3055 
3056                 /* Free the pseudo rx ring */
3057                 vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3058         }
3059 
3060         mac_perim_exit(mph1);
3061 }
3062 
3063 static void
3064 vnet_unbind_rings(vnet_res_t *vresp)
3065 {
3066         switch (vresp->type) {
3067 
3068         case VIO_NET_RES_LDC_SERVICE:
3069         case VIO_NET_RES_LDC_GUEST:
3070                 vnet_unbind_vgenring(vresp);
3071                 break;
3072 
3073         case VIO_NET_RES_HYBRID:
3074                 vnet_unbind_hwrings(vresp->vnetp);
3075                 break;
3076 
3077         default:
3078                 break;
3079 
3080         }
3081 }
3082 
3083 static int
3084 vnet_bind_rings(vnet_res_t *vresp)
3085 {
3086         int     rv;
3087 
3088         switch (vresp->type) {
3089 
3090         case VIO_NET_RES_LDC_SERVICE:
3091         case VIO_NET_RES_LDC_GUEST:
3092                 rv = vnet_bind_vgenring(vresp);
3093                 break;
3094 
3095         case VIO_NET_RES_HYBRID:
3096                 rv = vnet_bind_hwrings(vresp->vnetp);
3097                 break;
3098 
3099         default:
3100                 rv = 1;
3101                 break;
3102 
3103         }
3104 
3105         return (rv);
3106 }
3107 
3108 /* ARGSUSED */
3109 int
3110 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
3111 {
3112         vnet_t  *vnetp = (vnet_t *)arg;
3113 
3114         *val = mac_stat_get(vnetp->hio_mh, stat);
3115         return (0);
3116 }
3117 
3118 /*
3119  * The start() and stop() routines for the Hybrid resource below, are just
3120  * dummy functions. This is provided to avoid resource type specific code in
3121  * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
3122  * of the Hybrid resource happens in the context of the mac_client interfaces
3123  * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
3124  */
3125 /* ARGSUSED */
3126 static int
3127 vnet_hio_start(void *arg)
3128 {
3129         return (0);
3130 }
3131 
3132 /* ARGSUSED */
3133 static void
3134 vnet_hio_stop(void *arg)
3135 {
3136 }
3137 
3138 mblk_t *
3139 vnet_hio_tx(void *arg, mblk_t *mp)
3140 {
3141         vnet_pseudo_tx_ring_t   *tx_ringp;
3142         mblk_t                  *nextp;
3143         mblk_t                  *ret_mp;
3144 
3145         tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
3146         for (;;) {
3147                 nextp = mp->b_next;
3148                 mp->b_next = NULL;
3149 
3150                 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
3151                 if (ret_mp != NULL) {
3152                         ret_mp->b_next = nextp;
3153                         mp = ret_mp;
3154                         break;
3155                 }
3156 
3157                 if ((mp = nextp) == NULL)
3158                         break;
3159         }
3160         return (mp);
3161 }
3162 
3163 #ifdef  VNET_IOC_DEBUG
3164 
3165 /*
3166  * The ioctl entry point is used only for debugging for now. The ioctl commands
3167  * can be used to force the link state of the channel connected to vsw.
3168  */
3169 static void
3170 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3171 {
3172         struct iocblk   *iocp;
3173         vnet_t          *vnetp;
3174 
3175         iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
3176         iocp->ioc_error = 0;
3177         vnetp = (vnet_t *)arg;
3178 
3179         if (vnetp == NULL) {
3180                 miocnak(q, mp, 0, EINVAL);
3181                 return;
3182         }
3183 
3184         switch (iocp->ioc_cmd) {
3185 
3186         case VNET_FORCE_LINK_DOWN:
3187         case VNET_FORCE_LINK_UP:
3188                 vnet_force_link_state(vnetp, q, mp);
3189                 break;
3190 
3191         default:
3192                 iocp->ioc_error = EINVAL;
3193                 miocnak(q, mp, 0, iocp->ioc_error);
3194                 break;
3195 
3196         }
3197 }
3198 
3199 static void
3200 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
3201 {
3202         mac_register_t  *macp;
3203         mac_callbacks_t *cbp;
3204         vnet_res_t      *vresp;
3205 
3206         READ_ENTER(&vnetp->vsw_fp_rw);
3207 
3208         vresp = vnetp->vsw_fp;
3209         if (vresp == NULL) {
3210                 RW_EXIT(&vnetp->vsw_fp_rw);
3211                 return;
3212         }
3213 
3214         macp = &vresp->macreg;
3215         cbp = macp->m_callbacks;
3216         cbp->mc_ioctl(macp->m_driver, q, mp);
3217 
3218         RW_EXIT(&vnetp->vsw_fp_rw);
3219 }
3220 
3221 #else
3222 
3223 static void
3224 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3225 {
3226         vnet_t          *vnetp;
3227 
3228         vnetp = (vnet_t *)arg;
3229 
3230         if (vnetp == NULL) {
3231                 miocnak(q, mp, 0, EINVAL);
3232                 return;
3233         }
3234 
3235         /* ioctl support only for debugging */
3236         miocnak(q, mp, 0, ENOTSUP);
3237 }
3238 
3239 #endif