Print this page
11493 aggr needs support for multiple pseudo rx groups
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sun4v/io/vnet.c
+++ new/usr/src/uts/sun4v/io/vnet.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 + * Copyright 2018 Joyent, Inc.
25 26 */
26 27
27 28 #include <sys/types.h>
28 29 #include <sys/errno.h>
29 30 #include <sys/param.h>
30 31 #include <sys/callb.h>
31 32 #include <sys/stream.h>
32 33 #include <sys/kmem.h>
33 34 #include <sys/conf.h>
34 35 #include <sys/devops.h>
35 36 #include <sys/ksynch.h>
36 37 #include <sys/stat.h>
37 38 #include <sys/modctl.h>
38 39 #include <sys/modhash.h>
39 40 #include <sys/debug.h>
40 41 #include <sys/ethernet.h>
41 42 #include <sys/dlpi.h>
42 43 #include <net/if.h>
43 44 #include <sys/mac_provider.h>
44 45 #include <sys/mac_client.h>
45 46 #include <sys/mac_client_priv.h>
46 47 #include <sys/mac_ether.h>
47 48 #include <sys/ddi.h>
48 49 #include <sys/sunddi.h>
49 50 #include <sys/strsun.h>
50 51 #include <sys/note.h>
51 52 #include <sys/atomic.h>
52 53 #include <sys/vnet.h>
53 54 #include <sys/vlan.h>
54 55 #include <sys/vnet_mailbox.h>
55 56 #include <sys/vnet_common.h>
56 57 #include <sys/dds.h>
57 58 #include <sys/strsubr.h>
58 59 #include <sys/taskq.h>
59 60
60 61 /*
61 62 * Function prototypes.
62 63 */
63 64
64 65 /* DDI entrypoints */
65 66 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
66 67 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
67 68 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
68 69
69 70 /* MAC entrypoints */
70 71 static int vnet_m_stat(void *, uint_t, uint64_t *);
71 72 static int vnet_m_start(void *);
72 73 static void vnet_m_stop(void *);
73 74 static int vnet_m_promisc(void *, boolean_t);
74 75 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
75 76 static int vnet_m_unicst(void *, const uint8_t *);
76 77 mblk_t *vnet_m_tx(void *, mblk_t *);
77 78 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
78 79 #ifdef VNET_IOC_DEBUG
79 80 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
80 81 #endif
81 82 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
82 83 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
83 84 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
84 85 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
85 86 mac_group_info_t *infop, mac_group_handle_t handle);
86 87 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
87 88 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
88 89 static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
89 90 uint64_t *val);
90 91 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
91 92 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
92 93 static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
93 94 uint64_t *val);
94 95 static int vnet_ring_enable_intr(void *arg);
95 96 static int vnet_ring_disable_intr(void *arg);
96 97 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
97 98 static int vnet_addmac(void *arg, const uint8_t *mac_addr);
98 99 static int vnet_remmac(void *arg, const uint8_t *mac_addr);
99 100
100 101 /* vnet internal functions */
101 102 static int vnet_unattach(vnet_t *vnetp);
102 103 static void vnet_ring_grp_init(vnet_t *vnetp);
103 104 static void vnet_ring_grp_uninit(vnet_t *vnetp);
104 105 static int vnet_mac_register(vnet_t *);
105 106 static int vnet_read_mac_address(vnet_t *vnetp);
106 107 static int vnet_bind_vgenring(vnet_res_t *vresp);
107 108 static void vnet_unbind_vgenring(vnet_res_t *vresp);
108 109 static int vnet_bind_hwrings(vnet_t *vnetp);
109 110 static void vnet_unbind_hwrings(vnet_t *vnetp);
110 111 static int vnet_bind_rings(vnet_res_t *vresp);
111 112 static void vnet_unbind_rings(vnet_res_t *vresp);
112 113 static int vnet_hio_stat(void *, uint_t, uint64_t *);
113 114 static int vnet_hio_start(void *);
114 115 static void vnet_hio_stop(void *);
115 116 mblk_t *vnet_hio_tx(void *, mblk_t *);
116 117
117 118 /* Forwarding database (FDB) routines */
118 119 static void vnet_fdb_create(vnet_t *vnetp);
119 120 static void vnet_fdb_destroy(vnet_t *vnetp);
120 121 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
121 122 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
122 123 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
123 124 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
124 125
125 126 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
126 127 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
127 128 static void vnet_tx_update(vio_net_handle_t vrh);
128 129 static void vnet_res_start_task(void *arg);
129 130 static void vnet_start_resources(vnet_t *vnetp);
130 131 static void vnet_stop_resources(vnet_t *vnetp);
131 132 static void vnet_dispatch_res_task(vnet_t *vnetp);
132 133 static void vnet_res_start_task(void *arg);
133 134 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
134 135 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
135 136 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
136 137 static void vnet_tx_notify_thread(void *);
137 138
138 139 /* Exported to vnet_gen */
139 140 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
140 141 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
141 142 void vnet_dds_cleanup_hio(vnet_t *vnetp);
142 143
143 144 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
144 145 vnet_res_t *vresp);
145 146 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
146 147 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
147 148 static void vnet_hio_destroy_kstats(kstat_t *ksp);
148 149
149 150 /* Exported to to vnet_dds */
150 151 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
151 152 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
152 153 void vnet_hio_mac_cleanup(vnet_t *vnetp);
153 154
154 155 /* Externs that are imported from vnet_gen */
155 156 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
156 157 const uint8_t *macaddr, void **vgenhdl);
157 158 extern int vgen_init_mdeg(void *arg);
158 159 extern void vgen_uninit(void *arg);
159 160 extern int vgen_dds_tx(void *arg, void *dmsg);
160 161 extern int vgen_enable_intr(void *arg);
161 162 extern int vgen_disable_intr(void *arg);
162 163 extern mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
163 164
164 165 /* Externs that are imported from vnet_dds */
165 166 extern void vdds_mod_init(void);
166 167 extern void vdds_mod_fini(void);
167 168 extern int vdds_init(vnet_t *vnetp);
168 169 extern void vdds_cleanup(vnet_t *vnetp);
169 170 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
170 171 extern void vdds_cleanup_hybrid_res(void *arg);
171 172 extern void vdds_cleanup_hio(vnet_t *vnetp);
172 173
173 174 extern pri_t minclsyspri;
174 175
175 176 #define DRV_NAME "vnet"
176 177 #define VNET_FDBE_REFHOLD(p) \
177 178 { \
178 179 atomic_inc_32(&(p)->refcnt); \
179 180 ASSERT((p)->refcnt != 0); \
180 181 }
181 182
182 183 #define VNET_FDBE_REFRELE(p) \
183 184 { \
184 185 ASSERT((p)->refcnt != 0); \
185 186 atomic_dec_32(&(p)->refcnt); \
186 187 }
187 188
188 189 #ifdef VNET_IOC_DEBUG
189 190 #define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB)
190 191 #else
191 192 #define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB)
192 193 #endif
193 194
194 195 static mac_callbacks_t vnet_m_callbacks = {
195 196 VNET_M_CALLBACK_FLAGS,
196 197 vnet_m_stat,
197 198 vnet_m_start,
198 199 vnet_m_stop,
199 200 vnet_m_promisc,
200 201 vnet_m_multicst,
201 202 NULL, /* m_unicst entry must be NULL while rx rings are exposed */
202 203 NULL, /* m_tx entry must be NULL while tx rings are exposed */
203 204 NULL,
204 205 vnet_m_ioctl,
205 206 vnet_m_capab,
206 207 NULL
207 208 };
208 209
209 210 static mac_callbacks_t vnet_hio_res_callbacks = {
210 211 0,
211 212 vnet_hio_stat,
212 213 vnet_hio_start,
213 214 vnet_hio_stop,
214 215 NULL,
215 216 NULL,
216 217 NULL,
217 218 vnet_hio_tx,
218 219 NULL,
219 220 NULL,
220 221 NULL
221 222 };
222 223
223 224 /*
224 225 * Linked list of "vnet_t" structures - one per instance.
225 226 */
226 227 static vnet_t *vnet_headp = NULL;
227 228 static krwlock_t vnet_rw;
228 229
229 230 /* Tunables */
230 231 uint32_t vnet_num_descriptors = VNET_NUM_DESCRIPTORS;
231 232
232 233 /*
233 234 * Configure tx serialization in mac layer for the vnet device. This tunable
234 235 * should be enabled to improve performance only if HybridIO is configured for
235 236 * the vnet device.
236 237 */
237 238 boolean_t vnet_mac_tx_serialize = B_FALSE;
238 239
239 240 /* Configure enqueing at Rx soft rings in mac layer for the vnet device */
240 241 boolean_t vnet_mac_rx_queuing = B_TRUE;
241 242
242 243 /*
243 244 * Set this to non-zero to enable additional internal receive buffer pools
244 245 * based on the MTU of the device for better performance at the cost of more
245 246 * memory consumption. This is turned off by default, to use allocb(9F) for
246 247 * receive buffer allocations of sizes > 2K.
247 248 */
248 249 boolean_t vnet_jumbo_rxpools = B_FALSE;
249 250
250 251 /* # of chains in fdb hash table */
251 252 uint32_t vnet_fdb_nchains = VNET_NFDB_HASH;
252 253
253 254 /* Internal tunables */
254 255 uint32_t vnet_ethermtu = 1500; /* mtu of the device */
255 256
256 257 /*
257 258 * Default vlan id. This is only used internally when the "default-vlan-id"
258 259 * property is not present in the MD device node. Therefore, this should not be
259 260 * used as a tunable; if this value is changed, the corresponding variable
260 261 * should be updated to the same value in vsw and also other vnets connected to
261 262 * the same vsw.
262 263 */
263 264 uint16_t vnet_default_vlan_id = 1;
264 265
265 266 /* delay in usec to wait for all references on a fdb entry to be dropped */
266 267 uint32_t vnet_fdbe_refcnt_delay = 10;
267 268
268 269 static struct ether_addr etherbroadcastaddr = {
269 270 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
270 271 };
271 272
272 273 /* mac_open() retry delay in usec */
273 274 uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */
274 275
275 276 /* max # of mac_open() retries */
276 277 uint32_t vnet_mac_open_retries = 100;
277 278
278 279 /*
279 280 * Property names
280 281 */
281 282 static char macaddr_propname[] = "local-mac-address";
282 283
283 284 /*
284 285 * This is the string displayed by modinfo(1m).
285 286 */
286 287 static char vnet_ident[] = "vnet driver";
287 288 extern struct mod_ops mod_driverops;
288 289 static struct cb_ops cb_vnetops = {
289 290 nulldev, /* cb_open */
290 291 nulldev, /* cb_close */
291 292 nodev, /* cb_strategy */
292 293 nodev, /* cb_print */
293 294 nodev, /* cb_dump */
294 295 nodev, /* cb_read */
295 296 nodev, /* cb_write */
296 297 nodev, /* cb_ioctl */
297 298 nodev, /* cb_devmap */
298 299 nodev, /* cb_mmap */
299 300 nodev, /* cb_segmap */
300 301 nochpoll, /* cb_chpoll */
301 302 ddi_prop_op, /* cb_prop_op */
302 303 NULL, /* cb_stream */
303 304 (int)(D_MP) /* cb_flag */
304 305 };
305 306
306 307 static struct dev_ops vnetops = {
307 308 DEVO_REV, /* devo_rev */
308 309 0, /* devo_refcnt */
309 310 NULL, /* devo_getinfo */
310 311 nulldev, /* devo_identify */
311 312 nulldev, /* devo_probe */
312 313 vnetattach, /* devo_attach */
313 314 vnetdetach, /* devo_detach */
314 315 nodev, /* devo_reset */
315 316 &cb_vnetops, /* devo_cb_ops */
316 317 (struct bus_ops *)NULL, /* devo_bus_ops */
317 318 NULL, /* devo_power */
318 319 ddi_quiesce_not_supported, /* devo_quiesce */
319 320 };
320 321
321 322 static struct modldrv modldrv = {
322 323 &mod_driverops, /* Type of module. This one is a driver */
323 324 vnet_ident, /* ID string */
324 325 &vnetops /* driver specific ops */
325 326 };
326 327
327 328 static struct modlinkage modlinkage = {
328 329 MODREV_1, (void *)&modldrv, NULL
329 330 };
330 331
331 332 #ifdef DEBUG
332 333
333 334 #define DEBUG_PRINTF debug_printf
334 335
335 336 /*
336 337 * Print debug messages - set to 0xf to enable all msgs
337 338 */
338 339 int vnet_dbglevel = 0x8;
339 340
340 341 static void
341 342 debug_printf(const char *fname, void *arg, const char *fmt, ...)
342 343 {
343 344 char buf[512];
344 345 va_list ap;
345 346 vnet_t *vnetp = (vnet_t *)arg;
346 347 char *bufp = buf;
347 348
348 349 if (vnetp == NULL) {
349 350 (void) sprintf(bufp, "%s: ", fname);
350 351 bufp += strlen(bufp);
351 352 } else {
352 353 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
353 354 bufp += strlen(bufp);
354 355 }
355 356 va_start(ap, fmt);
356 357 (void) vsprintf(bufp, fmt, ap);
357 358 va_end(ap);
358 359 cmn_err(CE_CONT, "%s\n", buf);
359 360 }
360 361
361 362 #endif
362 363
363 364 /* _init(9E): initialize the loadable module */
364 365 int
365 366 _init(void)
366 367 {
367 368 int status;
368 369
369 370 DBG1(NULL, "enter\n");
370 371
371 372 mac_init_ops(&vnetops, "vnet");
372 373 status = mod_install(&modlinkage);
373 374 if (status != 0) {
374 375 mac_fini_ops(&vnetops);
375 376 }
376 377 vdds_mod_init();
377 378 DBG1(NULL, "exit(%d)\n", status);
378 379 return (status);
379 380 }
380 381
381 382 /* _fini(9E): prepare the module for unloading. */
382 383 int
383 384 _fini(void)
384 385 {
385 386 int status;
386 387
387 388 DBG1(NULL, "enter\n");
388 389
389 390 status = mod_remove(&modlinkage);
390 391 if (status != 0)
391 392 return (status);
392 393 mac_fini_ops(&vnetops);
393 394 vdds_mod_fini();
394 395
395 396 DBG1(NULL, "exit(%d)\n", status);
396 397 return (status);
397 398 }
398 399
399 400 /* _info(9E): return information about the loadable module */
400 401 int
401 402 _info(struct modinfo *modinfop)
402 403 {
403 404 return (mod_info(&modlinkage, modinfop));
404 405 }
405 406
406 407 /*
407 408 * attach(9E): attach a device to the system.
408 409 * called once for each instance of the device on the system.
409 410 */
410 411 static int
411 412 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
412 413 {
413 414 vnet_t *vnetp;
414 415 int status;
415 416 int instance;
416 417 uint64_t reg;
417 418 char qname[TASKQ_NAMELEN];
418 419 vnet_attach_progress_t attach_progress;
419 420
420 421 attach_progress = AST_init;
421 422
422 423 switch (cmd) {
423 424 case DDI_ATTACH:
424 425 break;
425 426 case DDI_RESUME:
426 427 case DDI_PM_RESUME:
427 428 default:
428 429 goto vnet_attach_fail;
429 430 }
430 431
431 432 instance = ddi_get_instance(dip);
432 433 DBG1(NULL, "instance(%d) enter\n", instance);
433 434
434 435 /* allocate vnet_t and mac_t structures */
435 436 vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
436 437 vnetp->dip = dip;
437 438 vnetp->instance = instance;
438 439 rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
439 440 rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
440 441 attach_progress |= AST_vnet_alloc;
441 442
442 443 vnet_ring_grp_init(vnetp);
443 444 attach_progress |= AST_ring_init;
444 445
445 446 status = vdds_init(vnetp);
446 447 if (status != 0) {
447 448 goto vnet_attach_fail;
448 449 }
449 450 attach_progress |= AST_vdds_init;
450 451
451 452 /* setup links to vnet_t from both devinfo and mac_t */
452 453 ddi_set_driver_private(dip, (caddr_t)vnetp);
453 454
454 455 /* read the mac address */
455 456 status = vnet_read_mac_address(vnetp);
456 457 if (status != DDI_SUCCESS) {
457 458 goto vnet_attach_fail;
458 459 }
459 460 attach_progress |= AST_read_macaddr;
460 461
461 462 reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
462 463 DDI_PROP_DONTPASS, "reg", -1);
463 464 if (reg == -1) {
464 465 goto vnet_attach_fail;
465 466 }
466 467 vnetp->reg = reg;
467 468
468 469 vnet_fdb_create(vnetp);
469 470 attach_progress |= AST_fdbh_alloc;
470 471
471 472 (void) snprintf(qname, TASKQ_NAMELEN, "vres_taskq%d", instance);
472 473 if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
473 474 TASKQ_DEFAULTPRI, 0)) == NULL) {
474 475 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
475 476 instance);
476 477 goto vnet_attach_fail;
477 478 }
478 479 attach_progress |= AST_taskq_create;
479 480
480 481 /* add to the list of vnet devices */
481 482 WRITE_ENTER(&vnet_rw);
482 483 vnetp->nextp = vnet_headp;
483 484 vnet_headp = vnetp;
484 485 RW_EXIT(&vnet_rw);
485 486
486 487 attach_progress |= AST_vnet_list;
487 488
488 489 /*
489 490 * Initialize the generic vnet plugin which provides communication via
490 491 * sun4v LDC (logical domain channel) based resources. This involves 2
491 492 * steps; first, vgen_init() is invoked to read the various properties
492 493 * of the vnet device from its MD node (including its mtu which is
493 494 * needed to mac_register()) and obtain a handle to the vgen layer.
494 495 * After mac_register() is done and we have a mac handle, we then
495 496 * invoke vgen_init_mdeg() which registers with the the MD event
496 497 * generator (mdeg) framework to allow LDC resource notifications.
497 498 * Note: this sequence also allows us to report the correct default #
498 499 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
499 500 * in the context of mac_register(); and avoids conflicting with
500 501 * dynamic pseudo rx rings which get added/removed as a result of mdeg
501 502 * events in vgen.
502 503 */
503 504 status = vgen_init(vnetp, reg, vnetp->dip,
504 505 (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
505 506 if (status != DDI_SUCCESS) {
506 507 DERR(vnetp, "vgen_init() failed\n");
507 508 goto vnet_attach_fail;
508 509 }
509 510 attach_progress |= AST_vgen_init;
510 511
511 512 status = vnet_mac_register(vnetp);
512 513 if (status != DDI_SUCCESS) {
513 514 goto vnet_attach_fail;
514 515 }
515 516 vnetp->link_state = LINK_STATE_UNKNOWN;
516 517 attach_progress |= AST_macreg;
517 518
518 519 status = vgen_init_mdeg(vnetp->vgenhdl);
519 520 if (status != DDI_SUCCESS) {
520 521 goto vnet_attach_fail;
521 522 }
522 523 attach_progress |= AST_init_mdeg;
523 524
524 525 vnetp->attach_progress = attach_progress;
525 526
526 527 DBG1(NULL, "instance(%d) exit\n", instance);
527 528 return (DDI_SUCCESS);
528 529
529 530 vnet_attach_fail:
530 531 vnetp->attach_progress = attach_progress;
531 532 status = vnet_unattach(vnetp);
532 533 ASSERT(status == 0);
533 534 return (DDI_FAILURE);
534 535 }
535 536
536 537 /*
537 538 * detach(9E): detach a device from the system.
538 539 */
539 540 static int
540 541 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
541 542 {
542 543 vnet_t *vnetp;
543 544 int instance;
544 545
545 546 instance = ddi_get_instance(dip);
546 547 DBG1(NULL, "instance(%d) enter\n", instance);
547 548
548 549 vnetp = ddi_get_driver_private(dip);
549 550 if (vnetp == NULL) {
550 551 goto vnet_detach_fail;
551 552 }
552 553
553 554 switch (cmd) {
554 555 case DDI_DETACH:
555 556 break;
556 557 case DDI_SUSPEND:
557 558 case DDI_PM_SUSPEND:
558 559 default:
559 560 goto vnet_detach_fail;
560 561 }
561 562
562 563 if (vnet_unattach(vnetp) != 0) {
563 564 goto vnet_detach_fail;
564 565 }
565 566
566 567 return (DDI_SUCCESS);
567 568
568 569 vnet_detach_fail:
569 570 return (DDI_FAILURE);
570 571 }
571 572
572 573 /*
573 574 * Common routine to handle vnetattach() failure and vnetdetach(). Note that
574 575 * the only reason this function could fail is if mac_unregister() fails.
575 576 * Otherwise, this function must ensure that all resources are freed and return
576 577 * success.
577 578 */
578 579 static int
579 580 vnet_unattach(vnet_t *vnetp)
580 581 {
581 582 vnet_attach_progress_t attach_progress;
582 583
583 584 attach_progress = vnetp->attach_progress;
584 585
585 586 /*
586 587 * Disable the mac device in the gldv3 subsystem. This can fail, in
587 588 * particular if there are still any open references to this mac
588 589 * device; in which case we just return failure without continuing to
589 590 * detach further.
590 591 * If it succeeds, we then invoke vgen_uninit() which should unregister
591 592 * any pseudo rings registered with the mac layer. Note we keep the
592 593 * AST_macreg flag on, so we can unregister with the mac layer at
593 594 * the end of this routine.
594 595 */
595 596 if (attach_progress & AST_macreg) {
596 597 if (mac_disable(vnetp->mh) != 0) {
597 598 return (1);
598 599 }
599 600 }
600 601
601 602 /*
602 603 * Now that we have disabled the device, we must finish all other steps
603 604 * and successfully return from this function; otherwise we will end up
604 605 * leaving the device in a broken/unusable state.
605 606 *
606 607 * First, release any hybrid resources assigned to this vnet device.
607 608 */
608 609 if (attach_progress & AST_vdds_init) {
609 610 vdds_cleanup(vnetp);
610 611 attach_progress &= ~AST_vdds_init;
611 612 }
612 613
613 614 /*
614 615 * Uninit vgen. This stops further mdeg callbacks to this vnet
615 616 * device and/or its ports; and detaches any existing ports.
616 617 */
617 618 if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
618 619 vgen_uninit(vnetp->vgenhdl);
619 620 attach_progress &= ~AST_vgen_init;
620 621 attach_progress &= ~AST_init_mdeg;
621 622 }
622 623
623 624 /* Destroy the taskq. */
624 625 if (attach_progress & AST_taskq_create) {
625 626 ddi_taskq_destroy(vnetp->taskqp);
626 627 attach_progress &= ~AST_taskq_create;
627 628 }
628 629
629 630 /* Destroy fdb. */
630 631 if (attach_progress & AST_fdbh_alloc) {
631 632 vnet_fdb_destroy(vnetp);
632 633 attach_progress &= ~AST_fdbh_alloc;
633 634 }
634 635
635 636 /* Remove from the device list */
636 637 if (attach_progress & AST_vnet_list) {
637 638 vnet_t **vnetpp;
638 639 /* unlink from instance(vnet_t) list */
639 640 WRITE_ENTER(&vnet_rw);
640 641 for (vnetpp = &vnet_headp; *vnetpp;
641 642 vnetpp = &(*vnetpp)->nextp) {
642 643 if (*vnetpp == vnetp) {
643 644 *vnetpp = vnetp->nextp;
644 645 break;
645 646 }
646 647 }
647 648 RW_EXIT(&vnet_rw);
648 649 attach_progress &= ~AST_vnet_list;
649 650 }
650 651
651 652 if (attach_progress & AST_ring_init) {
652 653 vnet_ring_grp_uninit(vnetp);
653 654 attach_progress &= ~AST_ring_init;
654 655 }
655 656
656 657 if (attach_progress & AST_macreg) {
657 658 VERIFY(mac_unregister(vnetp->mh) == 0);
658 659 vnetp->mh = NULL;
659 660 attach_progress &= ~AST_macreg;
660 661 }
661 662
662 663 if (attach_progress & AST_vnet_alloc) {
663 664 rw_destroy(&vnetp->vrwlock);
664 665 rw_destroy(&vnetp->vsw_fp_rw);
665 666 attach_progress &= ~AST_vnet_list;
666 667 KMEM_FREE(vnetp);
667 668 }
668 669
669 670 return (0);
670 671 }
671 672
672 673 /* enable the device for transmit/receive */
673 674 static int
674 675 vnet_m_start(void *arg)
675 676 {
676 677 vnet_t *vnetp = arg;
677 678
678 679 DBG1(vnetp, "enter\n");
679 680
680 681 WRITE_ENTER(&vnetp->vrwlock);
681 682 vnetp->flags |= VNET_STARTED;
682 683 vnet_start_resources(vnetp);
683 684 RW_EXIT(&vnetp->vrwlock);
684 685
685 686 DBG1(vnetp, "exit\n");
686 687 return (VNET_SUCCESS);
687 688
688 689 }
689 690
690 691 /* stop transmit/receive for the device */
691 692 static void
692 693 vnet_m_stop(void *arg)
693 694 {
694 695 vnet_t *vnetp = arg;
695 696
696 697 DBG1(vnetp, "enter\n");
697 698
698 699 WRITE_ENTER(&vnetp->vrwlock);
699 700 if (vnetp->flags & VNET_STARTED) {
700 701 /*
701 702 * Set the flags appropriately; this should prevent starting of
702 703 * any new resources that are added(see vnet_res_start_task()),
703 704 * while we release the vrwlock in vnet_stop_resources() before
704 705 * stopping each resource.
705 706 */
706 707 vnetp->flags &= ~VNET_STARTED;
707 708 vnetp->flags |= VNET_STOPPING;
708 709 vnet_stop_resources(vnetp);
709 710 vnetp->flags &= ~VNET_STOPPING;
710 711 }
711 712 RW_EXIT(&vnetp->vrwlock);
712 713
713 714 DBG1(vnetp, "exit\n");
714 715 }
715 716
716 717 /* set the unicast mac address of the device */
717 718 static int
718 719 vnet_m_unicst(void *arg, const uint8_t *macaddr)
719 720 {
720 721 _NOTE(ARGUNUSED(macaddr))
721 722
722 723 vnet_t *vnetp = arg;
723 724
724 725 DBG1(vnetp, "enter\n");
725 726 /*
726 727 * NOTE: setting mac address dynamically is not supported.
727 728 */
728 729 DBG1(vnetp, "exit\n");
729 730
730 731 return (VNET_FAILURE);
731 732 }
732 733
733 734 /* enable/disable a multicast address */
734 735 static int
735 736 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
736 737 {
737 738 _NOTE(ARGUNUSED(add, mca))
738 739
739 740 vnet_t *vnetp = arg;
740 741 vnet_res_t *vresp;
741 742 mac_register_t *macp;
742 743 mac_callbacks_t *cbp;
743 744 int rv = VNET_SUCCESS;
744 745
745 746 DBG1(vnetp, "enter\n");
746 747
747 748 READ_ENTER(&vnetp->vsw_fp_rw);
748 749 if (vnetp->vsw_fp == NULL) {
749 750 RW_EXIT(&vnetp->vsw_fp_rw);
750 751 return (EAGAIN);
751 752 }
752 753 VNET_FDBE_REFHOLD(vnetp->vsw_fp);
753 754 RW_EXIT(&vnetp->vsw_fp_rw);
754 755
755 756 vresp = vnetp->vsw_fp;
756 757 macp = &vresp->macreg;
757 758 cbp = macp->m_callbacks;
758 759 rv = cbp->mc_multicst(macp->m_driver, add, mca);
759 760
760 761 VNET_FDBE_REFRELE(vnetp->vsw_fp);
761 762
762 763 DBG1(vnetp, "exit(%d)\n", rv);
763 764 return (rv);
764 765 }
765 766
766 767 /* set or clear promiscuous mode on the device */
767 768 static int
768 769 vnet_m_promisc(void *arg, boolean_t on)
769 770 {
770 771 _NOTE(ARGUNUSED(on))
771 772
772 773 vnet_t *vnetp = arg;
773 774 DBG1(vnetp, "enter\n");
774 775 /*
775 776 * NOTE: setting promiscuous mode is not supported, just return success.
776 777 */
777 778 DBG1(vnetp, "exit\n");
778 779 return (VNET_SUCCESS);
779 780 }
780 781
781 782 /*
782 783 * Transmit a chain of packets. This function provides switching functionality
783 784 * based on the destination mac address to reach other guests (within ldoms) or
784 785 * external hosts.
785 786 */
786 787 mblk_t *
787 788 vnet_tx_ring_send(void *arg, mblk_t *mp)
788 789 {
789 790 vnet_pseudo_tx_ring_t *tx_ringp;
790 791 vnet_tx_ring_stats_t *statsp;
791 792 vnet_t *vnetp;
792 793 vnet_res_t *vresp;
793 794 mblk_t *next;
794 795 mblk_t *resid_mp;
795 796 mac_register_t *macp;
796 797 struct ether_header *ehp;
797 798 boolean_t is_unicast;
798 799 boolean_t is_pvid; /* non-default pvid ? */
799 800 boolean_t hres; /* Hybrid resource ? */
800 801 void *tx_arg;
801 802 size_t size;
802 803
803 804 tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
804 805 statsp = &tx_ringp->tx_ring_stats;
805 806 vnetp = (vnet_t *)tx_ringp->vnetp;
806 807 DBG1(vnetp, "enter\n");
807 808 ASSERT(mp != NULL);
808 809
809 810 is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
810 811
811 812 while (mp != NULL) {
812 813
813 814 next = mp->b_next;
814 815 mp->b_next = NULL;
815 816
816 817 /* update stats */
817 818 size = msgsize(mp);
818 819
819 820 /*
820 821 * Find fdb entry for the destination
821 822 * and hold a reference to it.
822 823 */
823 824 ehp = (struct ether_header *)mp->b_rptr;
824 825 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
825 826 if (vresp != NULL) {
826 827
827 828 /*
828 829 * Destination found in FDB.
829 830 * The destination is a vnet device within ldoms
830 831 * and directly reachable, invoke the tx function
831 832 * in the fdb entry.
832 833 */
833 834 macp = &vresp->macreg;
834 835 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
835 836
836 837 /* tx done; now release ref on fdb entry */
837 838 VNET_FDBE_REFRELE(vresp);
838 839
839 840 if (resid_mp != NULL) {
840 841 /* m_tx failed */
841 842 mp->b_next = next;
842 843 break;
843 844 }
844 845 } else {
845 846 is_unicast = !(IS_BROADCAST(ehp) ||
846 847 (IS_MULTICAST(ehp)));
847 848 /*
848 849 * Destination is not in FDB.
849 850 * If the destination is broadcast or multicast,
850 851 * then forward the packet to vswitch.
851 852 * If a Hybrid resource avilable, then send the
852 853 * unicast packet via hybrid resource, otherwise
853 854 * forward it to vswitch.
854 855 */
855 856 READ_ENTER(&vnetp->vsw_fp_rw);
856 857
857 858 if ((is_unicast) && (vnetp->hio_fp != NULL)) {
858 859 vresp = vnetp->hio_fp;
859 860 hres = B_TRUE;
860 861 } else {
861 862 vresp = vnetp->vsw_fp;
862 863 hres = B_FALSE;
863 864 }
864 865 if (vresp == NULL) {
865 866 /*
866 867 * no fdb entry to vsw? drop the packet.
867 868 */
868 869 RW_EXIT(&vnetp->vsw_fp_rw);
869 870 freemsg(mp);
870 871 mp = next;
871 872 continue;
872 873 }
873 874
874 875 /* ref hold the fdb entry to vsw */
875 876 VNET_FDBE_REFHOLD(vresp);
876 877
877 878 RW_EXIT(&vnetp->vsw_fp_rw);
878 879
879 880 /*
880 881 * In the case of a hybrid resource we need to insert
881 882 * the tag for the pvid case here; unlike packets that
882 883 * are destined to a vnet/vsw in which case the vgen
883 884 * layer does the tagging before sending it over ldc.
884 885 */
885 886 if (hres == B_TRUE) {
886 887 /*
887 888 * Determine if the frame being transmitted
888 889 * over the hybrid resource is untagged. If so,
889 890 * insert the tag before transmitting.
890 891 */
891 892 if (is_pvid == B_TRUE &&
892 893 ehp->ether_type != htons(ETHERTYPE_VLAN)) {
893 894
894 895 mp = vnet_vlan_insert_tag(mp,
895 896 vnetp->pvid);
896 897 if (mp == NULL) {
897 898 VNET_FDBE_REFRELE(vresp);
898 899 mp = next;
899 900 continue;
900 901 }
901 902
902 903 }
903 904
904 905 macp = &vresp->macreg;
905 906 tx_arg = tx_ringp;
906 907 } else {
907 908 macp = &vresp->macreg;
908 909 tx_arg = macp->m_driver;
909 910 }
910 911 resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
911 912
912 913 /* tx done; now release ref on fdb entry */
913 914 VNET_FDBE_REFRELE(vresp);
914 915
915 916 if (resid_mp != NULL) {
916 917 /* m_tx failed */
917 918 mp->b_next = next;
918 919 break;
919 920 }
920 921 }
921 922
922 923 statsp->obytes += size;
923 924 statsp->opackets++;
924 925 mp = next;
925 926 }
926 927
927 928 DBG1(vnetp, "exit\n");
928 929 return (mp);
929 930 }
930 931
931 932 /* get statistics from the device */
932 933 int
933 934 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
934 935 {
935 936 vnet_t *vnetp = arg;
936 937 vnet_res_t *vresp;
937 938 mac_register_t *macp;
938 939 mac_callbacks_t *cbp;
939 940 uint64_t val_total = 0;
940 941
941 942 DBG1(vnetp, "enter\n");
942 943
943 944 /*
944 945 * get the specified statistic from each transport and return the
945 946 * aggregate val. This obviously only works for counters.
946 947 */
947 948 if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
948 949 (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
949 950 return (ENOTSUP);
950 951 }
951 952
952 953 READ_ENTER(&vnetp->vrwlock);
953 954 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
954 955 macp = &vresp->macreg;
955 956 cbp = macp->m_callbacks;
956 957 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
957 958 val_total += *val;
958 959 }
959 960 RW_EXIT(&vnetp->vrwlock);
960 961
961 962 *val = val_total;
962 963
963 964 DBG1(vnetp, "exit\n");
964 965 return (0);
965 966 }
966 967
967 968 static void
968 969 vnet_ring_grp_init(vnet_t *vnetp)
969 970 {
970 971 vnet_pseudo_rx_group_t *rx_grp;
971 972 vnet_pseudo_rx_ring_t *rx_ringp;
972 973 vnet_pseudo_tx_group_t *tx_grp;
973 974 vnet_pseudo_tx_ring_t *tx_ringp;
974 975 int i;
975 976
976 977 tx_grp = &vnetp->tx_grp[0];
977 978 tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
978 979 VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
979 980 for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
980 981 tx_ringp[i].state |= VNET_TXRING_SHARED;
981 982 }
982 983 tx_grp->rings = tx_ringp;
983 984 tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
984 985 mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL);
985 986 cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL);
986 987 tx_grp->flowctl_thread = thread_create(NULL, 0,
987 988 vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri);
988 989
989 990 rx_grp = &vnetp->rx_grp[0];
990 991 rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
991 992 rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
992 993 rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
993 994 rx_grp->max_ring_cnt, KM_SLEEP);
994 995
995 996 /*
996 997 * Setup the first 3 Pseudo RX Rings that are reserved;
997 998 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
998 999 */
999 1000 rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
1000 1001 rx_ringp[0].index = 0;
1001 1002 rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1002 1003 rx_ringp[1].index = 1;
1003 1004 rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1004 1005 rx_ringp[2].index = 2;
1005 1006
1006 1007 rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1007 1008 rx_grp->rings = rx_ringp;
1008 1009
1009 1010 for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1010 1011 i < rx_grp->max_ring_cnt; i++) {
1011 1012 rx_ringp = &rx_grp->rings[i];
1012 1013 rx_ringp->state = VNET_RXRING_FREE;
1013 1014 rx_ringp->index = i;
1014 1015 }
1015 1016 }
1016 1017
1017 1018 static void
1018 1019 vnet_ring_grp_uninit(vnet_t *vnetp)
1019 1020 {
1020 1021 vnet_pseudo_rx_group_t *rx_grp;
1021 1022 vnet_pseudo_tx_group_t *tx_grp;
1022 1023 kt_did_t tid = 0;
1023 1024
1024 1025 tx_grp = &vnetp->tx_grp[0];
1025 1026
1026 1027 /* Inform tx_notify_thread to exit */
1027 1028 mutex_enter(&tx_grp->flowctl_lock);
1028 1029 if (tx_grp->flowctl_thread != NULL) {
1029 1030 tid = tx_grp->flowctl_thread->t_did;
1030 1031 tx_grp->flowctl_done = B_TRUE;
1031 1032 cv_signal(&tx_grp->flowctl_cv);
1032 1033 }
1033 1034 mutex_exit(&tx_grp->flowctl_lock);
1034 1035 if (tid != 0)
1035 1036 thread_join(tid);
1036 1037
1037 1038 if (tx_grp->rings != NULL) {
1038 1039 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
1039 1040 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
1040 1041 tx_grp->ring_cnt);
1041 1042 tx_grp->rings = NULL;
1042 1043 }
1043 1044
1044 1045 rx_grp = &vnetp->rx_grp[0];
1045 1046 if (rx_grp->rings != NULL) {
1046 1047 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
1047 1048 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1048 1049 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
1049 1050 rx_grp->max_ring_cnt);
1050 1051 rx_grp->rings = NULL;
1051 1052 }
1052 1053 }
1053 1054
1054 1055 static vnet_pseudo_rx_ring_t *
1055 1056 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
1056 1057 {
1057 1058 vnet_pseudo_rx_group_t *rx_grp;
1058 1059 vnet_pseudo_rx_ring_t *rx_ringp;
1059 1060 int index;
1060 1061
1061 1062 rx_grp = &vnetp->rx_grp[0];
1062 1063 WRITE_ENTER(&rx_grp->lock);
1063 1064
1064 1065 if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
1065 1066 /* no rings available */
1066 1067 RW_EXIT(&rx_grp->lock);
1067 1068 return (NULL);
1068 1069 }
1069 1070
1070 1071 for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1071 1072 index < rx_grp->max_ring_cnt; index++) {
1072 1073 rx_ringp = &rx_grp->rings[index];
1073 1074 if (rx_ringp->state == VNET_RXRING_FREE) {
1074 1075 rx_ringp->state |= VNET_RXRING_INUSE;
1075 1076 rx_grp->ring_cnt++;
1076 1077 break;
1077 1078 }
1078 1079 }
1079 1080
1080 1081 RW_EXIT(&rx_grp->lock);
1081 1082 return (rx_ringp);
1082 1083 }
1083 1084
1084 1085 static void
1085 1086 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
1086 1087 {
1087 1088 vnet_pseudo_rx_group_t *rx_grp;
1088 1089
1089 1090 ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1090 1091 rx_grp = &vnetp->rx_grp[0];
1091 1092 WRITE_ENTER(&rx_grp->lock);
1092 1093
1093 1094 if (ringp->state != VNET_RXRING_FREE) {
1094 1095 ringp->state = VNET_RXRING_FREE;
1095 1096 ringp->handle = NULL;
1096 1097 rx_grp->ring_cnt--;
1097 1098 }
1098 1099
1099 1100 RW_EXIT(&rx_grp->lock);
1100 1101 }
1101 1102
1102 1103 /* wrapper function for mac_register() */
1103 1104 static int
1104 1105 vnet_mac_register(vnet_t *vnetp)
1105 1106 {
1106 1107 mac_register_t *macp;
1107 1108 int err;
1108 1109
1109 1110 if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1110 1111 return (DDI_FAILURE);
1111 1112 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1112 1113 macp->m_driver = vnetp;
1113 1114 macp->m_dip = vnetp->dip;
1114 1115 macp->m_src_addr = vnetp->curr_macaddr;
1115 1116 macp->m_callbacks = &vnet_m_callbacks;
1116 1117 macp->m_min_sdu = 0;
1117 1118 macp->m_max_sdu = vnetp->mtu;
1118 1119 macp->m_margin = VLAN_TAGSZ;
1119 1120
1120 1121 macp->m_v12n = MAC_VIRT_LEVEL1;
1121 1122
1122 1123 /*
1123 1124 * Finally, we're ready to register ourselves with the MAC layer
1124 1125 * interface; if this succeeds, we're all ready to start()
↓ open down ↓ |
1090 lines elided |
↑ open up ↑ |
1125 1126 */
1126 1127 err = mac_register(macp, &vnetp->mh);
1127 1128 mac_free(macp);
1128 1129 return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
1129 1130 }
1130 1131
1131 1132 /* read the mac address of the device */
1132 1133 static int
1133 1134 vnet_read_mac_address(vnet_t *vnetp)
1134 1135 {
1135 - uchar_t *macaddr;
1136 - uint32_t size;
1137 - int rv;
1136 + uchar_t *macaddr;
1137 + uint32_t size;
1138 + int rv;
1138 1139
1139 1140 rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
1140 1141 DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
1141 1142 if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
1142 1143 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
1143 1144 macaddr_propname, rv);
1144 1145 return (DDI_FAILURE);
1145 1146 }
1146 1147 bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
1147 1148 bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
1148 1149 ddi_prop_free(macaddr);
1149 1150
1150 1151 return (DDI_SUCCESS);
1151 1152 }
1152 1153
1153 1154 static void
1154 1155 vnet_fdb_create(vnet_t *vnetp)
1155 1156 {
1156 1157 char hashname[MAXNAMELEN];
1157 1158
1158 1159 (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
1159 1160 vnetp->instance);
1160 1161 vnetp->fdb_nchains = vnet_fdb_nchains;
1161 1162 vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
1162 1163 mod_hash_null_valdtor, sizeof (void *));
1163 1164 }
1164 1165
1165 1166 static void
1166 1167 vnet_fdb_destroy(vnet_t *vnetp)
1167 1168 {
1168 1169 /* destroy fdb-hash-table */
1169 1170 if (vnetp->fdb_hashp != NULL) {
1170 1171 mod_hash_destroy_hash(vnetp->fdb_hashp);
1171 1172 vnetp->fdb_hashp = NULL;
1172 1173 vnetp->fdb_nchains = 0;
1173 1174 }
1174 1175 }
1175 1176
1176 1177 /*
1177 1178 * Add an entry into the fdb.
1178 1179 */
1179 1180 void
1180 1181 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
1181 1182 {
1182 1183 uint64_t addr = 0;
1183 1184 int rv;
1184 1185
1185 1186 KEY_HASH(addr, vresp->rem_macaddr);
1186 1187
1187 1188 /*
1188 1189 * If the entry being added corresponds to LDC_SERVICE resource,
1189 1190 * that is, vswitch connection, it is added to the hash and also
1190 1191 * the entry is cached, an additional reference count reflects
1191 1192 * this. The HYBRID resource is not added to the hash, but only
1192 1193 * cached, as it is only used for sending out packets for unknown
1193 1194 * unicast destinations.
1194 1195 */
1195 1196 (vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1196 1197 (vresp->refcnt = 1) : (vresp->refcnt = 0);
1197 1198
1198 1199 /*
1199 1200 * Note: duplicate keys will be rejected by mod_hash.
1200 1201 */
1201 1202 if (vresp->type != VIO_NET_RES_HYBRID) {
1202 1203 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1203 1204 (mod_hash_val_t)vresp);
1204 1205 if (rv != 0) {
1205 1206 DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
1206 1207 return;
1207 1208 }
1208 1209 }
1209 1210
1210 1211 if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1211 1212 /* Cache the fdb entry to vsw-port */
1212 1213 WRITE_ENTER(&vnetp->vsw_fp_rw);
1213 1214 if (vnetp->vsw_fp == NULL)
1214 1215 vnetp->vsw_fp = vresp;
1215 1216 RW_EXIT(&vnetp->vsw_fp_rw);
1216 1217 } else if (vresp->type == VIO_NET_RES_HYBRID) {
1217 1218 /* Cache the fdb entry to hybrid resource */
1218 1219 WRITE_ENTER(&vnetp->vsw_fp_rw);
1219 1220 if (vnetp->hio_fp == NULL)
1220 1221 vnetp->hio_fp = vresp;
1221 1222 RW_EXIT(&vnetp->vsw_fp_rw);
1222 1223 }
1223 1224 }
1224 1225
1225 1226 /*
1226 1227 * Remove an entry from fdb.
1227 1228 */
1228 1229 static void
1229 1230 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
1230 1231 {
1231 1232 uint64_t addr = 0;
1232 1233 int rv;
1233 1234 uint32_t refcnt;
1234 1235 vnet_res_t *tmp;
1235 1236
1236 1237 KEY_HASH(addr, vresp->rem_macaddr);
1237 1238
1238 1239 /*
1239 1240 * Remove the entry from fdb hash table.
1240 1241 * This prevents further references to this fdb entry.
1241 1242 */
1242 1243 if (vresp->type != VIO_NET_RES_HYBRID) {
1243 1244 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1244 1245 (mod_hash_val_t *)&tmp);
1245 1246 if (rv != 0) {
1246 1247 /*
1247 1248 * As the resources are added to the hash only
1248 1249 * after they are started, this can occur if
1249 1250 * a resource unregisters before it is ever started.
1250 1251 */
1251 1252 return;
1252 1253 }
1253 1254 }
1254 1255
1255 1256 if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1256 1257 WRITE_ENTER(&vnetp->vsw_fp_rw);
1257 1258
1258 1259 ASSERT(tmp == vnetp->vsw_fp);
1259 1260 vnetp->vsw_fp = NULL;
1260 1261
1261 1262 RW_EXIT(&vnetp->vsw_fp_rw);
1262 1263 } else if (vresp->type == VIO_NET_RES_HYBRID) {
1263 1264 WRITE_ENTER(&vnetp->vsw_fp_rw);
1264 1265
1265 1266 vnetp->hio_fp = NULL;
1266 1267
1267 1268 RW_EXIT(&vnetp->vsw_fp_rw);
1268 1269 }
1269 1270
1270 1271 /*
1271 1272 * If there are threads already ref holding before the entry was
1272 1273 * removed from hash table, then wait for ref count to drop to zero.
1273 1274 */
1274 1275 (vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1275 1276 (refcnt = 1) : (refcnt = 0);
1276 1277 while (vresp->refcnt > refcnt) {
1277 1278 delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1278 1279 }
1279 1280 }
1280 1281
1281 1282 /*
1282 1283 * Search fdb for a given mac address. If an entry is found, hold
1283 1284 * a reference to it and return the entry; else returns NULL.
1284 1285 */
1285 1286 static vnet_res_t *
1286 1287 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1287 1288 {
1288 1289 uint64_t key = 0;
1289 1290 vnet_res_t *vresp;
1290 1291 int rv;
1291 1292
1292 1293 KEY_HASH(key, addrp->ether_addr_octet);
1293 1294
1294 1295 rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1295 1296 (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1296 1297
1297 1298 if (rv != 0)
1298 1299 return (NULL);
1299 1300
1300 1301 return (vresp);
1301 1302 }
1302 1303
1303 1304 /*
1304 1305 * Callback function provided to mod_hash_find_cb(). After finding the fdb
1305 1306 * entry corresponding to the key (macaddr), this callback will be invoked by
1306 1307 * mod_hash_find_cb() to atomically increment the reference count on the fdb
1307 1308 * entry before returning the found entry.
1308 1309 */
1309 1310 static void
1310 1311 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1311 1312 {
1312 1313 _NOTE(ARGUNUSED(key))
1313 1314 VNET_FDBE_REFHOLD((vnet_res_t *)val);
1314 1315 }
1315 1316
1316 1317 /*
1317 1318 * Frames received that are tagged with the pvid of the vnet device must be
1318 1319 * untagged before sending up the stack. This function walks the chain of rx
1319 1320 * frames, untags any such frames and returns the updated chain.
1320 1321 *
1321 1322 * Arguments:
1322 1323 * pvid: pvid of the vnet device for which packets are being received
1323 1324 * mp: head of pkt chain to be validated and untagged
1324 1325 *
1325 1326 * Returns:
1326 1327 * mp: head of updated chain of packets
1327 1328 */
1328 1329 static void
1329 1330 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1330 1331 {
1331 1332 struct ether_vlan_header *evhp;
1332 1333 mblk_t *bp;
1333 1334 mblk_t *bpt;
1334 1335 mblk_t *bph;
1335 1336 mblk_t *bpn;
1336 1337
1337 1338 bpn = bph = bpt = NULL;
1338 1339
1339 1340 for (bp = *mp; bp != NULL; bp = bpn) {
1340 1341
1341 1342 bpn = bp->b_next;
1342 1343 bp->b_next = bp->b_prev = NULL;
1343 1344
1344 1345 evhp = (struct ether_vlan_header *)bp->b_rptr;
1345 1346
1346 1347 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1347 1348 VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1348 1349
1349 1350 bp = vnet_vlan_remove_tag(bp);
1350 1351 if (bp == NULL) {
1351 1352 continue;
1352 1353 }
1353 1354
1354 1355 }
1355 1356
1356 1357 /* build a chain of processed packets */
1357 1358 if (bph == NULL) {
1358 1359 bph = bpt = bp;
1359 1360 } else {
1360 1361 bpt->b_next = bp;
1361 1362 bpt = bp;
1362 1363 }
1363 1364
1364 1365 }
1365 1366
1366 1367 *mp = bph;
1367 1368 }
1368 1369
1369 1370 static void
1370 1371 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1371 1372 {
1372 1373 vnet_res_t *vresp = (vnet_res_t *)vrh;
1373 1374 vnet_t *vnetp = vresp->vnetp;
1374 1375 vnet_pseudo_rx_ring_t *ringp;
1375 1376
1376 1377 if ((vnetp == NULL) || (vnetp->mh == 0)) {
1377 1378 freemsgchain(mp);
1378 1379 return;
1379 1380 }
1380 1381
1381 1382 ringp = vresp->rx_ringp;
1382 1383 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
1383 1384 }
1384 1385
1385 1386 void
1386 1387 vnet_tx_update(vio_net_handle_t vrh)
1387 1388 {
1388 1389 vnet_res_t *vresp = (vnet_res_t *)vrh;
1389 1390 vnet_t *vnetp = vresp->vnetp;
1390 1391 vnet_pseudo_tx_ring_t *tx_ringp;
1391 1392 vnet_pseudo_tx_group_t *tx_grp;
1392 1393 int i;
1393 1394
1394 1395 if (vnetp == NULL || vnetp->mh == NULL) {
1395 1396 return;
1396 1397 }
1397 1398
1398 1399 /*
1399 1400 * Currently, the tx hwring API (used to access rings that belong to
1400 1401 * a Hybrid IO resource) does not provide us a per ring flow ctrl
1401 1402 * update; also the pseudo rings are shared by the ports/ldcs in the
1402 1403 * vgen layer. Thus we can't figure out which pseudo ring is being
1403 1404 * re-enabled for transmits. To work around this, when we get a tx
1404 1405 * restart notification from below, we simply propagate that to all
1405 1406 * the tx pseudo rings registered with the mac layer above.
1406 1407 *
1407 1408 * There are a couple of side effects with this approach, but they are
1408 1409 * not harmful, as outlined below:
1409 1410 *
1410 1411 * A) We might send an invalid ring_update() for a ring that is not
1411 1412 * really flow controlled. This will not have any effect in the mac
1412 1413 * layer and packets will continue to be transmitted on that ring.
1413 1414 *
1414 1415 * B) We might end up clearing the flow control in the mac layer for
1415 1416 * a ring that is still flow controlled in the underlying resource.
1416 1417 * This will result in the mac layer restarting transmit, only to be
1417 1418 * flow controlled again on that ring.
1418 1419 */
1419 1420 tx_grp = &vnetp->tx_grp[0];
1420 1421 for (i = 0; i < tx_grp->ring_cnt; i++) {
1421 1422 tx_ringp = &tx_grp->rings[i];
1422 1423 mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1423 1424 }
1424 1425 }
1425 1426
1426 1427 /*
1427 1428 * vnet_tx_notify_thread:
1428 1429 *
1429 1430 * vnet_tx_ring_update() callback function wakes up this thread when
1430 1431 * it gets called. This thread will call mac_tx_ring_update() to
1431 1432 * notify upper mac of flow control getting relieved. Note that
1432 1433 * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly
1433 1434 * because vnet_tx_ring_update() is called from lower mac with
1434 1435 * mi_rw_lock held and mac_tx_ring_update() would also try to grab
1435 1436 * the same lock.
1436 1437 */
1437 1438 static void
1438 1439 vnet_tx_notify_thread(void *arg)
1439 1440 {
1440 1441 callb_cpr_t cprinfo;
1441 1442 vnet_pseudo_tx_group_t *tx_grp = (vnet_pseudo_tx_group_t *)arg;
1442 1443 vnet_pseudo_tx_ring_t *tx_ringp;
1443 1444 vnet_t *vnetp;
1444 1445 int i;
1445 1446
1446 1447 CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr,
1447 1448 "vnet_tx_notify_thread");
1448 1449
1449 1450 mutex_enter(&tx_grp->flowctl_lock);
1450 1451 while (!tx_grp->flowctl_done) {
1451 1452 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1452 1453 cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock);
1453 1454 CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock);
1454 1455
1455 1456 for (i = 0; i < tx_grp->ring_cnt; i++) {
1456 1457 tx_ringp = &tx_grp->rings[i];
1457 1458 if (tx_ringp->woken_up) {
1458 1459 tx_ringp->woken_up = B_FALSE;
1459 1460 vnetp = tx_ringp->vnetp;
1460 1461 mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1461 1462 }
1462 1463 }
1463 1464 }
1464 1465 /*
1465 1466 * The tx_grp is being destroyed, exit the thread.
1466 1467 */
1467 1468 tx_grp->flowctl_thread = NULL;
1468 1469 CALLB_CPR_EXIT(&cprinfo);
1469 1470 thread_exit();
1470 1471 }
1471 1472
1472 1473 void
1473 1474 vnet_tx_ring_update(void *arg1, uintptr_t arg2)
1474 1475 {
1475 1476 vnet_t *vnetp = (vnet_t *)arg1;
1476 1477 vnet_pseudo_tx_group_t *tx_grp;
1477 1478 vnet_pseudo_tx_ring_t *tx_ringp;
1478 1479 int i;
1479 1480
1480 1481 tx_grp = &vnetp->tx_grp[0];
1481 1482 for (i = 0; i < tx_grp->ring_cnt; i++) {
1482 1483 tx_ringp = &tx_grp->rings[i];
1483 1484 if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) {
1484 1485 mutex_enter(&tx_grp->flowctl_lock);
1485 1486 tx_ringp->woken_up = B_TRUE;
1486 1487 cv_signal(&tx_grp->flowctl_cv);
1487 1488 mutex_exit(&tx_grp->flowctl_lock);
1488 1489 break;
1489 1490 }
1490 1491 }
1491 1492 }
1492 1493
1493 1494 /*
1494 1495 * Update the new mtu of vnet into the mac layer. First check if the device has
1495 1496 * been plumbed and if so fail the mtu update. Returns 0 on success.
1496 1497 */
1497 1498 int
1498 1499 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1499 1500 {
1500 1501 int rv;
1501 1502
1502 1503 if (vnetp == NULL || vnetp->mh == NULL) {
1503 1504 return (EINVAL);
1504 1505 }
1505 1506
1506 1507 WRITE_ENTER(&vnetp->vrwlock);
1507 1508
1508 1509 if (vnetp->flags & VNET_STARTED) {
1509 1510 RW_EXIT(&vnetp->vrwlock);
1510 1511 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1511 1512 "update as the device is plumbed\n",
1512 1513 vnetp->instance);
1513 1514 return (EBUSY);
1514 1515 }
1515 1516
1516 1517 /* update mtu in the mac layer */
1517 1518 rv = mac_maxsdu_update(vnetp->mh, mtu);
1518 1519 if (rv != 0) {
1519 1520 RW_EXIT(&vnetp->vrwlock);
1520 1521 cmn_err(CE_NOTE,
1521 1522 "!vnet%d: Unable to update mtu with mac layer\n",
1522 1523 vnetp->instance);
1523 1524 return (EIO);
1524 1525 }
1525 1526
1526 1527 vnetp->mtu = mtu;
1527 1528
1528 1529 RW_EXIT(&vnetp->vrwlock);
1529 1530
1530 1531 return (0);
1531 1532 }
1532 1533
1533 1534 /*
1534 1535 * Update the link state of vnet to the mac layer.
1535 1536 */
1536 1537 void
1537 1538 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1538 1539 {
1539 1540 if (vnetp == NULL || vnetp->mh == NULL) {
1540 1541 return;
1541 1542 }
1542 1543
1543 1544 WRITE_ENTER(&vnetp->vrwlock);
1544 1545 if (vnetp->link_state == link_state) {
1545 1546 RW_EXIT(&vnetp->vrwlock);
1546 1547 return;
1547 1548 }
1548 1549 vnetp->link_state = link_state;
1549 1550 RW_EXIT(&vnetp->vrwlock);
1550 1551
1551 1552 mac_link_update(vnetp->mh, link_state);
1552 1553 }
1553 1554
1554 1555 /*
1555 1556 * vio_net_resource_reg -- An interface called to register a resource
1556 1557 * with vnet.
1557 1558 * macp -- a GLDv3 mac_register that has all the details of
1558 1559 * a resource and its callbacks etc.
1559 1560 * type -- resource type.
1560 1561 * local_macaddr -- resource's MAC address. This is used to
1561 1562 * associate a resource with a corresponding vnet.
1562 1563 * remote_macaddr -- remote side MAC address. This is ignored for
1563 1564 * the Hybrid resources.
1564 1565 * vhp -- A handle returned to the caller.
1565 1566 * vcb -- A set of callbacks provided to the callers.
1566 1567 */
1567 1568 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1568 1569 ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1569 1570 vio_net_callbacks_t *vcb)
1570 1571 {
1571 1572 vnet_t *vnetp;
1572 1573 vnet_res_t *vresp;
1573 1574
1574 1575 vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1575 1576 ether_copy(local_macaddr, vresp->local_macaddr);
1576 1577 ether_copy(rem_macaddr, vresp->rem_macaddr);
1577 1578 vresp->type = type;
1578 1579 bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1579 1580
1580 1581 DBG1(NULL, "Resource Registerig type=0%X\n", type);
1581 1582
1582 1583 READ_ENTER(&vnet_rw);
1583 1584 vnetp = vnet_headp;
1584 1585 while (vnetp != NULL) {
1585 1586 if (VNET_MATCH_RES(vresp, vnetp)) {
1586 1587 vresp->vnetp = vnetp;
1587 1588
1588 1589 /* Setup kstats for hio resource */
1589 1590 if (vresp->type == VIO_NET_RES_HYBRID) {
1590 1591 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1591 1592 "hio", vresp);
1592 1593 if (vresp->ksp == NULL) {
1593 1594 cmn_err(CE_NOTE, "!vnet%d: Cannot "
1594 1595 "create kstats for hio resource",
1595 1596 vnetp->instance);
1596 1597 }
1597 1598 }
1598 1599 vnet_add_resource(vnetp, vresp);
1599 1600 break;
1600 1601 }
1601 1602 vnetp = vnetp->nextp;
1602 1603 }
1603 1604 RW_EXIT(&vnet_rw);
1604 1605 if (vresp->vnetp == NULL) {
1605 1606 DWARN(NULL, "No vnet instance");
1606 1607 kmem_free(vresp, sizeof (vnet_res_t));
1607 1608 return (ENXIO);
1608 1609 }
1609 1610
1610 1611 *vhp = vresp;
1611 1612 vcb->vio_net_rx_cb = vnet_rx;
1612 1613 vcb->vio_net_tx_update = vnet_tx_update;
1613 1614 vcb->vio_net_report_err = vnet_handle_res_err;
1614 1615
1615 1616 /* Bind the resource to pseudo ring(s) */
1616 1617 if (vnet_bind_rings(vresp) != 0) {
1617 1618 (void) vnet_rem_resource(vnetp, vresp);
1618 1619 vnet_hio_destroy_kstats(vresp->ksp);
1619 1620 KMEM_FREE(vresp);
1620 1621 return (1);
1621 1622 }
1622 1623
1623 1624 /* Dispatch a task to start resources */
1624 1625 vnet_dispatch_res_task(vnetp);
1625 1626 return (0);
1626 1627 }
1627 1628
1628 1629 /*
1629 1630 * vio_net_resource_unreg -- An interface to unregister a resource.
1630 1631 */
1631 1632 void
1632 1633 vio_net_resource_unreg(vio_net_handle_t vhp)
1633 1634 {
1634 1635 vnet_res_t *vresp = (vnet_res_t *)vhp;
1635 1636 vnet_t *vnetp = vresp->vnetp;
1636 1637
1637 1638 DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1638 1639
1639 1640 ASSERT(vnetp != NULL);
1640 1641 /*
1641 1642 * Remove the resource from fdb; this ensures
1642 1643 * there are no references to the resource.
1643 1644 */
1644 1645 vnet_fdbe_del(vnetp, vresp);
1645 1646
1646 1647 vnet_unbind_rings(vresp);
1647 1648
1648 1649 /* Now remove the resource from the list */
1649 1650 (void) vnet_rem_resource(vnetp, vresp);
1650 1651
1651 1652 vnet_hio_destroy_kstats(vresp->ksp);
1652 1653 KMEM_FREE(vresp);
1653 1654 }
1654 1655
1655 1656 static void
1656 1657 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
1657 1658 {
1658 1659 WRITE_ENTER(&vnetp->vrwlock);
1659 1660 vresp->nextp = vnetp->vres_list;
1660 1661 vnetp->vres_list = vresp;
1661 1662 RW_EXIT(&vnetp->vrwlock);
1662 1663 }
1663 1664
1664 1665 static vnet_res_t *
1665 1666 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
1666 1667 {
1667 1668 vnet_res_t *vrp;
1668 1669
1669 1670 WRITE_ENTER(&vnetp->vrwlock);
1670 1671 if (vresp == vnetp->vres_list) {
1671 1672 vnetp->vres_list = vresp->nextp;
1672 1673 } else {
1673 1674 vrp = vnetp->vres_list;
1674 1675 while (vrp->nextp != NULL) {
1675 1676 if (vrp->nextp == vresp) {
1676 1677 vrp->nextp = vresp->nextp;
1677 1678 break;
1678 1679 }
1679 1680 vrp = vrp->nextp;
1680 1681 }
1681 1682 }
1682 1683 vresp->vnetp = NULL;
1683 1684 vresp->nextp = NULL;
1684 1685
1685 1686 RW_EXIT(&vnetp->vrwlock);
1686 1687
1687 1688 return (vresp);
1688 1689 }
1689 1690
1690 1691 /*
1691 1692 * vnet_dds_rx -- an interface called by vgen to DDS messages.
1692 1693 */
1693 1694 void
1694 1695 vnet_dds_rx(void *arg, void *dmsg)
1695 1696 {
1696 1697 vnet_t *vnetp = arg;
1697 1698 vdds_process_dds_msg(vnetp, dmsg);
1698 1699 }
1699 1700
1700 1701 /*
1701 1702 * vnet_send_dds_msg -- An interface provided to DDS to send
1702 1703 * DDS messages. This simply sends meessages via vgen.
1703 1704 */
1704 1705 int
1705 1706 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1706 1707 {
1707 1708 int rv;
1708 1709
1709 1710 if (vnetp->vgenhdl != NULL) {
1710 1711 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1711 1712 }
1712 1713 return (rv);
1713 1714 }
1714 1715
1715 1716 /*
1716 1717 * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1717 1718 */
1718 1719 void
1719 1720 vnet_dds_cleanup_hio(vnet_t *vnetp)
1720 1721 {
1721 1722 vdds_cleanup_hio(vnetp);
1722 1723 }
1723 1724
1724 1725 /*
1725 1726 * vnet_handle_res_err -- A callback function called by a resource
1726 1727 * to report an error. For example, vgen can call to report
1727 1728 * an LDC down/reset event. This will trigger cleanup of associated
1728 1729 * Hybrid resource.
1729 1730 */
1730 1731 /* ARGSUSED */
1731 1732 static void
1732 1733 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1733 1734 {
1734 1735 vnet_res_t *vresp = (vnet_res_t *)vrh;
1735 1736 vnet_t *vnetp = vresp->vnetp;
1736 1737
1737 1738 if (vnetp == NULL) {
1738 1739 return;
1739 1740 }
1740 1741 if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1741 1742 (vresp->type != VIO_NET_RES_HYBRID)) {
1742 1743 return;
1743 1744 }
1744 1745
1745 1746 vdds_cleanup_hio(vnetp);
1746 1747 }
1747 1748
1748 1749 /*
1749 1750 * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1750 1751 */
1751 1752 static void
1752 1753 vnet_dispatch_res_task(vnet_t *vnetp)
1753 1754 {
1754 1755 int rv;
1755 1756
1756 1757 /*
1757 1758 * Dispatch the task. It could be the case that vnetp->flags does
1758 1759 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1759 1760 * can abort the task when the task is started. See related comments
1760 1761 * in vnet_m_stop() and vnet_stop_resources().
1761 1762 */
1762 1763 rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1763 1764 vnetp, DDI_NOSLEEP);
1764 1765 if (rv != DDI_SUCCESS) {
1765 1766 cmn_err(CE_WARN,
1766 1767 "vnet%d:Can't dispatch start resource task",
1767 1768 vnetp->instance);
1768 1769 }
1769 1770 }
1770 1771
1771 1772 /*
1772 1773 * vnet_res_start_task -- A taskq callback function that starts a resource.
1773 1774 */
1774 1775 static void
1775 1776 vnet_res_start_task(void *arg)
1776 1777 {
1777 1778 vnet_t *vnetp = arg;
1778 1779
1779 1780 WRITE_ENTER(&vnetp->vrwlock);
1780 1781 if (vnetp->flags & VNET_STARTED) {
1781 1782 vnet_start_resources(vnetp);
1782 1783 }
1783 1784 RW_EXIT(&vnetp->vrwlock);
1784 1785 }
1785 1786
1786 1787 /*
1787 1788 * vnet_start_resources -- starts all resources associated with
1788 1789 * a vnet.
1789 1790 */
1790 1791 static void
1791 1792 vnet_start_resources(vnet_t *vnetp)
1792 1793 {
1793 1794 mac_register_t *macp;
1794 1795 mac_callbacks_t *cbp;
1795 1796 vnet_res_t *vresp;
1796 1797 int rv;
1797 1798
1798 1799 DBG1(vnetp, "enter\n");
1799 1800
1800 1801 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1801 1802
1802 1803 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1803 1804 /* skip if it is already started */
1804 1805 if (vresp->flags & VNET_STARTED) {
1805 1806 continue;
1806 1807 }
1807 1808 macp = &vresp->macreg;
1808 1809 cbp = macp->m_callbacks;
1809 1810 rv = cbp->mc_start(macp->m_driver);
1810 1811 if (rv == 0) {
1811 1812 /*
1812 1813 * Successfully started the resource, so now
1813 1814 * add it to the fdb.
1814 1815 */
1815 1816 vresp->flags |= VNET_STARTED;
1816 1817 vnet_fdbe_add(vnetp, vresp);
1817 1818 }
1818 1819 }
1819 1820
1820 1821 DBG1(vnetp, "exit\n");
1821 1822
1822 1823 }
1823 1824
1824 1825 /*
1825 1826 * vnet_stop_resources -- stop all resources associated with a vnet.
1826 1827 */
1827 1828 static void
1828 1829 vnet_stop_resources(vnet_t *vnetp)
1829 1830 {
1830 1831 vnet_res_t *vresp;
1831 1832 mac_register_t *macp;
1832 1833 mac_callbacks_t *cbp;
1833 1834
1834 1835 DBG1(vnetp, "enter\n");
1835 1836
1836 1837 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1837 1838
1838 1839 for (vresp = vnetp->vres_list; vresp != NULL; ) {
1839 1840 if (vresp->flags & VNET_STARTED) {
1840 1841 /*
1841 1842 * Release the lock while invoking mc_stop() of the
1842 1843 * underlying resource. We hold a reference to this
1843 1844 * resource to prevent being removed from the list in
1844 1845 * vio_net_resource_unreg(). Note that new resources
1845 1846 * can be added to the head of the list while the lock
1846 1847 * is released, but they won't be started, as
1847 1848 * VNET_STARTED flag has been cleared for the vnet
1848 1849 * device in vnet_m_stop(). Also, while the lock is
1849 1850 * released a resource could be removed from the list
1850 1851 * in vio_net_resource_unreg(); but that is ok, as we
1851 1852 * re-acquire the lock and only then access the forward
1852 1853 * link (vresp->nextp) to continue with the next
1853 1854 * resource.
1854 1855 */
1855 1856 vresp->flags &= ~VNET_STARTED;
1856 1857 vresp->flags |= VNET_STOPPING;
1857 1858 macp = &vresp->macreg;
1858 1859 cbp = macp->m_callbacks;
1859 1860 VNET_FDBE_REFHOLD(vresp);
1860 1861 RW_EXIT(&vnetp->vrwlock);
1861 1862
1862 1863 cbp->mc_stop(macp->m_driver);
1863 1864
1864 1865 WRITE_ENTER(&vnetp->vrwlock);
1865 1866 vresp->flags &= ~VNET_STOPPING;
1866 1867 VNET_FDBE_REFRELE(vresp);
1867 1868 }
1868 1869 vresp = vresp->nextp;
1869 1870 }
1870 1871 DBG1(vnetp, "exit\n");
1871 1872 }
1872 1873
1873 1874 /*
1874 1875 * Setup kstats for the HIO statistics.
1875 1876 * NOTE: the synchronization for the statistics is the
1876 1877 * responsibility of the caller.
1877 1878 */
1878 1879 kstat_t *
1879 1880 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1880 1881 {
1881 1882 kstat_t *ksp;
1882 1883 vnet_t *vnetp = vresp->vnetp;
1883 1884 vnet_hio_kstats_t *hiokp;
1884 1885 size_t size;
1885 1886
1886 1887 ASSERT(vnetp != NULL);
1887 1888 size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1888 1889 ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1889 1890 KSTAT_TYPE_NAMED, size, 0);
1890 1891 if (ksp == NULL) {
1891 1892 return (NULL);
1892 1893 }
1893 1894
1894 1895 hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1895 1896 kstat_named_init(&hiokp->ipackets, "ipackets",
1896 1897 KSTAT_DATA_ULONG);
1897 1898 kstat_named_init(&hiokp->ierrors, "ierrors",
1898 1899 KSTAT_DATA_ULONG);
1899 1900 kstat_named_init(&hiokp->opackets, "opackets",
1900 1901 KSTAT_DATA_ULONG);
1901 1902 kstat_named_init(&hiokp->oerrors, "oerrors",
1902 1903 KSTAT_DATA_ULONG);
1903 1904
1904 1905
1905 1906 /* MIB II kstat variables */
1906 1907 kstat_named_init(&hiokp->rbytes, "rbytes",
1907 1908 KSTAT_DATA_ULONG);
1908 1909 kstat_named_init(&hiokp->obytes, "obytes",
1909 1910 KSTAT_DATA_ULONG);
1910 1911 kstat_named_init(&hiokp->multircv, "multircv",
1911 1912 KSTAT_DATA_ULONG);
1912 1913 kstat_named_init(&hiokp->multixmt, "multixmt",
1913 1914 KSTAT_DATA_ULONG);
1914 1915 kstat_named_init(&hiokp->brdcstrcv, "brdcstrcv",
1915 1916 KSTAT_DATA_ULONG);
1916 1917 kstat_named_init(&hiokp->brdcstxmt, "brdcstxmt",
1917 1918 KSTAT_DATA_ULONG);
1918 1919 kstat_named_init(&hiokp->norcvbuf, "norcvbuf",
1919 1920 KSTAT_DATA_ULONG);
1920 1921 kstat_named_init(&hiokp->noxmtbuf, "noxmtbuf",
1921 1922 KSTAT_DATA_ULONG);
1922 1923
1923 1924 ksp->ks_update = vnet_hio_update_kstats;
1924 1925 ksp->ks_private = (void *)vresp;
1925 1926 kstat_install(ksp);
1926 1927 return (ksp);
1927 1928 }
1928 1929
1929 1930 /*
1930 1931 * Destroy kstats.
1931 1932 */
1932 1933 static void
1933 1934 vnet_hio_destroy_kstats(kstat_t *ksp)
1934 1935 {
1935 1936 if (ksp != NULL)
1936 1937 kstat_delete(ksp);
1937 1938 }
1938 1939
1939 1940 /*
1940 1941 * Update the kstats.
1941 1942 */
1942 1943 static int
1943 1944 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1944 1945 {
1945 1946 vnet_t *vnetp;
1946 1947 vnet_res_t *vresp;
1947 1948 vnet_hio_stats_t statsp;
1948 1949 vnet_hio_kstats_t *hiokp;
1949 1950
1950 1951 vresp = (vnet_res_t *)ksp->ks_private;
1951 1952 vnetp = vresp->vnetp;
1952 1953
1953 1954 bzero(&statsp, sizeof (vnet_hio_stats_t));
1954 1955
1955 1956 READ_ENTER(&vnetp->vsw_fp_rw);
1956 1957 if (vnetp->hio_fp == NULL) {
1957 1958 /* not using hio resources, just return */
1958 1959 RW_EXIT(&vnetp->vsw_fp_rw);
1959 1960 return (0);
1960 1961 }
1961 1962 VNET_FDBE_REFHOLD(vnetp->hio_fp);
1962 1963 RW_EXIT(&vnetp->vsw_fp_rw);
1963 1964 vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1964 1965 VNET_FDBE_REFRELE(vnetp->hio_fp);
1965 1966
1966 1967 hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1967 1968
1968 1969 if (rw == KSTAT_READ) {
1969 1970 /* Link Input/Output stats */
1970 1971 hiokp->ipackets.value.ul = (uint32_t)statsp.ipackets;
1971 1972 hiokp->ipackets64.value.ull = statsp.ipackets;
1972 1973 hiokp->ierrors.value.ul = statsp.ierrors;
1973 1974 hiokp->opackets.value.ul = (uint32_t)statsp.opackets;
1974 1975 hiokp->opackets64.value.ull = statsp.opackets;
1975 1976 hiokp->oerrors.value.ul = statsp.oerrors;
1976 1977
1977 1978 /* MIB II kstat variables */
1978 1979 hiokp->rbytes.value.ul = (uint32_t)statsp.rbytes;
1979 1980 hiokp->rbytes64.value.ull = statsp.rbytes;
1980 1981 hiokp->obytes.value.ul = (uint32_t)statsp.obytes;
1981 1982 hiokp->obytes64.value.ull = statsp.obytes;
1982 1983 hiokp->multircv.value.ul = statsp.multircv;
1983 1984 hiokp->multixmt.value.ul = statsp.multixmt;
1984 1985 hiokp->brdcstrcv.value.ul = statsp.brdcstrcv;
1985 1986 hiokp->brdcstxmt.value.ul = statsp.brdcstxmt;
1986 1987 hiokp->norcvbuf.value.ul = statsp.norcvbuf;
1987 1988 hiokp->noxmtbuf.value.ul = statsp.noxmtbuf;
1988 1989 } else {
1989 1990 return (EACCES);
1990 1991 }
1991 1992
1992 1993 return (0);
1993 1994 }
1994 1995
1995 1996 static void
1996 1997 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1997 1998 {
1998 1999 mac_register_t *macp;
1999 2000 mac_callbacks_t *cbp;
2000 2001 uint64_t val;
2001 2002 int stat;
2002 2003
2003 2004 /*
2004 2005 * get the specified statistics from the underlying nxge.
2005 2006 */
2006 2007 macp = &vresp->macreg;
2007 2008 cbp = macp->m_callbacks;
2008 2009 for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
2009 2010 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
2010 2011 switch (stat) {
2011 2012 case MAC_STAT_IPACKETS:
2012 2013 statsp->ipackets = val;
2013 2014 break;
2014 2015
2015 2016 case MAC_STAT_IERRORS:
2016 2017 statsp->ierrors = val;
2017 2018 break;
2018 2019
2019 2020 case MAC_STAT_OPACKETS:
2020 2021 statsp->opackets = val;
2021 2022 break;
2022 2023
2023 2024 case MAC_STAT_OERRORS:
2024 2025 statsp->oerrors = val;
2025 2026 break;
2026 2027
2027 2028 case MAC_STAT_RBYTES:
2028 2029 statsp->rbytes = val;
2029 2030 break;
2030 2031
2031 2032 case MAC_STAT_OBYTES:
2032 2033 statsp->obytes = val;
2033 2034 break;
2034 2035
2035 2036 case MAC_STAT_MULTIRCV:
2036 2037 statsp->multircv = val;
2037 2038 break;
2038 2039
2039 2040 case MAC_STAT_MULTIXMT:
2040 2041 statsp->multixmt = val;
2041 2042 break;
2042 2043
2043 2044 case MAC_STAT_BRDCSTRCV:
2044 2045 statsp->brdcstrcv = val;
2045 2046 break;
2046 2047
2047 2048 case MAC_STAT_BRDCSTXMT:
2048 2049 statsp->brdcstxmt = val;
2049 2050 break;
2050 2051
2051 2052 case MAC_STAT_NOXMTBUF:
2052 2053 statsp->noxmtbuf = val;
2053 2054 break;
2054 2055
2055 2056 case MAC_STAT_NORCVBUF:
2056 2057 statsp->norcvbuf = val;
2057 2058 break;
2058 2059
2059 2060 default:
2060 2061 /*
2061 2062 * parameters not interested.
2062 2063 */
2063 2064 break;
2064 2065 }
2065 2066 }
2066 2067 }
2067 2068 }
2068 2069
2069 2070 static boolean_t
2070 2071 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
2071 2072 {
2072 2073 vnet_t *vnetp = (vnet_t *)arg;
2073 2074
2074 2075 if (vnetp == NULL) {
2075 2076 return (0);
2076 2077 }
2077 2078
2078 2079 switch (cap) {
2079 2080
2080 2081 case MAC_CAPAB_RINGS: {
2081 2082
2082 2083 mac_capab_rings_t *cap_rings = cap_data;
2083 2084 /*
2084 2085 * Rings Capability Notes:
2085 2086 * We advertise rings to make use of the rings framework in
2086 2087 * gldv3 mac layer, to improve the performance. This is
2087 2088 * specifically needed when a Hybrid resource (with multiple
2088 2089 * tx/rx hardware rings) is assigned to a vnet device. We also
2089 2090 * leverage this for the normal case when no Hybrid resource is
2090 2091 * assigned.
2091 2092 *
2092 2093 * Ring Allocation:
2093 2094 * - TX path:
2094 2095 * We expose a pseudo ring group with 2 pseudo tx rings (as
2095 2096 * currently HybridIO exports only 2 rings) In the normal case,
2096 2097 * transmit traffic that comes down to the driver through the
2097 2098 * mri_tx (vnet_tx_ring_send()) entry point goes through the
2098 2099 * distributed switching algorithm in vnet and gets transmitted
2099 2100 * over a port/LDC in the vgen layer to either the vswitch or a
2100 2101 * peer vnet. If and when a Hybrid resource is assigned to the
2101 2102 * vnet, we obtain the tx ring information of the Hybrid device
2102 2103 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
2103 2104 * Traffic being sent over the Hybrid resource by the mac layer
2104 2105 * gets spread across both hw rings, as they are mapped to the
2105 2106 * 2 pseudo tx rings in vnet.
2106 2107 *
2107 2108 * - RX path:
2108 2109 * We expose a pseudo ring group with 3 pseudo rx rings (static
2109 2110 * rings) initially. The first (default) pseudo rx ring is
2110 2111 * reserved for the resource that connects to the vswitch
2111 2112 * service. The next 2 rings are reserved for a Hybrid resource
2112 2113 * that may be assigned to the vnet device. If and when a
2113 2114 * Hybrid resource is assigned to the vnet, we obtain the rx
2114 2115 * ring information of the Hybrid device (nxge) and map these
2115 2116 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
2116 2117 * resource that connects to a peer vnet, we dynamically
2117 2118 * allocate a pseudo rx ring and map it to that resource, when
2118 2119 * the resource gets added; and the pseudo rx ring is
2119 2120 * dynamically registered with the upper mac layer. We do the
2120 2121 * reverse and unregister the ring with the mac layer when
2121 2122 * the resource gets removed.
2122 2123 *
2123 2124 * Synchronization notes:
2124 2125 * We don't need any lock to protect members of ring structure,
2125 2126 * specifically ringp->hw_rh, in either the TX or the RX ring,
2126 2127 * as explained below.
2127 2128 * - TX ring:
2128 2129 * ring->hw_rh is initialized only when a Hybrid resource is
2129 2130 * associated; and gets referenced only in vnet_hio_tx(). The
2130 2131 * Hybrid resource itself is available in fdb only after tx
2131 2132 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
2132 2133 * we call vnet_bind_rings() first and then call
2133 2134 * vnet_start_resources() which adds an entry to fdb. For
2134 2135 * traffic going over LDC resources, we don't reference
2135 2136 * ring->hw_rh at all.
2136 2137 * - RX ring:
2137 2138 * For rings mapped to Hybrid resource ring->hw_rh is
2138 2139 * initialized and only then do we add the rx callback for
2139 2140 * the underlying Hybrid resource; we disable callbacks before
2140 2141 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
2141 2142 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
2142 2143 * (vio_net_resource_unreg()).
2143 2144 * Also, we access ring->hw_rh in vnet_rx_ring_stat().
2144 2145 * Note that for rings mapped to Hybrid resource, though the
2145 2146 * rings are statically registered with the mac layer, its
2146 2147 * hardware ring mapping (ringp->hw_rh) can be torn down in
2147 2148 * vnet_unbind_hwrings() while the kstat operation is in
2148 2149 * progress. To protect against this, we hold a reference to
2149 2150 * the resource in FDB; this ensures that the thread in
2150 2151 * vio_net_resource_unreg() waits for the reference to be
2151 2152 * dropped before unbinding the ring.
2152 2153 *
2153 2154 * We don't need to do this for rings mapped to LDC resources.
2154 2155 * These rings are registered/unregistered dynamically with
2155 2156 * the mac layer and so any attempt to unregister the ring
2156 2157 * while kstat operation is in progress will block in
2157 2158 * mac_group_rem_ring(). Thus implicitly protects the
2158 2159 * resource (ringp->hw_rh) from disappearing.
2159 2160 */
2160 2161
2161 2162 if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2162 2163 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2163 2164
2164 2165 /*
2165 2166 * The ring_cnt for rx grp is initialized in
2166 2167 * vnet_ring_grp_init(). Later, the ring_cnt gets
2167 2168 * updated dynamically whenever LDC resources are added
2168 2169 * or removed.
2169 2170 */
2170 2171 cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
2171 2172 cap_rings->mr_rget = vnet_get_ring;
2172 2173
2173 2174 cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
2174 2175 cap_rings->mr_gget = vnet_get_group;
2175 2176 cap_rings->mr_gaddring = NULL;
2176 2177 cap_rings->mr_gremring = NULL;
2177 2178 } else {
2178 2179 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2179 2180
2180 2181 /*
2181 2182 * The ring_cnt for tx grp is initialized in
2182 2183 * vnet_ring_grp_init() and remains constant, as we
2183 2184 * do not support dymanic tx rings for now.
2184 2185 */
2185 2186 cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
2186 2187 cap_rings->mr_rget = vnet_get_ring;
2187 2188
2188 2189 /*
2189 2190 * Transmit rings are not grouped; i.e, the number of
2190 2191 * transmit ring groups advertised should be set to 0.
2191 2192 */
2192 2193 cap_rings->mr_gnum = 0;
2193 2194
2194 2195 cap_rings->mr_gget = vnet_get_group;
2195 2196 cap_rings->mr_gaddring = NULL;
2196 2197 cap_rings->mr_gremring = NULL;
2197 2198 }
2198 2199 return (B_TRUE);
2199 2200
2200 2201 }
2201 2202
2202 2203 default:
2203 2204 break;
2204 2205
2205 2206 }
2206 2207
2207 2208 return (B_FALSE);
2208 2209 }
2209 2210
2210 2211 /*
2211 2212 * Callback funtion for MAC layer to get ring information.
2212 2213 */
2213 2214 static void
2214 2215 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
2215 2216 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
2216 2217 {
2217 2218 vnet_t *vnetp = arg;
2218 2219
2219 2220 switch (rtype) {
2220 2221
2221 2222 case MAC_RING_TYPE_RX: {
2222 2223
2223 2224 vnet_pseudo_rx_group_t *rx_grp;
2224 2225 vnet_pseudo_rx_ring_t *rx_ringp;
2225 2226 mac_intr_t *mintr;
2226 2227
2227 2228 /* We advertised only one RX group */
2228 2229 ASSERT(g_index == 0);
2229 2230 rx_grp = &vnetp->rx_grp[g_index];
2230 2231
2231 2232 /* Check the current # of rings in the rx group */
2232 2233 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
2233 2234
2234 2235 /* Get the ring based on the index */
2235 2236 rx_ringp = &rx_grp->rings[r_index];
2236 2237
2237 2238 rx_ringp->handle = r_handle;
2238 2239 /*
2239 2240 * Note: we don't need to save the incoming r_index in rx_ring,
2240 2241 * as vnet_ring_grp_init() would have initialized the index for
2241 2242 * each ring in the array.
2242 2243 */
2243 2244 rx_ringp->grp = rx_grp;
2244 2245 rx_ringp->vnetp = vnetp;
2245 2246
2246 2247 mintr = &infop->mri_intr;
2247 2248 mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
2248 2249 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
2249 2250 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
2250 2251
2251 2252 infop->mri_driver = (mac_ring_driver_t)rx_ringp;
2252 2253 infop->mri_start = vnet_rx_ring_start;
2253 2254 infop->mri_stop = vnet_rx_ring_stop;
2254 2255 infop->mri_stat = vnet_rx_ring_stat;
2255 2256
2256 2257 /* Set the poll function, as this is an rx ring */
2257 2258 infop->mri_poll = vnet_rx_poll;
2258 2259 /*
2259 2260 * MAC_RING_RX_ENQUEUE bit needed to be set for nxge
2260 2261 * which was not sending packet chains in interrupt
2261 2262 * context. For such drivers, packets are queued in
2262 2263 * Rx soft rings so that we get a chance to switch
2263 2264 * into a polling mode under backlog. This bug (not
2264 2265 * sending packet chains) has now been fixed. Once
2265 2266 * the performance impact is measured, this change
2266 2267 * will be removed.
2267 2268 */
2268 2269 infop->mri_flags = (vnet_mac_rx_queuing ?
2269 2270 MAC_RING_RX_ENQUEUE : 0);
2270 2271 break;
2271 2272 }
2272 2273
2273 2274 case MAC_RING_TYPE_TX: {
2274 2275 vnet_pseudo_tx_group_t *tx_grp;
2275 2276 vnet_pseudo_tx_ring_t *tx_ringp;
2276 2277
2277 2278 /*
2278 2279 * No need to check grp index; mac layer passes -1 for it.
2279 2280 */
2280 2281 tx_grp = &vnetp->tx_grp[0];
2281 2282
2282 2283 /* Check the # of rings in the tx group */
2283 2284 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
2284 2285
2285 2286 /* Get the ring based on the index */
2286 2287 tx_ringp = &tx_grp->rings[r_index];
2287 2288
2288 2289 tx_ringp->handle = r_handle;
2289 2290 tx_ringp->index = r_index;
2290 2291 tx_ringp->grp = tx_grp;
2291 2292 tx_ringp->vnetp = vnetp;
2292 2293
2293 2294 infop->mri_driver = (mac_ring_driver_t)tx_ringp;
2294 2295 infop->mri_start = vnet_tx_ring_start;
2295 2296 infop->mri_stop = vnet_tx_ring_stop;
2296 2297 infop->mri_stat = vnet_tx_ring_stat;
2297 2298
2298 2299 /* Set the transmit function, as this is a tx ring */
2299 2300 infop->mri_tx = vnet_tx_ring_send;
2300 2301 /*
2301 2302 * MAC_RING_TX_SERIALIZE bit needs to be set while
2302 2303 * hybridIO is enabled to workaround tx lock
2303 2304 * contention issues in nxge.
2304 2305 */
2305 2306 infop->mri_flags = (vnet_mac_tx_serialize ?
2306 2307 MAC_RING_TX_SERIALIZE : 0);
2307 2308 break;
2308 2309 }
2309 2310
↓ open down ↓ |
1162 lines elided |
↑ open up ↑ |
2310 2311 default:
2311 2312 break;
2312 2313 }
2313 2314 }
2314 2315
2315 2316 /*
2316 2317 * Callback funtion for MAC layer to get group information.
2317 2318 */
2318 2319 static void
2319 2320 vnet_get_group(void *arg, mac_ring_type_t type, const int index,
2320 - mac_group_info_t *infop, mac_group_handle_t handle)
2321 + mac_group_info_t *infop, mac_group_handle_t handle)
2321 2322 {
2322 2323 vnet_t *vnetp = (vnet_t *)arg;
2323 2324
2324 2325 switch (type) {
2325 2326
2326 2327 case MAC_RING_TYPE_RX:
2327 2328 {
2328 2329 vnet_pseudo_rx_group_t *rx_grp;
2329 2330
2330 2331 /* We advertised only one RX group */
2331 2332 ASSERT(index == 0);
2332 2333
2333 2334 rx_grp = &vnetp->rx_grp[index];
2334 2335 rx_grp->handle = handle;
2335 2336 rx_grp->index = index;
2336 2337 rx_grp->vnetp = vnetp;
2337 2338
2338 2339 infop->mgi_driver = (mac_group_driver_t)rx_grp;
2339 2340 infop->mgi_start = NULL;
2340 2341 infop->mgi_stop = NULL;
2341 2342 infop->mgi_addmac = vnet_addmac;
2342 2343 infop->mgi_remmac = vnet_remmac;
2343 2344 infop->mgi_count = rx_grp->ring_cnt;
2344 2345
2345 2346 break;
2346 2347 }
2347 2348
2348 2349 case MAC_RING_TYPE_TX:
2349 2350 {
2350 2351 vnet_pseudo_tx_group_t *tx_grp;
2351 2352
2352 2353 /* We advertised only one TX group */
2353 2354 ASSERT(index == 0);
2354 2355
2355 2356 tx_grp = &vnetp->tx_grp[index];
2356 2357 tx_grp->handle = handle;
2357 2358 tx_grp->index = index;
2358 2359 tx_grp->vnetp = vnetp;
2359 2360
2360 2361 infop->mgi_driver = (mac_group_driver_t)tx_grp;
2361 2362 infop->mgi_start = NULL;
2362 2363 infop->mgi_stop = NULL;
2363 2364 infop->mgi_addmac = NULL;
2364 2365 infop->mgi_remmac = NULL;
2365 2366 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
2366 2367
2367 2368 break;
2368 2369 }
2369 2370
2370 2371 default:
2371 2372 break;
2372 2373
2373 2374 }
2374 2375 }
2375 2376
2376 2377 static int
2377 2378 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2378 2379 {
2379 2380 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2380 2381 int err;
2381 2382
2382 2383 /*
2383 2384 * If this ring is mapped to a LDC resource, simply mark the state to
2384 2385 * indicate the ring is started and return.
2385 2386 */
2386 2387 if ((rx_ringp->state &
2387 2388 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2388 2389 rx_ringp->gen_num = mr_gen_num;
2389 2390 rx_ringp->state |= VNET_RXRING_STARTED;
2390 2391 return (0);
2391 2392 }
2392 2393
2393 2394 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2394 2395
2395 2396 /*
2396 2397 * This must be a ring reserved for a hwring. If the hwring is not
2397 2398 * bound yet, simply mark the state to indicate the ring is started and
↓ open down ↓ |
67 lines elided |
↑ open up ↑ |
2398 2399 * return. If and when a hybrid resource is activated for this vnet
2399 2400 * device, we will bind the hwring and start it then. If a hwring is
2400 2401 * already bound, start it now.
2401 2402 */
2402 2403 if (rx_ringp->hw_rh == NULL) {
2403 2404 rx_ringp->gen_num = mr_gen_num;
2404 2405 rx_ringp->state |= VNET_RXRING_STARTED;
2405 2406 return (0);
2406 2407 }
2407 2408
2408 - err = mac_hwring_start(rx_ringp->hw_rh);
2409 + err = mac_hwring_activate(rx_ringp->hw_rh);
2409 2410 if (err == 0) {
2410 2411 rx_ringp->gen_num = mr_gen_num;
2411 2412 rx_ringp->state |= VNET_RXRING_STARTED;
2412 2413 } else {
2413 2414 err = ENXIO;
2414 2415 }
2415 2416
2416 2417 return (err);
2417 2418 }
2418 2419
2419 2420 static void
2420 2421 vnet_rx_ring_stop(mac_ring_driver_t arg)
2421 2422 {
2422 2423 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2423 2424
2424 2425 /*
2425 2426 * If this ring is mapped to a LDC resource, simply mark the state to
2426 2427 * indicate the ring is now stopped and return.
2427 2428 */
2428 2429 if ((rx_ringp->state &
2429 2430 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2430 2431 rx_ringp->state &= ~VNET_RXRING_STARTED;
2431 2432 return;
2432 2433 }
2433 2434
2434 2435 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2435 2436
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
2436 2437 /*
2437 2438 * This must be a ring reserved for a hwring. If the hwring is not
2438 2439 * bound yet, simply mark the state to indicate the ring is stopped and
2439 2440 * return. If a hwring is already bound, stop it now.
2440 2441 */
2441 2442 if (rx_ringp->hw_rh == NULL) {
2442 2443 rx_ringp->state &= ~VNET_RXRING_STARTED;
2443 2444 return;
2444 2445 }
2445 2446
2446 - mac_hwring_stop(rx_ringp->hw_rh);
2447 + mac_hwring_quiesce(rx_ringp->hw_rh);
2447 2448 rx_ringp->state &= ~VNET_RXRING_STARTED;
2448 2449 }
2449 2450
2450 2451 static int
2451 2452 vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2452 2453 {
2453 2454 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver;
2454 2455 vnet_t *vnetp = (vnet_t *)rx_ringp->vnetp;
2455 2456 vnet_res_t *vresp;
2456 2457 mac_register_t *macp;
2457 2458 mac_callbacks_t *cbp;
2458 2459
2459 2460 /*
2460 2461 * Refer to vnet_m_capab() function for detailed comments on ring
2461 2462 * synchronization.
2462 2463 */
2463 2464 if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) {
2464 2465 READ_ENTER(&vnetp->vsw_fp_rw);
2465 2466 if (vnetp->hio_fp == NULL) {
2466 2467 RW_EXIT(&vnetp->vsw_fp_rw);
2467 2468 return (0);
2468 2469 }
2469 2470
2470 2471 VNET_FDBE_REFHOLD(vnetp->hio_fp);
2471 2472 RW_EXIT(&vnetp->vsw_fp_rw);
2472 2473 (void) mac_hwring_getstat(rx_ringp->hw_rh, stat, val);
2473 2474 VNET_FDBE_REFRELE(vnetp->hio_fp);
2474 2475 return (0);
2475 2476 }
2476 2477
2477 2478 ASSERT((rx_ringp->state &
2478 2479 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0);
2479 2480 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2480 2481 macp = &vresp->macreg;
2481 2482 cbp = macp->m_callbacks;
2482 2483
2483 2484 cbp->mc_getstat(macp->m_driver, stat, val);
2484 2485
2485 2486 return (0);
2486 2487 }
2487 2488
2488 2489 /* ARGSUSED */
2489 2490 static int
2490 2491 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2491 2492 {
2492 2493 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2493 2494
2494 2495 tx_ringp->state |= VNET_TXRING_STARTED;
2495 2496 return (0);
2496 2497 }
2497 2498
2498 2499 static void
2499 2500 vnet_tx_ring_stop(mac_ring_driver_t arg)
2500 2501 {
2501 2502 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2502 2503
2503 2504 tx_ringp->state &= ~VNET_TXRING_STARTED;
2504 2505 }
2505 2506
2506 2507 static int
2507 2508 vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2508 2509 {
2509 2510 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver;
2510 2511 vnet_tx_ring_stats_t *statsp;
2511 2512
2512 2513 statsp = &tx_ringp->tx_ring_stats;
2513 2514
2514 2515 switch (stat) {
2515 2516 case MAC_STAT_OPACKETS:
2516 2517 *val = statsp->opackets;
2517 2518 break;
2518 2519
2519 2520 case MAC_STAT_OBYTES:
2520 2521 *val = statsp->obytes;
2521 2522 break;
2522 2523
2523 2524 default:
2524 2525 *val = 0;
2525 2526 return (ENOTSUP);
2526 2527 }
2527 2528
2528 2529 return (0);
2529 2530 }
2530 2531
2531 2532 /*
2532 2533 * Disable polling for a ring and enable its interrupt.
2533 2534 */
2534 2535 static int
2535 2536 vnet_ring_enable_intr(void *arg)
2536 2537 {
2537 2538 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2538 2539 vnet_res_t *vresp;
2539 2540
2540 2541 if (rx_ringp->hw_rh == NULL) {
2541 2542 /*
2542 2543 * Ring enable intr func is being invoked, but the ring is
2543 2544 * not bound to any underlying resource ? This must be a ring
2544 2545 * reserved for Hybrid resource and no such resource has been
2545 2546 * assigned to this vnet device yet. We simply return success.
2546 2547 */
2547 2548 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2548 2549 return (0);
2549 2550 }
2550 2551
2551 2552 /*
2552 2553 * The rx ring has been bound to either a LDC or a Hybrid resource.
2553 2554 * Call the appropriate function to enable interrupts for the ring.
2554 2555 */
2555 2556 if (rx_ringp->state & VNET_RXRING_HYBRID) {
2556 2557 return (mac_hwring_enable_intr(rx_ringp->hw_rh));
2557 2558 } else {
2558 2559 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2559 2560 return (vgen_enable_intr(vresp->macreg.m_driver));
2560 2561 }
2561 2562 }
2562 2563
2563 2564 /*
2564 2565 * Enable polling for a ring and disable its interrupt.
2565 2566 */
2566 2567 static int
2567 2568 vnet_ring_disable_intr(void *arg)
2568 2569 {
2569 2570 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2570 2571 vnet_res_t *vresp;
2571 2572
2572 2573 if (rx_ringp->hw_rh == NULL) {
2573 2574 /*
2574 2575 * Ring disable intr func is being invoked, but the ring is
2575 2576 * not bound to any underlying resource ? This must be a ring
2576 2577 * reserved for Hybrid resource and no such resource has been
2577 2578 * assigned to this vnet device yet. We simply return success.
2578 2579 */
2579 2580 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2580 2581 return (0);
2581 2582 }
2582 2583
2583 2584 /*
2584 2585 * The rx ring has been bound to either a LDC or a Hybrid resource.
2585 2586 * Call the appropriate function to disable interrupts for the ring.
2586 2587 */
2587 2588 if (rx_ringp->state & VNET_RXRING_HYBRID) {
2588 2589 return (mac_hwring_disable_intr(rx_ringp->hw_rh));
2589 2590 } else {
2590 2591 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2591 2592 return (vgen_disable_intr(vresp->macreg.m_driver));
2592 2593 }
2593 2594 }
2594 2595
2595 2596 /*
2596 2597 * Poll 'bytes_to_pickup' bytes of message from the rx ring.
2597 2598 */
2598 2599 static mblk_t *
2599 2600 vnet_rx_poll(void *arg, int bytes_to_pickup)
2600 2601 {
2601 2602 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2602 2603 mblk_t *mp = NULL;
2603 2604 vnet_res_t *vresp;
2604 2605 vnet_t *vnetp = rx_ringp->vnetp;
2605 2606
2606 2607 if (rx_ringp->hw_rh == NULL) {
2607 2608 return (NULL);
2608 2609 }
2609 2610
2610 2611 if (rx_ringp->state & VNET_RXRING_HYBRID) {
2611 2612 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
2612 2613 /*
2613 2614 * Packets received over a hybrid resource need additional
2614 2615 * processing to remove the tag, for the pvid case. The
2615 2616 * underlying resource is not aware of the vnet's pvid and thus
2616 2617 * packets are received with the vlan tag in the header; unlike
2617 2618 * packets that are received over a ldc channel in which case
2618 2619 * the peer vnet/vsw would have already removed the tag.
2619 2620 */
2620 2621 if (vnetp->pvid != vnetp->default_vlan_id) {
2621 2622 vnet_rx_frames_untag(vnetp->pvid, &mp);
2622 2623 }
↓ open down ↓ |
166 lines elided |
↑ open up ↑ |
2623 2624 } else {
2624 2625 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2625 2626 mp = vgen_rx_poll(vresp->macreg.m_driver, bytes_to_pickup);
2626 2627 }
2627 2628 return (mp);
2628 2629 }
2629 2630
2630 2631 /* ARGSUSED */
2631 2632 void
2632 2633 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
2633 - boolean_t loopback)
2634 + boolean_t loopback)
2634 2635 {
2635 2636 vnet_t *vnetp = (vnet_t *)arg;
2636 2637 vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh;
2637 2638
2638 2639 /*
2639 2640 * Packets received over a hybrid resource need additional processing
2640 2641 * to remove the tag, for the pvid case. The underlying resource is
2641 2642 * not aware of the vnet's pvid and thus packets are received with the
2642 2643 * vlan tag in the header; unlike packets that are received over a ldc
2643 2644 * channel in which case the peer vnet/vsw would have already removed
2644 2645 * the tag.
2645 2646 */
2646 2647 if (vnetp->pvid != vnetp->default_vlan_id) {
2647 2648 vnet_rx_frames_untag(vnetp->pvid, &mp);
2648 2649 if (mp == NULL) {
2649 2650 return;
2650 2651 }
2651 2652 }
2652 2653 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
2653 2654 }
2654 2655
2655 2656 static int
2656 2657 vnet_addmac(void *arg, const uint8_t *mac_addr)
2657 2658 {
2658 2659 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2659 2660 vnet_t *vnetp;
2660 2661
2661 2662 vnetp = rx_grp->vnetp;
2662 2663
2663 2664 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2664 2665 return (0);
2665 2666 }
2666 2667
2667 2668 cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
2668 2669 vnetp->instance, __func__);
2669 2670 return (EINVAL);
2670 2671 }
2671 2672
2672 2673 static int
2673 2674 vnet_remmac(void *arg, const uint8_t *mac_addr)
2674 2675 {
2675 2676 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2676 2677 vnet_t *vnetp;
2677 2678
2678 2679 vnetp = rx_grp->vnetp;
2679 2680
2680 2681 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2681 2682 return (0);
2682 2683 }
2683 2684
2684 2685 cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
2685 2686 vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
2686 2687 return (EINVAL);
2687 2688 }
2688 2689
2689 2690 int
2690 2691 vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
2691 2692 {
2692 2693 mac_handle_t mh;
2693 2694 mac_client_handle_t mch = NULL;
2694 2695 mac_unicast_handle_t muh = NULL;
2695 2696 mac_diag_t diag;
2696 2697 mac_register_t *macp;
2697 2698 char client_name[MAXNAMELEN];
2698 2699 int rv;
2699 2700 uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE |
2700 2701 MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
2701 2702 vio_net_callbacks_t vcb;
2702 2703 ether_addr_t rem_addr =
2703 2704 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2704 2705 uint32_t retries = 0;
2705 2706
2706 2707 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2707 2708 return (EAGAIN);
2708 2709 }
2709 2710
2710 2711 do {
2711 2712 rv = mac_open_by_linkname(ifname, &mh);
2712 2713 if (rv == 0) {
2713 2714 break;
2714 2715 }
2715 2716 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
2716 2717 mac_free(macp);
2717 2718 return (rv);
2718 2719 }
2719 2720 drv_usecwait(vnet_mac_open_delay);
2720 2721 } while (rv == ENOENT);
2721 2722
2722 2723 vnetp->hio_mh = mh;
2723 2724
2724 2725 (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
2725 2726 ifname);
2726 2727 rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
2727 2728 if (rv != 0) {
2728 2729 goto fail;
2729 2730 }
2730 2731 vnetp->hio_mch = mch;
2731 2732
2732 2733 rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
2733 2734 &diag);
2734 2735 if (rv != 0) {
2735 2736 goto fail;
2736 2737 }
2737 2738 vnetp->hio_muh = muh;
2738 2739
2739 2740 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2740 2741 macp->m_driver = vnetp;
2741 2742 macp->m_dip = NULL;
2742 2743 macp->m_src_addr = NULL;
2743 2744 macp->m_callbacks = &vnet_hio_res_callbacks;
2744 2745 macp->m_min_sdu = 0;
2745 2746 macp->m_max_sdu = ETHERMTU;
2746 2747
2747 2748 rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
2748 2749 vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
2749 2750 if (rv != 0) {
2750 2751 goto fail;
2751 2752 }
2752 2753 mac_free(macp);
2753 2754
2754 2755 /* add the recv callback */
2755 2756 mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
2756 2757
2757 2758 return (0);
2758 2759
2759 2760 fail:
2760 2761 mac_free(macp);
2761 2762 vnet_hio_mac_cleanup(vnetp);
2762 2763 return (1);
2763 2764 }
2764 2765
2765 2766 void
2766 2767 vnet_hio_mac_cleanup(vnet_t *vnetp)
2767 2768 {
2768 2769 if (vnetp->hio_vhp != NULL) {
2769 2770 vio_net_resource_unreg(vnetp->hio_vhp);
2770 2771 vnetp->hio_vhp = NULL;
2771 2772 }
2772 2773
2773 2774 if (vnetp->hio_muh != NULL) {
2774 2775 (void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
2775 2776 vnetp->hio_muh = NULL;
2776 2777 }
2777 2778
2778 2779 if (vnetp->hio_mch != NULL) {
2779 2780 mac_client_close(vnetp->hio_mch, 0);
2780 2781 vnetp->hio_mch = NULL;
2781 2782 }
2782 2783
2783 2784 if (vnetp->hio_mh != NULL) {
2784 2785 mac_close(vnetp->hio_mh);
2785 2786 vnetp->hio_mh = NULL;
2786 2787 }
2787 2788 }
2788 2789
2789 2790 /* Bind pseudo rings to hwrings */
2790 2791 static int
2791 2792 vnet_bind_hwrings(vnet_t *vnetp)
2792 2793 {
2793 2794 mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS];
2794 2795 mac_perim_handle_t mph1;
2795 2796 vnet_pseudo_rx_group_t *rx_grp;
2796 2797 vnet_pseudo_rx_ring_t *rx_ringp;
2797 2798 vnet_pseudo_tx_group_t *tx_grp;
2798 2799 vnet_pseudo_tx_ring_t *tx_ringp;
2799 2800 int hw_ring_cnt;
2800 2801 int i;
2801 2802 int rv;
2802 2803
2803 2804 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2804 2805
2805 2806 /* Get the list of the underlying RX rings. */
2806 2807 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
2807 2808 MAC_RING_TYPE_RX);
2808 2809
2809 2810 /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
2810 2811 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2811 2812 cmn_err(CE_WARN,
2812 2813 "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
2813 2814 vnetp->instance, hw_ring_cnt);
2814 2815 goto fail;
2815 2816 }
2816 2817
2817 2818 if (vnetp->rx_hwgh != NULL) {
2818 2819 /*
2819 2820 * Quiesce the HW ring and the mac srs on the ring. Note
2820 2821 * that the HW ring will be restarted when the pseudo ring
2821 2822 * is started. At that time all the packets will be
2822 2823 * directly passed up to the pseudo RX ring and handled
2823 2824 * by mac srs created over the pseudo RX ring.
2824 2825 */
2825 2826 mac_rx_client_quiesce(vnetp->hio_mch);
2826 2827 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
2827 2828 }
2828 2829
2829 2830 /*
2830 2831 * Bind the pseudo rings to the hwrings and start the hwrings.
2831 2832 * Note we don't need to register these with the upper mac, as we have
2832 2833 * statically exported these pseudo rxrings which are reserved for
2833 2834 * rxrings of Hybrid resource.
2834 2835 */
2835 2836 rx_grp = &vnetp->rx_grp[0];
2836 2837 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2837 2838 /* Pick the rxrings reserved for Hybrid resource */
2838 2839 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
↓ open down ↓ |
195 lines elided |
↑ open up ↑ |
2839 2840
2840 2841 /* Store the hw ring handle */
2841 2842 rx_ringp->hw_rh = hw_rh[i];
2842 2843
2843 2844 /* Bind the pseudo ring to the underlying hwring */
2844 2845 mac_hwring_setup(rx_ringp->hw_rh,
2845 2846 (mac_resource_handle_t)rx_ringp, NULL);
2846 2847
2847 2848 /* Start the hwring if needed */
2848 2849 if (rx_ringp->state & VNET_RXRING_STARTED) {
2849 - rv = mac_hwring_start(rx_ringp->hw_rh);
2850 + rv = mac_hwring_activate(rx_ringp->hw_rh);
2850 2851 if (rv != 0) {
2851 2852 mac_hwring_teardown(rx_ringp->hw_rh);
2852 2853 rx_ringp->hw_rh = NULL;
2853 2854 goto fail;
2854 2855 }
2855 2856 }
2856 2857 }
2857 2858
2858 2859 /* Get the list of the underlying TX rings. */
2859 2860 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
2860 2861 MAC_RING_TYPE_TX);
2861 2862
2862 2863 /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
2863 2864 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2864 2865 cmn_err(CE_WARN,
2865 2866 "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
2866 2867 vnetp->instance, hw_ring_cnt);
2867 2868 goto fail;
2868 2869 }
2869 2870
2870 2871 /*
2871 2872 * Now map the pseudo txrings to the hw txrings. Note we don't need
2872 2873 * to register these with the upper mac, as we have statically exported
2873 2874 * these rings. Note that these rings will continue to be used for LDC
2874 2875 * resources to peer vnets and vswitch (shared ring).
2875 2876 */
2876 2877 tx_grp = &vnetp->tx_grp[0];
2877 2878 for (i = 0; i < tx_grp->ring_cnt; i++) {
2878 2879 tx_ringp = &tx_grp->rings[i];
2879 2880 tx_ringp->hw_rh = hw_rh[i];
2880 2881 tx_ringp->state |= VNET_TXRING_HYBRID;
2881 2882 }
2882 2883 tx_grp->tx_notify_handle =
2883 2884 mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp);
2884 2885
2885 2886 mac_perim_exit(mph1);
2886 2887 return (0);
2887 2888
2888 2889 fail:
2889 2890 mac_perim_exit(mph1);
2890 2891 vnet_unbind_hwrings(vnetp);
2891 2892 return (1);
2892 2893 }
2893 2894
2894 2895 /* Unbind pseudo rings from hwrings */
2895 2896 static void
2896 2897 vnet_unbind_hwrings(vnet_t *vnetp)
2897 2898 {
2898 2899 mac_perim_handle_t mph1;
2899 2900 vnet_pseudo_rx_ring_t *rx_ringp;
2900 2901 vnet_pseudo_rx_group_t *rx_grp;
2901 2902 vnet_pseudo_tx_group_t *tx_grp;
2902 2903 vnet_pseudo_tx_ring_t *tx_ringp;
2903 2904 int i;
2904 2905
2905 2906 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2906 2907
2907 2908 tx_grp = &vnetp->tx_grp[0];
2908 2909 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2909 2910 tx_ringp = &tx_grp->rings[i];
2910 2911 if (tx_ringp->state & VNET_TXRING_HYBRID) {
2911 2912 tx_ringp->state &= ~VNET_TXRING_HYBRID;
2912 2913 tx_ringp->hw_rh = NULL;
↓ open down ↓ |
53 lines elided |
↑ open up ↑ |
2913 2914 }
2914 2915 }
2915 2916 (void) mac_client_tx_notify(vnetp->hio_mch, NULL,
2916 2917 tx_grp->tx_notify_handle);
2917 2918
2918 2919 rx_grp = &vnetp->rx_grp[0];
2919 2920 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2920 2921 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2921 2922 if (rx_ringp->hw_rh != NULL) {
2922 2923 /* Stop the hwring */
2923 - mac_hwring_stop(rx_ringp->hw_rh);
2924 + mac_hwring_quiesce(rx_ringp->hw_rh);
2924 2925
2925 2926 /* Teardown the hwring */
2926 2927 mac_hwring_teardown(rx_ringp->hw_rh);
2927 2928 rx_ringp->hw_rh = NULL;
2928 2929 }
2929 2930 }
2930 2931
2931 2932 if (vnetp->rx_hwgh != NULL) {
2932 2933 vnetp->rx_hwgh = NULL;
2933 2934 /*
2934 2935 * First clear the permanent-quiesced flag of the RX srs then
2935 2936 * restart the HW ring and the mac srs on the ring.
2936 2937 */
2937 2938 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
2938 2939 mac_rx_client_restart(vnetp->hio_mch);
2939 2940 }
2940 2941
2941 2942 mac_perim_exit(mph1);
2942 2943 }
2943 2944
2944 2945 /* Bind pseudo ring to a LDC resource */
2945 2946 static int
2946 2947 vnet_bind_vgenring(vnet_res_t *vresp)
2947 2948 {
2948 2949 vnet_t *vnetp;
2949 2950 vnet_pseudo_rx_group_t *rx_grp;
2950 2951 vnet_pseudo_rx_ring_t *rx_ringp;
2951 2952 mac_perim_handle_t mph1;
2952 2953 int rv;
2953 2954 int type;
2954 2955
2955 2956 vnetp = vresp->vnetp;
2956 2957 type = vresp->type;
2957 2958 rx_grp = &vnetp->rx_grp[0];
2958 2959
2959 2960 if (type == VIO_NET_RES_LDC_SERVICE) {
2960 2961 /*
2961 2962 * Ring Index 0 is the default ring in the group and is
2962 2963 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2963 2964 * is allocated statically and is reported to the mac layer
2964 2965 * in vnet_m_capab(). So, all we need to do here, is save a
2965 2966 * reference to the associated vresp.
2966 2967 */
2967 2968 rx_ringp = &rx_grp->rings[0];
2968 2969 rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2969 2970 vresp->rx_ringp = (void *)rx_ringp;
2970 2971 return (0);
2971 2972 }
2972 2973 ASSERT(type == VIO_NET_RES_LDC_GUEST);
2973 2974
2974 2975 mac_perim_enter_by_mh(vnetp->mh, &mph1);
2975 2976
2976 2977 rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
2977 2978 if (rx_ringp == NULL) {
2978 2979 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
2979 2980 vnetp->instance);
2980 2981 goto fail;
2981 2982 }
2982 2983
2983 2984 /* Store the LDC resource itself as the ring handle */
2984 2985 rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2985 2986
2986 2987 /*
2987 2988 * Save a reference to the ring in the resource for lookup during
2988 2989 * unbind. Note this is only done for LDC resources. We don't need this
2989 2990 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
2990 2991 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
2991 2992 */
2992 2993 vresp->rx_ringp = (void *)rx_ringp;
2993 2994 rx_ringp->state |= VNET_RXRING_LDC_GUEST;
2994 2995
2995 2996 /* Register the pseudo ring with upper-mac */
2996 2997 rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
2997 2998 if (rv != 0) {
2998 2999 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
2999 3000 rx_ringp->hw_rh = NULL;
3000 3001 vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3001 3002 goto fail;
3002 3003 }
3003 3004
3004 3005 mac_perim_exit(mph1);
3005 3006 return (0);
3006 3007 fail:
3007 3008 mac_perim_exit(mph1);
3008 3009 return (1);
3009 3010 }
3010 3011
3011 3012 /* Unbind pseudo ring from a LDC resource */
3012 3013 static void
3013 3014 vnet_unbind_vgenring(vnet_res_t *vresp)
3014 3015 {
3015 3016 vnet_t *vnetp;
3016 3017 vnet_pseudo_rx_group_t *rx_grp;
3017 3018 vnet_pseudo_rx_ring_t *rx_ringp;
3018 3019 mac_perim_handle_t mph1;
3019 3020 int type;
3020 3021
3021 3022 vnetp = vresp->vnetp;
3022 3023 type = vresp->type;
3023 3024 rx_grp = &vnetp->rx_grp[0];
3024 3025
3025 3026 if (vresp->rx_ringp == NULL) {
3026 3027 return;
3027 3028 }
3028 3029
3029 3030 if (type == VIO_NET_RES_LDC_SERVICE) {
3030 3031 /*
3031 3032 * Ring Index 0 is the default ring in the group and is
3032 3033 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
3033 3034 * is allocated statically and is reported to the mac layer
3034 3035 * in vnet_m_capab(). So, all we need to do here, is remove its
3035 3036 * reference to the associated vresp.
3036 3037 */
3037 3038 rx_ringp = &rx_grp->rings[0];
3038 3039 rx_ringp->hw_rh = NULL;
3039 3040 vresp->rx_ringp = NULL;
3040 3041 return;
3041 3042 }
3042 3043 ASSERT(type == VIO_NET_RES_LDC_GUEST);
3043 3044
3044 3045 mac_perim_enter_by_mh(vnetp->mh, &mph1);
3045 3046
3046 3047 rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
3047 3048 vresp->rx_ringp = NULL;
3048 3049
3049 3050 if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
3050 3051 /* Unregister the pseudo ring with upper-mac */
3051 3052 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
3052 3053
3053 3054 rx_ringp->hw_rh = NULL;
3054 3055 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
3055 3056
3056 3057 /* Free the pseudo rx ring */
3057 3058 vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3058 3059 }
3059 3060
3060 3061 mac_perim_exit(mph1);
3061 3062 }
3062 3063
3063 3064 static void
3064 3065 vnet_unbind_rings(vnet_res_t *vresp)
3065 3066 {
3066 3067 switch (vresp->type) {
3067 3068
3068 3069 case VIO_NET_RES_LDC_SERVICE:
3069 3070 case VIO_NET_RES_LDC_GUEST:
3070 3071 vnet_unbind_vgenring(vresp);
3071 3072 break;
3072 3073
3073 3074 case VIO_NET_RES_HYBRID:
3074 3075 vnet_unbind_hwrings(vresp->vnetp);
3075 3076 break;
3076 3077
3077 3078 default:
3078 3079 break;
3079 3080
3080 3081 }
3081 3082 }
3082 3083
3083 3084 static int
3084 3085 vnet_bind_rings(vnet_res_t *vresp)
3085 3086 {
3086 3087 int rv;
3087 3088
3088 3089 switch (vresp->type) {
3089 3090
3090 3091 case VIO_NET_RES_LDC_SERVICE:
3091 3092 case VIO_NET_RES_LDC_GUEST:
3092 3093 rv = vnet_bind_vgenring(vresp);
3093 3094 break;
3094 3095
3095 3096 case VIO_NET_RES_HYBRID:
3096 3097 rv = vnet_bind_hwrings(vresp->vnetp);
3097 3098 break;
3098 3099
3099 3100 default:
3100 3101 rv = 1;
3101 3102 break;
3102 3103
3103 3104 }
3104 3105
3105 3106 return (rv);
3106 3107 }
3107 3108
3108 3109 /* ARGSUSED */
3109 3110 int
3110 3111 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
3111 3112 {
3112 3113 vnet_t *vnetp = (vnet_t *)arg;
3113 3114
3114 3115 *val = mac_stat_get(vnetp->hio_mh, stat);
3115 3116 return (0);
3116 3117 }
3117 3118
3118 3119 /*
3119 3120 * The start() and stop() routines for the Hybrid resource below, are just
3120 3121 * dummy functions. This is provided to avoid resource type specific code in
3121 3122 * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
3122 3123 * of the Hybrid resource happens in the context of the mac_client interfaces
3123 3124 * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
3124 3125 */
3125 3126 /* ARGSUSED */
3126 3127 static int
3127 3128 vnet_hio_start(void *arg)
3128 3129 {
3129 3130 return (0);
3130 3131 }
3131 3132
3132 3133 /* ARGSUSED */
3133 3134 static void
3134 3135 vnet_hio_stop(void *arg)
3135 3136 {
3136 3137 }
3137 3138
3138 3139 mblk_t *
3139 3140 vnet_hio_tx(void *arg, mblk_t *mp)
3140 3141 {
3141 3142 vnet_pseudo_tx_ring_t *tx_ringp;
3142 3143 mblk_t *nextp;
3143 3144 mblk_t *ret_mp;
3144 3145
3145 3146 tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
3146 3147 for (;;) {
3147 3148 nextp = mp->b_next;
3148 3149 mp->b_next = NULL;
3149 3150
3150 3151 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
3151 3152 if (ret_mp != NULL) {
3152 3153 ret_mp->b_next = nextp;
3153 3154 mp = ret_mp;
3154 3155 break;
3155 3156 }
3156 3157
3157 3158 if ((mp = nextp) == NULL)
3158 3159 break;
3159 3160 }
3160 3161 return (mp);
3161 3162 }
3162 3163
3163 3164 #ifdef VNET_IOC_DEBUG
3164 3165
3165 3166 /*
3166 3167 * The ioctl entry point is used only for debugging for now. The ioctl commands
3167 3168 * can be used to force the link state of the channel connected to vsw.
3168 3169 */
3169 3170 static void
3170 3171 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3171 3172 {
3172 3173 struct iocblk *iocp;
3173 3174 vnet_t *vnetp;
3174 3175
3175 3176 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
3176 3177 iocp->ioc_error = 0;
3177 3178 vnetp = (vnet_t *)arg;
3178 3179
3179 3180 if (vnetp == NULL) {
3180 3181 miocnak(q, mp, 0, EINVAL);
3181 3182 return;
3182 3183 }
3183 3184
3184 3185 switch (iocp->ioc_cmd) {
3185 3186
3186 3187 case VNET_FORCE_LINK_DOWN:
3187 3188 case VNET_FORCE_LINK_UP:
3188 3189 vnet_force_link_state(vnetp, q, mp);
3189 3190 break;
3190 3191
3191 3192 default:
3192 3193 iocp->ioc_error = EINVAL;
3193 3194 miocnak(q, mp, 0, iocp->ioc_error);
3194 3195 break;
3195 3196
3196 3197 }
3197 3198 }
3198 3199
3199 3200 static void
3200 3201 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
3201 3202 {
3202 3203 mac_register_t *macp;
3203 3204 mac_callbacks_t *cbp;
3204 3205 vnet_res_t *vresp;
3205 3206
3206 3207 READ_ENTER(&vnetp->vsw_fp_rw);
3207 3208
3208 3209 vresp = vnetp->vsw_fp;
3209 3210 if (vresp == NULL) {
3210 3211 RW_EXIT(&vnetp->vsw_fp_rw);
3211 3212 return;
3212 3213 }
3213 3214
3214 3215 macp = &vresp->macreg;
3215 3216 cbp = macp->m_callbacks;
3216 3217 cbp->mc_ioctl(macp->m_driver, q, mp);
3217 3218
3218 3219 RW_EXIT(&vnetp->vsw_fp_rw);
3219 3220 }
3220 3221
3221 3222 #else
3222 3223
3223 3224 static void
3224 3225 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3225 3226 {
3226 3227 vnet_t *vnetp;
3227 3228
3228 3229 vnetp = (vnet_t *)arg;
3229 3230
3230 3231 if (vnetp == NULL) {
3231 3232 miocnak(q, mp, 0, EINVAL);
3232 3233 return;
3233 3234 }
3234 3235
3235 3236 /* ioctl support only for debugging */
3236 3237 miocnak(q, mp, 0, ENOTSUP);
3237 3238 }
3238 3239
3239 3240 #endif
↓ open down ↓ |
306 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX