Print this page
11490 SRS ring polling disabled for VLANs
11491 Want DLS bypass for VLAN traffic
11492 add VLVF bypass to ixgbe core
2869 duplicate packets with vnics over aggrs
11489 DLS stat delete and aggr kstat can deadlock
Portions contributed by: Theo Schlossnagle <jesus@omniti.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/dls/dls_link.c
+++ new/usr/src/uts/common/io/dls/dls_link.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 - * Copyright 2017 Joyent, Inc.
24 + * Copyright 2018 Joyent, Inc.
25 25 */
26 26
27 27 /*
28 28 * Data-Link Services Module
29 29 */
30 30
31 31 #include <sys/sysmacros.h>
32 32 #include <sys/strsubr.h>
33 33 #include <sys/strsun.h>
34 34 #include <sys/vlan.h>
35 35 #include <sys/dld_impl.h>
36 36 #include <sys/sdt.h>
37 37 #include <sys/atomic.h>
38 38 #include <sys/sysevent.h>
39 39 #include <sys/sysevent/eventdefs.h>
40 40 #include <sys/sysevent/datalink.h>
41 41
42 42 static kmem_cache_t *i_dls_link_cachep;
43 43 mod_hash_t *i_dls_link_hash;
44 44 static uint_t i_dls_link_count;
45 45
46 46 #define LINK_HASHSZ 67 /* prime */
47 47 #define IMPL_HASHSZ 67 /* prime */
48 48
49 49 /*
50 50 * Construct a hash key from the DLSAP value.
51 51 */
52 52 #define MAKE_KEY(_sap) \
53 53 ((mod_hash_key_t)(uintptr_t)((_sap) << VLAN_ID_SIZE))
54 54
55 55 #define DLS_STRIP_PADDING(pktsize, p) { \
56 56 if (pktsize != 0) { \
57 57 ssize_t delta = pktsize - msgdsize(p); \
58 58 \
59 59 if (delta < 0) \
60 60 (void) adjmsg(p, delta); \
61 61 } \
62 62 }
63 63
64 64 /*
65 65 * Private functions.
66 66 */
67 67
68 68 /*ARGSUSED*/
69 69 static int
70 70 i_dls_link_constructor(void *buf, void *arg, int kmflag)
71 71 {
72 72 dls_link_t *dlp = buf;
73 73 char name[MAXNAMELEN];
74 74
75 75 bzero(buf, sizeof (dls_link_t));
76 76
77 77 (void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf);
78 78 dlp->dl_str_hash = mod_hash_create_idhash(name, IMPL_HASHSZ,
79 79 mod_hash_null_valdtor);
80 80
81 81 return (0);
82 82 }
83 83
84 84 /*ARGSUSED*/
85 85 static void
86 86 i_dls_link_destructor(void *buf, void *arg)
87 87 {
88 88 dls_link_t *dlp = buf;
89 89
90 90 ASSERT(dlp->dl_ref == 0);
91 91 ASSERT(dlp->dl_mh == NULL);
92 92 ASSERT(dlp->dl_mah == NULL);
93 93 ASSERT(dlp->dl_unknowns == 0);
94 94
95 95 mod_hash_destroy_idhash(dlp->dl_str_hash);
96 96 dlp->dl_str_hash = NULL;
97 97
98 98 }
99 99
100 100 /*
101 101 * - Parse the mac header information of the given packet.
102 102 * - Strip the padding and skip over the header. Note that because some
103 103 * DLS consumers only check the db_ref count of the first mblk, we
104 104 * pullup the message into a single mblk. Because the original message
105 105 * is freed as the result of message pulling up, mac_vlan_header_info()
106 106 * is called again to update the mhi_saddr and mhi_daddr pointers in the
107 107 * mhip. Further, the mac_vlan_header_info() function ensures that the
108 108 * size of the pulled message is greater than the MAC header size,
109 109 * therefore we can directly advance b_rptr to point at the payload.
110 110 *
111 111 * We choose to use a macro for performance reasons.
112 112 */
113 113 #define DLS_PREPARE_PKT(mh, mp, mhip, err) { \
114 114 mblk_t *nextp = (mp)->b_next; \
115 115 if (((err) = mac_vlan_header_info((mh), (mp), (mhip))) == 0) { \
116 116 DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp)); \
117 117 if (MBLKL((mp)) < (mhip)->mhi_hdrsize) { \
118 118 mblk_t *newmp; \
119 119 if ((newmp = msgpullup((mp), -1)) == NULL) { \
120 120 (err) = EINVAL; \
121 121 } else { \
122 122 (mp)->b_next = NULL; \
123 123 freemsg((mp)); \
124 124 (mp) = newmp; \
125 125 VERIFY(mac_vlan_header_info((mh), \
126 126 (mp), (mhip)) == 0); \
127 127 (mp)->b_next = nextp; \
128 128 (mp)->b_rptr += (mhip)->mhi_hdrsize; \
129 129 } \
130 130 } else { \
131 131 (mp)->b_rptr += (mhip)->mhi_hdrsize; \
132 132 } \
133 133 } \
134 134 }
135 135
136 136 /*
137 137 * Truncate the chain starting at mp such that all packets in the chain
138 138 * have identical source and destination addresses, saps, and tag types
139 139 * (see below). It returns a pointer to the mblk following the chain,
140 140 * NULL if there is no further packet following the processed chain.
141 141 * The countp argument is set to the number of valid packets in the chain.
142 142 * Note that the whole MAC header (including the VLAN tag if any) in each
143 143 * packet will be stripped.
144 144 */
145 145 static mblk_t *
146 146 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip,
147 147 uint_t *countp)
148 148 {
149 149 mblk_t *prevp;
150 150 uint_t npacket = 1;
151 151 size_t addr_size = dlp->dl_mip->mi_addr_length;
152 152 uint16_t vid = VLAN_ID(mhip->mhi_tci);
153 153 uint16_t pri = VLAN_PRI(mhip->mhi_tci);
154 154
155 155 /*
156 156 * Compare with subsequent headers until we find one that has
157 157 * differing header information. After checking each packet
158 158 * strip padding and skip over the header.
159 159 */
160 160 for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) {
161 161 mac_header_info_t cmhi;
162 162 uint16_t cvid, cpri;
163 163 int err;
164 164
165 165 DLS_PREPARE_PKT(dlp->dl_mh, mp, &cmhi, err);
166 166 if (err != 0)
167 167 break;
168 168
169 169 prevp->b_next = mp;
170 170
171 171 /*
172 172 * The source, destination, sap, vlan tag must all match in
173 173 * a given subchain.
174 174 */
175 175 if (mhip->mhi_saddr == NULL || cmhi.mhi_saddr == NULL ||
176 176 memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 ||
177 177 memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 ||
178 178 mhip->mhi_bindsap != cmhi.mhi_bindsap) {
179 179 /*
180 180 * Note that we don't need to restore the padding.
181 181 */
182 182 mp->b_rptr -= cmhi.mhi_hdrsize;
183 183 break;
184 184 }
185 185
186 186 cvid = VLAN_ID(cmhi.mhi_tci);
187 187 cpri = VLAN_PRI(cmhi.mhi_tci);
188 188
189 189 /*
190 190 * There are several types of packets. Packets don't match
191 191 * if they are classified to different type or if they are
192 192 * VLAN packets but belong to different VLANs:
193 193 *
194 194 * packet type tagged vid pri
195 195 * ---------------------------------------------------------
196 196 * untagged No zero zero
197 197 * VLAN packets Yes non-zero -
198 198 * priority tagged Yes zero non-zero
199 199 * 0 tagged Yes zero zero
200 200 */
201 201 if ((mhip->mhi_istagged != cmhi.mhi_istagged) ||
202 202 (vid != cvid) || ((vid == VLAN_ID_NONE) &&
203 203 (((pri == 0) && (cpri != 0)) ||
204 204 ((pri != 0) && (cpri == 0))))) {
205 205 mp->b_rptr -= cmhi.mhi_hdrsize;
206 206 break;
207 207 }
208 208
209 209 npacket++;
210 210 }
211 211
212 212 /*
213 213 * Break the chain at this point and return a pointer to the next
214 214 * sub-chain.
215 215 */
216 216 prevp->b_next = NULL;
217 217 *countp = npacket;
218 218 return (mp);
219 219 }
220 220
221 221 /* ARGSUSED */
222 222 static int
223 223 i_dls_head_hold(mod_hash_key_t key, mod_hash_val_t val)
224 224 {
225 225 dls_head_t *dhp = (dls_head_t *)val;
226 226
227 227 /*
228 228 * The lock order is mod_hash's internal lock -> dh_lock as in the
229 229 * call to i_dls_link_rx -> mod_hash_find_cb_rval -> i_dls_head_hold
230 230 */
231 231 mutex_enter(&dhp->dh_lock);
232 232 if (dhp->dh_removing) {
233 233 mutex_exit(&dhp->dh_lock);
234 234 return (-1);
235 235 }
236 236 dhp->dh_ref++;
237 237 mutex_exit(&dhp->dh_lock);
238 238 return (0);
239 239 }
240 240
241 241 void
242 242 i_dls_head_rele(dls_head_t *dhp)
243 243 {
244 244 mutex_enter(&dhp->dh_lock);
245 245 dhp->dh_ref--;
246 246 if (dhp->dh_ref == 0 && dhp->dh_removing != 0)
247 247 cv_broadcast(&dhp->dh_cv);
248 248 mutex_exit(&dhp->dh_lock);
249 249 }
250 250
251 251 static dls_head_t *
252 252 i_dls_head_alloc(mod_hash_key_t key)
253 253 {
254 254 dls_head_t *dhp;
255 255
256 256 dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP);
257 257 dhp->dh_key = key;
258 258 return (dhp);
259 259 }
260 260
261 261 static void
262 262 i_dls_head_free(dls_head_t *dhp)
263 263 {
264 264 ASSERT(dhp->dh_ref == 0);
265 265 kmem_free(dhp, sizeof (dls_head_t));
266 266 }
267 267
268 268 /*
269 269 * Try to send mp up to the streams of the given sap. Return the
270 270 * number of streams which accepted this message, or 0 if no streams
271 271 * accepted the message.
272 272 *
273 273 * Note that this function copies the message chain and the original
274 274 * mp remains valid after this function returns.
275 275 */
276 276 static uint_t
277 277 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh,
278 278 mac_header_info_t *mhip, mblk_t *mp, uint32_t sap,
279 279 boolean_t (*acceptfunc)())
280 280 {
281 281 mod_hash_t *hash = dlp->dl_str_hash;
282 282 mod_hash_key_t key;
283 283 dls_head_t *dhp;
284 284 dld_str_t *dsp;
285 285 mblk_t *nmp;
286 286 dls_rx_t ds_rx;
287 287 void *ds_rx_arg;
288 288 uint_t naccepted = 0;
289 289 int rval;
290 290
291 291 /*
292 292 * Construct a hash key from the DLSAP.
293 293 */
294 294 key = MAKE_KEY(sap);
295 295
296 296 /*
297 297 * Search the hash table for a dld_str_t eligible to receive a
298 298 * packet chain for this DLSAP. The mod hash's internal lock
299 299 * serializes find/insert/remove from the mod hash list.
300 300 * Incrementing the dh_ref (while holding the mod hash lock)
301 301 * ensures dls_link_remove will wait for the upcall to finish.
302 302 */
303 303 if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
304 304 i_dls_head_hold, &rval) != 0 || (rval != 0)) {
305 305 return (0);
306 306 }
307 307
308 308 /*
309 309 * Find all dld_str_t that will accept the sub-chain.
310 310 */
311 311 for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next) {
312 312 if (!acceptfunc(dsp, mhip, &ds_rx, &ds_rx_arg))
313 313 continue;
314 314
315 315 /*
316 316 * We have at least one acceptor.
317 317 */
318 318 naccepted++;
319 319
320 320 /*
321 321 * There will normally be at least one more dld_str_t
322 322 * (since we've yet to check for non-promiscuous
323 323 * dld_str_t) so dup the sub-chain.
324 324 */
325 325 if ((nmp = copymsgchain(mp)) != NULL)
326 326 ds_rx(ds_rx_arg, mrh, nmp, mhip);
327 327 }
328 328
329 329 /*
330 330 * Release the hold on the dld_str_t chain now that we have
331 331 * finished walking it.
332 332 */
333 333 i_dls_head_rele(dhp);
334 334 return (naccepted);
335 335 }
336 336
337 337 /* ARGSUSED */
338 338 void
339 339 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
340 340 boolean_t loopback)
341 341 {
342 342 dls_link_t *dlp = arg;
343 343 mod_hash_t *hash = dlp->dl_str_hash;
344 344 mblk_t *nextp;
345 345 mac_header_info_t mhi;
346 346 dls_head_t *dhp;
347 347 dld_str_t *dsp;
348 348 dld_str_t *ndsp;
349 349 mblk_t *nmp;
350 350 mod_hash_key_t key;
351 351 uint_t npacket;
352 352 boolean_t accepted;
353 353 dls_rx_t ds_rx, nds_rx;
354 354 void *ds_rx_arg, *nds_rx_arg;
355 355 uint16_t vid;
356 356 int err, rval;
357 357
358 358 /*
359 359 * Walk the packet chain.
360 360 */
361 361 for (; mp != NULL; mp = nextp) {
362 362 /*
363 363 * Wipe the accepted state.
364 364 */
365 365 accepted = B_FALSE;
366 366
367 367 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
368 368 if (err != 0) {
369 369 atomic_inc_32(&(dlp->dl_unknowns));
370 370 nextp = mp->b_next;
371 371 mp->b_next = NULL;
372 372 freemsg(mp);
373 373 continue;
374 374 }
↓ open down ↓ |
340 lines elided |
↑ open up ↑ |
375 375
376 376 /*
377 377 * Grab the longest sub-chain we can process as a single
378 378 * unit.
379 379 */
380 380 nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket);
381 381 ASSERT(npacket != 0);
382 382
383 383 vid = VLAN_ID(mhi.mhi_tci);
384 384
385 + /*
386 + * This condition is true only when a sun4v vsw client
387 + * is on the scene; as it is the only type of client
388 + * that multiplexes VLANs on a single client instance.
389 + * All other types of clients have one VLAN per client
390 + * instance. In that case, MAC strips the VLAN tag
391 + * before delivering it to DLS (see mac_rx_deliver()).
392 + */
385 393 if (mhi.mhi_istagged) {
394 +
386 395 /*
387 396 * If it is tagged traffic, send it upstream to
388 397 * all dld_str_t which are attached to the physical
389 398 * link and bound to SAP 0x8100.
390 399 */
391 400 if (i_dls_link_rx_func(dlp, mrh, &mhi, mp,
392 401 ETHERTYPE_VLAN, dls_accept) > 0) {
393 402 accepted = B_TRUE;
394 403 }
395 404
396 405 /*
397 406 * Don't pass the packets up if they are tagged
398 407 * packets and:
399 408 * - their VID and priority are both zero and the
400 409 * original packet isn't using the PVID (invalid
401 410 * packets).
402 411 * - their sap is ETHERTYPE_VLAN and their VID is
403 412 * zero as they have already been sent upstreams.
404 413 */
405 414 if ((vid == VLAN_ID_NONE && !mhi.mhi_ispvid &&
406 415 VLAN_PRI(mhi.mhi_tci) == 0) ||
407 416 (mhi.mhi_bindsap == ETHERTYPE_VLAN &&
408 417 vid == VLAN_ID_NONE)) {
409 418 freemsgchain(mp);
410 419 goto loop;
411 420 }
412 421 }
413 422
414 423 /*
415 424 * Construct a hash key from the DLSAP.
416 425 */
417 426 key = MAKE_KEY(mhi.mhi_bindsap);
418 427
419 428 /*
420 429 * Search the hash table for dld_str_t eligible to receive
421 430 * a packet chain for this DLSAP.
422 431 */
423 432 if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp,
424 433 i_dls_head_hold, &rval) != 0 || (rval != 0)) {
425 434 freemsgchain(mp);
426 435 goto loop;
427 436 }
428 437
429 438 /*
430 439 * Find the first dld_str_t that will accept the sub-chain.
431 440 */
432 441 for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next)
433 442 if (dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
434 443 break;
435 444
436 445 /*
437 446 * If we did not find any dld_str_t willing to accept the
438 447 * sub-chain then throw it away.
439 448 */
440 449 if (dsp == NULL) {
441 450 i_dls_head_rele(dhp);
442 451 freemsgchain(mp);
443 452 goto loop;
444 453 }
445 454
446 455 /*
447 456 * We have at least one acceptor.
448 457 */
449 458 accepted = B_TRUE;
450 459 for (;;) {
451 460 /*
452 461 * Find the next dld_str_t that will accept the
453 462 * sub-chain.
454 463 */
455 464 for (ndsp = dsp->ds_next; ndsp != NULL;
456 465 ndsp = ndsp->ds_next)
457 466 if (dls_accept(ndsp, &mhi, &nds_rx,
458 467 &nds_rx_arg))
459 468 break;
460 469
461 470 /*
462 471 * If there are no more dld_str_t that are willing
463 472 * to accept the sub-chain then we don't need to dup
464 473 * it before handing it to the current one.
465 474 */
466 475 if (ndsp == NULL) {
467 476 ds_rx(ds_rx_arg, mrh, mp, &mhi);
468 477
469 478 /*
470 479 * Since there are no more dld_str_t, we're
471 480 * done.
472 481 */
473 482 break;
474 483 }
475 484
476 485 /*
477 486 * There are more dld_str_t so dup the sub-chain.
478 487 */
479 488 if ((nmp = copymsgchain(mp)) != NULL)
480 489 ds_rx(ds_rx_arg, mrh, nmp, &mhi);
481 490
482 491 dsp = ndsp;
483 492 ds_rx = nds_rx;
484 493 ds_rx_arg = nds_rx_arg;
485 494 }
486 495
487 496 /*
488 497 * Release the hold on the dld_str_t chain now that we have
489 498 * finished walking it.
490 499 */
491 500 i_dls_head_rele(dhp);
492 501
493 502 loop:
494 503 /*
495 504 * If there were no acceptors then add the packet count to the
496 505 * 'unknown' count.
497 506 */
498 507 if (!accepted)
499 508 atomic_add_32(&(dlp->dl_unknowns), npacket);
500 509 }
501 510 }
502 511
503 512 /* ARGSUSED */
504 513 void
505 514 dls_rx_vlan_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
506 515 boolean_t loopback)
507 516 {
508 517 dld_str_t *dsp = arg;
509 518 dls_link_t *dlp = dsp->ds_dlp;
510 519 mac_header_info_t mhi;
511 520 dls_rx_t ds_rx;
512 521 void *ds_rx_arg;
513 522 int err;
514 523
515 524 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
516 525 if (err != 0)
517 526 goto drop;
518 527
519 528 /*
520 529 * If there is promiscuous handle for vlan, we filter out the untagged
521 530 * pkts and pkts that are not for the primary unicast address.
522 531 */
523 532 if (dsp->ds_vlan_mph != NULL) {
524 533 uint8_t prim_addr[MAXMACADDRLEN];
525 534 size_t addr_length = dsp->ds_mip->mi_addr_length;
526 535
527 536 if (!(mhi.mhi_istagged))
528 537 goto drop;
529 538 ASSERT(dsp->ds_mh != NULL);
530 539 mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)prim_addr);
531 540 if (memcmp(mhi.mhi_daddr, prim_addr, addr_length) != 0)
532 541 goto drop;
533 542
534 543 if (!dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg))
535 544 goto drop;
536 545
537 546 ds_rx(ds_rx_arg, NULL, mp, &mhi);
538 547 return;
539 548 }
540 549
541 550 drop:
542 551 atomic_inc_32(&dlp->dl_unknowns);
543 552 freemsg(mp);
544 553 }
545 554
546 555 /* ARGSUSED */
547 556 void
548 557 dls_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
549 558 boolean_t loopback)
550 559 {
551 560 dld_str_t *dsp = arg;
552 561 dls_link_t *dlp = dsp->ds_dlp;
553 562 mac_header_info_t mhi;
554 563 dls_rx_t ds_rx;
555 564 void *ds_rx_arg;
556 565 int err;
557 566 dls_head_t *dhp;
558 567 mod_hash_key_t key;
559 568
560 569 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err);
561 570 if (err != 0)
562 571 goto drop;
563 572
564 573 /*
565 574 * In order to filter out sap pkt that no dls channel listens, search
566 575 * the hash table trying to find a dld_str_t eligible to receive the pkt
567 576 */
568 577 if ((dsp->ds_promisc & DLS_PROMISC_SAP) == 0) {
569 578 key = MAKE_KEY(mhi.mhi_bindsap);
570 579 if (mod_hash_find(dsp->ds_dlp->dl_str_hash, key,
571 580 (mod_hash_val_t *)&dhp) != 0)
572 581 goto drop;
573 582 }
574 583
575 584 if (!dls_accept_promisc(dsp, &mhi, &ds_rx, &ds_rx_arg, loopback))
576 585 goto drop;
577 586
578 587 ds_rx(ds_rx_arg, NULL, mp, &mhi);
579 588 return;
580 589
581 590 drop:
582 591 atomic_inc_32(&dlp->dl_unknowns);
583 592 freemsg(mp);
584 593 }
585 594
586 595 /*
587 596 * We'd like to notify via sysevents that a link state change has occurred.
588 597 * There are a couple of challenges associated with this. The first is that if
589 598 * the link is flapping a lot, we may not see an accurate state when we launch
590 599 * the notification, we're told it changed, not what it changed to.
591 600 *
592 601 * The next problem is that all of the information that a user has associated
593 602 * with this device is the exact opposite of what we have on the dls_link_t. We
594 603 * have the name of the mac device, which has no bearing on what users see.
595 604 * Likewise, we don't have the datalink id either. So we're going to have to get
596 605 * this from dls.
597 606 *
598 607 * This is all further complicated by the fact that this could be going on in
599 608 * another thread at the same time as someone is tearing down the dls_link_t
600 609 * that we're associated with. We need to be careful not to grab the mac
601 610 * perimeter, otherwise we stand a good chance of deadlock.
602 611 */
603 612 static void
604 613 dls_link_notify(void *arg, mac_notify_type_t type)
605 614 {
606 615 dls_link_t *dlp = arg;
607 616 dls_dl_handle_t dhp;
608 617 nvlist_t *nvp;
609 618 sysevent_t *event;
610 619 sysevent_id_t eid;
611 620
612 621 if (type != MAC_NOTE_LINK && type != MAC_NOTE_LOWLINK)
613 622 return;
614 623
615 624 /*
616 625 * If we can't find a devnet handle for this link, then there is no user
617 626 * knowable device for this at the moment and there's nothing we can
618 627 * really share with them that will make sense.
619 628 */
620 629 if (dls_devnet_hold_tmp_by_link(dlp, &dhp) != 0)
621 630 return;
622 631
623 632 /*
624 633 * Because we're attaching this nvlist_t to the sysevent, it'll get
625 634 * cleaned up when we call sysevent_free.
626 635 */
627 636 VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
628 637 VERIFY(nvlist_add_int32(nvp, DATALINK_EV_LINK_ID,
629 638 dls_devnet_linkid(dhp)) == 0);
630 639 VERIFY(nvlist_add_string(nvp, DATALINK_EV_LINK_NAME,
631 640 dls_devnet_link(dhp)) == 0);
632 641 VERIFY(nvlist_add_int32(nvp, DATALINK_EV_ZONE_ID,
633 642 dls_devnet_getzid(dhp)) == 0);
634 643
635 644 dls_devnet_rele_tmp(dhp);
636 645
637 646 event = sysevent_alloc(EC_DATALINK, ESC_DATALINK_LINK_STATE,
638 647 ILLUMOS_KERN_PUB"dls", SE_SLEEP);
639 648 VERIFY(event != NULL);
640 649 (void) sysevent_attach_attributes(event, (sysevent_attr_list_t *)nvp);
641 650
642 651 (void) log_sysevent(event, SE_SLEEP, &eid);
643 652 sysevent_free(event);
644 653
645 654 }
646 655
647 656 static void
648 657 i_dls_link_destroy(dls_link_t *dlp)
649 658 {
650 659 ASSERT(dlp->dl_nactive == 0);
651 660 ASSERT(dlp->dl_impl_count == 0);
652 661 ASSERT(dlp->dl_zone_ref == 0);
653 662
654 663 /*
655 664 * Free the structure back to the cache.
656 665 */
657 666 if (dlp->dl_mnh != NULL)
658 667 mac_notify_remove(dlp->dl_mnh, B_TRUE);
659 668
660 669 if (dlp->dl_mch != NULL)
661 670 mac_client_close(dlp->dl_mch, 0);
662 671
663 672 if (dlp->dl_mh != NULL) {
664 673 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
665 674 mac_close(dlp->dl_mh);
666 675 }
667 676
668 677 dlp->dl_mh = NULL;
669 678 dlp->dl_mch = NULL;
670 679 dlp->dl_mip = NULL;
671 680 dlp->dl_mnh = NULL;
672 681 dlp->dl_unknowns = 0;
673 682 dlp->dl_nonip_cnt = 0;
674 683 kmem_cache_free(i_dls_link_cachep, dlp);
675 684 }
676 685
677 686 static int
678 687 i_dls_link_create(const char *name, dls_link_t **dlpp)
679 688 {
680 689 dls_link_t *dlp;
681 690 int err;
682 691
683 692 /*
684 693 * Allocate a new dls_link_t structure.
685 694 */
686 695 dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP);
687 696
688 697 /*
689 698 * Name the dls_link_t after the MAC interface it represents.
690 699 */
691 700 (void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name));
692 701
693 702 /*
694 703 * First reference; hold open the MAC interface.
695 704 */
696 705 ASSERT(dlp->dl_mh == NULL);
697 706 err = mac_open(dlp->dl_name, &dlp->dl_mh);
698 707 if (err != 0)
699 708 goto bail;
700 709
701 710 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
702 711 dlp->dl_mip = mac_info(dlp->dl_mh);
703 712
704 713 /* DLS is the "primary" MAC client */
705 714 ASSERT(dlp->dl_mch == NULL);
706 715
707 716 err = mac_client_open(dlp->dl_mh, &dlp->dl_mch, NULL,
708 717 MAC_OPEN_FLAGS_USE_DATALINK_NAME);
709 718 if (err != 0)
710 719 goto bail;
711 720
712 721 dlp->dl_mnh = mac_notify_add(dlp->dl_mh, dls_link_notify, dlp);
713 722
714 723 DTRACE_PROBE2(dls__primary__client, char *, dlp->dl_name, void *,
715 724 dlp->dl_mch);
716 725
717 726 *dlpp = dlp;
718 727 return (0);
719 728
720 729 bail:
721 730 i_dls_link_destroy(dlp);
722 731 return (err);
723 732 }
724 733
725 734 /*
726 735 * Module initialization functions.
727 736 */
728 737
729 738 void
730 739 dls_link_init(void)
731 740 {
732 741 /*
733 742 * Create a kmem_cache of dls_link_t structures.
734 743 */
735 744 i_dls_link_cachep = kmem_cache_create("dls_link_cache",
736 745 sizeof (dls_link_t), 0, i_dls_link_constructor,
737 746 i_dls_link_destructor, NULL, NULL, NULL, 0);
738 747 ASSERT(i_dls_link_cachep != NULL);
739 748
740 749 /*
741 750 * Create a dls_link_t hash table and associated lock.
742 751 */
743 752 i_dls_link_hash = mod_hash_create_extended("dls_link_hash",
744 753 IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
745 754 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
746 755 i_dls_link_count = 0;
747 756 }
748 757
749 758 int
750 759 dls_link_fini(void)
751 760 {
752 761 if (i_dls_link_count > 0)
753 762 return (EBUSY);
754 763
755 764 /*
756 765 * Destroy the kmem_cache.
757 766 */
758 767 kmem_cache_destroy(i_dls_link_cachep);
759 768
760 769 /*
761 770 * Destroy the hash table and associated lock.
762 771 */
763 772 mod_hash_destroy_hash(i_dls_link_hash);
764 773 return (0);
765 774 }
766 775
767 776 /*
768 777 * Exported functions.
769 778 */
770 779
771 780 static int
772 781 dls_link_hold_common(const char *name, dls_link_t **dlpp, boolean_t create)
773 782 {
774 783 dls_link_t *dlp;
775 784 int err;
776 785
777 786 /*
778 787 * Look up a dls_link_t corresponding to the given macname in the
779 788 * global hash table. The i_dls_link_hash itself is protected by the
780 789 * mod_hash package's internal lock which synchronizes
781 790 * find/insert/remove into the global mod_hash list. Assumes that
782 791 * inserts and removes are single threaded on a per mac end point
783 792 * by the mac perimeter.
784 793 */
785 794 if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
786 795 (mod_hash_val_t *)&dlp)) == 0)
787 796 goto done;
788 797
789 798 ASSERT(err == MH_ERR_NOTFOUND);
790 799 if (!create)
791 800 return (ENOENT);
792 801
793 802 /*
794 803 * We didn't find anything so we need to create one.
795 804 */
796 805 if ((err = i_dls_link_create(name, &dlp)) != 0)
797 806 return (err);
798 807
799 808 /*
800 809 * Insert the dls_link_t.
801 810 */
802 811 err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name,
803 812 (mod_hash_val_t)dlp);
804 813 ASSERT(err == 0);
805 814
806 815 atomic_inc_32(&i_dls_link_count);
807 816 ASSERT(i_dls_link_count != 0);
808 817
809 818 done:
810 819 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
811 820 /*
812 821 * Bump the reference count and hand back the reference.
813 822 */
814 823 dlp->dl_ref++;
815 824 *dlpp = dlp;
816 825 return (0);
817 826 }
818 827
819 828 int
820 829 dls_link_hold_create(const char *name, dls_link_t **dlpp)
821 830 {
822 831 return (dls_link_hold_common(name, dlpp, B_TRUE));
823 832 }
824 833
825 834 int
826 835 dls_link_hold(const char *name, dls_link_t **dlpp)
827 836 {
828 837 return (dls_link_hold_common(name, dlpp, B_FALSE));
829 838 }
830 839
831 840 dev_info_t *
832 841 dls_link_devinfo(dev_t dev)
833 842 {
834 843 dls_link_t *dlp;
835 844 dev_info_t *dip;
836 845 char macname[MAXNAMELEN];
837 846 char *drv;
838 847 mac_perim_handle_t mph;
839 848
840 849 if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
841 850 return (NULL);
842 851 (void) snprintf(macname, MAXNAMELEN, "%s%d", drv,
843 852 DLS_MINOR2INST(getminor(dev)));
844 853
845 854 /*
846 855 * The code below assumes that the name constructed above is the
847 856 * macname. This is not the case for legacy devices. Currently this
848 857 * is ok because this function is only called in the getinfo(9e) path,
849 858 * which for a legacy device would directly end up in the driver's
850 859 * getinfo, rather than here
851 860 */
852 861 if (mac_perim_enter_by_macname(macname, &mph) != 0)
853 862 return (NULL);
854 863
855 864 if (dls_link_hold(macname, &dlp) != 0) {
856 865 mac_perim_exit(mph);
857 866 return (NULL);
858 867 }
859 868
860 869 dip = mac_devinfo_get(dlp->dl_mh);
861 870 dls_link_rele(dlp);
862 871 mac_perim_exit(mph);
863 872
864 873 return (dip);
865 874 }
866 875
867 876 dev_t
868 877 dls_link_dev(dls_link_t *dlp)
869 878 {
870 879 return (makedevice(ddi_driver_major(mac_devinfo_get(dlp->dl_mh)),
871 880 mac_minor(dlp->dl_mh)));
872 881 }
873 882
874 883 void
875 884 dls_link_rele(dls_link_t *dlp)
876 885 {
877 886 mod_hash_val_t val;
878 887
879 888 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
880 889 /*
881 890 * Check if there are any more references.
882 891 */
883 892 if (--dlp->dl_ref == 0) {
884 893 (void) mod_hash_remove(i_dls_link_hash,
885 894 (mod_hash_key_t)dlp->dl_name, &val);
886 895 ASSERT(dlp == (dls_link_t *)val);
887 896
888 897 /*
889 898 * Destroy the dls_link_t.
890 899 */
891 900 i_dls_link_destroy(dlp);
892 901 ASSERT(i_dls_link_count > 0);
893 902 atomic_dec_32(&i_dls_link_count);
894 903 }
895 904 }
896 905
897 906 int
898 907 dls_link_rele_by_name(const char *name)
899 908 {
900 909 dls_link_t *dlp;
901 910
902 911 if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name,
903 912 (mod_hash_val_t *)&dlp) != 0)
904 913 return (ENOENT);
905 914
906 915 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
907 916
908 917 /*
909 918 * Must fail detach if mac client is busy.
910 919 */
911 920 ASSERT(dlp->dl_ref > 0 && dlp->dl_mch != NULL);
912 921 if (mac_link_has_flows(dlp->dl_mch))
913 922 return (ENOTEMPTY);
914 923
915 924 dls_link_rele(dlp);
916 925 return (0);
917 926 }
918 927
919 928 int
920 929 dls_link_setzid(const char *name, zoneid_t zid)
921 930 {
922 931 dls_link_t *dlp;
923 932 int err = 0;
924 933 zoneid_t old_zid;
925 934
926 935 if ((err = dls_link_hold_create(name, &dlp)) != 0)
927 936 return (err);
928 937
929 938 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
930 939
931 940 if ((old_zid = dlp->dl_zid) == zid)
932 941 goto done;
933 942
934 943 /*
935 944 * Check whether this dlp is used by its own zone. If yes, we cannot
936 945 * change its zoneid.
937 946 */
938 947 if (dlp->dl_zone_ref != 0) {
939 948 err = EBUSY;
940 949 goto done;
941 950 }
942 951
943 952 dlp->dl_zid = zid;
944 953
945 954 if (zid == GLOBAL_ZONEID) {
946 955 /*
947 956 * The link is moving from a non-global zone to the global
948 957 * zone, so we need to release the reference that was held
949 958 * when the link was originally assigned to the non-global
950 959 * zone.
951 960 */
952 961 dls_link_rele(dlp);
953 962 }
954 963
955 964 done:
956 965 /*
957 966 * We only keep the reference to this link open if the link has
958 967 * successfully moved from the global zone to a non-global zone.
959 968 */
960 969 if (err != 0 || old_zid != GLOBAL_ZONEID)
961 970 dls_link_rele(dlp);
962 971 return (err);
963 972 }
964 973
965 974 int
966 975 dls_link_getzid(const char *name, zoneid_t *zidp)
967 976 {
968 977 dls_link_t *dlp;
969 978 int err = 0;
970 979
971 980 if ((err = dls_link_hold(name, &dlp)) != 0)
972 981 return (err);
973 982
974 983 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
975 984
976 985 *zidp = dlp->dl_zid;
977 986
978 987 dls_link_rele(dlp);
979 988 return (0);
980 989 }
981 990
982 991 void
983 992 dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp)
984 993 {
985 994 mod_hash_t *hash = dlp->dl_str_hash;
986 995 mod_hash_key_t key;
987 996 dls_head_t *dhp;
988 997 dld_str_t *p;
989 998 int err;
990 999
991 1000 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
992 1001
993 1002 /*
994 1003 * Generate a hash key based on the sap.
995 1004 */
996 1005 key = MAKE_KEY(sap);
997 1006
998 1007 /*
999 1008 * Search the table for a list head with this key.
1000 1009 */
1001 1010 if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) {
1002 1011 ASSERT(err == MH_ERR_NOTFOUND);
1003 1012
1004 1013 dhp = i_dls_head_alloc(key);
1005 1014 err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp);
1006 1015 ASSERT(err == 0);
1007 1016 }
1008 1017
1009 1018 /*
1010 1019 * Add the dld_str_t to the head of the list. List walkers in
1011 1020 * i_dls_link_rx_* bump up dh_ref to ensure the list does not change
1012 1021 * while they walk the list. The membar below ensures that list walkers
1013 1022 * see exactly the old list or the new list.
1014 1023 */
1015 1024 ASSERT(dsp->ds_next == NULL);
1016 1025 p = dhp->dh_list;
1017 1026 dsp->ds_next = p;
1018 1027
1019 1028 membar_producer();
1020 1029
1021 1030 dhp->dh_list = dsp;
1022 1031
1023 1032 /*
1024 1033 * Save a pointer to the list head.
1025 1034 */
1026 1035 dsp->ds_head = dhp;
1027 1036 dlp->dl_impl_count++;
1028 1037 }
1029 1038
1030 1039 void
1031 1040 dls_link_remove(dls_link_t *dlp, dld_str_t *dsp)
1032 1041 {
1033 1042 mod_hash_t *hash = dlp->dl_str_hash;
1034 1043 dld_str_t **pp;
1035 1044 dld_str_t *p;
1036 1045 dls_head_t *dhp;
1037 1046
1038 1047 ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
1039 1048
1040 1049 /*
1041 1050 * We set dh_removing here to tell the receive callbacks not to pass
1042 1051 * up packets anymore. Then wait till the current callbacks are done.
1043 1052 * This happens either in the close path or in processing the
1044 1053 * DL_UNBIND_REQ via a taskq thread, and it is ok to cv_wait in either.
1045 1054 * The dh_ref ensures there aren't and there won't be any upcalls
1046 1055 * walking or using the dh_list. The mod hash internal lock ensures
1047 1056 * that the insert/remove of the dls_head_t itself synchronizes with
1048 1057 * any i_dls_link_rx trying to locate it. The perimeter ensures that
1049 1058 * there isn't another simultaneous dls_link_add/remove.
1050 1059 */
1051 1060 dhp = dsp->ds_head;
1052 1061
1053 1062 mutex_enter(&dhp->dh_lock);
1054 1063 dhp->dh_removing = B_TRUE;
1055 1064 while (dhp->dh_ref != 0)
1056 1065 cv_wait(&dhp->dh_cv, &dhp->dh_lock);
1057 1066 mutex_exit(&dhp->dh_lock);
1058 1067
1059 1068 /*
1060 1069 * Walk the list and remove the dld_str_t.
1061 1070 */
1062 1071 for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->ds_next)) {
1063 1072 if (p == dsp)
1064 1073 break;
1065 1074 }
1066 1075 ASSERT(p != NULL);
1067 1076 *pp = p->ds_next;
1068 1077 p->ds_next = NULL;
1069 1078 p->ds_head = NULL;
1070 1079
1071 1080 ASSERT(dlp->dl_impl_count != 0);
1072 1081 dlp->dl_impl_count--;
1073 1082
1074 1083 if (dhp->dh_list == NULL) {
1075 1084 mod_hash_val_t val = NULL;
1076 1085
1077 1086 /*
1078 1087 * The list is empty so remove the hash table entry.
1079 1088 */
1080 1089 (void) mod_hash_remove(hash, dhp->dh_key, &val);
1081 1090 ASSERT(dhp == (dls_head_t *)val);
1082 1091 i_dls_head_free(dhp);
1083 1092 } else {
1084 1093 mutex_enter(&dhp->dh_lock);
1085 1094 dhp->dh_removing = B_FALSE;
1086 1095 mutex_exit(&dhp->dh_lock);
1087 1096 }
1088 1097 }
↓ open down ↓ |
693 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX