Print this page
11490 SRS ring polling disabled for VLANs
11491 Want DLS bypass for VLAN traffic
11492 add VLVF bypass to ixgbe core
2869 duplicate packets with vnics over aggrs
11489 DLS stat delete and aggr kstat can deadlock
Portions contributed by: Theo Schlossnagle <jesus@omniti.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/aggr/aggr_grp.c
+++ new/usr/src/uts/common/io/aggr/aggr_grp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 - * Copyright (c) 2017, Joyent, Inc.
23 + * Copyright 2018 Joyent, Inc.
24 24 */
25 25
26 26 /*
27 27 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
28 28 *
29 29 * An instance of the structure aggr_grp_t is allocated for each
30 30 * link aggregation group. When created, aggr_grp_t objects are
31 31 * entered into the aggr_grp_hash hash table maintained by the modhash
32 32 * module. The hash key is the linkid associated with the link
33 33 * aggregation group.
34 34 *
35 35 * A set of MAC ports are associated with each association group.
36 36 *
37 37 * Aggr pseudo TX rings
38 38 * --------------------
39 39 * The underlying ports (NICs) in an aggregation can have TX rings. To
40 40 * enhance aggr's performance, these TX rings are made available to the
41 41 * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
42 42 * They are already present and implemented on the RX side. It is called
43 43 * as pseudo RX rings. The same concept is extended to the TX side where
44 44 * each TX ring of an underlying port is reflected in aggr as a pseudo
45 45 * TX ring. Thus each pseudo TX ring will map to a specific hardware TX
46 46 * ring. Even in the case of a NIC that does not have a TX ring, a pseudo
47 47 * TX ring is given to the aggregation layer.
48 48 *
49 49 * With this change, the outgoing stack depth looks much better:
50 50 *
51 51 * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() ->
52 52 * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx()
53 53 *
54 54 * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings:
55 55 * SRS_TX_AGGR and SRS_TX_BW_AGGR.
56 56 *
57 57 * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine
58 58 * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX
59 59 * ring belonging to a port on which the packet has to be sent.
60 60 * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4
61 61 * policy and then uses the fanout_hint passed to it to pick a TX ring from
62 62 * the selected port.
63 63 *
64 64 * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where
65 65 * bandwidth limit is applied first on the outgoing packet and the packets
66 66 * allowed to go out would call mac_tx_aggr_mode() to send the packet on a
67 67 * particular TX ring.
68 68 */
69 69
70 70 #include <sys/types.h>
71 71 #include <sys/sysmacros.h>
72 72 #include <sys/conf.h>
73 73 #include <sys/cmn_err.h>
74 74 #include <sys/disp.h>
75 75 #include <sys/list.h>
76 76 #include <sys/ksynch.h>
77 77 #include <sys/kmem.h>
78 78 #include <sys/stream.h>
79 79 #include <sys/modctl.h>
80 80 #include <sys/ddi.h>
81 81 #include <sys/sunddi.h>
82 82 #include <sys/atomic.h>
83 83 #include <sys/stat.h>
84 84 #include <sys/modhash.h>
85 85 #include <sys/id_space.h>
86 86 #include <sys/strsun.h>
87 87 #include <sys/cred.h>
88 88 #include <sys/dlpi.h>
89 89 #include <sys/zone.h>
90 90 #include <sys/mac_provider.h>
91 91 #include <sys/dls.h>
92 92 #include <sys/vlan.h>
93 93 #include <sys/aggr.h>
94 94 #include <sys/aggr_impl.h>
95 95
96 96 static int aggr_m_start(void *);
97 97 static void aggr_m_stop(void *);
98 98 static int aggr_m_promisc(void *, boolean_t);
99 99 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
100 100 static int aggr_m_unicst(void *, const uint8_t *);
101 101 static int aggr_m_stat(void *, uint_t, uint64_t *);
102 102 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
103 103 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
104 104 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
105 105 const void *);
106 106 static void aggr_m_propinfo(void *, const char *, mac_prop_id_t,
107 107 mac_prop_info_handle_t);
108 108
109 109 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
110 110 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
111 111 boolean_t *);
112 112
113 113 static void aggr_grp_capab_set(aggr_grp_t *);
114 114 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
115 115 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
116 116 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
↓ open down ↓ |
83 lines elided |
↑ open up ↑ |
117 117 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
118 118 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
119 119
120 120 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
121 121 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
122 122 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
123 123 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
124 124 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
125 125 static int aggr_addmac(void *, const uint8_t *);
126 126 static int aggr_remmac(void *, const uint8_t *);
127 +static int aggr_addvlan(mac_group_driver_t, uint16_t);
128 +static int aggr_remvlan(mac_group_driver_t, uint16_t);
127 129 static mblk_t *aggr_rx_poll(void *, int);
128 130 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
129 131 const int, mac_ring_info_t *, mac_ring_handle_t);
130 132 static void aggr_fill_group(void *, mac_ring_type_t, const int,
131 133 mac_group_info_t *, mac_group_handle_t);
132 134
133 135 static kmem_cache_t *aggr_grp_cache;
134 136 static mod_hash_t *aggr_grp_hash;
135 137 static krwlock_t aggr_grp_lock;
136 138 static uint_t aggr_grp_cnt;
137 139 static id_space_t *key_ids;
138 140
139 141 #define GRP_HASHSZ 64
140 142 #define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid)
141 143 #define AGGR_PORT_NAME_DELIMIT '-'
142 144
143 145 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
144 146
145 147 #define AGGR_M_CALLBACK_FLAGS \
146 148 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
147 149
148 150 static mac_callbacks_t aggr_m_callbacks = {
149 151 AGGR_M_CALLBACK_FLAGS,
150 152 aggr_m_stat,
151 153 aggr_m_start,
152 154 aggr_m_stop,
153 155 aggr_m_promisc,
154 156 aggr_m_multicst,
155 157 NULL,
156 158 NULL,
157 159 NULL,
158 160 aggr_m_ioctl,
159 161 aggr_m_capab_get,
160 162 NULL,
161 163 NULL,
162 164 aggr_m_setprop,
163 165 NULL,
164 166 aggr_m_propinfo
165 167 };
166 168
167 169 /*ARGSUSED*/
168 170 static int
169 171 aggr_grp_constructor(void *buf, void *arg, int kmflag)
170 172 {
171 173 aggr_grp_t *grp = buf;
172 174
173 175 bzero(grp, sizeof (*grp));
174 176 mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
175 177 cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
176 178 rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
177 179 mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
178 180 cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
179 181 mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL);
180 182 cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL);
181 183 grp->lg_link_state = LINK_STATE_UNKNOWN;
182 184 return (0);
183 185 }
184 186
185 187 /*ARGSUSED*/
186 188 static void
187 189 aggr_grp_destructor(void *buf, void *arg)
188 190 {
189 191 aggr_grp_t *grp = buf;
190 192
191 193 if (grp->lg_tx_ports != NULL) {
192 194 kmem_free(grp->lg_tx_ports,
193 195 grp->lg_tx_ports_size * sizeof (aggr_port_t *));
194 196 }
195 197
196 198 mutex_destroy(&grp->lg_lacp_lock);
197 199 cv_destroy(&grp->lg_lacp_cv);
198 200 mutex_destroy(&grp->lg_port_lock);
199 201 cv_destroy(&grp->lg_port_cv);
200 202 rw_destroy(&grp->lg_tx_lock);
201 203 mutex_destroy(&grp->lg_tx_flowctl_lock);
202 204 cv_destroy(&grp->lg_tx_flowctl_cv);
203 205 }
204 206
205 207 void
206 208 aggr_grp_init(void)
207 209 {
208 210 aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
209 211 sizeof (aggr_grp_t), 0, aggr_grp_constructor,
210 212 aggr_grp_destructor, NULL, NULL, NULL, 0);
211 213
212 214 aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
213 215 GRP_HASHSZ, mod_hash_null_valdtor);
214 216 rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
215 217 aggr_grp_cnt = 0;
216 218
217 219 /*
218 220 * Allocate an id space to manage key values (when key is not
219 221 * specified). The range of the id space will be from
220 222 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
221 223 * uses a 16-bit key.
222 224 */
223 225 key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
224 226 ASSERT(key_ids != NULL);
225 227 }
226 228
227 229 void
228 230 aggr_grp_fini(void)
229 231 {
230 232 id_space_destroy(key_ids);
231 233 rw_destroy(&aggr_grp_lock);
232 234 mod_hash_destroy_idhash(aggr_grp_hash);
233 235 kmem_cache_destroy(aggr_grp_cache);
234 236 }
235 237
236 238 uint_t
237 239 aggr_grp_count(void)
238 240 {
239 241 uint_t count;
240 242
241 243 rw_enter(&aggr_grp_lock, RW_READER);
242 244 count = aggr_grp_cnt;
243 245 rw_exit(&aggr_grp_lock);
244 246 return (count);
245 247 }
246 248
247 249 /*
248 250 * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
249 251 * requires the mac perimeter, this function holds a reference of the aggr
250 252 * and aggr won't call mac_unregister() until this reference drops to 0.
251 253 */
252 254 void
253 255 aggr_grp_port_hold(aggr_port_t *port)
254 256 {
255 257 aggr_grp_t *grp = port->lp_grp;
256 258
257 259 AGGR_PORT_REFHOLD(port);
258 260 mutex_enter(&grp->lg_port_lock);
259 261 grp->lg_port_ref++;
260 262 mutex_exit(&grp->lg_port_lock);
261 263 }
262 264
263 265 /*
264 266 * Release the reference of the grp and inform aggr_grp_delete() calling
265 267 * mac_unregister() is now safe.
266 268 */
267 269 void
268 270 aggr_grp_port_rele(aggr_port_t *port)
269 271 {
270 272 aggr_grp_t *grp = port->lp_grp;
271 273
272 274 mutex_enter(&grp->lg_port_lock);
273 275 if (--grp->lg_port_ref == 0)
274 276 cv_signal(&grp->lg_port_cv);
275 277 mutex_exit(&grp->lg_port_lock);
276 278 AGGR_PORT_REFRELE(port);
277 279 }
278 280
279 281 /*
280 282 * Wait for the port's lacp timer thread and the port's notification callback
281 283 * to exit.
282 284 */
283 285 void
284 286 aggr_grp_port_wait(aggr_grp_t *grp)
285 287 {
286 288 mutex_enter(&grp->lg_port_lock);
287 289 if (grp->lg_port_ref != 0)
288 290 cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
289 291 mutex_exit(&grp->lg_port_lock);
290 292 }
291 293
292 294 /*
293 295 * Attach a port to a link aggregation group.
294 296 *
295 297 * A port is attached to a link aggregation group once its speed
296 298 * and link state have been verified.
297 299 *
298 300 * Returns B_TRUE if the group link state or speed has changed. If
299 301 * it's the case, the caller must notify the MAC layer via a call
300 302 * to mac_link().
301 303 */
302 304 boolean_t
303 305 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
304 306 {
305 307 boolean_t link_state_changed = B_FALSE;
306 308
307 309 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
308 310 ASSERT(MAC_PERIM_HELD(port->lp_mh));
309 311
310 312 if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
311 313 return (B_FALSE);
312 314
313 315 /*
314 316 * Validate the MAC port link speed and update the group
315 317 * link speed if needed.
316 318 */
↓ open down ↓ |
180 lines elided |
↑ open up ↑ |
317 319 if (port->lp_ifspeed == 0 ||
318 320 port->lp_link_state != LINK_STATE_UP ||
319 321 port->lp_link_duplex != LINK_DUPLEX_FULL) {
320 322 /*
321 323 * Can't attach a MAC port with unknown link speed,
322 324 * down link, or not in full duplex mode.
323 325 */
324 326 return (B_FALSE);
325 327 }
326 328
329 + mutex_enter(&grp->lg_stat_lock);
327 330 if (grp->lg_ifspeed == 0) {
328 331 /*
329 332 * The group inherits the speed of the first link being
330 333 * attached.
331 334 */
332 335 grp->lg_ifspeed = port->lp_ifspeed;
333 336 link_state_changed = B_TRUE;
334 337 } else if (grp->lg_ifspeed != port->lp_ifspeed) {
335 338 /*
336 339 * The link speed of the MAC port must be the same as
337 340 * the group link speed, as per 802.3ad. Since it is
338 341 * not, the attach is cancelled.
339 342 */
343 + mutex_exit(&grp->lg_stat_lock);
340 344 return (B_FALSE);
341 345 }
346 + mutex_exit(&grp->lg_stat_lock);
342 347
343 348 grp->lg_nattached_ports++;
344 349
345 350 /*
346 351 * Update the group link state.
347 352 */
348 353 if (grp->lg_link_state != LINK_STATE_UP) {
349 354 grp->lg_link_state = LINK_STATE_UP;
355 + mutex_enter(&grp->lg_stat_lock);
350 356 grp->lg_link_duplex = LINK_DUPLEX_FULL;
357 + mutex_exit(&grp->lg_stat_lock);
351 358 link_state_changed = B_TRUE;
352 359 }
353 360
354 361 /*
355 362 * Update port's state.
356 363 */
357 364 port->lp_state = AGGR_PORT_STATE_ATTACHED;
358 365
359 366 aggr_grp_multicst_port(port, B_TRUE);
360 367
361 368 /*
362 369 * Set port's receive callback
363 370 */
364 371 mac_rx_set(port->lp_mch, aggr_recv_cb, port);
365 372
366 373 /*
367 374 * If LACP is OFF, the port can be used to send data as soon
368 375 * as its link is up and verified to be compatible with the
369 376 * aggregation.
370 377 *
371 378 * If LACP is active or passive, notify the LACP subsystem, which
372 379 * will enable sending on the port following the LACP protocol.
373 380 */
374 381 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
375 382 aggr_send_port_enable(port);
376 383 else
377 384 aggr_lacp_port_attached(port);
378 385
379 386 return (link_state_changed);
380 387 }
381 388
382 389 boolean_t
383 390 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
384 391 {
385 392 boolean_t link_state_changed = B_FALSE;
386 393
387 394 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
388 395 ASSERT(MAC_PERIM_HELD(port->lp_mh));
389 396
390 397 /* update state */
391 398 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
392 399 return (B_FALSE);
393 400
394 401 mac_rx_clear(port->lp_mch);
395 402
396 403 aggr_grp_multicst_port(port, B_FALSE);
397 404
↓ open down ↓ |
37 lines elided |
↑ open up ↑ |
398 405 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
399 406 aggr_send_port_disable(port);
400 407 else
401 408 aggr_lacp_port_detached(port);
402 409
403 410 port->lp_state = AGGR_PORT_STATE_STANDBY;
404 411
405 412 grp->lg_nattached_ports--;
406 413 if (grp->lg_nattached_ports == 0) {
407 414 /* the last attached MAC port of the group is being detached */
408 - grp->lg_ifspeed = 0;
409 415 grp->lg_link_state = LINK_STATE_DOWN;
416 + mutex_enter(&grp->lg_stat_lock);
417 + grp->lg_ifspeed = 0;
410 418 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
419 + mutex_exit(&grp->lg_stat_lock);
411 420 link_state_changed = B_TRUE;
412 421 }
413 422
414 423 return (link_state_changed);
415 424 }
416 425
417 426 /*
418 427 * Update the MAC addresses of the constituent ports of the specified
419 428 * group. This function is invoked:
420 429 * - after creating a new aggregation group.
421 430 * - after adding new ports to an aggregation group.
422 431 * - after removing a port from a group when the MAC address of
423 432 * that port was used for the MAC address of the group.
424 433 * - after the MAC address of a port changed when the MAC address
425 434 * of that port was used for the MAC address of the group.
426 435 *
427 436 * Return true if the link state of the aggregation changed, for example
428 437 * as a result of a failure changing the MAC address of one of the
429 438 * constituent ports.
430 439 */
431 440 boolean_t
432 441 aggr_grp_update_ports_mac(aggr_grp_t *grp)
433 442 {
434 443 aggr_port_t *cport;
435 444 boolean_t link_state_changed = B_FALSE;
436 445 mac_perim_handle_t mph;
437 446
438 447 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
439 448
440 449 for (cport = grp->lg_ports; cport != NULL;
441 450 cport = cport->lp_next) {
442 451 mac_perim_enter_by_mh(cport->lp_mh, &mph);
443 452 if (aggr_port_unicst(cport) != 0) {
444 453 if (aggr_grp_detach_port(grp, cport))
445 454 link_state_changed = B_TRUE;
446 455 } else {
447 456 /*
448 457 * If a port was detached because of a previous
449 458 * failure changing the MAC address, the port is
450 459 * reattached when it successfully changes the MAC
451 460 * address now, and this might cause the link state
452 461 * of the aggregation to change.
453 462 */
454 463 if (aggr_grp_attach_port(grp, cport))
455 464 link_state_changed = B_TRUE;
456 465 }
457 466 mac_perim_exit(mph);
458 467 }
459 468 return (link_state_changed);
460 469 }
461 470
462 471 /*
463 472 * Invoked when the MAC address of a port has changed. If the port's
464 473 * MAC address was used for the group MAC address, set mac_addr_changedp
465 474 * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
466 475 * notification. If the link state changes due to detach/attach of
467 476 * the constituent port, set link_state_changedp to B_TRUE to indicate
468 477 * to the caller that it should send a MAC_NOTE_LINK notification. In both
469 478 * cases, it is the responsibility of the caller to invoke notification
470 479 * functions after releasing the the port lock.
471 480 */
472 481 void
473 482 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
474 483 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
475 484 {
476 485 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
477 486 ASSERT(MAC_PERIM_HELD(port->lp_mh));
478 487 ASSERT(mac_addr_changedp != NULL);
479 488 ASSERT(link_state_changedp != NULL);
480 489
481 490 *mac_addr_changedp = B_FALSE;
482 491 *link_state_changedp = B_FALSE;
483 492
484 493 if (grp->lg_addr_fixed) {
485 494 /*
486 495 * The group is using a fixed MAC address or an automatic
487 496 * MAC address has not been set.
488 497 */
489 498 return;
490 499 }
491 500
492 501 if (grp->lg_mac_addr_port == port) {
493 502 /*
494 503 * The MAC address of the port was assigned to the group
495 504 * MAC address. Update the group MAC address.
496 505 */
497 506 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
498 507 *mac_addr_changedp = B_TRUE;
499 508 } else {
500 509 /*
501 510 * Update the actual port MAC address to the MAC address
502 511 * of the group.
503 512 */
504 513 if (aggr_port_unicst(port) != 0) {
505 514 *link_state_changedp = aggr_grp_detach_port(grp, port);
506 515 } else {
507 516 /*
508 517 * If a port was detached because of a previous
509 518 * failure changing the MAC address, the port is
510 519 * reattached when it successfully changes the MAC
511 520 * address now, and this might cause the link state
512 521 * of the aggregation to change.
513 522 */
514 523 *link_state_changedp = aggr_grp_attach_port(grp, port);
515 524 }
516 525 }
517 526 }
518 527
519 528 /*
520 529 * Add a port to a link aggregation group.
521 530 */
522 531 static int
523 532 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
524 533 aggr_port_t **pp)
525 534 {
526 535 aggr_port_t *port, **cport;
527 536 mac_perim_handle_t mph;
528 537 zoneid_t port_zoneid = ALL_ZONES;
529 538 int err;
530 539
531 540 /* The port must be int the same zone as the aggregation. */
532 541 if (zone_check_datalink(&port_zoneid, port_linkid) != 0)
533 542 port_zoneid = GLOBAL_ZONEID;
534 543 if (grp->lg_zoneid != port_zoneid)
535 544 return (EBUSY);
536 545
537 546 /*
538 547 * lg_mh could be NULL when the function is called during the creation
539 548 * of the aggregation.
540 549 */
541 550 ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
542 551
543 552 /* create new port */
544 553 err = aggr_port_create(grp, port_linkid, force, &port);
545 554 if (err != 0)
546 555 return (err);
547 556
548 557 mac_perim_enter_by_mh(port->lp_mh, &mph);
549 558
550 559 /* add port to list of group constituent ports */
551 560 cport = &grp->lg_ports;
552 561 while (*cport != NULL)
553 562 cport = &((*cport)->lp_next);
554 563 *cport = port;
555 564
556 565 /*
557 566 * Back reference to the group it is member of. A port always
558 567 * holds a reference to its group to ensure that the back
559 568 * reference is always valid.
560 569 */
561 570 port->lp_grp = grp;
562 571 AGGR_GRP_REFHOLD(grp);
563 572 grp->lg_nports++;
564 573
565 574 aggr_lacp_init_port(port);
566 575 mac_perim_exit(mph);
567 576
568 577 if (pp != NULL)
569 578 *pp = port;
570 579
571 580 return (0);
572 581 }
573 582
574 583 /*
575 584 * This is called in response to either our LACP state machine or a MAC
576 585 * notification that the link has gone down via aggr_send_port_disable(). At
577 586 * this point, we may need to update our default ring. To that end, we go
578 587 * through the set of ports (underlying datalinks in an aggregation) that are
579 588 * currently enabled to transmit data. If all our links have been disabled for
580 589 * transmit, then we don't do anything.
581 590 *
582 591 * Note, because we only have a single TX group, we don't have to worry about
583 592 * the rings moving between groups and the chance that mac will reassign it
584 593 * unless someone removes a port, at which point, we play it safe and call this
585 594 * again.
586 595 */
587 596 void
588 597 aggr_grp_update_default(aggr_grp_t *grp)
589 598 {
590 599 aggr_port_t *port;
591 600 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
592 601
593 602 rw_enter(&grp->lg_tx_lock, RW_WRITER);
594 603
595 604 if (grp->lg_ntx_ports == 0) {
596 605 rw_exit(&grp->lg_tx_lock);
597 606 return;
598 607 }
599 608
600 609 port = grp->lg_tx_ports[0];
601 610 ASSERT(port->lp_tx_ring_cnt > 0);
602 611 mac_hwring_set_default(grp->lg_mh, port->lp_pseudo_tx_rings[0]);
603 612 rw_exit(&grp->lg_tx_lock);
604 613 }
605 614
606 615 /*
607 616 * Add a pseudo RX ring for the given HW ring handle.
608 617 */
609 618 static int
610 619 aggr_add_pseudo_rx_ring(aggr_port_t *port,
611 620 aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
612 621 {
613 622 aggr_pseudo_rx_ring_t *ring;
614 623 int err;
615 624 int j;
616 625
617 626 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
618 627 ring = rx_grp->arg_rings + j;
619 628 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
620 629 break;
621 630 }
622 631
623 632 /*
624 633 * No slot for this new RX ring.
625 634 */
626 635 if (j == MAX_RINGS_PER_GROUP)
627 636 return (EIO);
628 637
629 638 ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
630 639 ring->arr_hw_rh = hw_rh;
631 640 ring->arr_port = port;
632 641 rx_grp->arg_ring_cnt++;
633 642
634 643 /*
635 644 * The group is already registered, dynamically add a new ring to the
636 645 * mac group.
637 646 */
638 647 if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
639 648 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
640 649 ring->arr_hw_rh = NULL;
641 650 ring->arr_port = NULL;
642 651 rx_grp->arg_ring_cnt--;
643 652 } else {
644 653 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
645 654 mac_find_ring(rx_grp->arg_gh, j));
646 655 }
647 656 return (err);
648 657 }
649 658
650 659 /*
651 660 * Remove the pseudo RX ring of the given HW ring handle.
652 661 */
653 662 static void
654 663 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
655 664 {
656 665 aggr_pseudo_rx_ring_t *ring;
657 666 int j;
658 667
659 668 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
660 669 ring = rx_grp->arg_rings + j;
661 670 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
662 671 ring->arr_hw_rh != hw_rh) {
663 672 continue;
664 673 }
665 674
666 675 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
667 676
↓ open down ↓ |
247 lines elided |
↑ open up ↑ |
668 677 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
669 678 ring->arr_hw_rh = NULL;
670 679 ring->arr_port = NULL;
671 680 rx_grp->arg_ring_cnt--;
672 681 mac_hwring_teardown(hw_rh);
673 682 break;
674 683 }
675 684 }
676 685
677 686 /*
678 - * This function is called to create pseudo rings over the hardware rings of
679 - * the underlying device. Note that there is a 1:1 mapping between the pseudo
680 - * RX rings of the aggr and the hardware rings of the underlying port.
687 + * Create pseudo rings over the HW rings of the port.
688 + *
689 + * o Create a pseudo ring in rx_grp per HW ring in the port's HW group.
690 + *
691 + * o Program existing unicast filters on the pseudo group into the HW group.
692 + *
693 + * o Program existing VLAN filters on the pseudo group into the HW group.
681 694 */
682 695 static int
683 696 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
684 697 {
685 698 aggr_grp_t *grp = port->lp_grp;
686 699 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP];
687 700 aggr_unicst_addr_t *addr, *a;
688 701 mac_perim_handle_t pmph;
702 + aggr_vlan_t *avp;
689 703 int hw_rh_cnt, i = 0, j;
690 704 int err = 0;
691 705
692 706 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
693 707 mac_perim_enter_by_mh(port->lp_mh, &pmph);
694 708
695 709 /*
696 - * This function must be called after the aggr registers its mac
697 - * and its RX group has been initialized.
710 + * This function must be called after the aggr registers its MAC
711 + * and its Rx group has been initialized.
698 712 */
699 713 ASSERT(rx_grp->arg_gh != NULL);
700 714
701 715 /*
702 - * Get the list the the underlying HW rings.
716 + * Get the list of the underlying HW rings.
703 717 */
704 718 hw_rh_cnt = mac_hwrings_get(port->lp_mch,
705 719 &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
706 720
707 721 if (port->lp_hwgh != NULL) {
708 722 /*
709 - * Quiesce the HW ring and the mac srs on the ring. Note
723 + * Quiesce the HW ring and the MAC SRS on the ring. Note
710 724 * that the HW ring will be restarted when the pseudo ring
711 725 * is started. At that time all the packets will be
712 - * directly passed up to the pseudo RX ring and handled
713 - * by mac srs created over the pseudo RX ring.
726 + * directly passed up to the pseudo Rx ring and handled
727 + * by MAC SRS created over the pseudo Rx ring.
714 728 */
715 729 mac_rx_client_quiesce(port->lp_mch);
716 730 mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
717 731 }
718 732
719 733 /*
720 - * Add all the unicast addresses to the newly added port.
734 + * Add existing VLAN and unicast address filters to the port.
721 735 */
736 + for (avp = list_head(&rx_grp->arg_vlans); avp != NULL;
737 + avp = list_next(&rx_grp->arg_vlans, avp)) {
738 + if ((err = aggr_port_addvlan(port, avp->av_vid)) != 0)
739 + goto err;
740 + }
741 +
722 742 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
723 743 if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
724 - break;
744 + goto err;
725 745 }
726 746
727 - for (i = 0; err == 0 && i < hw_rh_cnt; i++)
747 + for (i = 0; i < hw_rh_cnt; i++) {
728 748 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
749 + if (err != 0)
750 + goto err;
751 + }
729 752
730 - if (err != 0) {
731 - for (j = 0; j < i; j++)
732 - aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
753 + port->lp_rx_grp_added = B_TRUE;
754 + mac_perim_exit(pmph);
755 + return (0);
733 756
734 - for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
735 - aggr_port_remmac(port, a->aua_addr);
757 +err:
758 + ASSERT(err != 0);
736 759
737 - if (port->lp_hwgh != NULL) {
738 - mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
739 - mac_rx_client_restart(port->lp_mch);
740 - port->lp_hwgh = NULL;
760 + for (j = 0; j < i; j++)
761 + aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
762 +
763 + for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
764 + aggr_port_remmac(port, a->aua_addr);
765 +
766 + if (avp != NULL)
767 + avp = list_prev(&rx_grp->arg_vlans, avp);
768 +
769 + for (; avp != NULL; avp = list_prev(&rx_grp->arg_vlans, avp)) {
770 + int err2;
771 +
772 + if ((err2 = aggr_port_remvlan(port, avp->av_vid)) != 0) {
773 + cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
774 + ": errno %d.", avp->av_vid,
775 + mac_client_name(port->lp_mch), err2);
741 776 }
742 - } else {
743 - port->lp_rx_grp_added = B_TRUE;
744 777 }
745 -done:
778 +
779 + if (port->lp_hwgh != NULL) {
780 + mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
781 + mac_rx_client_restart(port->lp_mch);
782 + port->lp_hwgh = NULL;
783 + }
784 +
746 785 mac_perim_exit(pmph);
747 786 return (err);
748 787 }
749 788
750 789 /*
751 - * This function is called by aggr to remove pseudo RX rings over the
752 - * HW rings of the underlying port.
790 + * Destroy the pseudo rings mapping to this port and remove all VLAN
791 + * and unicast filters from this port. Even if there are no underlying
792 + * HW rings we must still remove the unicast filters to take the port
793 + * out of promisc mode.
753 794 */
754 795 static void
755 796 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
756 797 {
757 798 aggr_grp_t *grp = port->lp_grp;
758 799 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP];
759 800 aggr_unicst_addr_t *addr;
760 801 mac_group_handle_t hwgh;
761 802 mac_perim_handle_t pmph;
762 803 int hw_rh_cnt, i;
763 804
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
764 805 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
765 806 mac_perim_enter_by_mh(port->lp_mh, &pmph);
766 807
767 808 if (!port->lp_rx_grp_added)
768 809 goto done;
769 810
770 811 ASSERT(rx_grp->arg_gh != NULL);
771 812 hw_rh_cnt = mac_hwrings_get(port->lp_mch,
772 813 &hwgh, hw_rh, MAC_RING_TYPE_RX);
773 814
774 - /*
775 - * If hw_rh_cnt is 0, it means that the underlying port does not
776 - * support RX rings. Directly return in this case.
777 - */
778 815 for (i = 0; i < hw_rh_cnt; i++)
779 816 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
780 817
781 818 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
782 819 aggr_port_remmac(port, addr->aua_addr);
783 820
821 + for (aggr_vlan_t *avp = list_head(&rx_grp->arg_vlans); avp != NULL;
822 + avp = list_next(&rx_grp->arg_vlans, avp)) {
823 + int err;
824 +
825 + if ((err = aggr_port_remvlan(port, avp->av_vid)) != 0) {
826 + cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
827 + ": errno %d.", avp->av_vid,
828 + mac_client_name(port->lp_mch), err);
829 + }
830 + }
831 +
784 832 if (port->lp_hwgh != NULL) {
785 833 port->lp_hwgh = NULL;
786 834
787 835 /*
788 836 * First clear the permanent-quiesced flag of the RX srs then
789 837 * restart the HW ring and the mac srs on the ring. Note that
790 838 * the HW ring and associated SRS will soon been removed when
791 839 * the port is removed from the aggr.
792 840 */
793 841 mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
794 842 mac_rx_client_restart(port->lp_mch);
795 843 }
796 844
797 845 port->lp_rx_grp_added = B_FALSE;
798 846 done:
799 847 mac_perim_exit(pmph);
800 848 }
801 849
802 850 /*
803 851 * Add a pseudo TX ring for the given HW ring handle.
804 852 */
805 853 static int
806 854 aggr_add_pseudo_tx_ring(aggr_port_t *port,
807 855 aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh,
808 856 mac_ring_handle_t *pseudo_rh)
809 857 {
810 858 aggr_pseudo_tx_ring_t *ring;
811 859 int err;
812 860 int i;
813 861
814 862 ASSERT(MAC_PERIM_HELD(port->lp_mh));
815 863 for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
816 864 ring = tx_grp->atg_rings + i;
817 865 if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE))
818 866 break;
819 867 }
820 868 /*
821 869 * No slot for this new TX ring.
822 870 */
823 871 if (i == MAX_RINGS_PER_GROUP)
824 872 return (EIO);
825 873 /*
826 874 * The following 4 statements needs to be done before
827 875 * calling mac_group_add_ring(). Otherwise it will
828 876 * result in an assertion failure in mac_init_ring().
829 877 */
830 878 ring->atr_flags |= MAC_PSEUDO_RING_INUSE;
831 879 ring->atr_hw_rh = hw_rh;
832 880 ring->atr_port = port;
833 881 tx_grp->atg_ring_cnt++;
834 882
835 883 /*
836 884 * The TX side has no concept of ring groups unlike RX groups.
837 885 * There is just a single group which stores all the TX rings.
838 886 * This group will be used to store aggr's pseudo TX rings.
839 887 */
840 888 if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) {
841 889 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
842 890 ring->atr_hw_rh = NULL;
843 891 ring->atr_port = NULL;
844 892 tx_grp->atg_ring_cnt--;
845 893 } else {
846 894 *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i);
847 895 if (hw_rh != NULL) {
848 896 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
849 897 mac_find_ring(tx_grp->atg_gh, i));
850 898 }
851 899 }
852 900
853 901 return (err);
854 902 }
855 903
856 904 /*
857 905 * Remove the pseudo TX ring of the given HW ring handle.
858 906 */
859 907 static void
860 908 aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp,
861 909 mac_ring_handle_t pseudo_hw_rh)
862 910 {
863 911 aggr_pseudo_tx_ring_t *ring;
864 912 int i;
865 913
866 914 for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
867 915 ring = tx_grp->atg_rings + i;
868 916 if (ring->atr_rh != pseudo_hw_rh)
869 917 continue;
870 918
871 919 ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE);
872 920 mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh);
873 921 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
874 922 mac_hwring_teardown(ring->atr_hw_rh);
875 923 ring->atr_hw_rh = NULL;
876 924 ring->atr_port = NULL;
877 925 tx_grp->atg_ring_cnt--;
878 926 break;
879 927 }
880 928 }
881 929
882 930 /*
883 931 * This function is called to create pseudo rings over hardware rings of
884 932 * the underlying device. There is a 1:1 mapping between the pseudo TX
885 933 * rings of the aggr and the hardware rings of the underlying port.
886 934 */
887 935 static int
888 936 aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
889 937 {
890 938 aggr_grp_t *grp = port->lp_grp;
891 939 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh;
892 940 mac_perim_handle_t pmph;
893 941 int hw_rh_cnt, i = 0, j;
894 942 int err = 0;
895 943
896 944 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
897 945 mac_perim_enter_by_mh(port->lp_mh, &pmph);
898 946
899 947 /*
900 948 * Get the list the the underlying HW rings.
901 949 */
902 950 hw_rh_cnt = mac_hwrings_get(port->lp_mch,
903 951 NULL, hw_rh, MAC_RING_TYPE_TX);
904 952
905 953 /*
906 954 * Even if the underlying NIC does not have TX rings, we
907 955 * still make a psuedo TX ring for that NIC with NULL as
908 956 * the ring handle.
909 957 */
910 958 if (hw_rh_cnt == 0)
911 959 port->lp_tx_ring_cnt = 1;
912 960 else
913 961 port->lp_tx_ring_cnt = hw_rh_cnt;
914 962
915 963 port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
916 964 port->lp_tx_ring_cnt), KM_SLEEP);
917 965 port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
918 966 port->lp_tx_ring_cnt), KM_SLEEP);
919 967
920 968 if (hw_rh_cnt == 0) {
921 969 if ((err = aggr_add_pseudo_tx_ring(port, tx_grp,
922 970 NULL, &pseudo_rh)) == 0) {
923 971 port->lp_tx_rings[0] = NULL;
924 972 port->lp_pseudo_tx_rings[0] = pseudo_rh;
925 973 }
926 974 } else {
927 975 for (i = 0; err == 0 && i < hw_rh_cnt; i++) {
928 976 err = aggr_add_pseudo_tx_ring(port,
929 977 tx_grp, hw_rh[i], &pseudo_rh);
930 978 if (err != 0)
931 979 break;
932 980 port->lp_tx_rings[i] = hw_rh[i];
933 981 port->lp_pseudo_tx_rings[i] = pseudo_rh;
934 982 }
935 983 }
936 984
937 985 if (err != 0) {
938 986 if (hw_rh_cnt != 0) {
939 987 for (j = 0; j < i; j++) {
940 988 aggr_rem_pseudo_tx_ring(tx_grp,
941 989 port->lp_pseudo_tx_rings[j]);
942 990 }
943 991 }
944 992 kmem_free(port->lp_tx_rings,
945 993 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
946 994 kmem_free(port->lp_pseudo_tx_rings,
947 995 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
948 996 port->lp_tx_ring_cnt = 0;
949 997 } else {
950 998 port->lp_tx_grp_added = B_TRUE;
951 999 port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch,
952 1000 aggr_tx_ring_update, port);
953 1001 }
954 1002 mac_perim_exit(pmph);
955 1003 aggr_grp_update_default(grp);
956 1004 return (err);
957 1005 }
958 1006
959 1007 /*
960 1008 * This function is called by aggr to remove pseudo TX rings over the
961 1009 * HW rings of the underlying port.
962 1010 */
963 1011 static void
964 1012 aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
965 1013 {
966 1014 aggr_grp_t *grp = port->lp_grp;
967 1015 mac_perim_handle_t pmph;
968 1016 int i;
969 1017
970 1018 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
971 1019 mac_perim_enter_by_mh(port->lp_mh, &pmph);
972 1020
973 1021 if (!port->lp_tx_grp_added)
974 1022 goto done;
975 1023
976 1024 ASSERT(tx_grp->atg_gh != NULL);
977 1025
978 1026 for (i = 0; i < port->lp_tx_ring_cnt; i++)
979 1027 aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]);
980 1028
981 1029 kmem_free(port->lp_tx_rings,
982 1030 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
983 1031 kmem_free(port->lp_pseudo_tx_rings,
984 1032 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
985 1033
986 1034 port->lp_tx_ring_cnt = 0;
987 1035 (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh);
988 1036 port->lp_tx_grp_added = B_FALSE;
989 1037 aggr_grp_update_default(grp);
990 1038 done:
991 1039 mac_perim_exit(pmph);
992 1040 }
993 1041
994 1042 static int
995 1043 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
996 1044 {
997 1045 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
998 1046 return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
999 1047 }
1000 1048
1001 1049 static int
1002 1050 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
1003 1051 {
1004 1052 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
1005 1053 return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
1006 1054 }
1007 1055
1008 1056 /*
1009 1057 * Here we need to start the pseudo-ring. As MAC already ensures that the
1010 1058 * underlying device is set up, all we need to do is save the ring generation.
1011 1059 *
1012 1060 * Note, we don't end up wanting to use the underlying mac_hwring_start/stop
1013 1061 * functions here as those don't actually stop and start the ring, they just
1014 1062 * quiesce the ring. Regardless of whether the aggr is logically up or not, we
1015 1063 * want to make sure that we can receive traffic for LACP.
1016 1064 */
1017 1065 static int
1018 1066 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)
1019 1067 {
1020 1068 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
1021 1069
1022 1070 rr_ring->arr_gen = mr_gen;
1023 1071 return (0);
1024 1072 }
1025 1073
1026 1074 /*
1027 1075 * Add one or more ports to an existing link aggregation group.
1028 1076 */
1029 1077 int
1030 1078 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
1031 1079 laioc_port_t *ports)
1032 1080 {
1033 1081 int rc, i, nadded = 0;
1034 1082 aggr_grp_t *grp = NULL;
1035 1083 aggr_port_t *port;
1036 1084 boolean_t link_state_changed = B_FALSE;
1037 1085 mac_perim_handle_t mph, pmph;
1038 1086
1039 1087 /* get group corresponding to linkid */
1040 1088 rw_enter(&aggr_grp_lock, RW_READER);
1041 1089 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1042 1090 (mod_hash_val_t *)&grp) != 0) {
1043 1091 rw_exit(&aggr_grp_lock);
1044 1092 return (ENOENT);
1045 1093 }
1046 1094 AGGR_GRP_REFHOLD(grp);
1047 1095
1048 1096 /*
1049 1097 * Hold the perimeter so that the aggregation won't be destroyed.
1050 1098 */
1051 1099 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1052 1100 rw_exit(&aggr_grp_lock);
1053 1101
1054 1102 /* add the specified ports to group */
1055 1103 for (i = 0; i < nports; i++) {
1056 1104 /* add port to group */
1057 1105 if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
1058 1106 force, &port)) != 0) {
1059 1107 goto bail;
1060 1108 }
1061 1109 ASSERT(port != NULL);
1062 1110 nadded++;
1063 1111
1064 1112 /* check capabilities */
1065 1113 if (!aggr_grp_capab_check(grp, port) ||
1066 1114 !aggr_grp_sdu_check(grp, port) ||
1067 1115 !aggr_grp_margin_check(grp, port)) {
1068 1116 rc = ENOTSUP;
1069 1117 goto bail;
1070 1118 }
1071 1119
1072 1120 /*
1073 1121 * Create the pseudo ring for each HW ring of the underlying
1074 1122 * port.
1075 1123 */
1076 1124 rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group);
1077 1125 if (rc != 0)
1078 1126 goto bail;
1079 1127 rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group);
1080 1128 if (rc != 0)
1081 1129 goto bail;
1082 1130
1083 1131 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1084 1132
1085 1133 /* set LACP mode */
1086 1134 aggr_port_lacp_set_mode(grp, port);
1087 1135
1088 1136 /* start port if group has already been started */
1089 1137 if (grp->lg_started) {
1090 1138 rc = aggr_port_start(port);
1091 1139 if (rc != 0) {
1092 1140 mac_perim_exit(pmph);
1093 1141 goto bail;
1094 1142 }
1095 1143
1096 1144 /*
1097 1145 * Turn on the promiscuous mode over the port when it
1098 1146 * is requested to be turned on to receive the
1099 1147 * non-primary address over a port, or the promiscous
1100 1148 * mode is enabled over the aggr.
1101 1149 */
1102 1150 if (grp->lg_promisc || port->lp_prom_addr != NULL) {
1103 1151 rc = aggr_port_promisc(port, B_TRUE);
1104 1152 if (rc != 0) {
1105 1153 mac_perim_exit(pmph);
1106 1154 goto bail;
1107 1155 }
1108 1156 }
1109 1157 }
1110 1158 mac_perim_exit(pmph);
1111 1159
1112 1160 /*
1113 1161 * Attach each port if necessary.
1114 1162 */
1115 1163 if (aggr_port_notify_link(grp, port))
1116 1164 link_state_changed = B_TRUE;
1117 1165
1118 1166 /*
1119 1167 * Initialize the callback functions for this port.
1120 1168 */
1121 1169 aggr_port_init_callbacks(port);
1122 1170 }
1123 1171
1124 1172 /* update the MAC address of the constituent ports */
1125 1173 if (aggr_grp_update_ports_mac(grp))
1126 1174 link_state_changed = B_TRUE;
1127 1175
1128 1176 if (link_state_changed)
1129 1177 mac_link_update(grp->lg_mh, grp->lg_link_state);
1130 1178
1131 1179 bail:
1132 1180 if (rc != 0) {
1133 1181 /* stop and remove ports that have been added */
1134 1182 for (i = 0; i < nadded; i++) {
1135 1183 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1136 1184 ASSERT(port != NULL);
1137 1185 if (grp->lg_started) {
1138 1186 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1139 1187 (void) aggr_port_promisc(port, B_FALSE);
1140 1188 aggr_port_stop(port);
1141 1189 mac_perim_exit(pmph);
1142 1190 }
1143 1191 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1144 1192 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1145 1193 (void) aggr_grp_rem_port(grp, port, NULL, NULL);
1146 1194 }
1147 1195 }
1148 1196
1149 1197 mac_perim_exit(mph);
1150 1198 AGGR_GRP_REFRELE(grp);
1151 1199 return (rc);
1152 1200 }
1153 1201
1154 1202 static int
1155 1203 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
1156 1204 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1157 1205 aggr_lacp_timer_t lacp_timer)
1158 1206 {
1159 1207 boolean_t mac_addr_changed = B_FALSE;
1160 1208 boolean_t link_state_changed = B_FALSE;
1161 1209 mac_perim_handle_t pmph;
1162 1210
1163 1211 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1164 1212
1165 1213 /* validate fixed address if specified */
1166 1214 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
1167 1215 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
1168 1216 (mac_addr[0] & 0x01))) {
1169 1217 return (EINVAL);
1170 1218 }
1171 1219
1172 1220 /* update policy if requested */
1173 1221 if (update_mask & AGGR_MODIFY_POLICY)
1174 1222 aggr_send_update_policy(grp, policy);
1175 1223
1176 1224 /* update unicast MAC address if requested */
1177 1225 if (update_mask & AGGR_MODIFY_MAC) {
1178 1226 if (mac_fixed) {
1179 1227 /* user-supplied MAC address */
1180 1228 grp->lg_mac_addr_port = NULL;
1181 1229 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
1182 1230 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1183 1231 mac_addr_changed = B_TRUE;
1184 1232 }
1185 1233 } else if (grp->lg_addr_fixed) {
1186 1234 /* switch from user-supplied to automatic */
1187 1235 aggr_port_t *port = grp->lg_ports;
1188 1236
1189 1237 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1190 1238 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
1191 1239 grp->lg_mac_addr_port = port;
1192 1240 mac_addr_changed = B_TRUE;
1193 1241 mac_perim_exit(pmph);
1194 1242 }
1195 1243 grp->lg_addr_fixed = mac_fixed;
1196 1244 }
1197 1245
1198 1246 if (mac_addr_changed)
1199 1247 link_state_changed = aggr_grp_update_ports_mac(grp);
1200 1248
1201 1249 if (update_mask & AGGR_MODIFY_LACP_MODE)
1202 1250 aggr_lacp_update_mode(grp, lacp_mode);
1203 1251
1204 1252 if (update_mask & AGGR_MODIFY_LACP_TIMER)
1205 1253 aggr_lacp_update_timer(grp, lacp_timer);
1206 1254
1207 1255 if (link_state_changed)
1208 1256 mac_link_update(grp->lg_mh, grp->lg_link_state);
1209 1257
1210 1258 if (mac_addr_changed)
1211 1259 mac_unicst_update(grp->lg_mh, grp->lg_addr);
1212 1260
1213 1261 return (0);
1214 1262 }
1215 1263
1216 1264 /*
1217 1265 * Update properties of an existing link aggregation group.
1218 1266 */
1219 1267 int
1220 1268 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
1221 1269 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1222 1270 aggr_lacp_timer_t lacp_timer)
1223 1271 {
1224 1272 aggr_grp_t *grp = NULL;
1225 1273 mac_perim_handle_t mph;
1226 1274 int err;
1227 1275
1228 1276 /* get group corresponding to linkid */
1229 1277 rw_enter(&aggr_grp_lock, RW_READER);
1230 1278 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1231 1279 (mod_hash_val_t *)&grp) != 0) {
1232 1280 rw_exit(&aggr_grp_lock);
1233 1281 return (ENOENT);
1234 1282 }
1235 1283 AGGR_GRP_REFHOLD(grp);
1236 1284
1237 1285 /*
1238 1286 * Hold the perimeter so that the aggregation won't be destroyed.
1239 1287 */
1240 1288 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1241 1289 rw_exit(&aggr_grp_lock);
1242 1290
1243 1291 err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
1244 1292 mac_addr, lacp_mode, lacp_timer);
1245 1293
1246 1294 mac_perim_exit(mph);
1247 1295 AGGR_GRP_REFRELE(grp);
1248 1296 return (err);
1249 1297 }
1250 1298
1251 1299 /*
1252 1300 * Create a new link aggregation group upon request from administrator.
1253 1301 * Returns 0 on success, an errno on failure.
1254 1302 */
1255 1303 int
1256 1304 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
1257 1305 laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
1258 1306 uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer,
1259 1307 cred_t *credp)
1260 1308 {
1261 1309 aggr_grp_t *grp = NULL;
1262 1310 aggr_port_t *port;
1263 1311 mac_register_t *mac;
1264 1312 boolean_t link_state_changed;
1265 1313 mac_perim_handle_t mph;
1266 1314 int err;
1267 1315 int i;
1268 1316 kt_did_t tid = 0;
1269 1317
1270 1318 /* need at least one port */
1271 1319 if (nports == 0)
1272 1320 return (EINVAL);
1273 1321
1274 1322 rw_enter(&aggr_grp_lock, RW_WRITER);
1275 1323
1276 1324 /* does a group with the same linkid already exist? */
1277 1325 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1278 1326 (mod_hash_val_t *)&grp);
1279 1327 if (err == 0) {
1280 1328 rw_exit(&aggr_grp_lock);
1281 1329 return (EEXIST);
1282 1330 }
1283 1331
1284 1332 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
1285 1333
1286 1334 grp->lg_refs = 1;
1287 1335 grp->lg_closing = B_FALSE;
1288 1336 grp->lg_force = force;
1289 1337 grp->lg_linkid = linkid;
1290 1338 grp->lg_zoneid = crgetzoneid(credp);
1291 1339 grp->lg_ifspeed = 0;
1292 1340 grp->lg_link_state = LINK_STATE_UNKNOWN;
1293 1341 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1294 1342 grp->lg_started = B_FALSE;
1295 1343 grp->lg_promisc = B_FALSE;
1296 1344 grp->lg_lacp_done = B_FALSE;
1297 1345 grp->lg_tx_notify_done = B_FALSE;
1298 1346 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1299 1347 grp->lg_lacp_rx_thread = thread_create(NULL, 0,
↓ open down ↓ |
506 lines elided |
↑ open up ↑ |
1300 1348 aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1301 1349 grp->lg_tx_notify_thread = thread_create(NULL, 0,
1302 1350 aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1303 1351 grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
1304 1352 MAX_RINGS_PER_GROUP), KM_SLEEP);
1305 1353 grp->lg_tx_blocked_cnt = 0;
1306 1354 bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1307 1355 bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
1308 1356 aggr_lacp_init_grp(grp);
1309 1357
1358 + grp->lg_rx_group.arg_untagged = 0;
1359 + list_create(&(grp->lg_rx_group.arg_vlans), sizeof (aggr_vlan_t),
1360 + offsetof(aggr_vlan_t, av_link));
1361 +
1310 1362 /* add MAC ports to group */
1311 1363 grp->lg_ports = NULL;
1312 1364 grp->lg_nports = 0;
1313 1365 grp->lg_nattached_ports = 0;
1314 1366 grp->lg_ntx_ports = 0;
1315 1367
1316 1368 /*
1317 1369 * If key is not specified by the user, allocate the key.
1318 1370 */
1319 1371 if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1320 1372 err = ENOMEM;
1321 1373 goto bail;
1322 1374 }
1323 1375 grp->lg_key = key;
1324 1376
1325 1377 for (i = 0; i < nports; i++) {
1326 - err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
1378 + err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, &port);
1327 1379 if (err != 0)
1328 1380 goto bail;
1329 1381 }
1330 1382
1331 1383 /*
1332 1384 * If no explicit MAC address was specified by the administrator,
1333 1385 * set it to the MAC address of the first port.
1334 1386 */
1335 1387 grp->lg_addr_fixed = mac_fixed;
1336 1388 if (grp->lg_addr_fixed) {
1337 1389 /* validate specified address */
1338 1390 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1339 1391 err = EINVAL;
1340 1392 goto bail;
1341 1393 }
1342 1394 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1343 1395 } else {
1344 1396 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1345 1397 grp->lg_mac_addr_port = grp->lg_ports;
1346 1398 }
1347 1399
1348 1400 /* set the initial group capabilities */
1349 1401 aggr_grp_capab_set(grp);
1350 1402
1351 1403 if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
1352 1404 err = ENOMEM;
1353 1405 goto bail;
1354 1406 }
1355 1407 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1356 1408 mac->m_driver = grp;
1357 1409 mac->m_dip = aggr_dip;
1358 1410 mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
1359 1411 mac->m_src_addr = grp->lg_addr;
1360 1412 mac->m_callbacks = &aggr_m_callbacks;
1361 1413 mac->m_min_sdu = 0;
1362 1414 mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
1363 1415 mac->m_margin = aggr_grp_max_margin(grp);
1364 1416 mac->m_v12n = MAC_VIRT_LEVEL1;
1365 1417 err = mac_register(mac, &grp->lg_mh);
1366 1418 mac_free(mac);
1367 1419 if (err != 0)
1368 1420 goto bail;
1369 1421
1370 1422 err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp));
1371 1423 if (err != 0) {
1372 1424 (void) mac_unregister(grp->lg_mh);
1373 1425 grp->lg_mh = NULL;
1374 1426 goto bail;
1375 1427 }
1376 1428
1377 1429 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1378 1430
1379 1431 /*
1380 1432 * Update the MAC address of the constituent ports.
1381 1433 * None of the port is attached at this time, the link state of the
1382 1434 * aggregation will not change.
1383 1435 */
1384 1436 link_state_changed = aggr_grp_update_ports_mac(grp);
1385 1437 ASSERT(!link_state_changed);
1386 1438
1387 1439 /* update outbound load balancing policy */
1388 1440 aggr_send_update_policy(grp, policy);
1389 1441
1390 1442 /* set LACP mode */
1391 1443 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
1392 1444
1393 1445 /*
1394 1446 * Attach each port if necessary.
1395 1447 */
1396 1448 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1397 1449 /*
1398 1450 * Create the pseudo ring for each HW ring of the underlying
1399 1451 * port. Note that this is done after the aggr registers the
1400 1452 * mac.
1401 1453 */
1402 1454 VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0);
1403 1455 VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0);
1404 1456 if (aggr_port_notify_link(grp, port))
1405 1457 link_state_changed = B_TRUE;
1406 1458
1407 1459 /*
1408 1460 * Initialize the callback functions for this port.
1409 1461 */
1410 1462 aggr_port_init_callbacks(port);
1411 1463 }
1412 1464
1413 1465 if (link_state_changed)
1414 1466 mac_link_update(grp->lg_mh, grp->lg_link_state);
1415 1467
1416 1468 /* add new group to hash table */
1417 1469 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
1418 1470 (mod_hash_val_t)grp);
1419 1471 ASSERT(err == 0);
1420 1472 aggr_grp_cnt++;
1421 1473
1422 1474 mac_perim_exit(mph);
1423 1475 rw_exit(&aggr_grp_lock);
1424 1476 return (0);
1425 1477
1426 1478 bail:
1427 1479
1428 1480 grp->lg_closing = B_TRUE;
1429 1481
1430 1482 port = grp->lg_ports;
1431 1483 while (port != NULL) {
1432 1484 aggr_port_t *cport;
1433 1485
1434 1486 cport = port->lp_next;
1435 1487 aggr_port_delete(port);
1436 1488 port = cport;
1437 1489 }
1438 1490
1439 1491 /*
1440 1492 * Inform the lacp_rx thread to exit.
1441 1493 */
1442 1494 mutex_enter(&grp->lg_lacp_lock);
1443 1495 grp->lg_lacp_done = B_TRUE;
1444 1496 cv_signal(&grp->lg_lacp_cv);
1445 1497 while (grp->lg_lacp_rx_thread != NULL)
1446 1498 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1447 1499 mutex_exit(&grp->lg_lacp_lock);
1448 1500 /*
1449 1501 * Inform the tx_notify thread to exit.
1450 1502 */
1451 1503 mutex_enter(&grp->lg_tx_flowctl_lock);
1452 1504 if (grp->lg_tx_notify_thread != NULL) {
1453 1505 tid = grp->lg_tx_notify_thread->t_did;
1454 1506 grp->lg_tx_notify_done = B_TRUE;
1455 1507 cv_signal(&grp->lg_tx_flowctl_cv);
1456 1508 }
1457 1509 mutex_exit(&grp->lg_tx_flowctl_lock);
1458 1510 if (tid != 0)
1459 1511 thread_join(tid);
1460 1512
1461 1513 kmem_free(grp->lg_tx_blocked_rings,
1462 1514 (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1463 1515 rw_exit(&aggr_grp_lock);
1464 1516 AGGR_GRP_REFRELE(grp);
1465 1517 return (err);
1466 1518 }
1467 1519
1468 1520 /*
1469 1521 * Return a pointer to the member of a group with specified linkid.
1470 1522 */
1471 1523 static aggr_port_t *
1472 1524 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
1473 1525 {
1474 1526 aggr_port_t *port;
1475 1527
1476 1528 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1477 1529
1478 1530 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1479 1531 if (port->lp_linkid == linkid)
1480 1532 break;
1481 1533 }
1482 1534
1483 1535 return (port);
1484 1536 }
1485 1537
1486 1538 /*
1487 1539 * Stop, detach and remove a port from a link aggregation group.
1488 1540 */
1489 1541 static int
1490 1542 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
1491 1543 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
1492 1544 {
1493 1545 int rc = 0;
1494 1546 aggr_port_t **pport;
1495 1547 boolean_t mac_addr_changed = B_FALSE;
1496 1548 boolean_t link_state_changed = B_FALSE;
1497 1549 mac_perim_handle_t mph;
1498 1550 uint64_t val;
1499 1551 uint_t i;
1500 1552 uint_t stat;
1501 1553
1502 1554 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1503 1555 ASSERT(grp->lg_nports > 1);
1504 1556 ASSERT(!grp->lg_closing);
1505 1557
1506 1558 /* unlink port */
1507 1559 for (pport = &grp->lg_ports; *pport != port;
1508 1560 pport = &(*pport)->lp_next) {
1509 1561 if (*pport == NULL) {
1510 1562 rc = ENOENT;
1511 1563 goto done;
1512 1564 }
1513 1565 }
1514 1566 *pport = port->lp_next;
1515 1567
1516 1568 mac_perim_enter_by_mh(port->lp_mh, &mph);
1517 1569
1518 1570 /*
1519 1571 * If the MAC address of the port being removed was assigned
1520 1572 * to the group, update the group MAC address
1521 1573 * using the MAC address of a different port.
1522 1574 */
1523 1575 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
1524 1576 /*
1525 1577 * Set the MAC address of the group to the
1526 1578 * MAC address of its first port.
1527 1579 */
1528 1580 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1529 1581 grp->lg_mac_addr_port = grp->lg_ports;
1530 1582 mac_addr_changed = B_TRUE;
1531 1583 }
1532 1584
1533 1585 link_state_changed = aggr_grp_detach_port(grp, port);
1534 1586
1535 1587 /*
1536 1588 * Add the counter statistics of the ports while it was aggregated
1537 1589 * to the group's residual statistics. This is done by obtaining
↓ open down ↓ |
201 lines elided |
↑ open up ↑ |
1538 1590 * the current counter from the underlying MAC then subtracting the
1539 1591 * value of the counter at the moment it was added to the
1540 1592 * aggregation.
1541 1593 */
1542 1594 for (i = 0; i < MAC_NSTAT; i++) {
1543 1595 stat = i + MAC_STAT_MIN;
1544 1596 if (!MAC_STAT_ISACOUNTER(stat))
1545 1597 continue;
1546 1598 val = aggr_port_stat(port, stat);
1547 1599 val -= port->lp_stat[i];
1600 + mutex_enter(&grp->lg_stat_lock);
1548 1601 grp->lg_stat[i] += val;
1602 + mutex_exit(&grp->lg_stat_lock);
1549 1603 }
1550 1604 for (i = 0; i < ETHER_NSTAT; i++) {
1551 1605 stat = i + MACTYPE_STAT_MIN;
1552 1606 if (!ETHER_STAT_ISACOUNTER(stat))
1553 1607 continue;
1554 1608 val = aggr_port_stat(port, stat);
1555 1609 val -= port->lp_ether_stat[i];
1610 + mutex_enter(&grp->lg_stat_lock);
1556 1611 grp->lg_ether_stat[i] += val;
1612 + mutex_exit(&grp->lg_stat_lock);
1557 1613 }
1558 1614
1559 1615 grp->lg_nports--;
1560 1616 mac_perim_exit(mph);
1561 1617
1562 1618 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1563 1619 aggr_port_delete(port);
1564 1620
1565 1621 /*
1566 1622 * If the group MAC address has changed, update the MAC address of
1567 1623 * the remaining constituent ports according to the new MAC
1568 1624 * address of the group.
1569 1625 */
1570 1626 if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1571 1627 link_state_changed = B_TRUE;
1572 1628
1573 1629 done:
1574 1630 if (mac_addr_changedp != NULL)
1575 1631 *mac_addr_changedp = mac_addr_changed;
1576 1632 if (link_state_changedp != NULL)
1577 1633 *link_state_changedp = link_state_changed;
1578 1634
1579 1635 return (rc);
1580 1636 }
1581 1637
1582 1638 /*
1583 1639 * Remove one or more ports from an existing link aggregation group.
1584 1640 */
1585 1641 int
1586 1642 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
1587 1643 {
1588 1644 int rc = 0, i;
1589 1645 aggr_grp_t *grp = NULL;
1590 1646 aggr_port_t *port;
1591 1647 boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
1592 1648 boolean_t link_state_update = B_FALSE, link_state_changed;
1593 1649 mac_perim_handle_t mph, pmph;
1594 1650
1595 1651 /* get group corresponding to linkid */
1596 1652 rw_enter(&aggr_grp_lock, RW_READER);
1597 1653 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1598 1654 (mod_hash_val_t *)&grp) != 0) {
1599 1655 rw_exit(&aggr_grp_lock);
1600 1656 return (ENOENT);
1601 1657 }
1602 1658 AGGR_GRP_REFHOLD(grp);
1603 1659
1604 1660 /*
1605 1661 * Hold the perimeter so that the aggregation won't be destroyed.
1606 1662 */
1607 1663 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1608 1664 rw_exit(&aggr_grp_lock);
1609 1665
1610 1666 /* we need to keep at least one port per group */
1611 1667 if (nports >= grp->lg_nports) {
1612 1668 rc = EINVAL;
1613 1669 goto bail;
1614 1670 }
1615 1671
1616 1672 /* first verify that all the groups are valid */
1617 1673 for (i = 0; i < nports; i++) {
1618 1674 if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
1619 1675 /* port not found */
1620 1676 rc = ENOENT;
1621 1677 goto bail;
1622 1678 }
1623 1679 }
1624 1680
1625 1681 /* clear the promiscous mode for the specified ports */
1626 1682 for (i = 0; i < nports && rc == 0; i++) {
1627 1683 /* lookup port */
1628 1684 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1629 1685 ASSERT(port != NULL);
1630 1686
1631 1687 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1632 1688 rc = aggr_port_promisc(port, B_FALSE);
1633 1689 mac_perim_exit(pmph);
1634 1690 }
1635 1691 if (rc != 0) {
1636 1692 for (i = 0; i < nports; i++) {
1637 1693 port = aggr_grp_port_lookup(grp,
1638 1694 ports[i].lp_linkid);
1639 1695 ASSERT(port != NULL);
1640 1696
1641 1697 /*
1642 1698 * Turn the promiscuous mode back on if it is required
1643 1699 * to receive the non-primary address over a port, or
1644 1700 * the promiscous mode is enabled over the aggr.
1645 1701 */
1646 1702 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1647 1703 if (port->lp_started && (grp->lg_promisc ||
1648 1704 port->lp_prom_addr != NULL)) {
1649 1705 (void) aggr_port_promisc(port, B_TRUE);
1650 1706 }
1651 1707 mac_perim_exit(pmph);
1652 1708 }
1653 1709 goto bail;
1654 1710 }
1655 1711
1656 1712 /* remove the specified ports from group */
1657 1713 for (i = 0; i < nports; i++) {
1658 1714 /* lookup port */
1659 1715 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1660 1716 ASSERT(port != NULL);
1661 1717
1662 1718 /* stop port if group has already been started */
1663 1719 if (grp->lg_started) {
1664 1720 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1665 1721 aggr_port_stop(port);
1666 1722 mac_perim_exit(pmph);
1667 1723 }
1668 1724
1669 1725 /*
1670 1726 * aggr_rem_pseudo_tx_group() is not called here. Instead
1671 1727 * it is called from inside aggr_grp_rem_port() after the
1672 1728 * port has been detached. The reason is that
1673 1729 * aggr_rem_pseudo_tx_group() removes one ring at a time
1674 1730 * and if there is still traffic going on, then there
1675 1731 * is the possibility of aggr_find_tx_ring() returning a
1676 1732 * removed ring for transmission. Once the port has been
1677 1733 * detached, that port will not be used and
1678 1734 * aggr_find_tx_ring() will not return any rings
1679 1735 * belonging to it.
1680 1736 */
1681 1737 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1682 1738
1683 1739 /* remove port from group */
1684 1740 rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1685 1741 &link_state_changed);
1686 1742 ASSERT(rc == 0);
1687 1743 mac_addr_update = mac_addr_update || mac_addr_changed;
1688 1744 link_state_update = link_state_update || link_state_changed;
1689 1745 }
1690 1746
1691 1747 bail:
1692 1748 if (mac_addr_update)
1693 1749 mac_unicst_update(grp->lg_mh, grp->lg_addr);
1694 1750 if (link_state_update)
1695 1751 mac_link_update(grp->lg_mh, grp->lg_link_state);
1696 1752
1697 1753 mac_perim_exit(mph);
1698 1754 AGGR_GRP_REFRELE(grp);
1699 1755
1700 1756 return (rc);
1701 1757 }
1702 1758
1703 1759 int
1704 1760 aggr_grp_delete(datalink_id_t linkid, cred_t *cred)
1705 1761 {
1706 1762 aggr_grp_t *grp = NULL;
1707 1763 aggr_port_t *port, *cport;
1708 1764 datalink_id_t tmpid;
1709 1765 mod_hash_val_t val;
1710 1766 mac_perim_handle_t mph, pmph;
1711 1767 int err;
1712 1768 kt_did_t tid = 0;
1713 1769
1714 1770 rw_enter(&aggr_grp_lock, RW_WRITER);
1715 1771
1716 1772 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1717 1773 (mod_hash_val_t *)&grp) != 0) {
1718 1774 rw_exit(&aggr_grp_lock);
1719 1775 return (ENOENT);
1720 1776 }
1721 1777
1722 1778 /*
1723 1779 * Note that dls_devnet_destroy() must be called before lg_lock is
1724 1780 * held. Otherwise, it will deadlock if another thread is in
1725 1781 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1726 1782 * dls_devnet_destroy() needs to delete.
1727 1783 */
1728 1784 if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
1729 1785 rw_exit(&aggr_grp_lock);
1730 1786 return (err);
1731 1787 }
1732 1788 ASSERT(linkid == tmpid);
1733 1789
1734 1790 /*
1735 1791 * Unregister from the MAC service module. Since this can
1736 1792 * fail if a client hasn't closed the MAC port, we gracefully
1737 1793 * fail the operation.
1738 1794 */
1739 1795 if ((err = mac_disable(grp->lg_mh)) != 0) {
1740 1796 (void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred));
1741 1797 rw_exit(&aggr_grp_lock);
1742 1798 return (err);
1743 1799 }
1744 1800 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1745 1801 ASSERT(grp == (aggr_grp_t *)val);
1746 1802
1747 1803 ASSERT(aggr_grp_cnt > 0);
1748 1804 aggr_grp_cnt--;
1749 1805 rw_exit(&aggr_grp_lock);
1750 1806
1751 1807 /*
1752 1808 * Inform the lacp_rx thread to exit.
1753 1809 */
1754 1810 mutex_enter(&grp->lg_lacp_lock);
1755 1811 grp->lg_lacp_done = B_TRUE;
1756 1812 cv_signal(&grp->lg_lacp_cv);
1757 1813 while (grp->lg_lacp_rx_thread != NULL)
1758 1814 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1759 1815 mutex_exit(&grp->lg_lacp_lock);
1760 1816 /*
1761 1817 * Inform the tx_notify_thread to exit.
1762 1818 */
1763 1819 mutex_enter(&grp->lg_tx_flowctl_lock);
1764 1820 if (grp->lg_tx_notify_thread != NULL) {
1765 1821 tid = grp->lg_tx_notify_thread->t_did;
1766 1822 grp->lg_tx_notify_done = B_TRUE;
1767 1823 cv_signal(&grp->lg_tx_flowctl_cv);
1768 1824 }
1769 1825 mutex_exit(&grp->lg_tx_flowctl_lock);
1770 1826 if (tid != 0)
1771 1827 thread_join(tid);
1772 1828
1773 1829 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1774 1830
1775 1831 grp->lg_closing = B_TRUE;
1776 1832 /* detach and free MAC ports associated with group */
1777 1833 port = grp->lg_ports;
1778 1834 while (port != NULL) {
1779 1835 cport = port->lp_next;
1780 1836 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1781 1837 if (grp->lg_started)
1782 1838 aggr_port_stop(port);
1783 1839 (void) aggr_grp_detach_port(grp, port);
1784 1840 mac_perim_exit(pmph);
1785 1841 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1786 1842 aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1787 1843 aggr_port_delete(port);
1788 1844 port = cport;
1789 1845 }
1790 1846
1791 1847 mac_perim_exit(mph);
1792 1848
1793 1849 kmem_free(grp->lg_tx_blocked_rings,
1794 1850 (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
↓ open down ↓ |
228 lines elided |
↑ open up ↑ |
1795 1851 /*
1796 1852 * Wait for the port's lacp timer thread and its notification callback
1797 1853 * to exit before calling mac_unregister() since both needs to access
1798 1854 * the mac perimeter of the grp.
1799 1855 */
1800 1856 aggr_grp_port_wait(grp);
1801 1857
1802 1858 VERIFY(mac_unregister(grp->lg_mh) == 0);
1803 1859 grp->lg_mh = NULL;
1804 1860
1861 + list_destroy(&(grp->lg_rx_group.arg_vlans));
1862 +
1805 1863 AGGR_GRP_REFRELE(grp);
1806 1864 return (0);
1807 1865 }
1808 1866
1809 1867 void
1810 1868 aggr_grp_free(aggr_grp_t *grp)
1811 1869 {
1812 1870 ASSERT(grp->lg_refs == 0);
1813 1871 ASSERT(grp->lg_port_ref == 0);
1814 1872 if (grp->lg_key > AGGR_MAX_KEY) {
1815 1873 id_free(key_ids, grp->lg_key);
1816 1874 grp->lg_key = 0;
1817 1875 }
1818 1876 kmem_cache_free(aggr_grp_cache, grp);
1819 1877 }
1820 1878
1821 1879 int
1822 1880 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1823 1881 aggr_grp_info_new_grp_fn_t new_grp_fn,
1824 1882 aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
1825 1883 {
1826 1884 aggr_grp_t *grp;
1827 1885 aggr_port_t *port;
1828 1886 mac_perim_handle_t mph, pmph;
1829 1887 int rc = 0;
1830 1888
1831 1889 /*
1832 1890 * Make sure that the aggregation link is visible from the caller's
1833 1891 * zone.
1834 1892 */
1835 1893 if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred)))
1836 1894 return (ENOENT);
1837 1895
1838 1896 rw_enter(&aggr_grp_lock, RW_READER);
1839 1897
1840 1898 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1841 1899 (mod_hash_val_t *)&grp) != 0) {
1842 1900 rw_exit(&aggr_grp_lock);
1843 1901 return (ENOENT);
1844 1902 }
1845 1903 AGGR_GRP_REFHOLD(grp);
1846 1904
1847 1905 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1848 1906 rw_exit(&aggr_grp_lock);
1849 1907
1850 1908 rc = new_grp_fn(fn_arg, grp->lg_linkid,
1851 1909 (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1852 1910 grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1853 1911 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1854 1912
1855 1913 if (rc != 0)
1856 1914 goto bail;
1857 1915
1858 1916 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1859 1917 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1860 1918 rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1861 1919 port->lp_state, &port->lp_lacp.ActorOperPortState);
1862 1920 mac_perim_exit(pmph);
1863 1921
1864 1922 if (rc != 0)
1865 1923 goto bail;
1866 1924 }
1867 1925
1868 1926 bail:
1869 1927 mac_perim_exit(mph);
1870 1928 AGGR_GRP_REFRELE(grp);
1871 1929 return (rc);
1872 1930 }
1873 1931
1874 1932 /*ARGSUSED*/
1875 1933 static void
1876 1934 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
↓ open down ↓ |
62 lines elided |
↑ open up ↑ |
1877 1935 {
1878 1936 miocnak(q, mp, 0, ENOTSUP);
1879 1937 }
1880 1938
1881 1939 static int
1882 1940 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1883 1941 {
1884 1942 aggr_port_t *port;
1885 1943 uint_t stat_index;
1886 1944
1945 + ASSERT(MUTEX_HELD(&grp->lg_stat_lock));
1946 +
1887 1947 /* We only aggregate counter statistics. */
1888 1948 if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1889 1949 IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1890 1950 return (ENOTSUP);
1891 1951 }
1892 1952
1893 1953 /*
1894 1954 * Counter statistics for a group are computed by aggregating the
1895 1955 * counters of the members MACs while they were aggregated, plus
1896 1956 * the residual counter of the group itself, which is updated each
1897 1957 * time a MAC is removed from the group.
1898 1958 */
1899 1959 *val = 0;
1900 1960 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1901 1961 /* actual port statistic */
1902 1962 *val += aggr_port_stat(port, stat);
1903 1963 /*
1904 1964 * minus the port stat when it was added, plus any residual
1905 1965 * amount for the group.
1906 1966 */
1907 1967 if (IS_MAC_STAT(stat)) {
1908 1968 stat_index = stat - MAC_STAT_MIN;
1909 1969 *val -= port->lp_stat[stat_index];
1910 1970 *val += grp->lg_stat[stat_index];
1911 1971 } else if (IS_MACTYPE_STAT(stat)) {
1912 1972 stat_index = stat - MACTYPE_STAT_MIN;
1913 1973 *val -= port->lp_ether_stat[stat_index];
1914 1974 *val += grp->lg_ether_stat[stat_index];
1915 1975 }
1916 1976 }
1917 1977 return (0);
1918 1978 }
1919 1979
1920 1980 int
1921 1981 aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1922 1982 {
1923 1983 aggr_pseudo_rx_ring_t *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver;
1924 1984
1925 1985 if (rx_ring->arr_hw_rh != NULL) {
1926 1986 *val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat);
1927 1987 } else {
1928 1988 aggr_port_t *port = rx_ring->arr_port;
1929 1989
1930 1990 *val = mac_stat_get(port->lp_mh, stat);
1931 1991
1932 1992 }
1933 1993 return (0);
1934 1994 }
1935 1995
1936 1996 int
1937 1997 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1938 1998 {
1939 1999 aggr_pseudo_tx_ring_t *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
1940 2000
1941 2001 if (tx_ring->atr_hw_rh != NULL) {
1942 2002 *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
1943 2003 } else {
1944 2004 aggr_port_t *port = tx_ring->atr_port;
↓ open down ↓ |
48 lines elided |
↑ open up ↑ |
1945 2005
1946 2006 *val = mac_stat_get(port->lp_mh, stat);
1947 2007 }
1948 2008 return (0);
1949 2009 }
1950 2010
1951 2011 static int
1952 2012 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1953 2013 {
1954 2014 aggr_grp_t *grp = arg;
1955 - mac_perim_handle_t mph;
1956 2015 int rval = 0;
1957 2016
1958 - mac_perim_enter_by_mh(grp->lg_mh, &mph);
2017 + mutex_enter(&grp->lg_stat_lock);
1959 2018
1960 2019 switch (stat) {
1961 2020 case MAC_STAT_IFSPEED:
1962 2021 *val = grp->lg_ifspeed;
1963 2022 break;
1964 2023
1965 2024 case ETHER_STAT_LINK_DUPLEX:
1966 2025 *val = grp->lg_link_duplex;
1967 2026 break;
1968 2027
1969 2028 default:
1970 2029 /*
1971 2030 * For all other statistics, we return the aggregated stat
1972 2031 * from the underlying ports. aggr_grp_stat() will set
1973 2032 * rval appropriately if the statistic isn't a counter.
1974 2033 */
1975 2034 rval = aggr_grp_stat(grp, stat, val);
1976 2035 }
1977 2036
1978 - mac_perim_exit(mph);
2037 + mutex_exit(&grp->lg_stat_lock);
1979 2038 return (rval);
1980 2039 }
1981 2040
1982 2041 static int
1983 2042 aggr_m_start(void *arg)
1984 2043 {
1985 2044 aggr_grp_t *grp = arg;
1986 2045 aggr_port_t *port;
1987 2046 mac_perim_handle_t mph, pmph;
1988 2047
1989 2048 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1990 2049
1991 2050 /*
1992 2051 * Attempts to start all configured members of the group.
1993 2052 * Group members will be attached when their link-up notification
1994 2053 * is received.
1995 2054 */
1996 2055 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1997 2056 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1998 2057 if (aggr_port_start(port) != 0) {
1999 2058 mac_perim_exit(pmph);
2000 2059 continue;
2001 2060 }
2002 2061
2003 2062 /*
2004 2063 * Turn on the promiscuous mode if it is required to receive
2005 2064 * the non-primary address over a port, or the promiscous
2006 2065 * mode is enabled over the aggr.
2007 2066 */
2008 2067 if (grp->lg_promisc || port->lp_prom_addr != NULL) {
2009 2068 if (aggr_port_promisc(port, B_TRUE) != 0)
2010 2069 aggr_port_stop(port);
2011 2070 }
2012 2071 mac_perim_exit(pmph);
2013 2072 }
2014 2073
2015 2074 grp->lg_started = B_TRUE;
2016 2075
2017 2076 mac_perim_exit(mph);
2018 2077 return (0);
2019 2078 }
2020 2079
2021 2080 static void
2022 2081 aggr_m_stop(void *arg)
2023 2082 {
2024 2083 aggr_grp_t *grp = arg;
2025 2084 aggr_port_t *port;
2026 2085 mac_perim_handle_t mph, pmph;
2027 2086
2028 2087 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2029 2088
2030 2089 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2031 2090 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2032 2091
2033 2092 /* reset port promiscuous mode */
2034 2093 (void) aggr_port_promisc(port, B_FALSE);
2035 2094
2036 2095 aggr_port_stop(port);
2037 2096 mac_perim_exit(pmph);
2038 2097 }
2039 2098
2040 2099 grp->lg_started = B_FALSE;
2041 2100 mac_perim_exit(mph);
2042 2101 }
2043 2102
2044 2103 static int
2045 2104 aggr_m_promisc(void *arg, boolean_t on)
2046 2105 {
2047 2106 aggr_grp_t *grp = arg;
2048 2107 aggr_port_t *port;
2049 2108 boolean_t link_state_changed = B_FALSE;
2050 2109 mac_perim_handle_t mph, pmph;
2051 2110
2052 2111 AGGR_GRP_REFHOLD(grp);
2053 2112 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2054 2113
2055 2114 ASSERT(!grp->lg_closing);
2056 2115
2057 2116 if (on == grp->lg_promisc)
2058 2117 goto bail;
2059 2118
2060 2119 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2061 2120 int err = 0;
2062 2121
2063 2122 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2064 2123 AGGR_PORT_REFHOLD(port);
2065 2124 if (!on && (port->lp_prom_addr == NULL))
2066 2125 err = aggr_port_promisc(port, B_FALSE);
2067 2126 else if (on && port->lp_started)
2068 2127 err = aggr_port_promisc(port, B_TRUE);
2069 2128
2070 2129 if (err != 0) {
2071 2130 if (aggr_grp_detach_port(grp, port))
2072 2131 link_state_changed = B_TRUE;
2073 2132 } else {
2074 2133 /*
2075 2134 * If a port was detached because of a previous
2076 2135 * failure changing the promiscuity, the port
2077 2136 * is reattached when it successfully changes
2078 2137 * the promiscuity now, and this might cause
2079 2138 * the link state of the aggregation to change.
2080 2139 */
2081 2140 if (aggr_grp_attach_port(grp, port))
2082 2141 link_state_changed = B_TRUE;
2083 2142 }
2084 2143 mac_perim_exit(pmph);
2085 2144 AGGR_PORT_REFRELE(port);
2086 2145 }
2087 2146
2088 2147 grp->lg_promisc = on;
2089 2148
2090 2149 if (link_state_changed)
2091 2150 mac_link_update(grp->lg_mh, grp->lg_link_state);
2092 2151
2093 2152 bail:
2094 2153 mac_perim_exit(mph);
2095 2154 AGGR_GRP_REFRELE(grp);
2096 2155
2097 2156 return (0);
2098 2157 }
2099 2158
2100 2159 static void
2101 2160 aggr_grp_port_rename(const char *new_name, void *arg)
2102 2161 {
2103 2162 /*
2104 2163 * aggr port's mac client name is the format of "aggr link name" plus
2105 2164 * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
2106 2165 */
2107 2166 int aggr_len, link_len, clnt_name_len, i;
2108 2167 char *str_end, *str_st, *str_del;
2109 2168 char aggr_name[MAXNAMELEN];
2110 2169 char link_name[MAXNAMELEN];
2111 2170 char *clnt_name;
2112 2171 aggr_grp_t *aggr_grp = arg;
2113 2172 aggr_port_t *aggr_port = aggr_grp->lg_ports;
2114 2173
2115 2174 for (i = 0; i < aggr_grp->lg_nports; i++) {
2116 2175 clnt_name = mac_client_name(aggr_port->lp_mch);
2117 2176 clnt_name_len = strlen(clnt_name);
2118 2177 str_st = clnt_name;
2119 2178 str_end = &(clnt_name[clnt_name_len]);
2120 2179 str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
2121 2180 ASSERT(str_del != NULL);
2122 2181 aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
2123 2182 link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
2124 2183 bzero(aggr_name, MAXNAMELEN);
2125 2184 bzero(link_name, MAXNAMELEN);
2126 2185 bcopy(clnt_name, aggr_name, aggr_len);
2127 2186 bcopy(str_del, link_name, link_len + 1);
2128 2187 bzero(clnt_name, MAXNAMELEN);
2129 2188 (void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
2130 2189 link_name);
2131 2190
2132 2191 (void) mac_rename_primary(aggr_port->lp_mh, NULL);
2133 2192 aggr_port = aggr_port->lp_next;
2134 2193 }
2135 2194 }
2136 2195
2137 2196 /*
2138 2197 * Initialize the capabilities that are advertised for the group
2139 2198 * according to the capabilities of the constituent ports.
2140 2199 */
2141 2200 static boolean_t
2142 2201 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
2143 2202 {
2144 2203 aggr_grp_t *grp = arg;
2145 2204
2146 2205 switch (cap) {
2147 2206 case MAC_CAPAB_HCKSUM: {
2148 2207 uint32_t *hcksum_txflags = cap_data;
2149 2208 *hcksum_txflags = grp->lg_hcksum_txflags;
2150 2209 break;
2151 2210 }
2152 2211 case MAC_CAPAB_LSO: {
2153 2212 mac_capab_lso_t *cap_lso = cap_data;
2154 2213
2155 2214 if (grp->lg_lso) {
2156 2215 *cap_lso = grp->lg_cap_lso;
2157 2216 break;
2158 2217 } else {
2159 2218 return (B_FALSE);
2160 2219 }
2161 2220 }
2162 2221 case MAC_CAPAB_NO_NATIVEVLAN:
2163 2222 return (!grp->lg_vlan);
2164 2223 case MAC_CAPAB_NO_ZCOPY:
2165 2224 return (!grp->lg_zcopy);
2166 2225 case MAC_CAPAB_RINGS: {
2167 2226 mac_capab_rings_t *cap_rings = cap_data;
2168 2227
2169 2228 if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2170 2229 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2171 2230 cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt;
2172 2231
2173 2232 /*
2174 2233 * An aggregation advertises only one (pseudo) RX
2175 2234 * group, which virtualizes the main/primary group of
2176 2235 * the underlying devices.
2177 2236 */
2178 2237 cap_rings->mr_gnum = 1;
2179 2238 cap_rings->mr_gaddring = NULL;
2180 2239 cap_rings->mr_gremring = NULL;
2181 2240 } else {
2182 2241 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2183 2242 cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt;
2184 2243 cap_rings->mr_gnum = 0;
2185 2244 }
2186 2245 cap_rings->mr_rget = aggr_fill_ring;
2187 2246 cap_rings->mr_gget = aggr_fill_group;
2188 2247 break;
2189 2248 }
2190 2249 case MAC_CAPAB_AGGR:
2191 2250 {
2192 2251 mac_capab_aggr_t *aggr_cap;
2193 2252
2194 2253 if (cap_data != NULL) {
2195 2254 aggr_cap = cap_data;
2196 2255 aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2197 2256 aggr_cap->mca_unicst = aggr_m_unicst;
2198 2257 aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
2199 2258 aggr_cap->mca_arg = arg;
↓ open down ↓ |
211 lines elided |
↑ open up ↑ |
2200 2259 }
2201 2260 return (B_TRUE);
2202 2261 }
2203 2262 default:
2204 2263 return (B_FALSE);
2205 2264 }
2206 2265 return (B_TRUE);
2207 2266 }
2208 2267
2209 2268 /*
2210 - * Callback funtion for MAC layer to register groups.
2269 + * Callback function for MAC layer to register groups.
2211 2270 */
2212 2271 static void
2213 2272 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2214 2273 mac_group_info_t *infop, mac_group_handle_t gh)
2215 2274 {
2216 2275 aggr_grp_t *grp = arg;
2217 2276 aggr_pseudo_rx_group_t *rx_group;
2218 2277 aggr_pseudo_tx_group_t *tx_group;
2219 2278
2220 2279 ASSERT(index == 0);
2221 2280 if (rtype == MAC_RING_TYPE_RX) {
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
2222 2281 rx_group = &grp->lg_rx_group;
2223 2282 rx_group->arg_gh = gh;
2224 2283 rx_group->arg_grp = grp;
2225 2284
2226 2285 infop->mgi_driver = (mac_group_driver_t)rx_group;
2227 2286 infop->mgi_start = NULL;
2228 2287 infop->mgi_stop = NULL;
2229 2288 infop->mgi_addmac = aggr_addmac;
2230 2289 infop->mgi_remmac = aggr_remmac;
2231 2290 infop->mgi_count = rx_group->arg_ring_cnt;
2291 +
2292 + /*
2293 + * Always set the HW VLAN callbacks. They are smart
2294 + * enough to know when a port has HW VLAN filters to
2295 + * program and when it doesn't.
2296 + */
2297 + infop->mgi_addvlan = aggr_addvlan;
2298 + infop->mgi_remvlan = aggr_remvlan;
2232 2299 } else {
2233 2300 tx_group = &grp->lg_tx_group;
2234 2301 tx_group->atg_gh = gh;
2235 2302 }
2236 2303 }
2237 2304
2238 2305 /*
2239 2306 * Callback funtion for MAC layer to register all rings.
2240 2307 */
2241 2308 static void
2242 2309 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2243 2310 const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2244 2311 {
2245 2312 aggr_grp_t *grp = arg;
2246 2313
2247 2314 switch (rtype) {
2248 2315 case MAC_RING_TYPE_RX: {
2249 2316 aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_group;
2250 2317 aggr_pseudo_rx_ring_t *rx_ring;
2251 2318 mac_intr_t aggr_mac_intr;
2252 2319
2253 2320 ASSERT(rg_index == 0);
2254 2321
2255 2322 ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt));
2256 2323 rx_ring = rx_group->arg_rings + index;
2257 2324 rx_ring->arr_rh = rh;
2258 2325
2259 2326 /*
2260 2327 * Entrypoint to enable interrupt (disable poll) and
2261 2328 * disable interrupt (enable poll).
2262 2329 */
2263 2330 aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
2264 2331 aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
2265 2332 aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
2266 2333 aggr_mac_intr.mi_ddi_handle = NULL;
2267 2334
2268 2335 infop->mri_driver = (mac_ring_driver_t)rx_ring;
2269 2336 infop->mri_start = aggr_pseudo_start_ring;
2270 2337 infop->mri_stop = NULL;
2271 2338
2272 2339 infop->mri_intr = aggr_mac_intr;
2273 2340 infop->mri_poll = aggr_rx_poll;
2274 2341
2275 2342 infop->mri_stat = aggr_rx_ring_stat;
2276 2343 break;
2277 2344 }
2278 2345 case MAC_RING_TYPE_TX: {
2279 2346 aggr_pseudo_tx_group_t *tx_group = &grp->lg_tx_group;
2280 2347 aggr_pseudo_tx_ring_t *tx_ring;
2281 2348
2282 2349 ASSERT(rg_index == -1);
2283 2350 ASSERT(index < tx_group->atg_ring_cnt);
2284 2351
2285 2352 tx_ring = &tx_group->atg_rings[index];
2286 2353 tx_ring->atr_rh = rh;
2287 2354
2288 2355 infop->mri_driver = (mac_ring_driver_t)tx_ring;
2289 2356 infop->mri_start = NULL;
2290 2357 infop->mri_stop = NULL;
2291 2358 infop->mri_tx = aggr_ring_tx;
2292 2359 infop->mri_stat = aggr_tx_ring_stat;
2293 2360 /*
2294 2361 * Use the hw TX ring handle to find if the ring needs
2295 2362 * serialization or not. For NICs that do not expose
2296 2363 * Tx rings, atr_hw_rh will be NULL.
2297 2364 */
2298 2365 if (tx_ring->atr_hw_rh != NULL) {
2299 2366 infop->mri_flags =
2300 2367 mac_hwring_getinfo(tx_ring->atr_hw_rh);
2301 2368 }
2302 2369 break;
2303 2370 }
2304 2371 default:
2305 2372 break;
2306 2373 }
2307 2374 }
2308 2375
2309 2376 static mblk_t *
2310 2377 aggr_rx_poll(void *arg, int bytes_to_pickup)
2311 2378 {
2312 2379 aggr_pseudo_rx_ring_t *rr_ring = arg;
2313 2380 aggr_port_t *port = rr_ring->arr_port;
2314 2381 aggr_grp_t *grp = port->lp_grp;
2315 2382 mblk_t *mp_chain, *mp, **mpp;
2316 2383
2317 2384 mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
2318 2385
2319 2386 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
2320 2387 return (mp_chain);
2321 2388
2322 2389 mpp = &mp_chain;
2323 2390 while ((mp = *mpp) != NULL) {
2324 2391 if (MBLKL(mp) >= sizeof (struct ether_header)) {
2325 2392 struct ether_header *ehp;
2326 2393
2327 2394 ehp = (struct ether_header *)mp->b_rptr;
2328 2395 if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
2329 2396 *mpp = mp->b_next;
2330 2397 mp->b_next = NULL;
2331 2398 aggr_recv_lacp(port,
2332 2399 (mac_resource_handle_t)rr_ring, mp);
2333 2400 continue;
2334 2401 }
2335 2402 }
2336 2403
2337 2404 if (!port->lp_collector_enabled) {
2338 2405 *mpp = mp->b_next;
2339 2406 mp->b_next = NULL;
2340 2407 freemsg(mp);
2341 2408 continue;
2342 2409 }
2343 2410 mpp = &mp->b_next;
2344 2411 }
2345 2412 return (mp_chain);
2346 2413 }
2347 2414
2348 2415 static int
2349 2416 aggr_addmac(void *arg, const uint8_t *mac_addr)
2350 2417 {
2351 2418 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg;
2352 2419 aggr_unicst_addr_t *addr, **pprev;
2353 2420 aggr_grp_t *grp = rx_group->arg_grp;
2354 2421 aggr_port_t *port, *p;
2355 2422 mac_perim_handle_t mph;
2356 2423 int err = 0;
2357 2424
2358 2425 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2359 2426
2360 2427 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2361 2428 mac_perim_exit(mph);
2362 2429 return (0);
2363 2430 }
2364 2431
2365 2432 /*
2366 2433 * Insert this mac address into the list of mac addresses owned by
2367 2434 * the aggregation pseudo group.
2368 2435 */
2369 2436 pprev = &rx_group->arg_macaddr;
2370 2437 while ((addr = *pprev) != NULL) {
2371 2438 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
2372 2439 mac_perim_exit(mph);
2373 2440 return (EEXIST);
2374 2441 }
2375 2442 pprev = &addr->aua_next;
2376 2443 }
2377 2444 addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
2378 2445 bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
2379 2446 addr->aua_next = NULL;
2380 2447 *pprev = addr;
2381 2448
2382 2449 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2383 2450 if ((err = aggr_port_addmac(port, mac_addr)) != 0)
2384 2451 break;
2385 2452
2386 2453 if (err != 0) {
2387 2454 for (p = grp->lg_ports; p != port; p = p->lp_next)
2388 2455 aggr_port_remmac(p, mac_addr);
2389 2456
2390 2457 *pprev = NULL;
2391 2458 kmem_free(addr, sizeof (aggr_unicst_addr_t));
2392 2459 }
2393 2460
2394 2461 mac_perim_exit(mph);
2395 2462 return (err);
2396 2463 }
2397 2464
2398 2465 static int
2399 2466 aggr_remmac(void *arg, const uint8_t *mac_addr)
2400 2467 {
2401 2468 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg;
2402 2469 aggr_unicst_addr_t *addr, **pprev;
2403 2470 aggr_grp_t *grp = rx_group->arg_grp;
2404 2471 aggr_port_t *port;
2405 2472 mac_perim_handle_t mph;
2406 2473 int err = 0;
2407 2474
2408 2475 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2409 2476
2410 2477 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2411 2478 mac_perim_exit(mph);
2412 2479 return (0);
2413 2480 }
2414 2481
2415 2482 /*
2416 2483 * Insert this mac address into the list of mac addresses owned by
2417 2484 * the aggregation pseudo group.
2418 2485 */
2419 2486 pprev = &rx_group->arg_macaddr;
2420 2487 while ((addr = *pprev) != NULL) {
2421 2488 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2422 2489 pprev = &addr->aua_next;
2423 2490 continue;
2424 2491 }
2425 2492 break;
2426 2493 }
2427 2494 if (addr == NULL) {
2428 2495 mac_perim_exit(mph);
2429 2496 return (EINVAL);
↓ open down ↓ |
188 lines elided |
↑ open up ↑ |
2430 2497 }
2431 2498
2432 2499 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2433 2500 aggr_port_remmac(port, mac_addr);
2434 2501
2435 2502 *pprev = addr->aua_next;
2436 2503 kmem_free(addr, sizeof (aggr_unicst_addr_t));
2437 2504
2438 2505 mac_perim_exit(mph);
2439 2506 return (err);
2507 +}
2508 +
2509 +/*
2510 + * Search for VID in the Rx group's list and return a pointer if
2511 + * found. Otherwise return NULL.
2512 + */
2513 +static aggr_vlan_t *
2514 +aggr_find_vlan(aggr_pseudo_rx_group_t *rx_group, uint16_t vid)
2515 +{
2516 + ASSERT(MAC_PERIM_HELD(rx_group->arg_grp->lg_mh));
2517 + for (aggr_vlan_t *avp = list_head(&rx_group->arg_vlans); avp != NULL;
2518 + avp = list_next(&rx_group->arg_vlans, avp)) {
2519 + if (avp->av_vid == vid)
2520 + return (avp);
2521 + }
2522 +
2523 + return (NULL);
2524 +}
2525 +
2526 +/*
2527 + * Accept traffic on the specified VID.
2528 + *
2529 + * Persist VLAN state in the aggr so that ports added later will
2530 + * receive the correct filters. In the future it would be nice to
2531 + * allow aggr to iterate its clients instead of duplicating state.
2532 + */
2533 +static int
2534 +aggr_addvlan(mac_group_driver_t gdriver, uint16_t vid)
2535 +{
2536 + aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
2537 + aggr_grp_t *aggr = rx_group->arg_grp;
2538 + aggr_port_t *port, *p;
2539 + mac_perim_handle_t mph;
2540 + int err = 0;
2541 + aggr_vlan_t *avp = NULL;
2542 +
2543 + mac_perim_enter_by_mh(aggr->lg_mh, &mph);
2544 +
2545 + if (vid == MAC_VLAN_UNTAGGED) {
2546 + /*
2547 + * Aggr is both a MAC provider and MAC client. As a
2548 + * MAC provider it is passed MAC_VLAN_UNTAGGED by its
2549 + * client. As a client itself, it should pass
2550 + * VLAN_ID_NONE to its ports.
2551 + */
2552 + vid = VLAN_ID_NONE;
2553 + rx_group->arg_untagged++;
2554 + goto update_ports;
2555 + }
2556 +
2557 + avp = aggr_find_vlan(rx_group, vid);
2558 +
2559 + if (avp != NULL) {
2560 + avp->av_refs++;
2561 + mac_perim_exit(mph);
2562 + return (0);
2563 + }
2564 +
2565 + avp = kmem_zalloc(sizeof (aggr_vlan_t), KM_SLEEP);
2566 + avp->av_vid = vid;
2567 + avp->av_refs = 1;
2568 +
2569 +update_ports:
2570 + for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
2571 + if ((err = aggr_port_addvlan(port, vid)) != 0)
2572 + break;
2573 +
2574 + if (err != 0) {
2575 + /*
2576 + * If any of these calls fail then we are in a
2577 + * situation where the ports have different HW state.
2578 + * There's no reasonable action the MAC client can
2579 + * take in this scenario to rectify the situation.
2580 + */
2581 + for (p = aggr->lg_ports; p != port; p = p->lp_next) {
2582 + int err2;
2583 +
2584 + if ((err2 = aggr_port_remvlan(p, vid)) != 0) {
2585 + cmn_err(CE_WARN, "Failed to remove VLAN %u"
2586 + " from port %s: errno %d.", vid,
2587 + mac_client_name(p->lp_mch), err2);
2588 + }
2589 +
2590 + }
2591 +
2592 + if (vid == VLAN_ID_NONE)
2593 + rx_group->arg_untagged--;
2594 +
2595 + if (avp != NULL) {
2596 + kmem_free(avp, sizeof (aggr_vlan_t));
2597 + avp = NULL;
2598 + }
2599 + }
2600 +
2601 + if (avp != NULL)
2602 + list_insert_tail(&rx_group->arg_vlans, avp);
2603 +
2604 +done:
2605 + mac_perim_exit(mph);
2606 + return (err);
2607 +}
2608 +
2609 +/*
2610 + * Stop accepting traffic on this VLAN if it's the last use of this VLAN.
2611 + */
2612 +static int
2613 +aggr_remvlan(mac_group_driver_t gdriver, uint16_t vid)
2614 +{
2615 + aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
2616 + aggr_grp_t *aggr = rx_group->arg_grp;
2617 + aggr_port_t *port, *p;
2618 + mac_perim_handle_t mph;
2619 + int err = 0;
2620 + aggr_vlan_t *avp = NULL;
2621 +
2622 + mac_perim_enter_by_mh(aggr->lg_mh, &mph);
2623 +
2624 + /*
2625 + * See the comment in aggr_addvlan().
2626 + */
2627 + if (vid == MAC_VLAN_UNTAGGED) {
2628 + vid = VLAN_ID_NONE;
2629 + rx_group->arg_untagged--;
2630 +
2631 + if (rx_group->arg_untagged > 0)
2632 + goto done;
2633 +
2634 + goto update_ports;
2635 + }
2636 +
2637 + avp = aggr_find_vlan(rx_group, vid);
2638 +
2639 + if (avp == NULL) {
2640 + err = ENOENT;
2641 + goto done;
2642 + }
2643 +
2644 + avp->av_refs--;
2645 +
2646 + if (avp->av_refs > 0)
2647 + goto done;
2648 +
2649 +update_ports:
2650 + for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
2651 + if ((err = aggr_port_remvlan(port, vid)) != 0)
2652 + break;
2653 +
2654 + /*
2655 + * See the comment in aggr_addvlan() for justification of the
2656 + * use of VERIFY here.
2657 + */
2658 + if (err != 0) {
2659 + for (p = aggr->lg_ports; p != port; p = p->lp_next) {
2660 + int err2;
2661 +
2662 + if ((err2 = aggr_port_addvlan(p, vid)) != 0) {
2663 + cmn_err(CE_WARN, "Failed to add VLAN %u"
2664 + " to port %s: errno %d.", vid,
2665 + mac_client_name(p->lp_mch), err2);
2666 + }
2667 + }
2668 +
2669 + if (avp != NULL)
2670 + avp->av_refs++;
2671 +
2672 + if (vid == VLAN_ID_NONE)
2673 + rx_group->arg_untagged++;
2674 +
2675 + goto done;
2676 + }
2677 +
2678 + if (err == 0 && avp != NULL) {
2679 + VERIFY3U(avp->av_refs, ==, 0);
2680 + list_remove(&rx_group->arg_vlans, avp);
2681 + kmem_free(avp, sizeof (aggr_vlan_t));
2682 + }
2683 +
2684 +done:
2685 + mac_perim_exit(mph);
2686 + return (err);
2440 2687 }
2441 2688
2442 2689 /*
2443 2690 * Add or remove the multicast addresses that are defined for the group
2444 2691 * to or from the specified port.
2445 2692 *
2446 2693 * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2447 2694 * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2448 2695 * called when the port is either stopped or detached.
2449 2696 */
2450 2697 void
2451 2698 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2452 2699 {
2453 2700 aggr_grp_t *grp = port->lp_grp;
2454 2701
2455 2702 ASSERT(MAC_PERIM_HELD(port->lp_mh));
2456 2703 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2457 2704
2458 2705 if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2459 2706 return;
2460 2707
2461 2708 mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
2462 2709 }
2463 2710
2464 2711 static int
2465 2712 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
2466 2713 {
2467 2714 aggr_grp_t *grp = arg;
2468 2715 aggr_port_t *port = NULL, *errport = NULL;
2469 2716 mac_perim_handle_t mph;
2470 2717 int err = 0;
2471 2718
2472 2719 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2473 2720 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2474 2721 if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2475 2722 !port->lp_started) {
2476 2723 continue;
2477 2724 }
2478 2725 err = aggr_port_multicst(port, add, addrp);
2479 2726 if (err != 0) {
2480 2727 errport = port;
2481 2728 break;
2482 2729 }
2483 2730 }
2484 2731
2485 2732 /*
2486 2733 * At least one port caused error return and this error is returned to
2487 2734 * mac, eventually a NAK would be sent upwards.
2488 2735 * Some ports have this multicast address listed now, and some don't.
2489 2736 * Treat this error as a whole aggr failure not individual port failure.
2490 2737 * Therefore remove this multicast address from other ports.
2491 2738 */
2492 2739 if ((err != 0) && add) {
2493 2740 for (port = grp->lg_ports; port != errport;
2494 2741 port = port->lp_next) {
2495 2742 if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2496 2743 !port->lp_started) {
2497 2744 continue;
2498 2745 }
2499 2746 (void) aggr_port_multicst(port, B_FALSE, addrp);
2500 2747 }
2501 2748 }
2502 2749 mac_perim_exit(mph);
2503 2750 return (err);
2504 2751 }
2505 2752
2506 2753 static int
2507 2754 aggr_m_unicst(void *arg, const uint8_t *macaddr)
2508 2755 {
2509 2756 aggr_grp_t *grp = arg;
2510 2757 mac_perim_handle_t mph;
2511 2758 int err;
2512 2759
2513 2760 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2514 2761 err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
2515 2762 0, 0);
2516 2763 mac_perim_exit(mph);
2517 2764 return (err);
2518 2765 }
2519 2766
2520 2767 /*
2521 2768 * Initialize the capabilities that are advertised for the group
2522 2769 * according to the capabilities of the constituent ports.
2523 2770 */
2524 2771 static void
2525 2772 aggr_grp_capab_set(aggr_grp_t *grp)
2526 2773 {
2527 2774 uint32_t cksum;
2528 2775 aggr_port_t *port;
2529 2776 mac_capab_lso_t cap_lso;
2530 2777
2531 2778 ASSERT(grp->lg_mh == NULL);
2532 2779 ASSERT(grp->lg_ports != NULL);
2533 2780
2534 2781 grp->lg_hcksum_txflags = (uint32_t)-1;
2535 2782 grp->lg_zcopy = B_TRUE;
2536 2783 grp->lg_vlan = B_TRUE;
2537 2784
2538 2785 grp->lg_lso = B_TRUE;
2539 2786 grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1;
2540 2787 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1;
2541 2788
2542 2789 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2543 2790 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
2544 2791 cksum = 0;
2545 2792 grp->lg_hcksum_txflags &= cksum;
2546 2793
2547 2794 grp->lg_vlan &=
2548 2795 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
2549 2796
2550 2797 grp->lg_zcopy &=
2551 2798 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
2552 2799
2553 2800 grp->lg_lso &=
2554 2801 mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso);
2555 2802 if (grp->lg_lso) {
2556 2803 grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags;
2557 2804 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2558 2805 cap_lso.lso_basic_tcp_ipv4.lso_max)
2559 2806 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max =
2560 2807 cap_lso.lso_basic_tcp_ipv4.lso_max;
2561 2808 }
2562 2809 }
2563 2810 }
2564 2811
2565 2812 /*
2566 2813 * Checks whether the capabilities of the port being added are compatible
2567 2814 * with the current capabilities of the aggregation.
2568 2815 */
2569 2816 static boolean_t
2570 2817 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
2571 2818 {
2572 2819 uint32_t hcksum_txflags;
2573 2820
2574 2821 ASSERT(grp->lg_ports != NULL);
2575 2822
2576 2823 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
2577 2824 grp->lg_vlan) != grp->lg_vlan) {
2578 2825 return (B_FALSE);
2579 2826 }
2580 2827
2581 2828 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
2582 2829 grp->lg_zcopy) != grp->lg_zcopy) {
2583 2830 return (B_FALSE);
2584 2831 }
2585 2832
2586 2833 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
2587 2834 if (grp->lg_hcksum_txflags != 0)
2588 2835 return (B_FALSE);
2589 2836 } else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
2590 2837 grp->lg_hcksum_txflags) {
2591 2838 return (B_FALSE);
2592 2839 }
2593 2840
2594 2841 if (grp->lg_lso) {
2595 2842 mac_capab_lso_t cap_lso;
2596 2843
2597 2844 if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) {
2598 2845 if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) !=
2599 2846 grp->lg_cap_lso.lso_flags)
2600 2847 return (B_FALSE);
2601 2848 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2602 2849 cap_lso.lso_basic_tcp_ipv4.lso_max)
2603 2850 return (B_FALSE);
2604 2851 } else {
2605 2852 return (B_FALSE);
2606 2853 }
2607 2854 }
2608 2855
2609 2856 return (B_TRUE);
2610 2857 }
2611 2858
2612 2859 /*
2613 2860 * Returns the maximum SDU according to the SDU of the constituent ports.
2614 2861 */
2615 2862 static uint_t
2616 2863 aggr_grp_max_sdu(aggr_grp_t *grp)
2617 2864 {
2618 2865 uint_t max_sdu = (uint_t)-1;
2619 2866 aggr_port_t *port;
2620 2867
2621 2868 ASSERT(grp->lg_ports != NULL);
2622 2869
2623 2870 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2624 2871 uint_t port_sdu_max;
2625 2872
2626 2873 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2627 2874 if (max_sdu > port_sdu_max)
2628 2875 max_sdu = port_sdu_max;
2629 2876 }
2630 2877
2631 2878 return (max_sdu);
2632 2879 }
2633 2880
2634 2881 /*
2635 2882 * Checks if the maximum SDU of the specified port is compatible
2636 2883 * with the maximum SDU of the specified aggregation group, returns
2637 2884 * B_TRUE if it is, B_FALSE otherwise.
2638 2885 */
2639 2886 static boolean_t
2640 2887 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
2641 2888 {
2642 2889 uint_t port_sdu_max;
2643 2890
2644 2891 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2645 2892 return (port_sdu_max >= grp->lg_max_sdu);
2646 2893 }
2647 2894
2648 2895 /*
2649 2896 * Returns the maximum margin according to the margin of the constituent ports.
2650 2897 */
2651 2898 static uint32_t
2652 2899 aggr_grp_max_margin(aggr_grp_t *grp)
2653 2900 {
2654 2901 uint32_t margin = UINT32_MAX;
2655 2902 aggr_port_t *port;
2656 2903
2657 2904 ASSERT(grp->lg_mh == NULL);
2658 2905 ASSERT(grp->lg_ports != NULL);
2659 2906
2660 2907 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2661 2908 if (margin > port->lp_margin)
2662 2909 margin = port->lp_margin;
2663 2910 }
2664 2911
2665 2912 grp->lg_margin = margin;
2666 2913 return (margin);
2667 2914 }
2668 2915
2669 2916 /*
2670 2917 * Checks if the maximum margin of the specified port is compatible
2671 2918 * with the maximum margin of the specified aggregation group, returns
2672 2919 * B_TRUE if it is, B_FALSE otherwise.
2673 2920 */
2674 2921 static boolean_t
2675 2922 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
2676 2923 {
2677 2924 if (port->lp_margin >= grp->lg_margin)
2678 2925 return (B_TRUE);
2679 2926
2680 2927 /*
2681 2928 * See whether the current margin value is allowed to be changed to
2682 2929 * the new value.
2683 2930 */
2684 2931 if (!mac_margin_update(grp->lg_mh, port->lp_margin))
2685 2932 return (B_FALSE);
2686 2933
2687 2934 grp->lg_margin = port->lp_margin;
2688 2935 return (B_TRUE);
2689 2936 }
2690 2937
2691 2938 /*
2692 2939 * Set MTU on individual ports of an aggregation group
2693 2940 */
2694 2941 static int
2695 2942 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu,
2696 2943 uint32_t *old_mtu)
2697 2944 {
2698 2945 boolean_t removed = B_FALSE;
2699 2946 mac_perim_handle_t mph;
2700 2947 mac_diag_t diag;
2701 2948 int err, rv, retry = 0;
2702 2949
2703 2950 if (port->lp_mah != NULL) {
2704 2951 (void) mac_unicast_remove(port->lp_mch, port->lp_mah);
2705 2952 port->lp_mah = NULL;
2706 2953 removed = B_TRUE;
2707 2954 }
2708 2955 err = mac_set_mtu(port->lp_mh, sdu, old_mtu);
2709 2956 try_again:
2710 2957 if (removed && (rv = mac_unicast_add(port->lp_mch, NULL,
2711 2958 MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK,
2712 2959 &port->lp_mah, 0, &diag)) != 0) {
2713 2960 /*
2714 2961 * following is a workaround for a bug in 'bge' driver.
2715 2962 * See CR 6794654 for more information and this work around
2716 2963 * will be removed once the CR is fixed.
2717 2964 */
2718 2965 if (rv == EIO && retry++ < 3) {
2719 2966 delay(2 * hz);
2720 2967 goto try_again;
2721 2968 }
2722 2969 /*
2723 2970 * if mac_unicast_add() failed while setting the MTU,
2724 2971 * detach the port from the group.
2725 2972 */
2726 2973 mac_perim_enter_by_mh(port->lp_mh, &mph);
2727 2974 (void) aggr_grp_detach_port(grp, port);
2728 2975 mac_perim_exit(mph);
2729 2976 cmn_err(CE_WARN, "Unable to restart the port %s while "
2730 2977 "setting MTU. Detaching the port from the aggregation.",
2731 2978 mac_client_name(port->lp_mch));
2732 2979 }
2733 2980 return (err);
2734 2981 }
2735 2982
2736 2983 static int
2737 2984 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu)
2738 2985 {
2739 2986 int err = 0, i, rv;
2740 2987 aggr_port_t *port;
2741 2988 uint32_t *mtu;
2742 2989
2743 2990 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2744 2991
2745 2992 /*
2746 2993 * If the MTU being set is equal to aggr group's maximum
2747 2994 * allowable value, then there is nothing to change
2748 2995 */
2749 2996 if (sdu == grp->lg_max_sdu)
2750 2997 return (0);
2751 2998
2752 2999 /* 0 is aggr group's min sdu */
2753 3000 if (sdu == 0)
2754 3001 return (EINVAL);
2755 3002
2756 3003 mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP);
2757 3004 for (port = grp->lg_ports, i = 0; port != NULL && err == 0;
2758 3005 port = port->lp_next, i++) {
2759 3006 err = aggr_set_port_sdu(grp, port, sdu, mtu + i);
2760 3007 }
2761 3008 if (err != 0) {
2762 3009 /* recover from error: reset the mtus of the ports */
2763 3010 aggr_port_t *tmp;
2764 3011
2765 3012 for (tmp = grp->lg_ports, i = 0; tmp != port;
2766 3013 tmp = tmp->lp_next, i++) {
2767 3014 (void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL);
2768 3015 }
2769 3016 goto bail;
2770 3017 }
2771 3018 grp->lg_max_sdu = aggr_grp_max_sdu(grp);
2772 3019 rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu);
2773 3020 ASSERT(rv == 0);
2774 3021 bail:
2775 3022 kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports);
2776 3023 return (err);
2777 3024 }
2778 3025
2779 3026 /*
2780 3027 * Callback functions for set/get of properties
2781 3028 */
2782 3029 /*ARGSUSED*/
2783 3030 static int
2784 3031 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2785 3032 uint_t pr_valsize, const void *pr_val)
2786 3033 {
2787 3034 int err = ENOTSUP;
2788 3035 aggr_grp_t *grp = m_driver;
2789 3036
2790 3037 switch (pr_num) {
2791 3038 case MAC_PROP_MTU: {
2792 3039 uint32_t mtu;
2793 3040
2794 3041 if (pr_valsize < sizeof (mtu)) {
2795 3042 err = EINVAL;
2796 3043 break;
2797 3044 }
2798 3045 bcopy(pr_val, &mtu, sizeof (mtu));
2799 3046 err = aggr_sdu_update(grp, mtu);
2800 3047 break;
2801 3048 }
2802 3049 default:
2803 3050 break;
2804 3051 }
2805 3052 return (err);
2806 3053 }
2807 3054
2808 3055 typedef struct rboundary {
2809 3056 uint32_t bval;
2810 3057 int btype;
2811 3058 } rboundary_t;
2812 3059
2813 3060 /*
2814 3061 * This function finds the intersection of mtu ranges stored in arrays -
2815 3062 * mrange[0] ... mrange[mcount -1]. It returns the intersection in rval.
2816 3063 * Individual arrays are assumed to contain non-overlapping ranges.
2817 3064 * Algorithm:
2818 3065 * A range has two boundaries - min and max. We scan all arrays and store
2819 3066 * each boundary as a separate element in a temporary array. We also store
2820 3067 * the boundary types, min or max, as +1 or -1 respectively in the temporary
2821 3068 * array. Then we sort the temporary array in ascending order. We scan the
2822 3069 * sorted array from lower to higher values and keep a cumulative sum of
2823 3070 * boundary types. Element in the temporary array for which the sum reaches
2824 3071 * mcount is a min boundary of a range in the result and next element will be
2825 3072 * max boundary.
2826 3073 *
2827 3074 * Example for mcount = 3,
2828 3075 *
2829 3076 * ----|_________|-------|_______|----|__|------ mrange[0]
2830 3077 *
2831 3078 * -------|________|--|____________|-----|___|-- mrange[1]
2832 3079 *
2833 3080 * --------|________________|-------|____|------ mrange[2]
2834 3081 *
2835 3082 * 3 2 1
2836 3083 * \|/
2837 3084 * 1 23 2 1 2 3 2 1 01 2 V 0 <- the sum
2838 3085 * ----|--||-----|-|--|--|--|----|-||-|--|---|-- sorted array
2839 3086 *
2840 3087 * same min and max
2841 3088 * V
2842 3089 * --------|_____|-------|__|------------|------ intersecting ranges
2843 3090 */
2844 3091 void
2845 3092 aggr_mtu_range_intersection(mac_propval_range_t **mrange, int mcount,
2846 3093 mac_propval_uint32_range_t **prval, int *prmaxcnt, int *prcount)
2847 3094 {
2848 3095 mac_propval_uint32_range_t *rval, *ur;
2849 3096 int rmaxcnt, rcount;
2850 3097 size_t sz_range32;
2851 3098 rboundary_t *ta; /* temporary array */
2852 3099 rboundary_t temp;
2853 3100 boolean_t range_started = B_FALSE;
2854 3101 int i, j, m, sum;
2855 3102
2856 3103 sz_range32 = sizeof (mac_propval_uint32_range_t);
2857 3104
2858 3105 for (i = 0, rmaxcnt = 0; i < mcount; i++)
2859 3106 rmaxcnt += mrange[i]->mpr_count;
2860 3107
2861 3108 /* Allocate enough space to store the results */
2862 3109 rval = kmem_alloc(rmaxcnt * sz_range32, KM_SLEEP);
2863 3110
2864 3111 /* Number of boundaries are twice as many as ranges */
2865 3112 ta = kmem_alloc(2 * rmaxcnt * sizeof (rboundary_t), KM_SLEEP);
2866 3113
2867 3114 for (i = 0, m = 0; i < mcount; i++) {
2868 3115 ur = &(mrange[i]->mpr_range_uint32[0]);
2869 3116 for (j = 0; j < mrange[i]->mpr_count; j++) {
2870 3117 ta[m].bval = ur[j].mpur_min;
2871 3118 ta[m++].btype = 1;
2872 3119 ta[m].bval = ur[j].mpur_max;
2873 3120 ta[m++].btype = -1;
2874 3121 }
2875 3122 }
2876 3123
2877 3124 /*
2878 3125 * Sort the temporary array in ascending order of bval;
2879 3126 * if boundary values are same then sort on btype.
2880 3127 */
2881 3128 for (i = 0; i < m-1; i++) {
2882 3129 for (j = i+1; j < m; j++) {
2883 3130 if ((ta[i].bval > ta[j].bval) ||
2884 3131 ((ta[i].bval == ta[j].bval) &&
2885 3132 (ta[i].btype < ta[j].btype))) {
2886 3133 temp = ta[i];
2887 3134 ta[i] = ta[j];
2888 3135 ta[j] = temp;
2889 3136 }
2890 3137 }
2891 3138 }
2892 3139
2893 3140 /* Walk through temporary array to find all ranges in the results */
2894 3141 for (i = 0, sum = 0, rcount = 0; i < m; i++) {
2895 3142 sum += ta[i].btype;
2896 3143 if (sum == mcount) {
2897 3144 rval[rcount].mpur_min = ta[i].bval;
2898 3145 range_started = B_TRUE;
2899 3146 } else if (sum < mcount && range_started) {
2900 3147 rval[rcount++].mpur_max = ta[i].bval;
2901 3148 range_started = B_FALSE;
2902 3149 }
2903 3150 }
2904 3151
2905 3152 *prval = rval;
2906 3153 *prmaxcnt = rmaxcnt;
2907 3154 *prcount = rcount;
2908 3155
2909 3156 kmem_free(ta, 2 * rmaxcnt * sizeof (rboundary_t));
2910 3157 }
2911 3158
2912 3159 /*
2913 3160 * Returns the mtu ranges which could be supported by aggr group.
2914 3161 * prmaxcnt returns the size of the buffer prval, prcount returns
2915 3162 * the number of valid entries in prval. Caller is responsible
2916 3163 * for freeing up prval.
2917 3164 */
2918 3165 int
2919 3166 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_uint32_range_t **prval,
2920 3167 int *prmaxcnt, int *prcount)
2921 3168 {
2922 3169 mac_propval_range_t **vals;
2923 3170 aggr_port_t *port;
2924 3171 mac_perim_handle_t mph;
2925 3172 uint_t i, numr;
2926 3173 int err = 0;
2927 3174 size_t sz_propval, sz_range32;
2928 3175 size_t size;
2929 3176
2930 3177 sz_propval = sizeof (mac_propval_range_t);
2931 3178 sz_range32 = sizeof (mac_propval_uint32_range_t);
2932 3179
2933 3180 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2934 3181
2935 3182 vals = kmem_zalloc(sizeof (mac_propval_range_t *) * grp->lg_nports,
2936 3183 KM_SLEEP);
2937 3184
2938 3185 for (port = grp->lg_ports, i = 0; port != NULL;
2939 3186 port = port->lp_next, i++) {
2940 3187
2941 3188 size = sz_propval;
2942 3189 vals[i] = kmem_alloc(size, KM_SLEEP);
2943 3190 vals[i]->mpr_count = 1;
2944 3191
2945 3192 mac_perim_enter_by_mh(port->lp_mh, &mph);
2946 3193
2947 3194 err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
2948 3195 NULL, 0, vals[i], NULL);
2949 3196 if (err == ENOSPC) {
2950 3197 /*
2951 3198 * Not enough space to hold all ranges.
2952 3199 * Allocate extra space as indicated and retry.
2953 3200 */
2954 3201 numr = vals[i]->mpr_count;
2955 3202 kmem_free(vals[i], sz_propval);
2956 3203 size = sz_propval + (numr - 1) * sz_range32;
2957 3204 vals[i] = kmem_alloc(size, KM_SLEEP);
2958 3205 vals[i]->mpr_count = numr;
2959 3206 err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
2960 3207 NULL, 0, vals[i], NULL);
2961 3208 ASSERT(err != ENOSPC);
2962 3209 }
2963 3210 mac_perim_exit(mph);
2964 3211 if (err != 0) {
2965 3212 kmem_free(vals[i], size);
2966 3213 vals[i] = NULL;
2967 3214 break;
2968 3215 }
2969 3216 }
2970 3217
2971 3218 /*
2972 3219 * if any of the underlying ports does not support changing MTU then
2973 3220 * just return ENOTSUP
2974 3221 */
2975 3222 if (port != NULL) {
2976 3223 ASSERT(err != 0);
2977 3224 goto done;
2978 3225 }
2979 3226
2980 3227 aggr_mtu_range_intersection(vals, grp->lg_nports, prval, prmaxcnt,
2981 3228 prcount);
2982 3229
2983 3230 done:
2984 3231 for (i = 0; i < grp->lg_nports; i++) {
2985 3232 if (vals[i] != NULL) {
2986 3233 numr = vals[i]->mpr_count;
2987 3234 size = sz_propval + (numr - 1) * sz_range32;
2988 3235 kmem_free(vals[i], size);
2989 3236 }
2990 3237 }
2991 3238
2992 3239 kmem_free(vals, sizeof (mac_propval_range_t *) * grp->lg_nports);
2993 3240 return (err);
2994 3241 }
2995 3242
2996 3243 static void
2997 3244 aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2998 3245 mac_prop_info_handle_t prh)
2999 3246 {
3000 3247 aggr_grp_t *grp = m_driver;
3001 3248 mac_propval_uint32_range_t *rval = NULL;
3002 3249 int i, rcount, rmaxcnt;
3003 3250 int err = 0;
3004 3251
3005 3252 _NOTE(ARGUNUSED(pr_name));
3006 3253
3007 3254 switch (pr_num) {
3008 3255 case MAC_PROP_MTU:
3009 3256
3010 3257 err = aggr_grp_possible_mtu_range(grp, &rval, &rmaxcnt,
3011 3258 &rcount);
3012 3259 if (err != 0) {
3013 3260 ASSERT(rval == NULL);
3014 3261 return;
3015 3262 }
3016 3263 for (i = 0; i < rcount; i++) {
3017 3264 mac_prop_info_set_range_uint32(prh,
3018 3265 rval[i].mpur_min, rval[i].mpur_max);
3019 3266 }
3020 3267 kmem_free(rval, sizeof (mac_propval_uint32_range_t) * rmaxcnt);
3021 3268 break;
3022 3269 }
3023 3270 }
↓ open down ↓ |
574 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX