Print this page
11493 aggr needs support for multiple pseudo rx groups
Portions contributed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/aggr/aggr_grp.c
+++ new/usr/src/uts/common/io/aggr/aggr_grp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright 2018 Joyent, Inc.
24 24 */
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
25 25
26 26 /*
27 27 * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
28 28 *
29 29 * An instance of the structure aggr_grp_t is allocated for each
30 30 * link aggregation group. When created, aggr_grp_t objects are
31 31 * entered into the aggr_grp_hash hash table maintained by the modhash
32 32 * module. The hash key is the linkid associated with the link
33 33 * aggregation group.
34 34 *
35 - * A set of MAC ports are associated with each association group.
35 + * Each aggregation contains a set of ports. The port is represented
36 + * by the aggr_port_t structure. A port consists of a single MAC
37 + * client which has exclusive (MCIS_EXCLUSIVE) use of the underlying
38 + * MAC. This client is used by the aggr to send and receive LACP
39 + * traffic. Each port client takes on the same MAC unicast address --
40 + * the address of the aggregation itself (taken from the first port by
41 + * default).
36 42 *
37 - * Aggr pseudo TX rings
38 - * --------------------
39 - * The underlying ports (NICs) in an aggregation can have TX rings. To
40 - * enhance aggr's performance, these TX rings are made available to the
41 - * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
42 - * They are already present and implemented on the RX side. It is called
43 - * as pseudo RX rings. The same concept is extended to the TX side where
44 - * each TX ring of an underlying port is reflected in aggr as a pseudo
45 - * TX ring. Thus each pseudo TX ring will map to a specific hardware TX
46 - * ring. Even in the case of a NIC that does not have a TX ring, a pseudo
47 - * TX ring is given to the aggregation layer.
43 + * The MAC client that hangs off each aggr port is not your typical
44 + * MAC client. Not only does it have exclusive control of the MAC, but
45 + * it also has no Tx or Rx SRSes. An SRS is designed to queue and
46 + * fanout traffic among L4 protocols; but the aggr is an intermediary,
47 + * not a consumer. Instead of using SRSes, the aggr puts the
48 + * underlying hardware rings into passthru mode and ships packets up
49 + * via a direct call to aggr_recv_cb(). This allows aggr to enforce
50 + * LACP while passing all other traffic up to clients of the aggr.
48 51 *
52 + * Pseudo Rx Groups and Rings
53 + * --------------------------
54 + *
55 + * It is imperative for client performance that the aggr provide as
56 + * many MAC groups as possible. In order to use the underlying HW
57 + * resources, aggr creates pseudo groups to aggregate the underlying
58 + * HW groups. Every HW group gets mapped to a pseudo group; and every
59 + * HW ring in that group gets mapped to a pseudo ring. The pseudo
60 + * group at index 0 combines all the HW groups at index 0 from each
61 + * port, etc. The aggr's MAC then creates normal MAC groups and rings
62 + * out of these pseudo groups and rings to present to the aggr's
63 + * clients. To the clients, the aggr's groups and rings are absolutely
64 + * no different than a NIC's groups or rings.
65 + *
66 + * Pseudo Tx Rings
67 + * ---------------
68 + *
69 + * The underlying ports (NICs) in an aggregation can have Tx rings. To
70 + * enhance aggr's performance, these Tx rings are made available to
71 + * the aggr layer as pseudo Tx rings. The concept of pseudo rings are
72 + * not new. They are already present and implemented on the Rx side.
73 + * The same concept is extended to the Tx side where each Tx ring of
74 + * an underlying port is reflected in aggr as a pseudo Tx ring. Thus
75 + * each pseudo Tx ring will map to a specific hardware Tx ring. Even
76 + * in the case of a NIC that does not have a Tx ring, a pseudo Tx ring
77 + * is given to the aggregation layer.
78 + *
49 79 * With this change, the outgoing stack depth looks much better:
50 80 *
51 81 * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() ->
52 82 * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx()
53 83 *
54 - * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings:
84 + * Two new modes are introduced to mac_tx() to handle aggr pseudo Tx rings:
55 85 * SRS_TX_AGGR and SRS_TX_BW_AGGR.
56 86 *
57 87 * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine
58 - * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX
88 + * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) Tx
59 89 * ring belonging to a port on which the packet has to be sent.
60 90 * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4
61 - * policy and then uses the fanout_hint passed to it to pick a TX ring from
91 + * policy and then uses the fanout_hint passed to it to pick a Tx ring from
62 92 * the selected port.
63 93 *
64 94 * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where
65 95 * bandwidth limit is applied first on the outgoing packet and the packets
66 96 * allowed to go out would call mac_tx_aggr_mode() to send the packet on a
67 - * particular TX ring.
97 + * particular Tx ring.
68 98 */
69 99
70 100 #include <sys/types.h>
71 101 #include <sys/sysmacros.h>
72 102 #include <sys/conf.h>
73 103 #include <sys/cmn_err.h>
74 104 #include <sys/disp.h>
75 105 #include <sys/list.h>
76 106 #include <sys/ksynch.h>
77 107 #include <sys/kmem.h>
78 108 #include <sys/stream.h>
79 109 #include <sys/modctl.h>
80 110 #include <sys/ddi.h>
81 111 #include <sys/sunddi.h>
82 112 #include <sys/atomic.h>
83 113 #include <sys/stat.h>
84 114 #include <sys/modhash.h>
85 115 #include <sys/id_space.h>
86 116 #include <sys/strsun.h>
87 117 #include <sys/cred.h>
88 118 #include <sys/dlpi.h>
89 119 #include <sys/zone.h>
90 120 #include <sys/mac_provider.h>
91 121 #include <sys/dls.h>
92 122 #include <sys/vlan.h>
93 123 #include <sys/aggr.h>
94 124 #include <sys/aggr_impl.h>
95 125
96 126 static int aggr_m_start(void *);
97 127 static void aggr_m_stop(void *);
98 128 static int aggr_m_promisc(void *, boolean_t);
99 129 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
100 130 static int aggr_m_unicst(void *, const uint8_t *);
101 131 static int aggr_m_stat(void *, uint_t, uint64_t *);
102 132 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
103 133 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
104 134 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
105 135 const void *);
106 136 static void aggr_m_propinfo(void *, const char *, mac_prop_id_t,
107 137 mac_prop_info_handle_t);
108 138
109 139 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
110 140 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
111 141 boolean_t *);
112 142
113 143 static void aggr_grp_capab_set(aggr_grp_t *);
↓ open down ↓ |
36 lines elided |
↑ open up ↑ |
114 144 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
115 145 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
116 146 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
117 147 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
118 148 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
119 149
120 150 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
121 151 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
122 152 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
123 153 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
124 -static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
154 +static int aggr_pseudo_start_rx_ring(mac_ring_driver_t, uint64_t);
155 +static void aggr_pseudo_stop_rx_ring(mac_ring_driver_t);
125 156 static int aggr_addmac(void *, const uint8_t *);
126 157 static int aggr_remmac(void *, const uint8_t *);
127 158 static int aggr_addvlan(mac_group_driver_t, uint16_t);
128 159 static int aggr_remvlan(mac_group_driver_t, uint16_t);
129 160 static mblk_t *aggr_rx_poll(void *, int);
130 161 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
131 162 const int, mac_ring_info_t *, mac_ring_handle_t);
132 163 static void aggr_fill_group(void *, mac_ring_type_t, const int,
133 164 mac_group_info_t *, mac_group_handle_t);
134 165
135 166 static kmem_cache_t *aggr_grp_cache;
136 167 static mod_hash_t *aggr_grp_hash;
137 168 static krwlock_t aggr_grp_lock;
138 169 static uint_t aggr_grp_cnt;
139 170 static id_space_t *key_ids;
140 171
141 172 #define GRP_HASHSZ 64
142 173 #define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid)
143 174 #define AGGR_PORT_NAME_DELIMIT '-'
144 175
145 176 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
146 177
147 178 #define AGGR_M_CALLBACK_FLAGS \
148 179 (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
149 180
150 181 static mac_callbacks_t aggr_m_callbacks = {
151 182 AGGR_M_CALLBACK_FLAGS,
152 183 aggr_m_stat,
153 184 aggr_m_start,
154 185 aggr_m_stop,
155 186 aggr_m_promisc,
156 187 aggr_m_multicst,
157 188 NULL,
158 189 NULL,
159 190 NULL,
160 191 aggr_m_ioctl,
161 192 aggr_m_capab_get,
162 193 NULL,
163 194 NULL,
164 195 aggr_m_setprop,
165 196 NULL,
166 197 aggr_m_propinfo
167 198 };
168 199
169 200 /*ARGSUSED*/
170 201 static int
171 202 aggr_grp_constructor(void *buf, void *arg, int kmflag)
172 203 {
173 204 aggr_grp_t *grp = buf;
174 205
175 206 bzero(grp, sizeof (*grp));
176 207 mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
177 208 cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
178 209 rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
179 210 mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
180 211 cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
181 212 mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL);
182 213 cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL);
183 214 grp->lg_link_state = LINK_STATE_UNKNOWN;
184 215 return (0);
185 216 }
186 217
187 218 /*ARGSUSED*/
188 219 static void
189 220 aggr_grp_destructor(void *buf, void *arg)
190 221 {
191 222 aggr_grp_t *grp = buf;
192 223
193 224 if (grp->lg_tx_ports != NULL) {
194 225 kmem_free(grp->lg_tx_ports,
195 226 grp->lg_tx_ports_size * sizeof (aggr_port_t *));
196 227 }
197 228
198 229 mutex_destroy(&grp->lg_lacp_lock);
199 230 cv_destroy(&grp->lg_lacp_cv);
200 231 mutex_destroy(&grp->lg_port_lock);
201 232 cv_destroy(&grp->lg_port_cv);
202 233 rw_destroy(&grp->lg_tx_lock);
203 234 mutex_destroy(&grp->lg_tx_flowctl_lock);
204 235 cv_destroy(&grp->lg_tx_flowctl_cv);
205 236 }
206 237
207 238 void
208 239 aggr_grp_init(void)
209 240 {
210 241 aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
211 242 sizeof (aggr_grp_t), 0, aggr_grp_constructor,
212 243 aggr_grp_destructor, NULL, NULL, NULL, 0);
213 244
214 245 aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
215 246 GRP_HASHSZ, mod_hash_null_valdtor);
216 247 rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
217 248 aggr_grp_cnt = 0;
218 249
219 250 /*
220 251 * Allocate an id space to manage key values (when key is not
221 252 * specified). The range of the id space will be from
222 253 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
223 254 * uses a 16-bit key.
224 255 */
225 256 key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
226 257 ASSERT(key_ids != NULL);
227 258 }
228 259
229 260 void
230 261 aggr_grp_fini(void)
231 262 {
232 263 id_space_destroy(key_ids);
233 264 rw_destroy(&aggr_grp_lock);
234 265 mod_hash_destroy_idhash(aggr_grp_hash);
235 266 kmem_cache_destroy(aggr_grp_cache);
236 267 }
237 268
238 269 uint_t
239 270 aggr_grp_count(void)
240 271 {
241 272 uint_t count;
242 273
243 274 rw_enter(&aggr_grp_lock, RW_READER);
244 275 count = aggr_grp_cnt;
245 276 rw_exit(&aggr_grp_lock);
246 277 return (count);
247 278 }
248 279
249 280 /*
250 281 * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
251 282 * requires the mac perimeter, this function holds a reference of the aggr
252 283 * and aggr won't call mac_unregister() until this reference drops to 0.
253 284 */
254 285 void
255 286 aggr_grp_port_hold(aggr_port_t *port)
256 287 {
257 288 aggr_grp_t *grp = port->lp_grp;
258 289
259 290 AGGR_PORT_REFHOLD(port);
260 291 mutex_enter(&grp->lg_port_lock);
261 292 grp->lg_port_ref++;
262 293 mutex_exit(&grp->lg_port_lock);
263 294 }
264 295
265 296 /*
266 297 * Release the reference of the grp and inform aggr_grp_delete() calling
267 298 * mac_unregister() is now safe.
268 299 */
269 300 void
270 301 aggr_grp_port_rele(aggr_port_t *port)
271 302 {
272 303 aggr_grp_t *grp = port->lp_grp;
273 304
274 305 mutex_enter(&grp->lg_port_lock);
275 306 if (--grp->lg_port_ref == 0)
276 307 cv_signal(&grp->lg_port_cv);
277 308 mutex_exit(&grp->lg_port_lock);
278 309 AGGR_PORT_REFRELE(port);
279 310 }
280 311
281 312 /*
282 313 * Wait for the port's lacp timer thread and the port's notification callback
283 314 * to exit.
284 315 */
285 316 void
286 317 aggr_grp_port_wait(aggr_grp_t *grp)
287 318 {
288 319 mutex_enter(&grp->lg_port_lock);
289 320 if (grp->lg_port_ref != 0)
290 321 cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
291 322 mutex_exit(&grp->lg_port_lock);
292 323 }
293 324
294 325 /*
295 326 * Attach a port to a link aggregation group.
296 327 *
297 328 * A port is attached to a link aggregation group once its speed
298 329 * and link state have been verified.
299 330 *
300 331 * Returns B_TRUE if the group link state or speed has changed. If
301 332 * it's the case, the caller must notify the MAC layer via a call
302 333 * to mac_link().
303 334 */
304 335 boolean_t
305 336 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
306 337 {
307 338 boolean_t link_state_changed = B_FALSE;
308 339
309 340 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
310 341 ASSERT(MAC_PERIM_HELD(port->lp_mh));
311 342
312 343 if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
313 344 return (B_FALSE);
314 345
315 346 /*
316 347 * Validate the MAC port link speed and update the group
317 348 * link speed if needed.
318 349 */
319 350 if (port->lp_ifspeed == 0 ||
320 351 port->lp_link_state != LINK_STATE_UP ||
321 352 port->lp_link_duplex != LINK_DUPLEX_FULL) {
322 353 /*
323 354 * Can't attach a MAC port with unknown link speed,
324 355 * down link, or not in full duplex mode.
325 356 */
326 357 return (B_FALSE);
327 358 }
328 359
329 360 mutex_enter(&grp->lg_stat_lock);
330 361 if (grp->lg_ifspeed == 0) {
331 362 /*
332 363 * The group inherits the speed of the first link being
333 364 * attached.
334 365 */
335 366 grp->lg_ifspeed = port->lp_ifspeed;
336 367 link_state_changed = B_TRUE;
337 368 } else if (grp->lg_ifspeed != port->lp_ifspeed) {
338 369 /*
339 370 * The link speed of the MAC port must be the same as
340 371 * the group link speed, as per 802.3ad. Since it is
341 372 * not, the attach is cancelled.
342 373 */
343 374 mutex_exit(&grp->lg_stat_lock);
344 375 return (B_FALSE);
345 376 }
346 377 mutex_exit(&grp->lg_stat_lock);
347 378
348 379 grp->lg_nattached_ports++;
349 380
350 381 /*
351 382 * Update the group link state.
352 383 */
353 384 if (grp->lg_link_state != LINK_STATE_UP) {
354 385 grp->lg_link_state = LINK_STATE_UP;
355 386 mutex_enter(&grp->lg_stat_lock);
356 387 grp->lg_link_duplex = LINK_DUPLEX_FULL;
357 388 mutex_exit(&grp->lg_stat_lock);
358 389 link_state_changed = B_TRUE;
↓ open down ↓ |
224 lines elided |
↑ open up ↑ |
359 390 }
360 391
361 392 /*
362 393 * Update port's state.
363 394 */
364 395 port->lp_state = AGGR_PORT_STATE_ATTACHED;
365 396
366 397 aggr_grp_multicst_port(port, B_TRUE);
367 398
368 399 /*
369 - * Set port's receive callback
400 + * The port client doesn't have an Rx SRS; instead of calling
401 + * mac_rx_set() we set the client's flow callback directly.
402 + * This datapath is used only when the port's driver doesn't
403 + * support MAC_CAPAB_RINGS. Drivers with ring support will
404 + * deliver traffic to the aggr via ring passthru.
370 405 */
371 - mac_rx_set(port->lp_mch, aggr_recv_cb, port);
406 + mac_client_set_flow_cb(port->lp_mch, aggr_recv_cb, port);
372 407
373 408 /*
374 409 * If LACP is OFF, the port can be used to send data as soon
375 410 * as its link is up and verified to be compatible with the
376 411 * aggregation.
377 412 *
378 413 * If LACP is active or passive, notify the LACP subsystem, which
379 414 * will enable sending on the port following the LACP protocol.
380 415 */
381 416 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
382 417 aggr_send_port_enable(port);
383 418 else
384 419 aggr_lacp_port_attached(port);
385 420
386 421 return (link_state_changed);
387 422 }
388 423
389 424 boolean_t
390 425 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
391 426 {
392 427 boolean_t link_state_changed = B_FALSE;
393 428
394 429 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
395 430 ASSERT(MAC_PERIM_HELD(port->lp_mh));
396 431
397 432 /* update state */
398 433 if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
399 434 return (B_FALSE);
400 435
401 - mac_rx_clear(port->lp_mch);
436 + mac_client_clear_flow_cb(port->lp_mch);
402 437
403 438 aggr_grp_multicst_port(port, B_FALSE);
404 439
405 440 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
406 441 aggr_send_port_disable(port);
407 442 else
408 443 aggr_lacp_port_detached(port);
409 444
410 445 port->lp_state = AGGR_PORT_STATE_STANDBY;
411 446
412 447 grp->lg_nattached_ports--;
413 448 if (grp->lg_nattached_ports == 0) {
414 449 /* the last attached MAC port of the group is being detached */
415 450 grp->lg_link_state = LINK_STATE_DOWN;
416 451 mutex_enter(&grp->lg_stat_lock);
417 452 grp->lg_ifspeed = 0;
418 453 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
419 454 mutex_exit(&grp->lg_stat_lock);
420 455 link_state_changed = B_TRUE;
421 456 }
422 457
423 458 return (link_state_changed);
424 459 }
425 460
426 461 /*
427 462 * Update the MAC addresses of the constituent ports of the specified
428 463 * group. This function is invoked:
429 464 * - after creating a new aggregation group.
430 465 * - after adding new ports to an aggregation group.
431 466 * - after removing a port from a group when the MAC address of
432 467 * that port was used for the MAC address of the group.
433 468 * - after the MAC address of a port changed when the MAC address
434 469 * of that port was used for the MAC address of the group.
435 470 *
436 471 * Return true if the link state of the aggregation changed, for example
437 472 * as a result of a failure changing the MAC address of one of the
438 473 * constituent ports.
439 474 */
440 475 boolean_t
441 476 aggr_grp_update_ports_mac(aggr_grp_t *grp)
442 477 {
443 478 aggr_port_t *cport;
444 479 boolean_t link_state_changed = B_FALSE;
445 480 mac_perim_handle_t mph;
446 481
447 482 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
448 483
449 484 for (cport = grp->lg_ports; cport != NULL;
450 485 cport = cport->lp_next) {
451 486 mac_perim_enter_by_mh(cport->lp_mh, &mph);
452 487 if (aggr_port_unicst(cport) != 0) {
453 488 if (aggr_grp_detach_port(grp, cport))
454 489 link_state_changed = B_TRUE;
455 490 } else {
456 491 /*
457 492 * If a port was detached because of a previous
458 493 * failure changing the MAC address, the port is
459 494 * reattached when it successfully changes the MAC
460 495 * address now, and this might cause the link state
461 496 * of the aggregation to change.
462 497 */
463 498 if (aggr_grp_attach_port(grp, cport))
464 499 link_state_changed = B_TRUE;
465 500 }
466 501 mac_perim_exit(mph);
467 502 }
468 503 return (link_state_changed);
469 504 }
470 505
471 506 /*
472 507 * Invoked when the MAC address of a port has changed. If the port's
473 508 * MAC address was used for the group MAC address, set mac_addr_changedp
474 509 * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
475 510 * notification. If the link state changes due to detach/attach of
476 511 * the constituent port, set link_state_changedp to B_TRUE to indicate
477 512 * to the caller that it should send a MAC_NOTE_LINK notification. In both
478 513 * cases, it is the responsibility of the caller to invoke notification
479 514 * functions after releasing the the port lock.
480 515 */
481 516 void
482 517 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
483 518 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
484 519 {
485 520 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
486 521 ASSERT(MAC_PERIM_HELD(port->lp_mh));
487 522 ASSERT(mac_addr_changedp != NULL);
488 523 ASSERT(link_state_changedp != NULL);
489 524
490 525 *mac_addr_changedp = B_FALSE;
491 526 *link_state_changedp = B_FALSE;
492 527
493 528 if (grp->lg_addr_fixed) {
494 529 /*
495 530 * The group is using a fixed MAC address or an automatic
496 531 * MAC address has not been set.
497 532 */
498 533 return;
499 534 }
500 535
501 536 if (grp->lg_mac_addr_port == port) {
502 537 /*
503 538 * The MAC address of the port was assigned to the group
504 539 * MAC address. Update the group MAC address.
505 540 */
506 541 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
507 542 *mac_addr_changedp = B_TRUE;
508 543 } else {
509 544 /*
510 545 * Update the actual port MAC address to the MAC address
511 546 * of the group.
512 547 */
513 548 if (aggr_port_unicst(port) != 0) {
514 549 *link_state_changedp = aggr_grp_detach_port(grp, port);
515 550 } else {
516 551 /*
517 552 * If a port was detached because of a previous
518 553 * failure changing the MAC address, the port is
519 554 * reattached when it successfully changes the MAC
520 555 * address now, and this might cause the link state
521 556 * of the aggregation to change.
522 557 */
523 558 *link_state_changedp = aggr_grp_attach_port(grp, port);
524 559 }
525 560 }
526 561 }
527 562
528 563 /*
529 564 * Add a port to a link aggregation group.
↓ open down ↓ |
118 lines elided |
↑ open up ↑ |
530 565 */
531 566 static int
532 567 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
533 568 aggr_port_t **pp)
534 569 {
535 570 aggr_port_t *port, **cport;
536 571 mac_perim_handle_t mph;
537 572 zoneid_t port_zoneid = ALL_ZONES;
538 573 int err;
539 574
540 - /* The port must be int the same zone as the aggregation. */
575 + /* The port must be in the same zone as the aggregation. */
541 576 if (zone_check_datalink(&port_zoneid, port_linkid) != 0)
542 577 port_zoneid = GLOBAL_ZONEID;
543 578 if (grp->lg_zoneid != port_zoneid)
544 579 return (EBUSY);
545 580
546 581 /*
547 - * lg_mh could be NULL when the function is called during the creation
548 - * of the aggregation.
582 + * If we are creating the aggr, then there is no MAC handle
583 + * and thus no perimeter to hold. If we are adding a port to
584 + * an existing aggr, then the perimiter of the aggr's MAC must
585 + * be held.
549 586 */
550 587 ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
551 588
552 - /* create new port */
553 589 err = aggr_port_create(grp, port_linkid, force, &port);
554 590 if (err != 0)
555 591 return (err);
556 592
557 593 mac_perim_enter_by_mh(port->lp_mh, &mph);
558 594
559 - /* add port to list of group constituent ports */
595 + /* Add the new port to the end of the list. */
560 596 cport = &grp->lg_ports;
561 597 while (*cport != NULL)
562 598 cport = &((*cport)->lp_next);
563 599 *cport = port;
564 600
565 601 /*
566 602 * Back reference to the group it is member of. A port always
567 603 * holds a reference to its group to ensure that the back
568 604 * reference is always valid.
569 605 */
570 606 port->lp_grp = grp;
571 607 AGGR_GRP_REFHOLD(grp);
572 608 grp->lg_nports++;
573 609
574 610 aggr_lacp_init_port(port);
575 611 mac_perim_exit(mph);
576 612
577 613 if (pp != NULL)
578 614 *pp = port;
579 615
580 616 return (0);
581 617 }
582 618
583 619 /*
584 620 * This is called in response to either our LACP state machine or a MAC
585 621 * notification that the link has gone down via aggr_send_port_disable(). At
586 622 * this point, we may need to update our default ring. To that end, we go
587 623 * through the set of ports (underlying datalinks in an aggregation) that are
588 624 * currently enabled to transmit data. If all our links have been disabled for
589 625 * transmit, then we don't do anything.
590 626 *
591 627 * Note, because we only have a single TX group, we don't have to worry about
592 628 * the rings moving between groups and the chance that mac will reassign it
593 629 * unless someone removes a port, at which point, we play it safe and call this
594 630 * again.
595 631 */
596 632 void
597 633 aggr_grp_update_default(aggr_grp_t *grp)
598 634 {
599 635 aggr_port_t *port;
600 636 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
601 637
602 638 rw_enter(&grp->lg_tx_lock, RW_WRITER);
603 639
604 640 if (grp->lg_ntx_ports == 0) {
605 641 rw_exit(&grp->lg_tx_lock);
606 642 return;
607 643 }
608 644
609 645 port = grp->lg_tx_ports[0];
610 646 ASSERT(port->lp_tx_ring_cnt > 0);
611 647 mac_hwring_set_default(grp->lg_mh, port->lp_pseudo_tx_rings[0]);
612 648 rw_exit(&grp->lg_tx_lock);
613 649 }
614 650
615 651 /*
616 652 * Add a pseudo RX ring for the given HW ring handle.
617 653 */
618 654 static int
619 655 aggr_add_pseudo_rx_ring(aggr_port_t *port,
620 656 aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
621 657 {
622 658 aggr_pseudo_rx_ring_t *ring;
623 659 int err;
624 660 int j;
625 661
626 662 for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
627 663 ring = rx_grp->arg_rings + j;
628 664 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
629 665 break;
630 666 }
↓ open down ↓ |
61 lines elided |
↑ open up ↑ |
631 667
632 668 /*
633 669 * No slot for this new RX ring.
634 670 */
635 671 if (j == MAX_RINGS_PER_GROUP)
636 672 return (EIO);
637 673
638 674 ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
639 675 ring->arr_hw_rh = hw_rh;
640 676 ring->arr_port = port;
677 + ring->arr_grp = rx_grp;
641 678 rx_grp->arg_ring_cnt++;
642 679
643 680 /*
644 681 * The group is already registered, dynamically add a new ring to the
645 682 * mac group.
646 683 */
647 684 if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
648 685 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
649 686 ring->arr_hw_rh = NULL;
650 687 ring->arr_port = NULL;
688 + ring->arr_grp = NULL;
651 689 rx_grp->arg_ring_cnt--;
652 690 } else {
653 - mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
654 - mac_find_ring(rx_grp->arg_gh, j));
691 + /*
692 + * This must run after the MAC is registered.
693 + */
694 + ASSERT3P(ring->arr_rh, !=, NULL);
695 + mac_hwring_set_passthru(hw_rh, (mac_rx_t)aggr_recv_cb,
696 + (void *)port, (mac_resource_handle_t)ring);
655 697 }
656 698 return (err);
657 699 }
658 700
659 701 /*
660 702 * Remove the pseudo RX ring of the given HW ring handle.
661 703 */
662 704 static void
663 705 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
664 706 {
665 - aggr_pseudo_rx_ring_t *ring;
666 - int j;
707 + for (uint_t j = 0; j < MAX_RINGS_PER_GROUP; j++) {
708 + aggr_pseudo_rx_ring_t *ring = rx_grp->arg_rings + j;
667 709
668 - for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
669 - ring = rx_grp->arg_rings + j;
670 710 if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
671 711 ring->arr_hw_rh != hw_rh) {
672 712 continue;
673 713 }
674 714
675 715 mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
676 716
677 717 ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
678 718 ring->arr_hw_rh = NULL;
679 719 ring->arr_port = NULL;
720 + ring->arr_grp = NULL;
680 721 rx_grp->arg_ring_cnt--;
681 - mac_hwring_teardown(hw_rh);
722 + mac_hwring_clear_passthru(hw_rh);
682 723 break;
683 724 }
684 725 }
685 726
686 727 /*
687 728 * Create pseudo rings over the HW rings of the port.
688 729 *
689 730 * o Create a pseudo ring in rx_grp per HW ring in the port's HW group.
690 731 *
691 732 * o Program existing unicast filters on the pseudo group into the HW group.
692 733 *
693 734 * o Program existing VLAN filters on the pseudo group into the HW group.
694 735 */
695 736 static int
696 737 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
697 738 {
698 - aggr_grp_t *grp = port->lp_grp;
699 739 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP];
700 740 aggr_unicst_addr_t *addr, *a;
701 741 mac_perim_handle_t pmph;
702 742 aggr_vlan_t *avp;
703 - int hw_rh_cnt, i = 0, j;
743 + uint_t hw_rh_cnt, i;
704 744 int err = 0;
745 + uint_t g_idx = rx_grp->arg_index;
705 746
706 - ASSERT(MAC_PERIM_HELD(grp->lg_mh));
747 + ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
748 + ASSERT3U(g_idx, <, MAX_GROUPS_PER_PORT);
707 749 mac_perim_enter_by_mh(port->lp_mh, &pmph);
708 750
709 751 /*
710 - * This function must be called after the aggr registers its MAC
711 - * and its Rx group has been initialized.
752 + * This function must be called after the aggr registers its
753 + * MAC and its Rx groups have been initialized.
712 754 */
713 755 ASSERT(rx_grp->arg_gh != NULL);
714 756
715 757 /*
716 758 * Get the list of the underlying HW rings.
717 759 */
718 - hw_rh_cnt = mac_hwrings_get(port->lp_mch,
719 - &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
760 + hw_rh_cnt = mac_hwrings_idx_get(port->lp_mh, g_idx,
761 + &port->lp_hwghs[g_idx], hw_rh, MAC_RING_TYPE_RX);
720 762
721 - if (port->lp_hwgh != NULL) {
722 - /*
723 - * Quiesce the HW ring and the MAC SRS on the ring. Note
724 - * that the HW ring will be restarted when the pseudo ring
725 - * is started. At that time all the packets will be
726 - * directly passed up to the pseudo Rx ring and handled
727 - * by MAC SRS created over the pseudo Rx ring.
728 - */
729 - mac_rx_client_quiesce(port->lp_mch);
730 - mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
731 - }
732 -
733 763 /*
734 764 * Add existing VLAN and unicast address filters to the port.
735 765 */
736 766 for (avp = list_head(&rx_grp->arg_vlans); avp != NULL;
737 767 avp = list_next(&rx_grp->arg_vlans, avp)) {
738 - if ((err = aggr_port_addvlan(port, avp->av_vid)) != 0)
768 + if ((err = aggr_port_addvlan(port, g_idx, avp->av_vid)) != 0)
739 769 goto err;
740 770 }
741 771
742 772 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
743 - if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
773 + if ((err = aggr_port_addmac(port, g_idx, addr->aua_addr)) != 0)
744 774 goto err;
745 775 }
746 776
747 777 for (i = 0; i < hw_rh_cnt; i++) {
748 778 err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
749 779 if (err != 0)
750 780 goto err;
751 781 }
752 782
753 - port->lp_rx_grp_added = B_TRUE;
754 783 mac_perim_exit(pmph);
755 784 return (0);
756 785
757 786 err:
758 787 ASSERT(err != 0);
759 788
760 - for (j = 0; j < i; j++)
789 + for (uint_t j = 0; j < i; j++)
761 790 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
762 791
763 792 for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
764 - aggr_port_remmac(port, a->aua_addr);
793 + aggr_port_remmac(port, g_idx, a->aua_addr);
765 794
766 795 if (avp != NULL)
767 796 avp = list_prev(&rx_grp->arg_vlans, avp);
768 797
769 798 for (; avp != NULL; avp = list_prev(&rx_grp->arg_vlans, avp)) {
770 799 int err2;
771 800
772 - if ((err2 = aggr_port_remvlan(port, avp->av_vid)) != 0) {
801 + if ((err2 = aggr_port_remvlan(port, g_idx, avp->av_vid)) != 0) {
773 802 cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
774 803 ": errno %d.", avp->av_vid,
775 804 mac_client_name(port->lp_mch), err2);
776 805 }
777 806 }
778 807
779 - if (port->lp_hwgh != NULL) {
780 - mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
781 - mac_rx_client_restart(port->lp_mch);
782 - port->lp_hwgh = NULL;
783 - }
784 -
808 + port->lp_hwghs[g_idx] = NULL;
785 809 mac_perim_exit(pmph);
786 810 return (err);
787 811 }
788 812
789 813 /*
790 814 * Destroy the pseudo rings mapping to this port and remove all VLAN
791 815 * and unicast filters from this port. Even if there are no underlying
792 816 * HW rings we must still remove the unicast filters to take the port
793 817 * out of promisc mode.
794 818 */
795 819 static void
796 820 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
797 821 {
798 - aggr_grp_t *grp = port->lp_grp;
799 822 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP];
800 823 aggr_unicst_addr_t *addr;
801 - mac_group_handle_t hwgh;
802 824 mac_perim_handle_t pmph;
803 - int hw_rh_cnt, i;
825 + uint_t hw_rh_cnt;
826 + uint_t g_idx = rx_grp->arg_index;
804 827
805 - ASSERT(MAC_PERIM_HELD(grp->lg_mh));
828 + ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
829 + ASSERT3U(g_idx, <, MAX_GROUPS_PER_PORT);
830 + ASSERT3P(rx_grp->arg_gh, !=, NULL);
806 831 mac_perim_enter_by_mh(port->lp_mh, &pmph);
807 832
808 - if (!port->lp_rx_grp_added)
809 - goto done;
833 + hw_rh_cnt = mac_hwrings_idx_get(port->lp_mh, g_idx, NULL, hw_rh,
834 + MAC_RING_TYPE_RX);
810 835
811 - ASSERT(rx_grp->arg_gh != NULL);
812 - hw_rh_cnt = mac_hwrings_get(port->lp_mch,
813 - &hwgh, hw_rh, MAC_RING_TYPE_RX);
814 -
815 - for (i = 0; i < hw_rh_cnt; i++)
836 + for (uint_t i = 0; i < hw_rh_cnt; i++)
816 837 aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
817 838
818 839 for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
819 - aggr_port_remmac(port, addr->aua_addr);
840 + aggr_port_remmac(port, g_idx, addr->aua_addr);
820 841
821 842 for (aggr_vlan_t *avp = list_head(&rx_grp->arg_vlans); avp != NULL;
822 843 avp = list_next(&rx_grp->arg_vlans, avp)) {
823 844 int err;
824 845
825 - if ((err = aggr_port_remvlan(port, avp->av_vid)) != 0) {
846 + if ((err = aggr_port_remvlan(port, g_idx, avp->av_vid)) != 0) {
826 847 cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
827 848 ": errno %d.", avp->av_vid,
828 849 mac_client_name(port->lp_mch), err);
829 850 }
830 851 }
831 852
832 - if (port->lp_hwgh != NULL) {
833 - port->lp_hwgh = NULL;
834 -
835 - /*
836 - * First clear the permanent-quiesced flag of the RX srs then
837 - * restart the HW ring and the mac srs on the ring. Note that
838 - * the HW ring and associated SRS will soon been removed when
839 - * the port is removed from the aggr.
840 - */
841 - mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
842 - mac_rx_client_restart(port->lp_mch);
843 - }
844 -
845 - port->lp_rx_grp_added = B_FALSE;
846 -done:
853 + port->lp_hwghs[g_idx] = NULL;
847 854 mac_perim_exit(pmph);
848 855 }
849 856
850 857 /*
851 858 * Add a pseudo TX ring for the given HW ring handle.
852 859 */
853 860 static int
854 861 aggr_add_pseudo_tx_ring(aggr_port_t *port,
855 862 aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh,
856 863 mac_ring_handle_t *pseudo_rh)
857 864 {
858 865 aggr_pseudo_tx_ring_t *ring;
859 866 int err;
860 867 int i;
861 868
862 869 ASSERT(MAC_PERIM_HELD(port->lp_mh));
863 870 for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
864 871 ring = tx_grp->atg_rings + i;
865 872 if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE))
866 873 break;
867 874 }
868 875 /*
869 876 * No slot for this new TX ring.
870 877 */
871 878 if (i == MAX_RINGS_PER_GROUP)
872 879 return (EIO);
873 880 /*
874 881 * The following 4 statements needs to be done before
875 882 * calling mac_group_add_ring(). Otherwise it will
876 883 * result in an assertion failure in mac_init_ring().
877 884 */
878 885 ring->atr_flags |= MAC_PSEUDO_RING_INUSE;
879 886 ring->atr_hw_rh = hw_rh;
880 887 ring->atr_port = port;
881 888 tx_grp->atg_ring_cnt++;
882 889
883 890 /*
884 891 * The TX side has no concept of ring groups unlike RX groups.
885 892 * There is just a single group which stores all the TX rings.
886 893 * This group will be used to store aggr's pseudo TX rings.
887 894 */
888 895 if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) {
889 896 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
890 897 ring->atr_hw_rh = NULL;
891 898 ring->atr_port = NULL;
892 899 tx_grp->atg_ring_cnt--;
893 900 } else {
894 901 *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i);
895 902 if (hw_rh != NULL) {
896 903 mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
897 904 mac_find_ring(tx_grp->atg_gh, i));
898 905 }
899 906 }
900 907
901 908 return (err);
902 909 }
903 910
904 911 /*
905 912 * Remove the pseudo TX ring of the given HW ring handle.
906 913 */
907 914 static void
908 915 aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp,
909 916 mac_ring_handle_t pseudo_hw_rh)
910 917 {
911 918 aggr_pseudo_tx_ring_t *ring;
912 919 int i;
913 920
914 921 for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
915 922 ring = tx_grp->atg_rings + i;
916 923 if (ring->atr_rh != pseudo_hw_rh)
917 924 continue;
918 925
919 926 ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE);
920 927 mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh);
921 928 ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
922 929 mac_hwring_teardown(ring->atr_hw_rh);
923 930 ring->atr_hw_rh = NULL;
924 931 ring->atr_port = NULL;
925 932 tx_grp->atg_ring_cnt--;
926 933 break;
927 934 }
928 935 }
929 936
930 937 /*
931 938 * This function is called to create pseudo rings over hardware rings of
932 939 * the underlying device. There is a 1:1 mapping between the pseudo TX
933 940 * rings of the aggr and the hardware rings of the underlying port.
934 941 */
935 942 static int
936 943 aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
937 944 {
938 945 aggr_grp_t *grp = port->lp_grp;
939 946 mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh;
↓ open down ↓ |
83 lines elided |
↑ open up ↑ |
940 947 mac_perim_handle_t pmph;
941 948 int hw_rh_cnt, i = 0, j;
942 949 int err = 0;
943 950
944 951 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
945 952 mac_perim_enter_by_mh(port->lp_mh, &pmph);
946 953
947 954 /*
948 955 * Get the list the the underlying HW rings.
949 956 */
950 - hw_rh_cnt = mac_hwrings_get(port->lp_mch,
951 - NULL, hw_rh, MAC_RING_TYPE_TX);
957 + hw_rh_cnt = mac_hwrings_get(port->lp_mch, NULL, hw_rh,
958 + MAC_RING_TYPE_TX);
952 959
953 960 /*
954 961 * Even if the underlying NIC does not have TX rings, we
955 962 * still make a psuedo TX ring for that NIC with NULL as
956 963 * the ring handle.
957 964 */
958 965 if (hw_rh_cnt == 0)
959 966 port->lp_tx_ring_cnt = 1;
960 967 else
961 968 port->lp_tx_ring_cnt = hw_rh_cnt;
962 969
963 970 port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
964 971 port->lp_tx_ring_cnt), KM_SLEEP);
965 972 port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
966 973 port->lp_tx_ring_cnt), KM_SLEEP);
967 974
968 975 if (hw_rh_cnt == 0) {
969 976 if ((err = aggr_add_pseudo_tx_ring(port, tx_grp,
970 977 NULL, &pseudo_rh)) == 0) {
971 978 port->lp_tx_rings[0] = NULL;
972 979 port->lp_pseudo_tx_rings[0] = pseudo_rh;
973 980 }
974 981 } else {
975 982 for (i = 0; err == 0 && i < hw_rh_cnt; i++) {
976 983 err = aggr_add_pseudo_tx_ring(port,
977 984 tx_grp, hw_rh[i], &pseudo_rh);
978 985 if (err != 0)
979 986 break;
980 987 port->lp_tx_rings[i] = hw_rh[i];
981 988 port->lp_pseudo_tx_rings[i] = pseudo_rh;
982 989 }
983 990 }
984 991
985 992 if (err != 0) {
986 993 if (hw_rh_cnt != 0) {
987 994 for (j = 0; j < i; j++) {
988 995 aggr_rem_pseudo_tx_ring(tx_grp,
989 996 port->lp_pseudo_tx_rings[j]);
990 997 }
991 998 }
992 999 kmem_free(port->lp_tx_rings,
993 1000 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
994 1001 kmem_free(port->lp_pseudo_tx_rings,
995 1002 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
996 1003 port->lp_tx_ring_cnt = 0;
997 1004 } else {
998 1005 port->lp_tx_grp_added = B_TRUE;
999 1006 port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch,
1000 1007 aggr_tx_ring_update, port);
1001 1008 }
1002 1009 mac_perim_exit(pmph);
1003 1010 aggr_grp_update_default(grp);
1004 1011 return (err);
1005 1012 }
1006 1013
1007 1014 /*
1008 1015 * This function is called by aggr to remove pseudo TX rings over the
1009 1016 * HW rings of the underlying port.
1010 1017 */
1011 1018 static void
1012 1019 aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
1013 1020 {
1014 1021 aggr_grp_t *grp = port->lp_grp;
1015 1022 mac_perim_handle_t pmph;
1016 1023 int i;
1017 1024
1018 1025 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1019 1026 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1020 1027
1021 1028 if (!port->lp_tx_grp_added)
1022 1029 goto done;
1023 1030
1024 1031 ASSERT(tx_grp->atg_gh != NULL);
1025 1032
1026 1033 for (i = 0; i < port->lp_tx_ring_cnt; i++)
1027 1034 aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]);
1028 1035
1029 1036 kmem_free(port->lp_tx_rings,
1030 1037 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
1031 1038 kmem_free(port->lp_pseudo_tx_rings,
1032 1039 (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
1033 1040
1034 1041 port->lp_tx_ring_cnt = 0;
1035 1042 (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh);
1036 1043 port->lp_tx_grp_added = B_FALSE;
1037 1044 aggr_grp_update_default(grp);
1038 1045 done:
1039 1046 mac_perim_exit(pmph);
1040 1047 }
1041 1048
1042 1049 static int
1043 1050 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
1044 1051 {
1045 1052 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
1046 1053 return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
1047 1054 }
1048 1055
1049 1056 static int
1050 1057 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
1051 1058 {
1052 1059 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
1053 1060 return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
1054 1061 }
1055 1062
1056 1063 /*
1057 - * Here we need to start the pseudo-ring. As MAC already ensures that the
1058 - * underlying device is set up, all we need to do is save the ring generation.
1059 - *
1060 - * Note, we don't end up wanting to use the underlying mac_hwring_start/stop
1061 - * functions here as those don't actually stop and start the ring, they just
1062 - * quiesce the ring. Regardless of whether the aggr is logically up or not, we
1063 - * want to make sure that we can receive traffic for LACP.
1064 + * Start the pseudo ring. Since the pseudo ring is just an abstraction
1065 + * over an actual HW ring, the real task is to start the underlying HW
1066 + * ring.
1064 1067 */
1065 1068 static int
1066 -aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)
1069 +aggr_pseudo_start_rx_ring(mac_ring_driver_t arg, uint64_t mr_gen)
1067 1070 {
1071 + int err;
1068 1072 aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
1069 1073
1074 + err = mac_hwring_start(rr_ring->arr_hw_rh);
1075 +
1076 + if (err != 0)
1077 + return (err);
1078 +
1070 1079 rr_ring->arr_gen = mr_gen;
1071 - return (0);
1080 + return (err);
1072 1081 }
1073 1082
1074 1083 /*
1084 + * Stop the pseudo ring. Since the pseudo ring is just an abstraction
1085 + * over an actual HW ring, the real task is to stop the underlying HW
1086 + * ring.
1087 + */
1088 +static void
1089 +aggr_pseudo_stop_rx_ring(mac_ring_driver_t arg)
1090 +{
1091 + aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
1092 +
1093 + /*
1094 + * The rings underlying the default group must stay up to
1095 + * continue receiving LACP traffic. We would normally never
1096 + * stop the default Rx rings because of the primary MAC
1097 + * client; but aggr's primary MAC client doesn't call
1098 + * mac_unicast_add() and thus mi_active is 0 when the last
1099 + * non-primary client is deleted.
1100 + */
1101 + if (rr_ring->arr_grp->arg_index != 0)
1102 + mac_hwring_stop(rr_ring->arr_hw_rh);
1103 +}
1104 +
1105 +/*
1075 1106 * Add one or more ports to an existing link aggregation group.
1076 1107 */
1077 1108 int
1078 1109 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
1079 1110 laioc_port_t *ports)
1080 1111 {
1081 - int rc, i, nadded = 0;
1112 + int rc;
1113 + uint_t port_added = 0;
1114 + uint_t grp_added;
1082 1115 aggr_grp_t *grp = NULL;
1083 1116 aggr_port_t *port;
1084 1117 boolean_t link_state_changed = B_FALSE;
1085 1118 mac_perim_handle_t mph, pmph;
1086 1119
1087 - /* get group corresponding to linkid */
1120 + /* Get the aggr corresponding to linkid. */
1088 1121 rw_enter(&aggr_grp_lock, RW_READER);
1089 1122 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1090 1123 (mod_hash_val_t *)&grp) != 0) {
1091 1124 rw_exit(&aggr_grp_lock);
1092 1125 return (ENOENT);
1093 1126 }
1094 1127 AGGR_GRP_REFHOLD(grp);
1095 1128
1096 1129 /*
1097 - * Hold the perimeter so that the aggregation won't be destroyed.
1130 + * Hold the perimeter so that the aggregation can't be destroyed.
1098 1131 */
1099 1132 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1100 1133 rw_exit(&aggr_grp_lock);
1101 1134
1102 - /* add the specified ports to group */
1103 - for (i = 0; i < nports; i++) {
1104 - /* add port to group */
1135 + /* Add the specified ports to the aggr. */
1136 + for (uint_t i = 0; i < nports; i++) {
1137 + grp_added = 0;
1138 +
1105 1139 if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
1106 1140 force, &port)) != 0) {
1107 1141 goto bail;
1108 1142 }
1143 +
1109 1144 ASSERT(port != NULL);
1110 - nadded++;
1145 + port_added++;
1111 1146
1112 1147 /* check capabilities */
1113 1148 if (!aggr_grp_capab_check(grp, port) ||
1114 1149 !aggr_grp_sdu_check(grp, port) ||
1115 1150 !aggr_grp_margin_check(grp, port)) {
1116 1151 rc = ENOTSUP;
1117 1152 goto bail;
1118 1153 }
1119 1154
1120 1155 /*
1121 1156 * Create the pseudo ring for each HW ring of the underlying
1122 1157 * port.
1123 1158 */
1124 1159 rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group);
1125 1160 if (rc != 0)
1126 1161 goto bail;
1127 - rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group);
1128 - if (rc != 0)
1129 - goto bail;
1130 1162
1163 + for (uint_t j = 0; j < grp->lg_rx_group_count; j++) {
1164 + rc = aggr_add_pseudo_rx_group(port,
1165 + &grp->lg_rx_groups[j]);
1166 +
1167 + if (rc != 0)
1168 + goto bail;
1169 +
1170 + grp_added++;
1171 + }
1172 +
1131 1173 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1132 1174
1133 1175 /* set LACP mode */
1134 1176 aggr_port_lacp_set_mode(grp, port);
1135 1177
1136 1178 /* start port if group has already been started */
1137 1179 if (grp->lg_started) {
1138 1180 rc = aggr_port_start(port);
1139 1181 if (rc != 0) {
1140 1182 mac_perim_exit(pmph);
1141 1183 goto bail;
1142 1184 }
1143 1185
1144 1186 /*
1145 1187 * Turn on the promiscuous mode over the port when it
1146 1188 * is requested to be turned on to receive the
1147 - * non-primary address over a port, or the promiscous
1189 + * non-primary address over a port, or the promiscuous
1148 1190 * mode is enabled over the aggr.
1149 1191 */
1150 1192 if (grp->lg_promisc || port->lp_prom_addr != NULL) {
1151 1193 rc = aggr_port_promisc(port, B_TRUE);
1152 1194 if (rc != 0) {
1153 1195 mac_perim_exit(pmph);
1154 1196 goto bail;
1155 1197 }
1156 1198 }
1157 1199 }
1158 1200 mac_perim_exit(pmph);
1159 1201
1160 1202 /*
1161 1203 * Attach each port if necessary.
1162 1204 */
1163 1205 if (aggr_port_notify_link(grp, port))
1164 1206 link_state_changed = B_TRUE;
1165 1207
1166 1208 /*
1167 1209 * Initialize the callback functions for this port.
1168 1210 */
1169 1211 aggr_port_init_callbacks(port);
1170 1212 }
1171 1213
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
1172 1214 /* update the MAC address of the constituent ports */
1173 1215 if (aggr_grp_update_ports_mac(grp))
1174 1216 link_state_changed = B_TRUE;
1175 1217
1176 1218 if (link_state_changed)
1177 1219 mac_link_update(grp->lg_mh, grp->lg_link_state);
1178 1220
1179 1221 bail:
1180 1222 if (rc != 0) {
1181 1223 /* stop and remove ports that have been added */
1182 - for (i = 0; i < nadded; i++) {
1224 + for (uint_t i = 0; i < port_added; i++) {
1225 + uint_t grp_remove;
1226 +
1183 1227 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1184 1228 ASSERT(port != NULL);
1229 +
1185 1230 if (grp->lg_started) {
1186 1231 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1187 1232 (void) aggr_port_promisc(port, B_FALSE);
1188 1233 aggr_port_stop(port);
1189 1234 mac_perim_exit(pmph);
1190 1235 }
1236 +
1191 1237 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1192 - aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1238 +
1239 + /*
1240 + * Only the last port could have a partial set
1241 + * of groups added.
1242 + */
1243 + grp_remove = (i + 1 == port_added) ? grp_added :
1244 + grp->lg_rx_group_count;
1245 +
1246 + for (uint_t j = 0; j < grp_remove; j++) {
1247 + aggr_rem_pseudo_rx_group(port,
1248 + &grp->lg_rx_groups[j]);
1249 + }
1250 +
1193 1251 (void) aggr_grp_rem_port(grp, port, NULL, NULL);
1194 1252 }
1195 1253 }
1196 1254
1197 1255 mac_perim_exit(mph);
1198 1256 AGGR_GRP_REFRELE(grp);
1199 1257 return (rc);
1200 1258 }
1201 1259
1202 1260 static int
1203 1261 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
1204 1262 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1205 1263 aggr_lacp_timer_t lacp_timer)
1206 1264 {
1207 1265 boolean_t mac_addr_changed = B_FALSE;
1208 1266 boolean_t link_state_changed = B_FALSE;
1209 1267 mac_perim_handle_t pmph;
1210 1268
1211 1269 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1212 1270
1213 1271 /* validate fixed address if specified */
1214 1272 if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
1215 1273 ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
1216 1274 (mac_addr[0] & 0x01))) {
1217 1275 return (EINVAL);
1218 1276 }
1219 1277
1220 1278 /* update policy if requested */
1221 1279 if (update_mask & AGGR_MODIFY_POLICY)
1222 1280 aggr_send_update_policy(grp, policy);
1223 1281
1224 1282 /* update unicast MAC address if requested */
1225 1283 if (update_mask & AGGR_MODIFY_MAC) {
1226 1284 if (mac_fixed) {
1227 1285 /* user-supplied MAC address */
1228 1286 grp->lg_mac_addr_port = NULL;
1229 1287 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
1230 1288 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1231 1289 mac_addr_changed = B_TRUE;
1232 1290 }
1233 1291 } else if (grp->lg_addr_fixed) {
1234 1292 /* switch from user-supplied to automatic */
1235 1293 aggr_port_t *port = grp->lg_ports;
1236 1294
1237 1295 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1238 1296 bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
1239 1297 grp->lg_mac_addr_port = port;
1240 1298 mac_addr_changed = B_TRUE;
1241 1299 mac_perim_exit(pmph);
1242 1300 }
1243 1301 grp->lg_addr_fixed = mac_fixed;
1244 1302 }
1245 1303
1246 1304 if (mac_addr_changed)
1247 1305 link_state_changed = aggr_grp_update_ports_mac(grp);
1248 1306
1249 1307 if (update_mask & AGGR_MODIFY_LACP_MODE)
1250 1308 aggr_lacp_update_mode(grp, lacp_mode);
1251 1309
1252 1310 if (update_mask & AGGR_MODIFY_LACP_TIMER)
1253 1311 aggr_lacp_update_timer(grp, lacp_timer);
1254 1312
1255 1313 if (link_state_changed)
1256 1314 mac_link_update(grp->lg_mh, grp->lg_link_state);
1257 1315
1258 1316 if (mac_addr_changed)
1259 1317 mac_unicst_update(grp->lg_mh, grp->lg_addr);
1260 1318
1261 1319 return (0);
1262 1320 }
1263 1321
1264 1322 /*
1265 1323 * Update properties of an existing link aggregation group.
1266 1324 */
1267 1325 int
1268 1326 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
1269 1327 boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1270 1328 aggr_lacp_timer_t lacp_timer)
1271 1329 {
1272 1330 aggr_grp_t *grp = NULL;
1273 1331 mac_perim_handle_t mph;
1274 1332 int err;
1275 1333
1276 1334 /* get group corresponding to linkid */
1277 1335 rw_enter(&aggr_grp_lock, RW_READER);
1278 1336 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1279 1337 (mod_hash_val_t *)&grp) != 0) {
1280 1338 rw_exit(&aggr_grp_lock);
1281 1339 return (ENOENT);
1282 1340 }
1283 1341 AGGR_GRP_REFHOLD(grp);
1284 1342
1285 1343 /*
1286 1344 * Hold the perimeter so that the aggregation won't be destroyed.
1287 1345 */
1288 1346 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1289 1347 rw_exit(&aggr_grp_lock);
1290 1348
1291 1349 err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
1292 1350 mac_addr, lacp_mode, lacp_timer);
1293 1351
1294 1352 mac_perim_exit(mph);
1295 1353 AGGR_GRP_REFRELE(grp);
1296 1354 return (err);
1297 1355 }
1298 1356
1299 1357 /*
1300 1358 * Create a new link aggregation group upon request from administrator.
1301 1359 * Returns 0 on success, an errno on failure.
1302 1360 */
1303 1361 int
1304 1362 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
1305 1363 laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
1306 1364 uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer,
1307 1365 cred_t *credp)
1308 1366 {
1309 1367 aggr_grp_t *grp = NULL;
1310 1368 aggr_port_t *port;
1311 1369 mac_register_t *mac;
1312 1370 boolean_t link_state_changed;
1313 1371 mac_perim_handle_t mph;
1314 1372 int err;
1315 1373 int i;
1316 1374 kt_did_t tid = 0;
1317 1375
1318 1376 /* need at least one port */
1319 1377 if (nports == 0)
1320 1378 return (EINVAL);
1321 1379
1322 1380 rw_enter(&aggr_grp_lock, RW_WRITER);
1323 1381
1324 1382 /* does a group with the same linkid already exist? */
1325 1383 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1326 1384 (mod_hash_val_t *)&grp);
1327 1385 if (err == 0) {
1328 1386 rw_exit(&aggr_grp_lock);
1329 1387 return (EEXIST);
1330 1388 }
1331 1389
1332 1390 grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
1333 1391
1334 1392 grp->lg_refs = 1;
1335 1393 grp->lg_closing = B_FALSE;
1336 1394 grp->lg_force = force;
1337 1395 grp->lg_linkid = linkid;
1338 1396 grp->lg_zoneid = crgetzoneid(credp);
1339 1397 grp->lg_ifspeed = 0;
1340 1398 grp->lg_link_state = LINK_STATE_UNKNOWN;
1341 1399 grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1342 1400 grp->lg_started = B_FALSE;
1343 1401 grp->lg_promisc = B_FALSE;
↓ open down ↓ |
141 lines elided |
↑ open up ↑ |
1344 1402 grp->lg_lacp_done = B_FALSE;
1345 1403 grp->lg_tx_notify_done = B_FALSE;
1346 1404 grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1347 1405 grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1348 1406 aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1349 1407 grp->lg_tx_notify_thread = thread_create(NULL, 0,
1350 1408 aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1351 1409 grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
1352 1410 MAX_RINGS_PER_GROUP), KM_SLEEP);
1353 1411 grp->lg_tx_blocked_cnt = 0;
1354 - bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1412 + bzero(&grp->lg_rx_groups,
1413 + sizeof (aggr_pseudo_rx_group_t) * MAX_GROUPS_PER_PORT);
1355 1414 bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
1356 1415 aggr_lacp_init_grp(grp);
1357 1416
1358 - grp->lg_rx_group.arg_untagged = 0;
1359 - list_create(&(grp->lg_rx_group.arg_vlans), sizeof (aggr_vlan_t),
1360 - offsetof(aggr_vlan_t, av_link));
1361 -
1362 1417 /* add MAC ports to group */
1363 1418 grp->lg_ports = NULL;
1364 1419 grp->lg_nports = 0;
1365 1420 grp->lg_nattached_ports = 0;
1366 1421 grp->lg_ntx_ports = 0;
1367 1422
1368 1423 /*
1369 1424 * If key is not specified by the user, allocate the key.
1370 1425 */
1371 1426 if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1372 1427 err = ENOMEM;
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
1373 1428 goto bail;
1374 1429 }
1375 1430 grp->lg_key = key;
1376 1431
1377 1432 for (i = 0; i < nports; i++) {
1378 1433 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, &port);
1379 1434 if (err != 0)
1380 1435 goto bail;
1381 1436 }
1382 1437
1438 + grp->lg_rx_group_count = 1;
1439 +
1440 + for (i = 0, port = grp->lg_ports; port != NULL;
1441 + i++, port = port->lp_next) {
1442 + uint_t num_rgroups;
1443 +
1444 + mac_perim_enter_by_mh(port->lp_mh, &mph);
1445 + num_rgroups = mac_get_num_rx_groups(port->lp_mh);
1446 + mac_perim_exit(mph);
1447 +
1448 + /*
1449 + * Utilize all the groups in a port. If some ports
1450 + * have less groups than others, then traffic destined
1451 + * for the same unicast address may be HW classified
1452 + * on some ports but SW classified by aggr when
1453 + * arriving on other ports.
1454 + */
1455 + grp->lg_rx_group_count = MAX(grp->lg_rx_group_count,
1456 + num_rgroups);
1457 + }
1458 +
1383 1459 /*
1460 + * There could be cases where the hardware provides more
1461 + * groups than aggr can support. Make sure we never go above
1462 + * the max aggr can support.
1463 + */
1464 + grp->lg_rx_group_count = MIN(grp->lg_rx_group_count,
1465 + MAX_GROUPS_PER_PORT);
1466 +
1467 + ASSERT3U(grp->lg_rx_group_count, >, 0);
1468 + for (i = 0; i < MAX_GROUPS_PER_PORT; i++) {
1469 + grp->lg_rx_groups[i].arg_index = i;
1470 + grp->lg_rx_groups[i].arg_untagged = 0;
1471 + list_create(&(grp->lg_rx_groups[i].arg_vlans),
1472 + sizeof (aggr_vlan_t), offsetof(aggr_vlan_t, av_link));
1473 + }
1474 +
1475 + /*
1384 1476 * If no explicit MAC address was specified by the administrator,
1385 1477 * set it to the MAC address of the first port.
1386 1478 */
1387 1479 grp->lg_addr_fixed = mac_fixed;
1388 1480 if (grp->lg_addr_fixed) {
1389 1481 /* validate specified address */
1390 1482 if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1391 1483 err = EINVAL;
1392 1484 goto bail;
1393 1485 }
1394 1486 bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1395 1487 } else {
1396 1488 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1397 1489 grp->lg_mac_addr_port = grp->lg_ports;
1398 1490 }
1399 1491
1400 - /* set the initial group capabilities */
1492 + /* Set the initial group capabilities. */
1401 1493 aggr_grp_capab_set(grp);
1402 1494
1403 1495 if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
1404 1496 err = ENOMEM;
1405 1497 goto bail;
1406 1498 }
1407 1499 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1408 1500 mac->m_driver = grp;
1409 1501 mac->m_dip = aggr_dip;
1410 1502 mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
1411 1503 mac->m_src_addr = grp->lg_addr;
1412 1504 mac->m_callbacks = &aggr_m_callbacks;
1413 1505 mac->m_min_sdu = 0;
1414 1506 mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
1415 1507 mac->m_margin = aggr_grp_max_margin(grp);
1416 1508 mac->m_v12n = MAC_VIRT_LEVEL1;
1417 1509 err = mac_register(mac, &grp->lg_mh);
1418 1510 mac_free(mac);
1419 1511 if (err != 0)
1420 1512 goto bail;
1421 1513
1422 1514 err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp));
1423 1515 if (err != 0) {
1424 1516 (void) mac_unregister(grp->lg_mh);
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
1425 1517 grp->lg_mh = NULL;
1426 1518 goto bail;
1427 1519 }
1428 1520
1429 1521 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1430 1522
1431 1523 /*
1432 1524 * Update the MAC address of the constituent ports.
1433 1525 * None of the port is attached at this time, the link state of the
1434 1526 * aggregation will not change.
1527 + *
1528 + * All ports take on the primary MAC address of the aggr
1529 + * (lg_aggr). At this point, none of the ports are attached;
1530 + * thus the link state of the aggregation will not change.
1435 1531 */
1436 1532 link_state_changed = aggr_grp_update_ports_mac(grp);
1437 1533 ASSERT(!link_state_changed);
1438 1534
1439 - /* update outbound load balancing policy */
1535 + /* Update outbound load balancing policy. */
1440 1536 aggr_send_update_policy(grp, policy);
1441 1537
1442 - /* set LACP mode */
1538 + /* Set LACP mode. */
1443 1539 aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
1444 1540
1445 1541 /*
1446 1542 * Attach each port if necessary.
1447 1543 */
1448 1544 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1449 1545 /*
1450 - * Create the pseudo ring for each HW ring of the underlying
1451 - * port. Note that this is done after the aggr registers the
1452 - * mac.
1546 + * Create the pseudo ring for each HW ring of the
1547 + * underlying port. Note that this is done after the
1548 + * aggr registers its MAC.
1453 1549 */
1454 - VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0);
1455 - VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0);
1550 + VERIFY3S(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group),
1551 + ==, 0);
1552 +
1553 + for (i = 0; i < grp->lg_rx_group_count; i++) {
1554 + VERIFY3S(aggr_add_pseudo_rx_group(port,
1555 + &grp->lg_rx_groups[i]), ==, 0);
1556 + }
1557 +
1456 1558 if (aggr_port_notify_link(grp, port))
1457 1559 link_state_changed = B_TRUE;
1458 1560
1459 1561 /*
1460 1562 * Initialize the callback functions for this port.
1461 1563 */
1462 1564 aggr_port_init_callbacks(port);
1463 1565 }
1464 1566
1465 1567 if (link_state_changed)
1466 1568 mac_link_update(grp->lg_mh, grp->lg_link_state);
1467 1569
1468 1570 /* add new group to hash table */
1469 1571 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
1470 1572 (mod_hash_val_t)grp);
1471 1573 ASSERT(err == 0);
1472 1574 aggr_grp_cnt++;
1473 1575
1474 1576 mac_perim_exit(mph);
1475 1577 rw_exit(&aggr_grp_lock);
1476 1578 return (0);
1477 1579
1478 1580 bail:
1479 1581
1480 1582 grp->lg_closing = B_TRUE;
1481 1583
1482 1584 port = grp->lg_ports;
1483 1585 while (port != NULL) {
1484 1586 aggr_port_t *cport;
1485 1587
1486 1588 cport = port->lp_next;
1487 1589 aggr_port_delete(port);
1488 1590 port = cport;
1489 1591 }
1490 1592
1491 1593 /*
1492 1594 * Inform the lacp_rx thread to exit.
1493 1595 */
1494 1596 mutex_enter(&grp->lg_lacp_lock);
1495 1597 grp->lg_lacp_done = B_TRUE;
1496 1598 cv_signal(&grp->lg_lacp_cv);
1497 1599 while (grp->lg_lacp_rx_thread != NULL)
1498 1600 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1499 1601 mutex_exit(&grp->lg_lacp_lock);
1500 1602 /*
1501 1603 * Inform the tx_notify thread to exit.
1502 1604 */
1503 1605 mutex_enter(&grp->lg_tx_flowctl_lock);
1504 1606 if (grp->lg_tx_notify_thread != NULL) {
1505 1607 tid = grp->lg_tx_notify_thread->t_did;
1506 1608 grp->lg_tx_notify_done = B_TRUE;
1507 1609 cv_signal(&grp->lg_tx_flowctl_cv);
1508 1610 }
1509 1611 mutex_exit(&grp->lg_tx_flowctl_lock);
1510 1612 if (tid != 0)
1511 1613 thread_join(tid);
1512 1614
1513 1615 kmem_free(grp->lg_tx_blocked_rings,
1514 1616 (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1515 1617 rw_exit(&aggr_grp_lock);
1516 1618 AGGR_GRP_REFRELE(grp);
1517 1619 return (err);
1518 1620 }
1519 1621
1520 1622 /*
1521 1623 * Return a pointer to the member of a group with specified linkid.
1522 1624 */
1523 1625 static aggr_port_t *
1524 1626 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
1525 1627 {
1526 1628 aggr_port_t *port;
1527 1629
1528 1630 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1529 1631
1530 1632 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1531 1633 if (port->lp_linkid == linkid)
1532 1634 break;
1533 1635 }
1534 1636
1535 1637 return (port);
1536 1638 }
1537 1639
1538 1640 /*
1539 1641 * Stop, detach and remove a port from a link aggregation group.
1540 1642 */
1541 1643 static int
1542 1644 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
1543 1645 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
1544 1646 {
1545 1647 int rc = 0;
1546 1648 aggr_port_t **pport;
1547 1649 boolean_t mac_addr_changed = B_FALSE;
1548 1650 boolean_t link_state_changed = B_FALSE;
1549 1651 mac_perim_handle_t mph;
1550 1652 uint64_t val;
1551 1653 uint_t i;
1552 1654 uint_t stat;
1553 1655
1554 1656 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1555 1657 ASSERT(grp->lg_nports > 1);
1556 1658 ASSERT(!grp->lg_closing);
1557 1659
1558 1660 /* unlink port */
1559 1661 for (pport = &grp->lg_ports; *pport != port;
1560 1662 pport = &(*pport)->lp_next) {
1561 1663 if (*pport == NULL) {
1562 1664 rc = ENOENT;
1563 1665 goto done;
1564 1666 }
1565 1667 }
1566 1668 *pport = port->lp_next;
1567 1669
1568 1670 mac_perim_enter_by_mh(port->lp_mh, &mph);
1569 1671
1570 1672 /*
1571 1673 * If the MAC address of the port being removed was assigned
1572 1674 * to the group, update the group MAC address
1573 1675 * using the MAC address of a different port.
1574 1676 */
1575 1677 if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
1576 1678 /*
1577 1679 * Set the MAC address of the group to the
1578 1680 * MAC address of its first port.
1579 1681 */
1580 1682 bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1581 1683 grp->lg_mac_addr_port = grp->lg_ports;
1582 1684 mac_addr_changed = B_TRUE;
1583 1685 }
1584 1686
1585 1687 link_state_changed = aggr_grp_detach_port(grp, port);
1586 1688
1587 1689 /*
1588 1690 * Add the counter statistics of the ports while it was aggregated
1589 1691 * to the group's residual statistics. This is done by obtaining
1590 1692 * the current counter from the underlying MAC then subtracting the
1591 1693 * value of the counter at the moment it was added to the
1592 1694 * aggregation.
1593 1695 */
1594 1696 for (i = 0; i < MAC_NSTAT; i++) {
1595 1697 stat = i + MAC_STAT_MIN;
1596 1698 if (!MAC_STAT_ISACOUNTER(stat))
1597 1699 continue;
1598 1700 val = aggr_port_stat(port, stat);
1599 1701 val -= port->lp_stat[i];
1600 1702 mutex_enter(&grp->lg_stat_lock);
1601 1703 grp->lg_stat[i] += val;
1602 1704 mutex_exit(&grp->lg_stat_lock);
1603 1705 }
1604 1706 for (i = 0; i < ETHER_NSTAT; i++) {
1605 1707 stat = i + MACTYPE_STAT_MIN;
1606 1708 if (!ETHER_STAT_ISACOUNTER(stat))
1607 1709 continue;
1608 1710 val = aggr_port_stat(port, stat);
1609 1711 val -= port->lp_ether_stat[i];
1610 1712 mutex_enter(&grp->lg_stat_lock);
1611 1713 grp->lg_ether_stat[i] += val;
1612 1714 mutex_exit(&grp->lg_stat_lock);
1613 1715 }
1614 1716
1615 1717 grp->lg_nports--;
1616 1718 mac_perim_exit(mph);
1617 1719
1618 1720 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1619 1721 aggr_port_delete(port);
1620 1722
1621 1723 /*
1622 1724 * If the group MAC address has changed, update the MAC address of
1623 1725 * the remaining constituent ports according to the new MAC
1624 1726 * address of the group.
1625 1727 */
1626 1728 if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1627 1729 link_state_changed = B_TRUE;
1628 1730
1629 1731 done:
1630 1732 if (mac_addr_changedp != NULL)
1631 1733 *mac_addr_changedp = mac_addr_changed;
1632 1734 if (link_state_changedp != NULL)
1633 1735 *link_state_changedp = link_state_changed;
1634 1736
1635 1737 return (rc);
1636 1738 }
1637 1739
1638 1740 /*
1639 1741 * Remove one or more ports from an existing link aggregation group.
1640 1742 */
1641 1743 int
1642 1744 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
1643 1745 {
1644 1746 int rc = 0, i;
1645 1747 aggr_grp_t *grp = NULL;
1646 1748 aggr_port_t *port;
1647 1749 boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
1648 1750 boolean_t link_state_update = B_FALSE, link_state_changed;
1649 1751 mac_perim_handle_t mph, pmph;
1650 1752
1651 1753 /* get group corresponding to linkid */
1652 1754 rw_enter(&aggr_grp_lock, RW_READER);
1653 1755 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1654 1756 (mod_hash_val_t *)&grp) != 0) {
1655 1757 rw_exit(&aggr_grp_lock);
1656 1758 return (ENOENT);
1657 1759 }
1658 1760 AGGR_GRP_REFHOLD(grp);
1659 1761
1660 1762 /*
1661 1763 * Hold the perimeter so that the aggregation won't be destroyed.
1662 1764 */
1663 1765 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1664 1766 rw_exit(&aggr_grp_lock);
1665 1767
1666 1768 /* we need to keep at least one port per group */
1667 1769 if (nports >= grp->lg_nports) {
1668 1770 rc = EINVAL;
1669 1771 goto bail;
1670 1772 }
1671 1773
1672 1774 /* first verify that all the groups are valid */
1673 1775 for (i = 0; i < nports; i++) {
1674 1776 if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
1675 1777 /* port not found */
1676 1778 rc = ENOENT;
1677 1779 goto bail;
1678 1780 }
1679 1781 }
1680 1782
1681 1783 /* clear the promiscous mode for the specified ports */
1682 1784 for (i = 0; i < nports && rc == 0; i++) {
1683 1785 /* lookup port */
1684 1786 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1685 1787 ASSERT(port != NULL);
1686 1788
1687 1789 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1688 1790 rc = aggr_port_promisc(port, B_FALSE);
1689 1791 mac_perim_exit(pmph);
1690 1792 }
1691 1793 if (rc != 0) {
1692 1794 for (i = 0; i < nports; i++) {
1693 1795 port = aggr_grp_port_lookup(grp,
1694 1796 ports[i].lp_linkid);
1695 1797 ASSERT(port != NULL);
1696 1798
1697 1799 /*
1698 1800 * Turn the promiscuous mode back on if it is required
1699 1801 * to receive the non-primary address over a port, or
1700 1802 * the promiscous mode is enabled over the aggr.
1701 1803 */
1702 1804 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1703 1805 if (port->lp_started && (grp->lg_promisc ||
1704 1806 port->lp_prom_addr != NULL)) {
1705 1807 (void) aggr_port_promisc(port, B_TRUE);
1706 1808 }
1707 1809 mac_perim_exit(pmph);
1708 1810 }
1709 1811 goto bail;
1710 1812 }
1711 1813
1712 1814 /* remove the specified ports from group */
1713 1815 for (i = 0; i < nports; i++) {
1714 1816 /* lookup port */
1715 1817 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1716 1818 ASSERT(port != NULL);
1717 1819
1718 1820 /* stop port if group has already been started */
1719 1821 if (grp->lg_started) {
1720 1822 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1721 1823 aggr_port_stop(port);
1722 1824 mac_perim_exit(pmph);
1723 1825 }
1724 1826
1725 1827 /*
1726 1828 * aggr_rem_pseudo_tx_group() is not called here. Instead
↓ open down ↓ |
261 lines elided |
↑ open up ↑ |
1727 1829 * it is called from inside aggr_grp_rem_port() after the
1728 1830 * port has been detached. The reason is that
1729 1831 * aggr_rem_pseudo_tx_group() removes one ring at a time
1730 1832 * and if there is still traffic going on, then there
1731 1833 * is the possibility of aggr_find_tx_ring() returning a
1732 1834 * removed ring for transmission. Once the port has been
1733 1835 * detached, that port will not be used and
1734 1836 * aggr_find_tx_ring() will not return any rings
1735 1837 * belonging to it.
1736 1838 */
1737 - aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1839 + for (i = 0; i < grp->lg_rx_group_count; i++)
1840 + aggr_rem_pseudo_rx_group(port, &grp->lg_rx_groups[i]);
1738 1841
1739 1842 /* remove port from group */
1740 1843 rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1741 1844 &link_state_changed);
1742 1845 ASSERT(rc == 0);
1743 1846 mac_addr_update = mac_addr_update || mac_addr_changed;
1744 1847 link_state_update = link_state_update || link_state_changed;
1745 1848 }
1746 1849
1747 1850 bail:
1748 1851 if (mac_addr_update)
1749 1852 mac_unicst_update(grp->lg_mh, grp->lg_addr);
1750 1853 if (link_state_update)
1751 1854 mac_link_update(grp->lg_mh, grp->lg_link_state);
1752 1855
1753 1856 mac_perim_exit(mph);
1754 1857 AGGR_GRP_REFRELE(grp);
1755 1858
1756 1859 return (rc);
1757 1860 }
1758 1861
1759 1862 int
1760 1863 aggr_grp_delete(datalink_id_t linkid, cred_t *cred)
1761 1864 {
1762 1865 aggr_grp_t *grp = NULL;
1763 1866 aggr_port_t *port, *cport;
1764 1867 datalink_id_t tmpid;
1765 1868 mod_hash_val_t val;
1766 1869 mac_perim_handle_t mph, pmph;
1767 1870 int err;
1768 1871 kt_did_t tid = 0;
1769 1872
1770 1873 rw_enter(&aggr_grp_lock, RW_WRITER);
1771 1874
1772 1875 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1773 1876 (mod_hash_val_t *)&grp) != 0) {
1774 1877 rw_exit(&aggr_grp_lock);
1775 1878 return (ENOENT);
1776 1879 }
1777 1880
1778 1881 /*
1779 1882 * Note that dls_devnet_destroy() must be called before lg_lock is
1780 1883 * held. Otherwise, it will deadlock if another thread is in
1781 1884 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1782 1885 * dls_devnet_destroy() needs to delete.
1783 1886 */
1784 1887 if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
1785 1888 rw_exit(&aggr_grp_lock);
1786 1889 return (err);
1787 1890 }
1788 1891 ASSERT(linkid == tmpid);
1789 1892
1790 1893 /*
1791 1894 * Unregister from the MAC service module. Since this can
1792 1895 * fail if a client hasn't closed the MAC port, we gracefully
1793 1896 * fail the operation.
1794 1897 */
1795 1898 if ((err = mac_disable(grp->lg_mh)) != 0) {
1796 1899 (void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred));
1797 1900 rw_exit(&aggr_grp_lock);
1798 1901 return (err);
1799 1902 }
1800 1903 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1801 1904 ASSERT(grp == (aggr_grp_t *)val);
1802 1905
1803 1906 ASSERT(aggr_grp_cnt > 0);
1804 1907 aggr_grp_cnt--;
1805 1908 rw_exit(&aggr_grp_lock);
1806 1909
1807 1910 /*
1808 1911 * Inform the lacp_rx thread to exit.
1809 1912 */
1810 1913 mutex_enter(&grp->lg_lacp_lock);
1811 1914 grp->lg_lacp_done = B_TRUE;
1812 1915 cv_signal(&grp->lg_lacp_cv);
1813 1916 while (grp->lg_lacp_rx_thread != NULL)
1814 1917 cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1815 1918 mutex_exit(&grp->lg_lacp_lock);
1816 1919 /*
1817 1920 * Inform the tx_notify_thread to exit.
1818 1921 */
1819 1922 mutex_enter(&grp->lg_tx_flowctl_lock);
1820 1923 if (grp->lg_tx_notify_thread != NULL) {
1821 1924 tid = grp->lg_tx_notify_thread->t_did;
1822 1925 grp->lg_tx_notify_done = B_TRUE;
1823 1926 cv_signal(&grp->lg_tx_flowctl_cv);
1824 1927 }
1825 1928 mutex_exit(&grp->lg_tx_flowctl_lock);
1826 1929 if (tid != 0)
1827 1930 thread_join(tid);
1828 1931
1829 1932 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1830 1933
1831 1934 grp->lg_closing = B_TRUE;
↓ open down ↓ |
84 lines elided |
↑ open up ↑ |
1832 1935 /* detach and free MAC ports associated with group */
1833 1936 port = grp->lg_ports;
1834 1937 while (port != NULL) {
1835 1938 cport = port->lp_next;
1836 1939 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1837 1940 if (grp->lg_started)
1838 1941 aggr_port_stop(port);
1839 1942 (void) aggr_grp_detach_port(grp, port);
1840 1943 mac_perim_exit(pmph);
1841 1944 aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1842 - aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1945 + for (uint_t i = 0; i < grp->lg_rx_group_count; i++)
1946 + aggr_rem_pseudo_rx_group(port, &grp->lg_rx_groups[i]);
1843 1947 aggr_port_delete(port);
1844 1948 port = cport;
1845 1949 }
1846 1950
1847 1951 mac_perim_exit(mph);
1848 1952
1849 1953 kmem_free(grp->lg_tx_blocked_rings,
1850 1954 (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1851 1955 /*
1852 1956 * Wait for the port's lacp timer thread and its notification callback
1853 1957 * to exit before calling mac_unregister() since both needs to access
1854 1958 * the mac perimeter of the grp.
1855 1959 */
1856 1960 aggr_grp_port_wait(grp);
1857 1961
1858 1962 VERIFY(mac_unregister(grp->lg_mh) == 0);
1859 1963 grp->lg_mh = NULL;
1860 1964
1861 - list_destroy(&(grp->lg_rx_group.arg_vlans));
1965 + for (uint_t i = 0; i < MAX_GROUPS_PER_PORT; i++) {
1966 + list_destroy(&(grp->lg_rx_groups[i].arg_vlans));
1967 + }
1862 1968
1863 1969 AGGR_GRP_REFRELE(grp);
1864 1970 return (0);
1865 1971 }
1866 1972
1867 1973 void
1868 1974 aggr_grp_free(aggr_grp_t *grp)
1869 1975 {
1870 1976 ASSERT(grp->lg_refs == 0);
1871 1977 ASSERT(grp->lg_port_ref == 0);
1872 1978 if (grp->lg_key > AGGR_MAX_KEY) {
1873 1979 id_free(key_ids, grp->lg_key);
1874 1980 grp->lg_key = 0;
1875 1981 }
1876 1982 kmem_cache_free(aggr_grp_cache, grp);
1877 1983 }
1878 1984
1879 1985 int
1880 1986 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1881 1987 aggr_grp_info_new_grp_fn_t new_grp_fn,
1882 1988 aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
1883 1989 {
1884 1990 aggr_grp_t *grp;
1885 1991 aggr_port_t *port;
1886 1992 mac_perim_handle_t mph, pmph;
1887 1993 int rc = 0;
1888 1994
1889 1995 /*
1890 1996 * Make sure that the aggregation link is visible from the caller's
1891 1997 * zone.
1892 1998 */
1893 1999 if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred)))
1894 2000 return (ENOENT);
1895 2001
1896 2002 rw_enter(&aggr_grp_lock, RW_READER);
1897 2003
1898 2004 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1899 2005 (mod_hash_val_t *)&grp) != 0) {
1900 2006 rw_exit(&aggr_grp_lock);
1901 2007 return (ENOENT);
1902 2008 }
1903 2009 AGGR_GRP_REFHOLD(grp);
1904 2010
1905 2011 mac_perim_enter_by_mh(grp->lg_mh, &mph);
1906 2012 rw_exit(&aggr_grp_lock);
1907 2013
1908 2014 rc = new_grp_fn(fn_arg, grp->lg_linkid,
1909 2015 (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1910 2016 grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1911 2017 grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1912 2018
1913 2019 if (rc != 0)
1914 2020 goto bail;
1915 2021
1916 2022 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1917 2023 mac_perim_enter_by_mh(port->lp_mh, &pmph);
1918 2024 rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1919 2025 port->lp_state, &port->lp_lacp.ActorOperPortState);
1920 2026 mac_perim_exit(pmph);
1921 2027
1922 2028 if (rc != 0)
1923 2029 goto bail;
1924 2030 }
1925 2031
1926 2032 bail:
1927 2033 mac_perim_exit(mph);
1928 2034 AGGR_GRP_REFRELE(grp);
1929 2035 return (rc);
1930 2036 }
1931 2037
1932 2038 /*ARGSUSED*/
1933 2039 static void
1934 2040 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1935 2041 {
1936 2042 miocnak(q, mp, 0, ENOTSUP);
1937 2043 }
1938 2044
1939 2045 static int
1940 2046 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1941 2047 {
1942 2048 aggr_port_t *port;
1943 2049 uint_t stat_index;
1944 2050
1945 2051 ASSERT(MUTEX_HELD(&grp->lg_stat_lock));
1946 2052
1947 2053 /* We only aggregate counter statistics. */
1948 2054 if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1949 2055 IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1950 2056 return (ENOTSUP);
1951 2057 }
1952 2058
1953 2059 /*
1954 2060 * Counter statistics for a group are computed by aggregating the
1955 2061 * counters of the members MACs while they were aggregated, plus
1956 2062 * the residual counter of the group itself, which is updated each
1957 2063 * time a MAC is removed from the group.
1958 2064 */
1959 2065 *val = 0;
1960 2066 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1961 2067 /* actual port statistic */
1962 2068 *val += aggr_port_stat(port, stat);
1963 2069 /*
1964 2070 * minus the port stat when it was added, plus any residual
1965 2071 * amount for the group.
1966 2072 */
1967 2073 if (IS_MAC_STAT(stat)) {
1968 2074 stat_index = stat - MAC_STAT_MIN;
1969 2075 *val -= port->lp_stat[stat_index];
1970 2076 *val += grp->lg_stat[stat_index];
1971 2077 } else if (IS_MACTYPE_STAT(stat)) {
1972 2078 stat_index = stat - MACTYPE_STAT_MIN;
1973 2079 *val -= port->lp_ether_stat[stat_index];
1974 2080 *val += grp->lg_ether_stat[stat_index];
1975 2081 }
1976 2082 }
1977 2083 return (0);
1978 2084 }
1979 2085
1980 2086 int
1981 2087 aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1982 2088 {
1983 2089 aggr_pseudo_rx_ring_t *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver;
1984 2090
1985 2091 if (rx_ring->arr_hw_rh != NULL) {
1986 2092 *val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat);
1987 2093 } else {
1988 2094 aggr_port_t *port = rx_ring->arr_port;
1989 2095
1990 2096 *val = mac_stat_get(port->lp_mh, stat);
1991 2097
1992 2098 }
1993 2099 return (0);
1994 2100 }
1995 2101
1996 2102 int
1997 2103 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1998 2104 {
1999 2105 aggr_pseudo_tx_ring_t *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
2000 2106
2001 2107 if (tx_ring->atr_hw_rh != NULL) {
2002 2108 *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
2003 2109 } else {
2004 2110 aggr_port_t *port = tx_ring->atr_port;
2005 2111
2006 2112 *val = mac_stat_get(port->lp_mh, stat);
2007 2113 }
2008 2114 return (0);
2009 2115 }
2010 2116
2011 2117 static int
2012 2118 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
2013 2119 {
2014 2120 aggr_grp_t *grp = arg;
2015 2121 int rval = 0;
2016 2122
2017 2123 mutex_enter(&grp->lg_stat_lock);
2018 2124
2019 2125 switch (stat) {
2020 2126 case MAC_STAT_IFSPEED:
2021 2127 *val = grp->lg_ifspeed;
2022 2128 break;
2023 2129
2024 2130 case ETHER_STAT_LINK_DUPLEX:
2025 2131 *val = grp->lg_link_duplex;
2026 2132 break;
2027 2133
2028 2134 default:
2029 2135 /*
2030 2136 * For all other statistics, we return the aggregated stat
2031 2137 * from the underlying ports. aggr_grp_stat() will set
2032 2138 * rval appropriately if the statistic isn't a counter.
2033 2139 */
2034 2140 rval = aggr_grp_stat(grp, stat, val);
2035 2141 }
2036 2142
2037 2143 mutex_exit(&grp->lg_stat_lock);
2038 2144 return (rval);
2039 2145 }
2040 2146
2041 2147 static int
2042 2148 aggr_m_start(void *arg)
2043 2149 {
2044 2150 aggr_grp_t *grp = arg;
2045 2151 aggr_port_t *port;
2046 2152 mac_perim_handle_t mph, pmph;
2047 2153
2048 2154 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2049 2155
2050 2156 /*
2051 2157 * Attempts to start all configured members of the group.
2052 2158 * Group members will be attached when their link-up notification
2053 2159 * is received.
2054 2160 */
2055 2161 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2056 2162 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2057 2163 if (aggr_port_start(port) != 0) {
2058 2164 mac_perim_exit(pmph);
2059 2165 continue;
2060 2166 }
2061 2167
2062 2168 /*
2063 2169 * Turn on the promiscuous mode if it is required to receive
2064 2170 * the non-primary address over a port, or the promiscous
2065 2171 * mode is enabled over the aggr.
2066 2172 */
2067 2173 if (grp->lg_promisc || port->lp_prom_addr != NULL) {
2068 2174 if (aggr_port_promisc(port, B_TRUE) != 0)
2069 2175 aggr_port_stop(port);
2070 2176 }
2071 2177 mac_perim_exit(pmph);
2072 2178 }
2073 2179
2074 2180 grp->lg_started = B_TRUE;
2075 2181
2076 2182 mac_perim_exit(mph);
2077 2183 return (0);
2078 2184 }
2079 2185
2080 2186 static void
2081 2187 aggr_m_stop(void *arg)
2082 2188 {
2083 2189 aggr_grp_t *grp = arg;
2084 2190 aggr_port_t *port;
2085 2191 mac_perim_handle_t mph, pmph;
2086 2192
2087 2193 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2088 2194
2089 2195 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2090 2196 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2091 2197
2092 2198 /* reset port promiscuous mode */
2093 2199 (void) aggr_port_promisc(port, B_FALSE);
2094 2200
2095 2201 aggr_port_stop(port);
2096 2202 mac_perim_exit(pmph);
2097 2203 }
2098 2204
2099 2205 grp->lg_started = B_FALSE;
2100 2206 mac_perim_exit(mph);
2101 2207 }
2102 2208
2103 2209 static int
2104 2210 aggr_m_promisc(void *arg, boolean_t on)
2105 2211 {
2106 2212 aggr_grp_t *grp = arg;
2107 2213 aggr_port_t *port;
2108 2214 boolean_t link_state_changed = B_FALSE;
2109 2215 mac_perim_handle_t mph, pmph;
2110 2216
2111 2217 AGGR_GRP_REFHOLD(grp);
2112 2218 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2113 2219
2114 2220 ASSERT(!grp->lg_closing);
2115 2221
2116 2222 if (on == grp->lg_promisc)
2117 2223 goto bail;
2118 2224
2119 2225 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2120 2226 int err = 0;
2121 2227
2122 2228 mac_perim_enter_by_mh(port->lp_mh, &pmph);
2123 2229 AGGR_PORT_REFHOLD(port);
2124 2230 if (!on && (port->lp_prom_addr == NULL))
2125 2231 err = aggr_port_promisc(port, B_FALSE);
2126 2232 else if (on && port->lp_started)
2127 2233 err = aggr_port_promisc(port, B_TRUE);
2128 2234
2129 2235 if (err != 0) {
2130 2236 if (aggr_grp_detach_port(grp, port))
2131 2237 link_state_changed = B_TRUE;
2132 2238 } else {
2133 2239 /*
2134 2240 * If a port was detached because of a previous
2135 2241 * failure changing the promiscuity, the port
2136 2242 * is reattached when it successfully changes
2137 2243 * the promiscuity now, and this might cause
2138 2244 * the link state of the aggregation to change.
2139 2245 */
2140 2246 if (aggr_grp_attach_port(grp, port))
2141 2247 link_state_changed = B_TRUE;
2142 2248 }
2143 2249 mac_perim_exit(pmph);
2144 2250 AGGR_PORT_REFRELE(port);
2145 2251 }
2146 2252
2147 2253 grp->lg_promisc = on;
2148 2254
2149 2255 if (link_state_changed)
2150 2256 mac_link_update(grp->lg_mh, grp->lg_link_state);
2151 2257
2152 2258 bail:
2153 2259 mac_perim_exit(mph);
2154 2260 AGGR_GRP_REFRELE(grp);
2155 2261
2156 2262 return (0);
2157 2263 }
2158 2264
2159 2265 static void
2160 2266 aggr_grp_port_rename(const char *new_name, void *arg)
2161 2267 {
2162 2268 /*
2163 2269 * aggr port's mac client name is the format of "aggr link name" plus
2164 2270 * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
2165 2271 */
2166 2272 int aggr_len, link_len, clnt_name_len, i;
2167 2273 char *str_end, *str_st, *str_del;
2168 2274 char aggr_name[MAXNAMELEN];
2169 2275 char link_name[MAXNAMELEN];
2170 2276 char *clnt_name;
2171 2277 aggr_grp_t *aggr_grp = arg;
2172 2278 aggr_port_t *aggr_port = aggr_grp->lg_ports;
2173 2279
2174 2280 for (i = 0; i < aggr_grp->lg_nports; i++) {
2175 2281 clnt_name = mac_client_name(aggr_port->lp_mch);
2176 2282 clnt_name_len = strlen(clnt_name);
2177 2283 str_st = clnt_name;
2178 2284 str_end = &(clnt_name[clnt_name_len]);
2179 2285 str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
2180 2286 ASSERT(str_del != NULL);
2181 2287 aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
2182 2288 link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
2183 2289 bzero(aggr_name, MAXNAMELEN);
2184 2290 bzero(link_name, MAXNAMELEN);
2185 2291 bcopy(clnt_name, aggr_name, aggr_len);
2186 2292 bcopy(str_del, link_name, link_len + 1);
2187 2293 bzero(clnt_name, MAXNAMELEN);
2188 2294 (void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
2189 2295 link_name);
2190 2296
2191 2297 (void) mac_rename_primary(aggr_port->lp_mh, NULL);
2192 2298 aggr_port = aggr_port->lp_next;
2193 2299 }
2194 2300 }
2195 2301
2196 2302 /*
2197 2303 * Initialize the capabilities that are advertised for the group
2198 2304 * according to the capabilities of the constituent ports.
2199 2305 */
2200 2306 static boolean_t
2201 2307 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
2202 2308 {
2203 2309 aggr_grp_t *grp = arg;
2204 2310
2205 2311 switch (cap) {
2206 2312 case MAC_CAPAB_HCKSUM: {
2207 2313 uint32_t *hcksum_txflags = cap_data;
2208 2314 *hcksum_txflags = grp->lg_hcksum_txflags;
2209 2315 break;
2210 2316 }
2211 2317 case MAC_CAPAB_LSO: {
2212 2318 mac_capab_lso_t *cap_lso = cap_data;
2213 2319
2214 2320 if (grp->lg_lso) {
2215 2321 *cap_lso = grp->lg_cap_lso;
2216 2322 break;
↓ open down ↓ |
345 lines elided |
↑ open up ↑ |
2217 2323 } else {
2218 2324 return (B_FALSE);
2219 2325 }
2220 2326 }
2221 2327 case MAC_CAPAB_NO_NATIVEVLAN:
2222 2328 return (!grp->lg_vlan);
2223 2329 case MAC_CAPAB_NO_ZCOPY:
2224 2330 return (!grp->lg_zcopy);
2225 2331 case MAC_CAPAB_RINGS: {
2226 2332 mac_capab_rings_t *cap_rings = cap_data;
2333 + uint_t ring_cnt = 0;
2227 2334
2335 + for (uint_t i = 0; i < grp->lg_rx_group_count; i++)
2336 + ring_cnt += grp->lg_rx_groups[i].arg_ring_cnt;
2337 +
2228 2338 if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2229 2339 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2230 - cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt;
2231 -
2232 - /*
2233 - * An aggregation advertises only one (pseudo) RX
2234 - * group, which virtualizes the main/primary group of
2235 - * the underlying devices.
2236 - */
2237 - cap_rings->mr_gnum = 1;
2340 + cap_rings->mr_rnum = ring_cnt;
2341 + cap_rings->mr_gnum = grp->lg_rx_group_count;
2238 2342 cap_rings->mr_gaddring = NULL;
2239 2343 cap_rings->mr_gremring = NULL;
2240 2344 } else {
2241 2345 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2242 2346 cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt;
2243 2347 cap_rings->mr_gnum = 0;
2244 2348 }
2245 2349 cap_rings->mr_rget = aggr_fill_ring;
2246 2350 cap_rings->mr_gget = aggr_fill_group;
2247 2351 break;
2248 2352 }
2249 2353 case MAC_CAPAB_AGGR:
2250 2354 {
2251 2355 mac_capab_aggr_t *aggr_cap;
2252 2356
2253 2357 if (cap_data != NULL) {
2254 2358 aggr_cap = cap_data;
2255 2359 aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2256 2360 aggr_cap->mca_unicst = aggr_m_unicst;
2257 2361 aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
2258 2362 aggr_cap->mca_arg = arg;
2259 2363 }
2260 2364 return (B_TRUE);
2261 2365 }
2262 2366 default:
2263 2367 return (B_FALSE);
2264 2368 }
2265 2369 return (B_TRUE);
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
2266 2370 }
2267 2371
2268 2372 /*
2269 2373 * Callback function for MAC layer to register groups.
2270 2374 */
2271 2375 static void
2272 2376 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2273 2377 mac_group_info_t *infop, mac_group_handle_t gh)
2274 2378 {
2275 2379 aggr_grp_t *grp = arg;
2276 - aggr_pseudo_rx_group_t *rx_group;
2277 - aggr_pseudo_tx_group_t *tx_group;
2278 2380
2279 - ASSERT(index == 0);
2280 2381 if (rtype == MAC_RING_TYPE_RX) {
2281 - rx_group = &grp->lg_rx_group;
2382 + aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_groups[index];
2383 +
2282 2384 rx_group->arg_gh = gh;
2283 2385 rx_group->arg_grp = grp;
2284 2386
2285 2387 infop->mgi_driver = (mac_group_driver_t)rx_group;
2286 2388 infop->mgi_start = NULL;
2287 2389 infop->mgi_stop = NULL;
2288 2390 infop->mgi_addmac = aggr_addmac;
2289 2391 infop->mgi_remmac = aggr_remmac;
2290 2392 infop->mgi_count = rx_group->arg_ring_cnt;
2291 2393
2292 2394 /*
2293 2395 * Always set the HW VLAN callbacks. They are smart
2294 2396 * enough to know when a port has HW VLAN filters to
2295 2397 * program and when it doesn't.
2296 2398 */
2297 2399 infop->mgi_addvlan = aggr_addvlan;
2298 2400 infop->mgi_remvlan = aggr_remvlan;
2299 2401 } else {
2300 - tx_group = &grp->lg_tx_group;
2402 + aggr_pseudo_tx_group_t *tx_group = &grp->lg_tx_group;
2403 +
2404 + ASSERT3S(index, ==, 0);
2301 2405 tx_group->atg_gh = gh;
2302 2406 }
2303 2407 }
2304 2408
2305 2409 /*
2306 2410 * Callback funtion for MAC layer to register all rings.
2307 2411 */
2308 2412 static void
2309 2413 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2310 2414 const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2311 2415 {
2312 2416 aggr_grp_t *grp = arg;
2313 2417
2314 2418 switch (rtype) {
2315 2419 case MAC_RING_TYPE_RX: {
2316 - aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_group;
2420 + aggr_pseudo_rx_group_t *rx_group;
2317 2421 aggr_pseudo_rx_ring_t *rx_ring;
2318 2422 mac_intr_t aggr_mac_intr;
2319 2423
2320 - ASSERT(rg_index == 0);
2321 -
2322 - ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt));
2424 + rx_group = &grp->lg_rx_groups[rg_index];
2425 + ASSERT3S(index, >=, 0);
2426 + ASSERT3S(index, <, rx_group->arg_ring_cnt);
2323 2427 rx_ring = rx_group->arg_rings + index;
2324 2428 rx_ring->arr_rh = rh;
2325 2429
2326 2430 /*
2327 2431 * Entrypoint to enable interrupt (disable poll) and
2328 2432 * disable interrupt (enable poll).
2329 2433 */
2330 2434 aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
2331 2435 aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
2332 2436 aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
2333 2437 aggr_mac_intr.mi_ddi_handle = NULL;
2334 2438
2335 2439 infop->mri_driver = (mac_ring_driver_t)rx_ring;
2336 - infop->mri_start = aggr_pseudo_start_ring;
2337 - infop->mri_stop = NULL;
2440 + infop->mri_start = aggr_pseudo_start_rx_ring;
2441 + infop->mri_stop = aggr_pseudo_stop_rx_ring;
2338 2442
2339 2443 infop->mri_intr = aggr_mac_intr;
2340 2444 infop->mri_poll = aggr_rx_poll;
2341 2445
2342 2446 infop->mri_stat = aggr_rx_ring_stat;
2343 2447 break;
2344 2448 }
2345 2449 case MAC_RING_TYPE_TX: {
2346 2450 aggr_pseudo_tx_group_t *tx_group = &grp->lg_tx_group;
2347 2451 aggr_pseudo_tx_ring_t *tx_ring;
2348 2452
2349 2453 ASSERT(rg_index == -1);
2350 2454 ASSERT(index < tx_group->atg_ring_cnt);
2351 2455
2352 2456 tx_ring = &tx_group->atg_rings[index];
2353 2457 tx_ring->atr_rh = rh;
2354 2458
2355 2459 infop->mri_driver = (mac_ring_driver_t)tx_ring;
2356 2460 infop->mri_start = NULL;
2357 2461 infop->mri_stop = NULL;
2358 2462 infop->mri_tx = aggr_ring_tx;
2359 2463 infop->mri_stat = aggr_tx_ring_stat;
2360 2464 /*
2361 2465 * Use the hw TX ring handle to find if the ring needs
2362 2466 * serialization or not. For NICs that do not expose
2363 2467 * Tx rings, atr_hw_rh will be NULL.
2364 2468 */
2365 2469 if (tx_ring->atr_hw_rh != NULL) {
2366 2470 infop->mri_flags =
2367 2471 mac_hwring_getinfo(tx_ring->atr_hw_rh);
2368 2472 }
2369 2473 break;
2370 2474 }
2371 2475 default:
2372 2476 break;
2373 2477 }
2374 2478 }
2375 2479
2376 2480 static mblk_t *
2377 2481 aggr_rx_poll(void *arg, int bytes_to_pickup)
2378 2482 {
2379 2483 aggr_pseudo_rx_ring_t *rr_ring = arg;
2380 2484 aggr_port_t *port = rr_ring->arr_port;
2381 2485 aggr_grp_t *grp = port->lp_grp;
2382 2486 mblk_t *mp_chain, *mp, **mpp;
2383 2487
2384 2488 mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
2385 2489
2386 2490 if (grp->lg_lacp_mode == AGGR_LACP_OFF)
2387 2491 return (mp_chain);
2388 2492
2389 2493 mpp = &mp_chain;
2390 2494 while ((mp = *mpp) != NULL) {
2391 2495 if (MBLKL(mp) >= sizeof (struct ether_header)) {
2392 2496 struct ether_header *ehp;
2393 2497
2394 2498 ehp = (struct ether_header *)mp->b_rptr;
2395 2499 if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
2396 2500 *mpp = mp->b_next;
2397 2501 mp->b_next = NULL;
2398 2502 aggr_recv_lacp(port,
2399 2503 (mac_resource_handle_t)rr_ring, mp);
2400 2504 continue;
2401 2505 }
2402 2506 }
2403 2507
2404 2508 if (!port->lp_collector_enabled) {
2405 2509 *mpp = mp->b_next;
2406 2510 mp->b_next = NULL;
2407 2511 freemsg(mp);
2408 2512 continue;
2409 2513 }
2410 2514 mpp = &mp->b_next;
2411 2515 }
2412 2516 return (mp_chain);
2413 2517 }
↓ open down ↓ |
66 lines elided |
↑ open up ↑ |
2414 2518
2415 2519 static int
2416 2520 aggr_addmac(void *arg, const uint8_t *mac_addr)
2417 2521 {
2418 2522 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg;
2419 2523 aggr_unicst_addr_t *addr, **pprev;
2420 2524 aggr_grp_t *grp = rx_group->arg_grp;
2421 2525 aggr_port_t *port, *p;
2422 2526 mac_perim_handle_t mph;
2423 2527 int err = 0;
2528 + uint_t idx = rx_group->arg_index;
2424 2529
2425 2530 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2426 2531
2427 2532 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2428 2533 mac_perim_exit(mph);
2429 2534 return (0);
2430 2535 }
2431 2536
2432 2537 /*
2433 2538 * Insert this mac address into the list of mac addresses owned by
2434 2539 * the aggregation pseudo group.
2435 2540 */
2436 2541 pprev = &rx_group->arg_macaddr;
2437 2542 while ((addr = *pprev) != NULL) {
2438 2543 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
2439 2544 mac_perim_exit(mph);
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
2440 2545 return (EEXIST);
2441 2546 }
2442 2547 pprev = &addr->aua_next;
2443 2548 }
2444 2549 addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
2445 2550 bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
2446 2551 addr->aua_next = NULL;
2447 2552 *pprev = addr;
2448 2553
2449 2554 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2450 - if ((err = aggr_port_addmac(port, mac_addr)) != 0)
2555 + if ((err = aggr_port_addmac(port, idx, mac_addr)) != 0)
2451 2556 break;
2452 2557
2453 2558 if (err != 0) {
2454 2559 for (p = grp->lg_ports; p != port; p = p->lp_next)
2455 - aggr_port_remmac(p, mac_addr);
2560 + aggr_port_remmac(p, idx, mac_addr);
2456 2561
2457 2562 *pprev = NULL;
2458 2563 kmem_free(addr, sizeof (aggr_unicst_addr_t));
2459 2564 }
2460 2565
2461 2566 mac_perim_exit(mph);
2462 2567 return (err);
2463 2568 }
2464 2569
2465 2570 static int
2466 2571 aggr_remmac(void *arg, const uint8_t *mac_addr)
2467 2572 {
2468 2573 aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg;
2469 2574 aggr_unicst_addr_t *addr, **pprev;
2470 2575 aggr_grp_t *grp = rx_group->arg_grp;
2471 2576 aggr_port_t *port;
2472 2577 mac_perim_handle_t mph;
2473 2578 int err = 0;
2474 2579
2475 2580 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2476 2581
2477 2582 if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2478 2583 mac_perim_exit(mph);
2479 2584 return (0);
2480 2585 }
2481 2586
2482 2587 /*
2483 2588 * Insert this mac address into the list of mac addresses owned by
2484 2589 * the aggregation pseudo group.
2485 2590 */
2486 2591 pprev = &rx_group->arg_macaddr;
2487 2592 while ((addr = *pprev) != NULL) {
2488 2593 if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2489 2594 pprev = &addr->aua_next;
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
2490 2595 continue;
2491 2596 }
2492 2597 break;
2493 2598 }
2494 2599 if (addr == NULL) {
2495 2600 mac_perim_exit(mph);
2496 2601 return (EINVAL);
2497 2602 }
2498 2603
2499 2604 for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2500 - aggr_port_remmac(port, mac_addr);
2605 + aggr_port_remmac(port, rx_group->arg_index, mac_addr);
2501 2606
2502 2607 *pprev = addr->aua_next;
2503 2608 kmem_free(addr, sizeof (aggr_unicst_addr_t));
2504 2609
2505 2610 mac_perim_exit(mph);
2506 2611 return (err);
2507 2612 }
2508 2613
2509 2614 /*
2510 2615 * Search for VID in the Rx group's list and return a pointer if
2511 2616 * found. Otherwise return NULL.
2512 2617 */
2513 2618 static aggr_vlan_t *
2514 2619 aggr_find_vlan(aggr_pseudo_rx_group_t *rx_group, uint16_t vid)
2515 2620 {
2516 2621 ASSERT(MAC_PERIM_HELD(rx_group->arg_grp->lg_mh));
2517 2622 for (aggr_vlan_t *avp = list_head(&rx_group->arg_vlans); avp != NULL;
2518 2623 avp = list_next(&rx_group->arg_vlans, avp)) {
2519 2624 if (avp->av_vid == vid)
2520 2625 return (avp);
2521 2626 }
2522 2627
2523 2628 return (NULL);
2524 2629 }
2525 2630
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
2526 2631 /*
2527 2632 * Accept traffic on the specified VID.
2528 2633 *
2529 2634 * Persist VLAN state in the aggr so that ports added later will
2530 2635 * receive the correct filters. In the future it would be nice to
2531 2636 * allow aggr to iterate its clients instead of duplicating state.
2532 2637 */
2533 2638 static int
2534 2639 aggr_addvlan(mac_group_driver_t gdriver, uint16_t vid)
2535 2640 {
2536 - aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
2641 + aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
2537 2642 aggr_grp_t *aggr = rx_group->arg_grp;
2538 2643 aggr_port_t *port, *p;
2539 2644 mac_perim_handle_t mph;
2540 2645 int err = 0;
2541 2646 aggr_vlan_t *avp = NULL;
2647 + uint_t idx = rx_group->arg_index;
2542 2648
2543 2649 mac_perim_enter_by_mh(aggr->lg_mh, &mph);
2544 2650
2545 2651 if (vid == MAC_VLAN_UNTAGGED) {
2546 2652 /*
2547 2653 * Aggr is both a MAC provider and MAC client. As a
2548 2654 * MAC provider it is passed MAC_VLAN_UNTAGGED by its
2549 2655 * client. As a client itself, it should pass
2550 2656 * VLAN_ID_NONE to its ports.
2551 2657 */
2552 2658 vid = VLAN_ID_NONE;
2553 2659 rx_group->arg_untagged++;
2554 2660 goto update_ports;
2555 2661 }
2556 2662
2557 2663 avp = aggr_find_vlan(rx_group, vid);
2558 2664
2559 2665 if (avp != NULL) {
2560 2666 avp->av_refs++;
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
2561 2667 mac_perim_exit(mph);
2562 2668 return (0);
2563 2669 }
2564 2670
2565 2671 avp = kmem_zalloc(sizeof (aggr_vlan_t), KM_SLEEP);
2566 2672 avp->av_vid = vid;
2567 2673 avp->av_refs = 1;
2568 2674
2569 2675 update_ports:
2570 2676 for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
2571 - if ((err = aggr_port_addvlan(port, vid)) != 0)
2677 + if ((err = aggr_port_addvlan(port, idx, vid)) != 0)
2572 2678 break;
2573 2679
2574 2680 if (err != 0) {
2575 2681 /*
2576 2682 * If any of these calls fail then we are in a
2577 2683 * situation where the ports have different HW state.
2578 2684 * There's no reasonable action the MAC client can
2579 2685 * take in this scenario to rectify the situation.
2580 2686 */
2581 2687 for (p = aggr->lg_ports; p != port; p = p->lp_next) {
2582 2688 int err2;
2583 2689
2584 - if ((err2 = aggr_port_remvlan(p, vid)) != 0) {
2690 + if ((err2 = aggr_port_remvlan(p, idx, vid)) != 0) {
2585 2691 cmn_err(CE_WARN, "Failed to remove VLAN %u"
2586 2692 " from port %s: errno %d.", vid,
2587 2693 mac_client_name(p->lp_mch), err2);
2588 2694 }
2589 2695
2590 2696 }
2591 2697
2592 2698 if (vid == VLAN_ID_NONE)
2593 2699 rx_group->arg_untagged--;
2594 2700
2595 2701 if (avp != NULL) {
2596 2702 kmem_free(avp, sizeof (aggr_vlan_t));
2597 2703 avp = NULL;
2598 2704 }
2599 2705 }
2600 2706
2601 2707 if (avp != NULL)
2602 2708 list_insert_tail(&rx_group->arg_vlans, avp);
2603 2709
2604 2710 done:
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
2605 2711 mac_perim_exit(mph);
2606 2712 return (err);
2607 2713 }
2608 2714
2609 2715 /*
2610 2716 * Stop accepting traffic on this VLAN if it's the last use of this VLAN.
2611 2717 */
2612 2718 static int
2613 2719 aggr_remvlan(mac_group_driver_t gdriver, uint16_t vid)
2614 2720 {
2615 - aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
2721 + aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)gdriver;
2616 2722 aggr_grp_t *aggr = rx_group->arg_grp;
2617 2723 aggr_port_t *port, *p;
2618 2724 mac_perim_handle_t mph;
2619 2725 int err = 0;
2620 2726 aggr_vlan_t *avp = NULL;
2727 + uint_t idx = rx_group->arg_index;
2621 2728
2622 2729 mac_perim_enter_by_mh(aggr->lg_mh, &mph);
2623 2730
2624 2731 /*
2625 2732 * See the comment in aggr_addvlan().
2626 2733 */
2627 2734 if (vid == MAC_VLAN_UNTAGGED) {
2628 2735 vid = VLAN_ID_NONE;
2629 2736 rx_group->arg_untagged--;
2630 2737
2631 2738 if (rx_group->arg_untagged > 0)
2632 2739 goto done;
2633 2740
2634 2741 goto update_ports;
2635 2742 }
2636 2743
2637 2744 avp = aggr_find_vlan(rx_group, vid);
2638 2745
2639 2746 if (avp == NULL) {
2640 2747 err = ENOENT;
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
2641 2748 goto done;
2642 2749 }
2643 2750
2644 2751 avp->av_refs--;
2645 2752
2646 2753 if (avp->av_refs > 0)
2647 2754 goto done;
2648 2755
2649 2756 update_ports:
2650 2757 for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
2651 - if ((err = aggr_port_remvlan(port, vid)) != 0)
2758 + if ((err = aggr_port_remvlan(port, idx, vid)) != 0)
2652 2759 break;
2653 2760
2654 2761 /*
2655 2762 * See the comment in aggr_addvlan() for justification of the
2656 2763 * use of VERIFY here.
2657 2764 */
2658 2765 if (err != 0) {
2659 2766 for (p = aggr->lg_ports; p != port; p = p->lp_next) {
2660 2767 int err2;
2661 2768
2662 - if ((err2 = aggr_port_addvlan(p, vid)) != 0) {
2769 + if ((err2 = aggr_port_addvlan(p, idx, vid)) != 0) {
2663 2770 cmn_err(CE_WARN, "Failed to add VLAN %u"
2664 2771 " to port %s: errno %d.", vid,
2665 2772 mac_client_name(p->lp_mch), err2);
2666 2773 }
2667 2774 }
2668 2775
2669 2776 if (avp != NULL)
2670 2777 avp->av_refs++;
2671 2778
2672 2779 if (vid == VLAN_ID_NONE)
2673 2780 rx_group->arg_untagged++;
2674 2781
2675 2782 goto done;
2676 2783 }
2677 2784
2678 2785 if (err == 0 && avp != NULL) {
2679 2786 VERIFY3U(avp->av_refs, ==, 0);
2680 2787 list_remove(&rx_group->arg_vlans, avp);
2681 2788 kmem_free(avp, sizeof (aggr_vlan_t));
2682 2789 }
2683 2790
2684 2791 done:
2685 2792 mac_perim_exit(mph);
2686 2793 return (err);
2687 2794 }
2688 2795
2689 2796 /*
2690 2797 * Add or remove the multicast addresses that are defined for the group
2691 2798 * to or from the specified port.
2692 2799 *
2693 2800 * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2694 2801 * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2695 2802 * called when the port is either stopped or detached.
2696 2803 */
2697 2804 void
2698 2805 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2699 2806 {
2700 2807 aggr_grp_t *grp = port->lp_grp;
2701 2808
2702 2809 ASSERT(MAC_PERIM_HELD(port->lp_mh));
2703 2810 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2704 2811
2705 2812 if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2706 2813 return;
2707 2814
2708 2815 mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
2709 2816 }
2710 2817
2711 2818 static int
2712 2819 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
2713 2820 {
2714 2821 aggr_grp_t *grp = arg;
2715 2822 aggr_port_t *port = NULL, *errport = NULL;
2716 2823 mac_perim_handle_t mph;
2717 2824 int err = 0;
2718 2825
2719 2826 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2720 2827 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2721 2828 if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2722 2829 !port->lp_started) {
2723 2830 continue;
2724 2831 }
2725 2832 err = aggr_port_multicst(port, add, addrp);
2726 2833 if (err != 0) {
2727 2834 errport = port;
2728 2835 break;
2729 2836 }
2730 2837 }
2731 2838
2732 2839 /*
2733 2840 * At least one port caused error return and this error is returned to
2734 2841 * mac, eventually a NAK would be sent upwards.
2735 2842 * Some ports have this multicast address listed now, and some don't.
2736 2843 * Treat this error as a whole aggr failure not individual port failure.
2737 2844 * Therefore remove this multicast address from other ports.
2738 2845 */
2739 2846 if ((err != 0) && add) {
2740 2847 for (port = grp->lg_ports; port != errport;
2741 2848 port = port->lp_next) {
2742 2849 if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2743 2850 !port->lp_started) {
2744 2851 continue;
2745 2852 }
2746 2853 (void) aggr_port_multicst(port, B_FALSE, addrp);
2747 2854 }
2748 2855 }
2749 2856 mac_perim_exit(mph);
2750 2857 return (err);
2751 2858 }
2752 2859
2753 2860 static int
2754 2861 aggr_m_unicst(void *arg, const uint8_t *macaddr)
2755 2862 {
2756 2863 aggr_grp_t *grp = arg;
2757 2864 mac_perim_handle_t mph;
2758 2865 int err;
2759 2866
2760 2867 mac_perim_enter_by_mh(grp->lg_mh, &mph);
2761 2868 err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
2762 2869 0, 0);
2763 2870 mac_perim_exit(mph);
2764 2871 return (err);
2765 2872 }
2766 2873
2767 2874 /*
2768 2875 * Initialize the capabilities that are advertised for the group
2769 2876 * according to the capabilities of the constituent ports.
2770 2877 */
2771 2878 static void
2772 2879 aggr_grp_capab_set(aggr_grp_t *grp)
2773 2880 {
2774 2881 uint32_t cksum;
2775 2882 aggr_port_t *port;
2776 2883 mac_capab_lso_t cap_lso;
2777 2884
2778 2885 ASSERT(grp->lg_mh == NULL);
2779 2886 ASSERT(grp->lg_ports != NULL);
2780 2887
2781 2888 grp->lg_hcksum_txflags = (uint32_t)-1;
2782 2889 grp->lg_zcopy = B_TRUE;
2783 2890 grp->lg_vlan = B_TRUE;
2784 2891
2785 2892 grp->lg_lso = B_TRUE;
2786 2893 grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1;
2787 2894 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1;
2788 2895
2789 2896 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2790 2897 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
2791 2898 cksum = 0;
2792 2899 grp->lg_hcksum_txflags &= cksum;
2793 2900
2794 2901 grp->lg_vlan &=
2795 2902 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
2796 2903
2797 2904 grp->lg_zcopy &=
2798 2905 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
2799 2906
2800 2907 grp->lg_lso &=
2801 2908 mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso);
2802 2909 if (grp->lg_lso) {
2803 2910 grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags;
2804 2911 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2805 2912 cap_lso.lso_basic_tcp_ipv4.lso_max)
2806 2913 grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max =
2807 2914 cap_lso.lso_basic_tcp_ipv4.lso_max;
2808 2915 }
2809 2916 }
2810 2917 }
2811 2918
2812 2919 /*
2813 2920 * Checks whether the capabilities of the port being added are compatible
2814 2921 * with the current capabilities of the aggregation.
2815 2922 */
2816 2923 static boolean_t
2817 2924 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
2818 2925 {
2819 2926 uint32_t hcksum_txflags;
2820 2927
2821 2928 ASSERT(grp->lg_ports != NULL);
2822 2929
2823 2930 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
2824 2931 grp->lg_vlan) != grp->lg_vlan) {
2825 2932 return (B_FALSE);
2826 2933 }
2827 2934
2828 2935 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
2829 2936 grp->lg_zcopy) != grp->lg_zcopy) {
2830 2937 return (B_FALSE);
2831 2938 }
2832 2939
2833 2940 if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
2834 2941 if (grp->lg_hcksum_txflags != 0)
2835 2942 return (B_FALSE);
2836 2943 } else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
2837 2944 grp->lg_hcksum_txflags) {
2838 2945 return (B_FALSE);
2839 2946 }
2840 2947
2841 2948 if (grp->lg_lso) {
2842 2949 mac_capab_lso_t cap_lso;
2843 2950
2844 2951 if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) {
2845 2952 if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) !=
2846 2953 grp->lg_cap_lso.lso_flags)
2847 2954 return (B_FALSE);
2848 2955 if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2849 2956 cap_lso.lso_basic_tcp_ipv4.lso_max)
2850 2957 return (B_FALSE);
2851 2958 } else {
2852 2959 return (B_FALSE);
2853 2960 }
2854 2961 }
2855 2962
2856 2963 return (B_TRUE);
2857 2964 }
2858 2965
2859 2966 /*
2860 2967 * Returns the maximum SDU according to the SDU of the constituent ports.
2861 2968 */
2862 2969 static uint_t
2863 2970 aggr_grp_max_sdu(aggr_grp_t *grp)
2864 2971 {
2865 2972 uint_t max_sdu = (uint_t)-1;
2866 2973 aggr_port_t *port;
2867 2974
2868 2975 ASSERT(grp->lg_ports != NULL);
2869 2976
2870 2977 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2871 2978 uint_t port_sdu_max;
2872 2979
2873 2980 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2874 2981 if (max_sdu > port_sdu_max)
2875 2982 max_sdu = port_sdu_max;
2876 2983 }
2877 2984
2878 2985 return (max_sdu);
2879 2986 }
2880 2987
2881 2988 /*
2882 2989 * Checks if the maximum SDU of the specified port is compatible
2883 2990 * with the maximum SDU of the specified aggregation group, returns
2884 2991 * B_TRUE if it is, B_FALSE otherwise.
2885 2992 */
2886 2993 static boolean_t
2887 2994 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
2888 2995 {
2889 2996 uint_t port_sdu_max;
2890 2997
2891 2998 mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2892 2999 return (port_sdu_max >= grp->lg_max_sdu);
2893 3000 }
2894 3001
2895 3002 /*
2896 3003 * Returns the maximum margin according to the margin of the constituent ports.
2897 3004 */
2898 3005 static uint32_t
2899 3006 aggr_grp_max_margin(aggr_grp_t *grp)
2900 3007 {
2901 3008 uint32_t margin = UINT32_MAX;
2902 3009 aggr_port_t *port;
2903 3010
2904 3011 ASSERT(grp->lg_mh == NULL);
2905 3012 ASSERT(grp->lg_ports != NULL);
2906 3013
2907 3014 for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2908 3015 if (margin > port->lp_margin)
2909 3016 margin = port->lp_margin;
2910 3017 }
2911 3018
2912 3019 grp->lg_margin = margin;
2913 3020 return (margin);
2914 3021 }
2915 3022
2916 3023 /*
2917 3024 * Checks if the maximum margin of the specified port is compatible
2918 3025 * with the maximum margin of the specified aggregation group, returns
2919 3026 * B_TRUE if it is, B_FALSE otherwise.
2920 3027 */
2921 3028 static boolean_t
2922 3029 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
2923 3030 {
2924 3031 if (port->lp_margin >= grp->lg_margin)
2925 3032 return (B_TRUE);
2926 3033
2927 3034 /*
2928 3035 * See whether the current margin value is allowed to be changed to
2929 3036 * the new value.
2930 3037 */
2931 3038 if (!mac_margin_update(grp->lg_mh, port->lp_margin))
2932 3039 return (B_FALSE);
2933 3040
2934 3041 grp->lg_margin = port->lp_margin;
2935 3042 return (B_TRUE);
2936 3043 }
2937 3044
2938 3045 /*
2939 3046 * Set MTU on individual ports of an aggregation group
2940 3047 */
2941 3048 static int
2942 3049 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu,
2943 3050 uint32_t *old_mtu)
2944 3051 {
2945 3052 boolean_t removed = B_FALSE;
2946 3053 mac_perim_handle_t mph;
2947 3054 mac_diag_t diag;
2948 3055 int err, rv, retry = 0;
2949 3056
2950 3057 if (port->lp_mah != NULL) {
2951 3058 (void) mac_unicast_remove(port->lp_mch, port->lp_mah);
2952 3059 port->lp_mah = NULL;
2953 3060 removed = B_TRUE;
2954 3061 }
2955 3062 err = mac_set_mtu(port->lp_mh, sdu, old_mtu);
2956 3063 try_again:
2957 3064 if (removed && (rv = mac_unicast_add(port->lp_mch, NULL,
2958 3065 MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK,
2959 3066 &port->lp_mah, 0, &diag)) != 0) {
2960 3067 /*
2961 3068 * following is a workaround for a bug in 'bge' driver.
2962 3069 * See CR 6794654 for more information and this work around
2963 3070 * will be removed once the CR is fixed.
2964 3071 */
2965 3072 if (rv == EIO && retry++ < 3) {
2966 3073 delay(2 * hz);
2967 3074 goto try_again;
2968 3075 }
2969 3076 /*
2970 3077 * if mac_unicast_add() failed while setting the MTU,
2971 3078 * detach the port from the group.
2972 3079 */
2973 3080 mac_perim_enter_by_mh(port->lp_mh, &mph);
2974 3081 (void) aggr_grp_detach_port(grp, port);
2975 3082 mac_perim_exit(mph);
2976 3083 cmn_err(CE_WARN, "Unable to restart the port %s while "
2977 3084 "setting MTU. Detaching the port from the aggregation.",
2978 3085 mac_client_name(port->lp_mch));
2979 3086 }
2980 3087 return (err);
2981 3088 }
2982 3089
2983 3090 static int
2984 3091 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu)
2985 3092 {
2986 3093 int err = 0, i, rv;
2987 3094 aggr_port_t *port;
2988 3095 uint32_t *mtu;
2989 3096
2990 3097 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2991 3098
2992 3099 /*
2993 3100 * If the MTU being set is equal to aggr group's maximum
2994 3101 * allowable value, then there is nothing to change
2995 3102 */
2996 3103 if (sdu == grp->lg_max_sdu)
2997 3104 return (0);
2998 3105
2999 3106 /* 0 is aggr group's min sdu */
3000 3107 if (sdu == 0)
3001 3108 return (EINVAL);
3002 3109
3003 3110 mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP);
3004 3111 for (port = grp->lg_ports, i = 0; port != NULL && err == 0;
3005 3112 port = port->lp_next, i++) {
3006 3113 err = aggr_set_port_sdu(grp, port, sdu, mtu + i);
3007 3114 }
3008 3115 if (err != 0) {
3009 3116 /* recover from error: reset the mtus of the ports */
3010 3117 aggr_port_t *tmp;
3011 3118
3012 3119 for (tmp = grp->lg_ports, i = 0; tmp != port;
3013 3120 tmp = tmp->lp_next, i++) {
3014 3121 (void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL);
3015 3122 }
3016 3123 goto bail;
3017 3124 }
3018 3125 grp->lg_max_sdu = aggr_grp_max_sdu(grp);
3019 3126 rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu);
3020 3127 ASSERT(rv == 0);
3021 3128 bail:
3022 3129 kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports);
3023 3130 return (err);
3024 3131 }
3025 3132
3026 3133 /*
3027 3134 * Callback functions for set/get of properties
3028 3135 */
3029 3136 /*ARGSUSED*/
3030 3137 static int
3031 3138 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
3032 3139 uint_t pr_valsize, const void *pr_val)
3033 3140 {
3034 3141 int err = ENOTSUP;
3035 3142 aggr_grp_t *grp = m_driver;
3036 3143
3037 3144 switch (pr_num) {
3038 3145 case MAC_PROP_MTU: {
3039 3146 uint32_t mtu;
3040 3147
3041 3148 if (pr_valsize < sizeof (mtu)) {
3042 3149 err = EINVAL;
3043 3150 break;
3044 3151 }
3045 3152 bcopy(pr_val, &mtu, sizeof (mtu));
3046 3153 err = aggr_sdu_update(grp, mtu);
3047 3154 break;
3048 3155 }
3049 3156 default:
3050 3157 break;
3051 3158 }
3052 3159 return (err);
3053 3160 }
3054 3161
3055 3162 typedef struct rboundary {
3056 3163 uint32_t bval;
3057 3164 int btype;
3058 3165 } rboundary_t;
3059 3166
3060 3167 /*
3061 3168 * This function finds the intersection of mtu ranges stored in arrays -
3062 3169 * mrange[0] ... mrange[mcount -1]. It returns the intersection in rval.
3063 3170 * Individual arrays are assumed to contain non-overlapping ranges.
3064 3171 * Algorithm:
3065 3172 * A range has two boundaries - min and max. We scan all arrays and store
3066 3173 * each boundary as a separate element in a temporary array. We also store
3067 3174 * the boundary types, min or max, as +1 or -1 respectively in the temporary
3068 3175 * array. Then we sort the temporary array in ascending order. We scan the
3069 3176 * sorted array from lower to higher values and keep a cumulative sum of
3070 3177 * boundary types. Element in the temporary array for which the sum reaches
3071 3178 * mcount is a min boundary of a range in the result and next element will be
3072 3179 * max boundary.
3073 3180 *
3074 3181 * Example for mcount = 3,
3075 3182 *
3076 3183 * ----|_________|-------|_______|----|__|------ mrange[0]
3077 3184 *
3078 3185 * -------|________|--|____________|-----|___|-- mrange[1]
3079 3186 *
3080 3187 * --------|________________|-------|____|------ mrange[2]
3081 3188 *
3082 3189 * 3 2 1
3083 3190 * \|/
3084 3191 * 1 23 2 1 2 3 2 1 01 2 V 0 <- the sum
3085 3192 * ----|--||-----|-|--|--|--|----|-||-|--|---|-- sorted array
3086 3193 *
3087 3194 * same min and max
3088 3195 * V
3089 3196 * --------|_____|-------|__|------------|------ intersecting ranges
3090 3197 */
3091 3198 void
3092 3199 aggr_mtu_range_intersection(mac_propval_range_t **mrange, int mcount,
3093 3200 mac_propval_uint32_range_t **prval, int *prmaxcnt, int *prcount)
3094 3201 {
3095 3202 mac_propval_uint32_range_t *rval, *ur;
3096 3203 int rmaxcnt, rcount;
3097 3204 size_t sz_range32;
3098 3205 rboundary_t *ta; /* temporary array */
3099 3206 rboundary_t temp;
3100 3207 boolean_t range_started = B_FALSE;
3101 3208 int i, j, m, sum;
3102 3209
3103 3210 sz_range32 = sizeof (mac_propval_uint32_range_t);
3104 3211
3105 3212 for (i = 0, rmaxcnt = 0; i < mcount; i++)
3106 3213 rmaxcnt += mrange[i]->mpr_count;
3107 3214
3108 3215 /* Allocate enough space to store the results */
3109 3216 rval = kmem_alloc(rmaxcnt * sz_range32, KM_SLEEP);
3110 3217
3111 3218 /* Number of boundaries are twice as many as ranges */
3112 3219 ta = kmem_alloc(2 * rmaxcnt * sizeof (rboundary_t), KM_SLEEP);
3113 3220
3114 3221 for (i = 0, m = 0; i < mcount; i++) {
3115 3222 ur = &(mrange[i]->mpr_range_uint32[0]);
3116 3223 for (j = 0; j < mrange[i]->mpr_count; j++) {
3117 3224 ta[m].bval = ur[j].mpur_min;
3118 3225 ta[m++].btype = 1;
3119 3226 ta[m].bval = ur[j].mpur_max;
3120 3227 ta[m++].btype = -1;
3121 3228 }
3122 3229 }
3123 3230
3124 3231 /*
3125 3232 * Sort the temporary array in ascending order of bval;
3126 3233 * if boundary values are same then sort on btype.
3127 3234 */
3128 3235 for (i = 0; i < m-1; i++) {
3129 3236 for (j = i+1; j < m; j++) {
3130 3237 if ((ta[i].bval > ta[j].bval) ||
3131 3238 ((ta[i].bval == ta[j].bval) &&
3132 3239 (ta[i].btype < ta[j].btype))) {
3133 3240 temp = ta[i];
3134 3241 ta[i] = ta[j];
3135 3242 ta[j] = temp;
3136 3243 }
3137 3244 }
3138 3245 }
3139 3246
3140 3247 /* Walk through temporary array to find all ranges in the results */
3141 3248 for (i = 0, sum = 0, rcount = 0; i < m; i++) {
3142 3249 sum += ta[i].btype;
3143 3250 if (sum == mcount) {
3144 3251 rval[rcount].mpur_min = ta[i].bval;
3145 3252 range_started = B_TRUE;
3146 3253 } else if (sum < mcount && range_started) {
3147 3254 rval[rcount++].mpur_max = ta[i].bval;
3148 3255 range_started = B_FALSE;
3149 3256 }
3150 3257 }
3151 3258
3152 3259 *prval = rval;
3153 3260 *prmaxcnt = rmaxcnt;
3154 3261 *prcount = rcount;
3155 3262
3156 3263 kmem_free(ta, 2 * rmaxcnt * sizeof (rboundary_t));
3157 3264 }
3158 3265
3159 3266 /*
3160 3267 * Returns the mtu ranges which could be supported by aggr group.
3161 3268 * prmaxcnt returns the size of the buffer prval, prcount returns
3162 3269 * the number of valid entries in prval. Caller is responsible
3163 3270 * for freeing up prval.
3164 3271 */
3165 3272 int
3166 3273 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_uint32_range_t **prval,
3167 3274 int *prmaxcnt, int *prcount)
3168 3275 {
3169 3276 mac_propval_range_t **vals;
3170 3277 aggr_port_t *port;
3171 3278 mac_perim_handle_t mph;
3172 3279 uint_t i, numr;
3173 3280 int err = 0;
3174 3281 size_t sz_propval, sz_range32;
3175 3282 size_t size;
3176 3283
3177 3284 sz_propval = sizeof (mac_propval_range_t);
3178 3285 sz_range32 = sizeof (mac_propval_uint32_range_t);
3179 3286
3180 3287 ASSERT(MAC_PERIM_HELD(grp->lg_mh));
3181 3288
3182 3289 vals = kmem_zalloc(sizeof (mac_propval_range_t *) * grp->lg_nports,
3183 3290 KM_SLEEP);
3184 3291
3185 3292 for (port = grp->lg_ports, i = 0; port != NULL;
3186 3293 port = port->lp_next, i++) {
3187 3294
3188 3295 size = sz_propval;
3189 3296 vals[i] = kmem_alloc(size, KM_SLEEP);
3190 3297 vals[i]->mpr_count = 1;
3191 3298
3192 3299 mac_perim_enter_by_mh(port->lp_mh, &mph);
3193 3300
3194 3301 err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
3195 3302 NULL, 0, vals[i], NULL);
3196 3303 if (err == ENOSPC) {
3197 3304 /*
3198 3305 * Not enough space to hold all ranges.
3199 3306 * Allocate extra space as indicated and retry.
3200 3307 */
3201 3308 numr = vals[i]->mpr_count;
3202 3309 kmem_free(vals[i], sz_propval);
3203 3310 size = sz_propval + (numr - 1) * sz_range32;
3204 3311 vals[i] = kmem_alloc(size, KM_SLEEP);
3205 3312 vals[i]->mpr_count = numr;
3206 3313 err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
3207 3314 NULL, 0, vals[i], NULL);
3208 3315 ASSERT(err != ENOSPC);
3209 3316 }
3210 3317 mac_perim_exit(mph);
3211 3318 if (err != 0) {
3212 3319 kmem_free(vals[i], size);
3213 3320 vals[i] = NULL;
3214 3321 break;
3215 3322 }
3216 3323 }
3217 3324
3218 3325 /*
3219 3326 * if any of the underlying ports does not support changing MTU then
3220 3327 * just return ENOTSUP
3221 3328 */
3222 3329 if (port != NULL) {
3223 3330 ASSERT(err != 0);
3224 3331 goto done;
3225 3332 }
3226 3333
3227 3334 aggr_mtu_range_intersection(vals, grp->lg_nports, prval, prmaxcnt,
3228 3335 prcount);
3229 3336
3230 3337 done:
3231 3338 for (i = 0; i < grp->lg_nports; i++) {
3232 3339 if (vals[i] != NULL) {
3233 3340 numr = vals[i]->mpr_count;
3234 3341 size = sz_propval + (numr - 1) * sz_range32;
3235 3342 kmem_free(vals[i], size);
3236 3343 }
3237 3344 }
3238 3345
3239 3346 kmem_free(vals, sizeof (mac_propval_range_t *) * grp->lg_nports);
3240 3347 return (err);
3241 3348 }
3242 3349
3243 3350 static void
3244 3351 aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
3245 3352 mac_prop_info_handle_t prh)
3246 3353 {
3247 3354 aggr_grp_t *grp = m_driver;
3248 3355 mac_propval_uint32_range_t *rval = NULL;
3249 3356 int i, rcount, rmaxcnt;
3250 3357 int err = 0;
3251 3358
3252 3359 _NOTE(ARGUNUSED(pr_name));
3253 3360
3254 3361 switch (pr_num) {
3255 3362 case MAC_PROP_MTU:
3256 3363
3257 3364 err = aggr_grp_possible_mtu_range(grp, &rval, &rmaxcnt,
3258 3365 &rcount);
3259 3366 if (err != 0) {
3260 3367 ASSERT(rval == NULL);
3261 3368 return;
3262 3369 }
3263 3370 for (i = 0; i < rcount; i++) {
3264 3371 mac_prop_info_set_range_uint32(prh,
3265 3372 rval[i].mpur_min, rval[i].mpur_max);
3266 3373 }
3267 3374 kmem_free(rval, sizeof (mac_propval_uint32_range_t) * rmaxcnt);
3268 3375 break;
3269 3376 }
3270 3377 }
↓ open down ↓ |
598 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX