Print this page
11490 SRS ring polling disabled for VLANs
11491 Want DLS bypass for VLAN traffic
11492 add VLVF bypass to ixgbe core
2869 duplicate packets with vnics over aggrs
11489 DLS stat delete and aggr kstat can deadlock
Portions contributed by: Theo Schlossnagle <jesus@omniti.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/mac/mac_datapath_setup.c
+++ new/usr/src/uts/common/io/mac/mac_datapath_setup.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 - * Copyright 2017, Joyent, Inc.
23 + * Copyright 2018 Joyent, Inc.
24 24 */
25 25
26 26 #include <sys/types.h>
27 27 #include <sys/callb.h>
28 28 #include <sys/cpupart.h>
29 29 #include <sys/pool.h>
30 30 #include <sys/pool_pset.h>
31 31 #include <sys/sdt.h>
32 32 #include <sys/strsubr.h>
33 33 #include <sys/strsun.h>
34 34 #include <sys/vlan.h>
35 35 #include <inet/ipsec_impl.h>
36 36 #include <inet/ip_impl.h>
37 37 #include <inet/sadb.h>
38 38 #include <inet/ipsecesp.h>
39 39 #include <inet/ipsecah.h>
40 40
41 41 #include <sys/mac_impl.h>
42 42 #include <sys/mac_client_impl.h>
43 43 #include <sys/mac_client_priv.h>
44 44 #include <sys/mac_soft_ring.h>
45 45 #include <sys/mac_flow_impl.h>
46 46 #include <sys/mac_stat.h>
47 47
48 48 static void mac_srs_soft_rings_signal(mac_soft_ring_set_t *, uint_t);
49 49 static void mac_srs_update_fanout_list(mac_soft_ring_set_t *);
50 50 static void mac_srs_poll_unbind(mac_soft_ring_set_t *);
51 51 static void mac_srs_worker_unbind(mac_soft_ring_set_t *);
52 52 static void mac_srs_soft_rings_quiesce(mac_soft_ring_set_t *, uint_t);
53 53
54 54 static int mac_srs_cpu_setup(cpu_setup_t, int, void *);
55 55 static void mac_srs_worker_bind(mac_soft_ring_set_t *, processorid_t);
56 56 static void mac_srs_poll_bind(mac_soft_ring_set_t *, processorid_t);
57 57 static void mac_srs_threads_unbind(mac_soft_ring_set_t *);
58 58 static void mac_srs_add_glist(mac_soft_ring_set_t *);
59 59 static void mac_srs_remove_glist(mac_soft_ring_set_t *);
60 60 static void mac_srs_fanout_list_free(mac_soft_ring_set_t *);
61 61 static void mac_soft_ring_remove(mac_soft_ring_set_t *, mac_soft_ring_t *);
62 62
63 63 static int mac_compute_soft_ring_count(flow_entry_t *, int, int);
64 64 static void mac_walk_srs_and_bind(int);
65 65 static void mac_walk_srs_and_unbind(int);
66 66
67 67 extern boolean_t mac_latency_optimize;
68 68
69 69 static kmem_cache_t *mac_srs_cache;
70 70 kmem_cache_t *mac_soft_ring_cache;
71 71
72 72 /*
73 73 * The duration in msec we wait before signalling the soft ring
74 74 * worker thread in case packets get queued.
75 75 */
76 76 uint32_t mac_soft_ring_worker_wait = 0;
77 77
78 78 /*
79 79 * A global tunable for turning polling on/off. By default, dynamic
80 80 * polling is always on and is always very beneficial. It should be
81 81 * turned off with absolute care and for the rare workload (very
82 82 * low latency sensitive traffic).
83 83 */
84 84 int mac_poll_enable = B_TRUE;
85 85
86 86 /*
87 87 * Need to set mac_soft_ring_max_q_cnt based on bandwidth and perhaps latency.
88 88 * Large values could end up in consuming lot of system memory and cause
89 89 * system hang.
90 90 */
91 91 int mac_soft_ring_max_q_cnt = 1024;
92 92 int mac_soft_ring_min_q_cnt = 256;
93 93 int mac_soft_ring_poll_thres = 16;
94 94
95 95 boolean_t mac_tx_serialize = B_FALSE;
96 96
97 97 /*
98 98 * mac_tx_srs_hiwat is the queue depth threshold at which callers of
99 99 * mac_tx() will be notified of flow control condition.
100 100 *
101 101 * TCP does not honour flow control condition sent up by mac_tx().
102 102 * Thus provision is made for TCP to allow more packets to be queued
103 103 * in SRS upto a maximum of mac_tx_srs_max_q_cnt.
104 104 *
105 105 * Note that mac_tx_srs_hiwat is always be lesser than
106 106 * mac_tx_srs_max_q_cnt.
107 107 */
108 108 uint32_t mac_tx_srs_max_q_cnt = 100000;
109 109 uint32_t mac_tx_srs_hiwat = 1000;
110 110
111 111 /*
112 112 * mac_rx_soft_ring_count, mac_soft_ring_10gig_count:
113 113 *
114 114 * Global tunables that determines the number of soft rings to be used for
115 115 * fanning out incoming traffic on a link. These count will be used only
116 116 * when no explicit set of CPUs was assigned to the data-links.
117 117 *
118 118 * mac_rx_soft_ring_count tunable will come into effect only if
119 119 * mac_soft_ring_enable is set. mac_soft_ring_enable is turned on by
120 120 * default only for sun4v platforms.
121 121 *
122 122 * mac_rx_soft_ring_10gig_count will come into effect if you are running on a
123 123 * 10Gbps link and is not dependent upon mac_soft_ring_enable.
124 124 *
125 125 * The number of soft rings for fanout for a link or a flow is determined
126 126 * by mac_compute_soft_ring_count() routine. This routine will take into
127 127 * account mac_soft_ring_enable, mac_rx_soft_ring_count and
128 128 * mac_rx_soft_ring_10gig_count to determine the soft ring count for a link.
129 129 *
130 130 * If a bandwidth is specified, the determination of the number of soft
131 131 * rings is based on specified bandwidth, CPU speed and number of CPUs in
132 132 * the system.
133 133 */
134 134 uint_t mac_rx_soft_ring_count = 8;
135 135 uint_t mac_rx_soft_ring_10gig_count = 8;
136 136
137 137 /*
138 138 * Every Tx and Rx mac_soft_ring_set_t (mac_srs) created gets added
139 139 * to mac_srs_g_list and mac_srs_g_lock protects mac_srs_g_list. The
140 140 * list is used to walk the list of all MAC threads when a CPU is
141 141 * coming online or going offline.
142 142 */
143 143 static mac_soft_ring_set_t *mac_srs_g_list = NULL;
144 144 static krwlock_t mac_srs_g_lock;
145 145
146 146 /*
147 147 * Whether the SRS threads should be bound, or not.
148 148 */
149 149 boolean_t mac_srs_thread_bind = B_TRUE;
150 150
151 151 /*
152 152 * Whether Rx/Tx interrupts should be re-targeted. Disabled by default.
153 153 * dladm command would override this.
154 154 */
155 155 boolean_t mac_tx_intr_retarget = B_FALSE;
156 156 boolean_t mac_rx_intr_retarget = B_FALSE;
157 157
158 158 /*
159 159 * If cpu bindings are specified by user, then Tx SRS and its soft
160 160 * rings should also be bound to the CPUs specified by user. The
161 161 * CPUs for Tx bindings are at the end of the cpu list provided by
162 162 * the user. If enough CPUs are not available (for Tx and Rx
163 163 * SRSes), then the CPUs are shared by both Tx and Rx SRSes.
164 164 */
165 165 #define BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp) { \
166 166 processorid_t cpuid; \
167 167 int i; \
168 168 mac_soft_ring_t *softring; \
169 169 mac_cpus_t *srs_cpu; \
170 170 \
171 171 srs_cpu = &mac_tx_srs->srs_cpu; \
172 172 cpuid = srs_cpu->mc_tx_fanout_cpus[0]; \
173 173 mac_srs_worker_bind(mac_tx_srs, cpuid); \
174 174 if (MAC_TX_SOFT_RINGS(mac_tx_srs)) { \
175 175 for (i = 0; i < mac_tx_srs->srs_tx_ring_count; i++) { \
176 176 cpuid = srs_cpu->mc_tx_fanout_cpus[i]; \
177 177 softring = mac_tx_srs->srs_tx_soft_rings[i]; \
178 178 if (cpuid != -1) { \
179 179 (void) mac_soft_ring_bind(softring, \
180 180 cpuid); \
181 181 } \
182 182 } \
183 183 } \
184 184 }
185 185
186 186 /*
187 187 * Re-targeting is allowed only for exclusive group or for primary.
188 188 */
189 189 #define RETARGETABLE_CLIENT(group, mcip) \
190 190 ((((group) != NULL) && \
191 191 ((group)->mrg_state == MAC_GROUP_STATE_RESERVED)) || \
192 192 mac_is_primary_client(mcip))
193 193
194 194 #define MAC_RING_RETARGETABLE(ring) \
195 195 (((ring) != NULL) && \
196 196 ((ring)->mr_info.mri_intr.mi_ddi_handle != NULL) && \
197 197 !((ring)->mr_info.mri_intr.mi_ddi_shared))
198 198
199 199
200 200 /* INIT and FINI ROUTINES */
201 201
202 202 void
203 203 mac_soft_ring_init(void)
204 204 {
205 205 mac_soft_ring_cache = kmem_cache_create("mac_soft_ring_cache",
206 206 sizeof (mac_soft_ring_t), 64, NULL, NULL, NULL, NULL, NULL, 0);
207 207
208 208 mac_srs_cache = kmem_cache_create("mac_srs_cache",
209 209 sizeof (mac_soft_ring_set_t),
210 210 64, NULL, NULL, NULL, NULL, NULL, 0);
211 211
212 212 rw_init(&mac_srs_g_lock, NULL, RW_DEFAULT, NULL);
213 213 mutex_enter(&cpu_lock);
214 214 register_cpu_setup_func(mac_srs_cpu_setup, NULL);
215 215 mutex_exit(&cpu_lock);
216 216 }
217 217
218 218 void
219 219 mac_soft_ring_finish(void)
220 220 {
221 221 mutex_enter(&cpu_lock);
222 222 unregister_cpu_setup_func(mac_srs_cpu_setup, NULL);
223 223 mutex_exit(&cpu_lock);
224 224 rw_destroy(&mac_srs_g_lock);
225 225 kmem_cache_destroy(mac_soft_ring_cache);
226 226 kmem_cache_destroy(mac_srs_cache);
227 227 }
228 228
229 229 static void
230 230 mac_srs_soft_rings_free(mac_soft_ring_set_t *mac_srs)
231 231 {
232 232 mac_soft_ring_t *softring, *next, *head;
233 233
234 234 /*
235 235 * Synchronize with mac_walk_srs_bind/unbind which are callbacks from
236 236 * DR. The callbacks from DR are called with cpu_lock held, and hence
237 237 * can't wait to grab the mac perimeter. The soft ring list is hence
238 238 * protected for read access by srs_lock. Changing the soft ring list
239 239 * needs the mac perimeter and the srs_lock.
240 240 */
241 241 mutex_enter(&mac_srs->srs_lock);
242 242
243 243 head = mac_srs->srs_soft_ring_head;
244 244 mac_srs->srs_soft_ring_head = NULL;
245 245 mac_srs->srs_soft_ring_tail = NULL;
246 246 mac_srs->srs_soft_ring_count = 0;
247 247
248 248 mutex_exit(&mac_srs->srs_lock);
249 249
250 250 for (softring = head; softring != NULL; softring = next) {
251 251 next = softring->s_ring_next;
252 252 mac_soft_ring_free(softring);
253 253 }
254 254 }
255 255
256 256 static void
257 257 mac_srs_add_glist(mac_soft_ring_set_t *mac_srs)
258 258 {
259 259 ASSERT(mac_srs->srs_next == NULL && mac_srs->srs_prev == NULL);
260 260 ASSERT(MAC_PERIM_HELD((mac_handle_t)mac_srs->srs_mcip->mci_mip));
261 261
262 262 rw_enter(&mac_srs_g_lock, RW_WRITER);
263 263 mutex_enter(&mac_srs->srs_lock);
264 264
265 265 ASSERT((mac_srs->srs_state & SRS_IN_GLIST) == 0);
266 266
267 267 if (mac_srs_g_list == NULL) {
268 268 mac_srs_g_list = mac_srs;
269 269 } else {
270 270 mac_srs->srs_next = mac_srs_g_list;
271 271 mac_srs_g_list->srs_prev = mac_srs;
272 272 mac_srs->srs_prev = NULL;
273 273 mac_srs_g_list = mac_srs;
274 274 }
275 275 mac_srs->srs_state |= SRS_IN_GLIST;
276 276
277 277 mutex_exit(&mac_srs->srs_lock);
278 278 rw_exit(&mac_srs_g_lock);
279 279 }
280 280
281 281 static void
282 282 mac_srs_remove_glist(mac_soft_ring_set_t *mac_srs)
283 283 {
284 284 ASSERT(MAC_PERIM_HELD((mac_handle_t)mac_srs->srs_mcip->mci_mip));
285 285
286 286 rw_enter(&mac_srs_g_lock, RW_WRITER);
287 287 mutex_enter(&mac_srs->srs_lock);
288 288
289 289 ASSERT((mac_srs->srs_state & SRS_IN_GLIST) != 0);
290 290
291 291 if (mac_srs == mac_srs_g_list) {
292 292 mac_srs_g_list = mac_srs->srs_next;
293 293 if (mac_srs_g_list != NULL)
294 294 mac_srs_g_list->srs_prev = NULL;
295 295 } else {
296 296 mac_srs->srs_prev->srs_next = mac_srs->srs_next;
297 297 if (mac_srs->srs_next != NULL)
298 298 mac_srs->srs_next->srs_prev = mac_srs->srs_prev;
299 299 }
300 300 mac_srs->srs_state &= ~SRS_IN_GLIST;
301 301
302 302 mutex_exit(&mac_srs->srs_lock);
303 303 rw_exit(&mac_srs_g_lock);
304 304 }
305 305
306 306 /* POLLING SETUP AND TEAR DOWN ROUTINES */
307 307
308 308 /*
309 309 * mac_srs_client_poll_quiesce and mac_srs_client_poll_restart
310 310 *
311 311 * These routines are used to call back into the upper layer
312 312 * (primarily TCP squeue) to stop polling the soft rings or
313 313 * restart polling.
314 314 */
315 315 void
316 316 mac_srs_client_poll_quiesce(mac_client_impl_t *mcip,
317 317 mac_soft_ring_set_t *mac_srs)
318 318 {
319 319 mac_soft_ring_t *softring;
320 320
321 321 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
322 322
323 323 if (!(mac_srs->srs_type & SRST_CLIENT_POLL_ENABLED)) {
324 324 ASSERT(!(mac_srs->srs_type & SRST_DLS_BYPASS));
325 325 return;
326 326 }
327 327
328 328 for (softring = mac_srs->srs_soft_ring_head;
329 329 softring != NULL; softring = softring->s_ring_next) {
330 330 if ((softring->s_ring_type & ST_RING_TCP) &&
331 331 (softring->s_ring_rx_arg2 != NULL)) {
332 332 mcip->mci_resource_quiesce(mcip->mci_resource_arg,
333 333 softring->s_ring_rx_arg2);
334 334 }
335 335 }
336 336 }
337 337
338 338 void
339 339 mac_srs_client_poll_restart(mac_client_impl_t *mcip,
340 340 mac_soft_ring_set_t *mac_srs)
341 341 {
342 342 mac_soft_ring_t *softring;
343 343
344 344 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
345 345
346 346 if (!(mac_srs->srs_type & SRST_CLIENT_POLL_ENABLED)) {
347 347 ASSERT(!(mac_srs->srs_type & SRST_DLS_BYPASS));
348 348 return;
349 349 }
350 350
351 351 for (softring = mac_srs->srs_soft_ring_head;
352 352 softring != NULL; softring = softring->s_ring_next) {
353 353 if ((softring->s_ring_type & ST_RING_TCP) &&
354 354 (softring->s_ring_rx_arg2 != NULL)) {
355 355 mcip->mci_resource_restart(mcip->mci_resource_arg,
356 356 softring->s_ring_rx_arg2);
357 357 }
358 358 }
359 359 }
360 360
361 361 /*
362 362 * Register the given SRS and associated soft rings with the consumer and
363 363 * enable the polling interface used by the consumer.(i.e IP) over this
364 364 * SRS and associated soft rings.
365 365 */
366 366 void
367 367 mac_srs_client_poll_enable(mac_client_impl_t *mcip,
368 368 mac_soft_ring_set_t *mac_srs)
369 369 {
370 370 mac_rx_fifo_t mrf;
371 371 mac_soft_ring_t *softring;
372 372
373 373 ASSERT(mac_srs->srs_mcip == mcip);
374 374 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
375 375
376 376 if (!(mcip->mci_state_flags & MCIS_CLIENT_POLL_CAPABLE))
377 377 return;
378 378
379 379 bzero(&mrf, sizeof (mac_rx_fifo_t));
380 380 mrf.mrf_type = MAC_RX_FIFO;
381 381
382 382 /*
383 383 * A SRS is capable of acting as a soft ring for cases
384 384 * where no fanout is needed. This is the case for userland
385 385 * flows.
386 386 */
387 387 if (mac_srs->srs_type & SRST_NO_SOFT_RINGS)
388 388 return;
389 389
390 390 mrf.mrf_receive = (mac_receive_t)mac_soft_ring_poll;
391 391 mrf.mrf_intr_enable = (mac_intr_enable_t)mac_soft_ring_intr_enable;
392 392 mrf.mrf_intr_disable = (mac_intr_disable_t)mac_soft_ring_intr_disable;
393 393 mac_srs->srs_type |= SRST_CLIENT_POLL_ENABLED;
394 394
395 395 softring = mac_srs->srs_soft_ring_head;
396 396 while (softring != NULL) {
397 397 if (softring->s_ring_type & (ST_RING_TCP | ST_RING_UDP)) {
398 398 /*
399 399 * TCP and UDP support DLS bypass. Squeue polling
400 400 * support implies DLS bypass since the squeue poll
401 401 * path does not have DLS processing.
402 402 */
403 403 mac_soft_ring_dls_bypass(softring,
404 404 mcip->mci_direct_rx_fn, mcip->mci_direct_rx_arg);
405 405 }
406 406 /*
407 407 * Non-TCP protocols don't support squeues. Hence we don't
408 408 * make any ring addition callbacks for non-TCP rings
409 409 */
410 410 if (!(softring->s_ring_type & ST_RING_TCP)) {
411 411 softring->s_ring_rx_arg2 = NULL;
412 412 softring = softring->s_ring_next;
413 413 continue;
414 414 }
415 415 mrf.mrf_rx_arg = softring;
416 416 mrf.mrf_intr_handle = (mac_intr_handle_t)softring;
417 417 mrf.mrf_cpu_id = softring->s_ring_cpuid;
418 418 mrf.mrf_flow_priority = mac_srs->srs_pri;
419 419
420 420 softring->s_ring_rx_arg2 = mcip->mci_resource_add(
421 421 mcip->mci_resource_arg, (mac_resource_t *)&mrf);
422 422
423 423 softring = softring->s_ring_next;
424 424 }
425 425 }
426 426
427 427 /*
428 428 * Unregister the given SRS and associated soft rings with the consumer and
429 429 * disable the polling interface used by the consumer.(i.e IP) over this
430 430 * SRS and associated soft rings.
431 431 */
432 432 void
433 433 mac_srs_client_poll_disable(mac_client_impl_t *mcip,
434 434 mac_soft_ring_set_t *mac_srs)
435 435 {
436 436 mac_soft_ring_t *softring;
437 437
438 438 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
439 439
440 440 /*
441 441 * A SRS is capable of acting as a soft ring for cases
442 442 * where no protocol fanout is needed. This is the case
443 443 * for userland flows. Nothing to do here.
444 444 */
445 445 if (mac_srs->srs_type & SRST_NO_SOFT_RINGS)
446 446 return;
447 447
448 448 mutex_enter(&mac_srs->srs_lock);
449 449 if (!(mac_srs->srs_type & SRST_CLIENT_POLL_ENABLED)) {
450 450 ASSERT(!(mac_srs->srs_type & SRST_DLS_BYPASS));
451 451 mutex_exit(&mac_srs->srs_lock);
452 452 return;
453 453 }
454 454 mac_srs->srs_type &= ~(SRST_CLIENT_POLL_ENABLED | SRST_DLS_BYPASS);
455 455 mutex_exit(&mac_srs->srs_lock);
456 456
457 457 /*
458 458 * DLS bypass is now disabled in the case of both TCP and UDP.
459 459 * Reset the soft ring callbacks to the standard 'mac_rx_deliver'
460 460 * callback. In addition, in the case of TCP, invoke IP's callback
461 461 * for ring removal.
462 462 */
463 463 for (softring = mac_srs->srs_soft_ring_head;
464 464 softring != NULL; softring = softring->s_ring_next) {
465 465 if (!(softring->s_ring_type & (ST_RING_UDP | ST_RING_TCP)))
466 466 continue;
467 467
468 468 if ((softring->s_ring_type & ST_RING_TCP) &&
469 469 softring->s_ring_rx_arg2 != NULL) {
470 470 mcip->mci_resource_remove(mcip->mci_resource_arg,
471 471 softring->s_ring_rx_arg2);
472 472 }
473 473
474 474 mutex_enter(&softring->s_ring_lock);
475 475 while (softring->s_ring_state & S_RING_PROC) {
476 476 softring->s_ring_state |= S_RING_CLIENT_WAIT;
477 477 cv_wait(&softring->s_ring_client_cv,
478 478 &softring->s_ring_lock);
479 479 }
480 480 softring->s_ring_state &= ~S_RING_CLIENT_WAIT;
481 481 softring->s_ring_rx_arg2 = NULL;
482 482 softring->s_ring_rx_func = mac_rx_deliver;
483 483 softring->s_ring_rx_arg1 = mcip;
484 484 mutex_exit(&softring->s_ring_lock);
485 485 }
486 486 }
487 487
488 488 /*
489 489 * Enable or disable poll capability of the SRS on the underlying Rx ring.
490 490 *
491 491 * There is a need to enable or disable the poll capability of an SRS over an
492 492 * Rx ring depending on the number of mac clients sharing the ring and also
493 493 * whether user flows are configured on it. However the poll state is actively
494 494 * manipulated by the SRS worker and poll threads and uncoordinated changes by
495 495 * yet another thread to the underlying capability can surprise them leading
496 496 * to assert failures. Instead we quiesce the SRS, make the changes and then
497 497 * restart the SRS.
498 498 */
499 499 static void
500 500 mac_srs_poll_state_change(mac_soft_ring_set_t *mac_srs,
501 501 boolean_t turn_off_poll_capab, mac_rx_func_t rx_func)
502 502 {
503 503 boolean_t need_restart = B_FALSE;
504 504 mac_srs_rx_t *srs_rx = &mac_srs->srs_rx;
505 505 mac_ring_t *ring;
506 506
507 507 if (!SRS_QUIESCED(mac_srs)) {
508 508 mac_rx_srs_quiesce(mac_srs, SRS_QUIESCE);
509 509 need_restart = B_TRUE;
510 510 }
511 511
512 512 ring = mac_srs->srs_ring;
513 513 if ((ring != NULL) &&
514 514 (ring->mr_classify_type == MAC_HW_CLASSIFIER)) {
515 515 if (turn_off_poll_capab)
516 516 mac_srs->srs_state &= ~SRS_POLLING_CAPAB;
517 517 else if (mac_poll_enable)
518 518 mac_srs->srs_state |= SRS_POLLING_CAPAB;
519 519 }
520 520 srs_rx->sr_lower_proc = rx_func;
521 521
522 522 if (need_restart)
523 523 mac_rx_srs_restart(mac_srs);
524 524 }
525 525
526 526 /* CPU RECONFIGURATION AND FANOUT COMPUTATION ROUTINES */
527 527
528 528 /*
529 529 * Return the next CPU to be used to bind a MAC kernel thread.
530 530 * If a cpupart is specified, the cpu chosen must be from that
531 531 * cpu partition.
532 532 */
533 533 static processorid_t
534 534 mac_next_bind_cpu(cpupart_t *cpupart)
535 535 {
536 536 static cpu_t *cp = NULL;
537 537 cpu_t *cp_start;
538 538
539 539 ASSERT(MUTEX_HELD(&cpu_lock));
540 540
541 541 if (cp == NULL)
542 542 cp = cpu_list;
543 543
544 544 cp = cp->cpu_next_onln;
545 545 cp_start = cp;
546 546
547 547 do {
548 548 if ((cpupart == NULL) || (cp->cpu_part == cpupart))
549 549 return (cp->cpu_id);
550 550
551 551 } while ((cp = cp->cpu_next_onln) != cp_start);
552 552
553 553 return (-1); /* No matching CPU found online */
554 554 }
555 555
556 556 /* ARGSUSED */
557 557 static int
558 558 mac_srs_cpu_setup(cpu_setup_t what, int id, void *arg)
559 559 {
560 560 ASSERT(MUTEX_HELD(&cpu_lock));
561 561 switch (what) {
562 562 case CPU_CONFIG:
563 563 case CPU_ON:
564 564 case CPU_CPUPART_IN:
565 565 mac_walk_srs_and_bind(id);
566 566 break;
567 567
568 568 case CPU_UNCONFIG:
569 569 case CPU_OFF:
570 570 case CPU_CPUPART_OUT:
571 571 mac_walk_srs_and_unbind(id);
572 572 break;
573 573
574 574 default:
575 575 break;
576 576 }
577 577 return (0);
578 578 }
579 579
580 580 /*
581 581 * mac_compute_soft_ring_count():
582 582 *
583 583 * This routine computes the number of soft rings needed to handle incoming
584 584 * load given a flow_entry.
585 585 *
586 586 * The routine does the following:
587 587 * 1) soft rings will be created if mac_soft_ring_enable is set.
588 588 * 2) If the underlying link is a 10Gbps link, then soft rings will be
589 589 * created even if mac_soft_ring_enable is not set. The number of soft
590 590 * rings, so created, will equal mac_rx_soft_ring_10gig_count.
591 591 * 3) On a sun4v platform (i.e., mac_soft_ring_enable is set), 2 times the
592 592 * mac_rx_soft_ring_10gig_count number of soft rings will be created for a
593 593 * 10Gbps link.
594 594 *
595 595 * If a bandwidth limit is specified, the number that gets computed is
596 596 * dependent upon CPU speed, the number of Rx rings configured, and
597 597 * the bandwidth limit.
598 598 * If more Rx rings are available, less number of soft rings is needed.
599 599 *
600 600 * mac_use_bw_heuristic is another "hidden" variable that can be used to
601 601 * override the default use of soft ring count computation. Depending upon
602 602 * the usefulness of it, mac_use_bw_heuristic can later be made into a
603 603 * data-link property or removed altogether.
604 604 *
605 605 * TODO: Cleanup and tighten some of the assumptions.
606 606 */
607 607 boolean_t mac_use_bw_heuristic = B_TRUE;
608 608 static int
609 609 mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt, int maxcpus)
610 610 {
611 611 uint64_t cpu_speed, bw = 0;
612 612 int srings = 0;
613 613 boolean_t bw_enabled = B_FALSE;
614 614
615 615 ASSERT(!(flent->fe_type & FLOW_USER));
616 616 if (flent->fe_resource_props.mrp_mask & MRP_MAXBW &&
617 617 mac_use_bw_heuristic) {
618 618 /* bandwidth enabled */
619 619 bw_enabled = B_TRUE;
620 620 bw = flent->fe_resource_props.mrp_maxbw;
621 621 }
622 622 if (!bw_enabled) {
623 623 /* No bandwidth enabled */
624 624 if (mac_soft_ring_enable)
625 625 srings = mac_rx_soft_ring_count;
626 626
627 627 /* Is this a 10Gig link? */
628 628 flent->fe_nic_speed = mac_client_stat_get(flent->fe_mcip,
629 629 MAC_STAT_IFSPEED);
630 630 /* convert to Mbps */
631 631 if (((flent->fe_nic_speed)/1000000) > 1000 &&
632 632 mac_rx_soft_ring_10gig_count > 0) {
633 633 /* This is a 10Gig link */
634 634 srings = mac_rx_soft_ring_10gig_count;
635 635 /*
636 636 * Use 2 times mac_rx_soft_ring_10gig_count for
637 637 * sun4v systems.
638 638 */
639 639 if (mac_soft_ring_enable)
640 640 srings = srings * 2;
641 641 }
642 642 } else {
643 643 /*
644 644 * Soft ring computation using CPU speed and specified
645 645 * bandwidth limit.
646 646 */
647 647 /* Assumption: all CPUs have the same frequency */
648 648 cpu_speed = (uint64_t)CPU->cpu_type_info.pi_clock;
649 649
650 650 /* cpu_speed is in MHz; make bw in units of Mbps. */
651 651 bw = bw/1000000;
652 652
653 653 if (bw >= 1000) {
654 654 /*
655 655 * bw is greater than or equal to 1Gbps.
656 656 * The number of soft rings required is a function
657 657 * of bandwidth and CPU speed. To keep this simple,
658 658 * let's use this rule: 1GHz CPU can handle 1Gbps.
659 659 * If bw is less than 1 Gbps, then there is no need
660 660 * for soft rings. Assumption is that CPU speeds
661 661 * (on modern systems) are at least 1GHz.
662 662 */
663 663 srings = bw/cpu_speed;
664 664 if (srings <= 1 && mac_soft_ring_enable) {
665 665 /*
666 666 * Give at least 2 soft rings
667 667 * for sun4v systems
668 668 */
669 669 srings = 2;
670 670 }
671 671 }
672 672 }
673 673 /*
674 674 * If the flent has multiple Rx SRSs, then each SRS need not
675 675 * have that many soft rings on top of it. The number of
676 676 * soft rings for each Rx SRS is found by dividing srings by
677 677 * rx_srs_cnt.
678 678 */
679 679 if (rx_srs_cnt > 1) {
680 680 int remainder;
681 681
682 682 remainder = srings%rx_srs_cnt;
683 683 srings = srings/rx_srs_cnt;
684 684 if (remainder != 0)
685 685 srings++;
686 686 /*
687 687 * Fanning out to 1 soft ring is not very useful.
688 688 * Set it as well to 0 and mac_srs_fanout_init()
689 689 * will take care of creating a single soft ring
690 690 * for proto fanout.
691 691 */
692 692 if (srings == 1)
693 693 srings = 0;
694 694 }
695 695 /* Do some more massaging */
696 696 srings = min(srings, maxcpus);
697 697 srings = min(srings, MAX_SR_FANOUT);
698 698 return (srings);
699 699 }
700 700
701 701 /*
702 702 * mac_tx_cpu_init:
703 703 * set up CPUs for Tx interrupt re-targeting and Tx worker
704 704 * thread binding
705 705 */
706 706 static void
707 707 mac_tx_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp,
708 708 cpupart_t *cpupart)
709 709 {
710 710 mac_soft_ring_set_t *tx_srs = flent->fe_tx_srs;
711 711 mac_srs_tx_t *srs_tx = &tx_srs->srs_tx;
712 712 mac_cpus_t *srs_cpu = &tx_srs->srs_cpu;
713 713 mac_soft_ring_t *sringp;
714 714 mac_ring_t *ring;
715 715 processorid_t worker_cpuid;
716 716 boolean_t retargetable_client = B_FALSE;
717 717 int i, j;
718 718
719 719 if (RETARGETABLE_CLIENT((mac_group_t *)flent->fe_tx_ring_group,
720 720 flent->fe_mcip)) {
721 721 retargetable_client = B_TRUE;
722 722 }
723 723
724 724 if (MAC_TX_SOFT_RINGS(tx_srs)) {
725 725 if (mrp != NULL)
726 726 j = mrp->mrp_ncpus - 1;
727 727 for (i = 0; i < tx_srs->srs_tx_ring_count; i++) {
728 728 if (mrp != NULL) {
729 729 if (j < 0)
730 730 j = mrp->mrp_ncpus - 1;
731 731 worker_cpuid = mrp->mrp_cpu[j];
732 732 } else {
733 733 /*
734 734 * Bind interrupt to the next CPU available
735 735 * and leave the worker unbound.
736 736 */
737 737 worker_cpuid = -1;
738 738 }
739 739 sringp = tx_srs->srs_tx_soft_rings[i];
740 740 ring = (mac_ring_t *)sringp->s_ring_tx_arg2;
741 741 srs_cpu->mc_tx_fanout_cpus[i] = worker_cpuid;
742 742 if (MAC_RING_RETARGETABLE(ring) &&
743 743 retargetable_client) {
744 744 mutex_enter(&cpu_lock);
745 745 srs_cpu->mc_tx_intr_cpu[i] =
746 746 (mrp != NULL) ? mrp->mrp_cpu[j] :
747 747 (mac_tx_intr_retarget ?
748 748 mac_next_bind_cpu(cpupart) : -1);
749 749 mutex_exit(&cpu_lock);
750 750 } else {
751 751 srs_cpu->mc_tx_intr_cpu[i] = -1;
752 752 }
753 753 if (mrp != NULL)
754 754 j--;
755 755 }
756 756 } else {
757 757 /* Tx mac_ring_handle_t is stored in st_arg2 */
758 758 srs_cpu->mc_tx_fanout_cpus[0] =
759 759 (mrp != NULL) ? mrp->mrp_cpu[mrp->mrp_ncpus - 1] : -1;
760 760 ring = (mac_ring_t *)srs_tx->st_arg2;
761 761 if (MAC_RING_RETARGETABLE(ring) && retargetable_client) {
762 762 mutex_enter(&cpu_lock);
763 763 srs_cpu->mc_tx_intr_cpu[0] = (mrp != NULL) ?
764 764 mrp->mrp_cpu[mrp->mrp_ncpus - 1] :
765 765 (mac_tx_intr_retarget ?
766 766 mac_next_bind_cpu(cpupart) : -1);
767 767 mutex_exit(&cpu_lock);
768 768 } else {
769 769 srs_cpu->mc_tx_intr_cpu[0] = -1;
770 770 }
771 771 }
772 772 }
773 773
774 774 /*
775 775 * Assignment of user specified CPUs to a link.
776 776 *
777 777 * Minimum CPUs required to get an optimal assignmet:
778 778 * For each Rx SRS, atleast two CPUs are needed if mac_latency_optimize
779 779 * flag is set -- one for polling, one for fanout soft ring.
780 780 * If mac_latency_optimize is not set, then 3 CPUs are needed -- one
781 781 * for polling, one for SRS worker thread and one for fanout soft ring.
782 782 *
783 783 * The CPUs needed for Tx side is equal to the number of Tx rings
784 784 * the link is using.
785 785 *
786 786 * mac_flow_user_cpu_init() categorizes the CPU assignment depending
787 787 * upon the number of CPUs in 3 different buckets.
788 788 *
789 789 * In the first bucket, the most optimal case is handled. The user has
790 790 * passed enough number of CPUs and every thread gets its own CPU.
791 791 *
792 792 * The second and third are the sub-optimal cases. Enough CPUs are not
793 793 * available.
794 794 *
795 795 * The second bucket handles the case where atleast one distinct CPU is
796 796 * is available for each of the Rx rings (Rx SRSes) and Tx rings (Tx
797 797 * SRS or soft rings).
798 798 *
799 799 * In the third case (worst case scenario), specified CPU count is less
800 800 * than the Rx rings configured for the link. In this case, we round
801 801 * robin the CPUs among the Rx SRSes and Tx SRS/soft rings.
802 802 */
803 803 static void
804 804 mac_flow_user_cpu_init(flow_entry_t *flent, mac_resource_props_t *mrp)
805 805 {
806 806 mac_soft_ring_set_t *rx_srs, *tx_srs;
807 807 int i, srs_cnt;
808 808 mac_cpus_t *srs_cpu;
809 809 int no_of_cpus, cpu_cnt;
810 810 int rx_srs_cnt, reqd_rx_cpu_cnt;
811 811 int fanout_cpu_cnt, reqd_tx_cpu_cnt;
812 812 int reqd_poll_worker_cnt, fanout_cnt_per_srs;
813 813 mac_resource_props_t *emrp = &flent->fe_effective_props;
814 814
815 815 ASSERT(mrp->mrp_fanout_mode == MCM_CPUS);
816 816 /*
817 817 * The check for nbc_ncpus to be within limits for
818 818 * the user specified case was done earlier and if
819 819 * not within limits, an error would have been
820 820 * returned to the user.
821 821 */
822 822 ASSERT(mrp->mrp_ncpus > 0);
823 823
824 824 no_of_cpus = mrp->mrp_ncpus;
825 825
826 826 if (mrp->mrp_rx_intr_cpu != -1) {
827 827 /*
828 828 * interrupt has been re-targetted. Poll
829 829 * thread needs to be bound to interrupt
830 830 * CPU.
831 831 *
832 832 * Find where in the list is the intr
833 833 * CPU and swap it with the first one.
834 834 * We will be using the first CPU in the
835 835 * list for poll.
836 836 */
837 837 for (i = 0; i < no_of_cpus; i++) {
838 838 if (mrp->mrp_cpu[i] == mrp->mrp_rx_intr_cpu)
839 839 break;
840 840 }
841 841 mrp->mrp_cpu[i] = mrp->mrp_cpu[0];
842 842 mrp->mrp_cpu[0] = mrp->mrp_rx_intr_cpu;
843 843 }
844 844
845 845 /*
846 846 * Requirements:
847 847 * The number of CPUs that each Rx ring needs is dependent
848 848 * upon mac_latency_optimize flag.
849 849 * 1) If set, atleast 2 CPUs are needed -- one for
850 850 * polling, one for fanout soft ring.
851 851 * 2) If not set, then atleast 3 CPUs are needed -- one
852 852 * for polling, one for srs worker thread, and one for
853 853 * fanout soft ring.
854 854 */
855 855 rx_srs_cnt = (flent->fe_rx_srs_cnt > 1) ?
856 856 (flent->fe_rx_srs_cnt - 1) : flent->fe_rx_srs_cnt;
857 857 reqd_rx_cpu_cnt = mac_latency_optimize ?
858 858 (rx_srs_cnt * 2) : (rx_srs_cnt * 3);
859 859
860 860 /* How many CPUs are needed for Tx side? */
861 861 tx_srs = flent->fe_tx_srs;
862 862 reqd_tx_cpu_cnt = MAC_TX_SOFT_RINGS(tx_srs) ?
863 863 tx_srs->srs_tx_ring_count : 1;
864 864
865 865 /* CPUs needed for Rx SRSes poll and worker threads */
866 866 reqd_poll_worker_cnt = mac_latency_optimize ?
867 867 rx_srs_cnt : rx_srs_cnt * 2;
868 868
869 869 /* Has the user provided enough CPUs? */
870 870 if (no_of_cpus >= (reqd_rx_cpu_cnt + reqd_tx_cpu_cnt)) {
871 871 /*
872 872 * Best case scenario. There is enough CPUs. All
873 873 * Rx rings will get their own set of CPUs plus
874 874 * Tx soft rings will get their own.
875 875 */
876 876 /*
877 877 * fanout_cpu_cnt is the number of CPUs available
878 878 * for Rx side fanout soft rings.
879 879 */
880 880 fanout_cpu_cnt = no_of_cpus -
881 881 reqd_poll_worker_cnt - reqd_tx_cpu_cnt;
882 882
883 883 /*
884 884 * Divide fanout_cpu_cnt by rx_srs_cnt to find
885 885 * out how many fanout soft rings each Rx SRS
886 886 * can have.
887 887 */
888 888 fanout_cnt_per_srs = fanout_cpu_cnt/rx_srs_cnt;
889 889
890 890 /* fanout_cnt_per_srs should not be > MAX_SR_FANOUT */
891 891 fanout_cnt_per_srs = min(fanout_cnt_per_srs, MAX_SR_FANOUT);
892 892
893 893 /* Do the assignment for the default Rx ring */
894 894 cpu_cnt = 0;
895 895 rx_srs = flent->fe_rx_srs[0];
896 896 ASSERT(rx_srs->srs_ring == NULL);
897 897 if (rx_srs->srs_fanout_state == SRS_FANOUT_INIT)
898 898 rx_srs->srs_fanout_state = SRS_FANOUT_REINIT;
899 899 srs_cpu = &rx_srs->srs_cpu;
900 900 srs_cpu->mc_ncpus = no_of_cpus;
901 901 bcopy(mrp->mrp_cpu,
902 902 srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus));
903 903 srs_cpu->mc_rx_fanout_cnt = fanout_cnt_per_srs;
904 904 srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++];
905 905 /* Retarget the interrupt to the same CPU as the poll */
906 906 srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid;
907 907 srs_cpu->mc_rx_workerid = (mac_latency_optimize ?
908 908 srs_cpu->mc_rx_pollid : mrp->mrp_cpu[cpu_cnt++]);
909 909 for (i = 0; i < fanout_cnt_per_srs; i++)
910 910 srs_cpu->mc_rx_fanout_cpus[i] = mrp->mrp_cpu[cpu_cnt++];
911 911
912 912 /* Do the assignment for h/w Rx SRSes */
913 913 if (flent->fe_rx_srs_cnt > 1) {
914 914 cpu_cnt = 0;
915 915 for (srs_cnt = 1;
916 916 srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) {
917 917 rx_srs = flent->fe_rx_srs[srs_cnt];
918 918 ASSERT(rx_srs->srs_ring != NULL);
919 919 if (rx_srs->srs_fanout_state ==
920 920 SRS_FANOUT_INIT) {
921 921 rx_srs->srs_fanout_state =
922 922 SRS_FANOUT_REINIT;
923 923 }
924 924 srs_cpu = &rx_srs->srs_cpu;
925 925 srs_cpu->mc_ncpus = no_of_cpus;
926 926 bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus,
927 927 sizeof (srs_cpu->mc_cpus));
928 928 srs_cpu->mc_rx_fanout_cnt = fanout_cnt_per_srs;
929 929 /* The first CPU in the list is the intr CPU */
930 930 srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++];
931 931 srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid;
932 932 srs_cpu->mc_rx_workerid =
933 933 (mac_latency_optimize ?
934 934 srs_cpu->mc_rx_pollid :
935 935 mrp->mrp_cpu[cpu_cnt++]);
936 936 for (i = 0; i < fanout_cnt_per_srs; i++) {
937 937 srs_cpu->mc_rx_fanout_cpus[i] =
938 938 mrp->mrp_cpu[cpu_cnt++];
939 939 }
940 940 ASSERT(cpu_cnt <= no_of_cpus);
941 941 }
942 942 }
943 943 goto tx_cpu_init;
944 944 }
945 945
946 946 /*
947 947 * Sub-optimal case.
948 948 * We have the following information:
949 949 * no_of_cpus - no. of cpus that user passed.
950 950 * rx_srs_cnt - no. of rx rings.
951 951 * reqd_rx_cpu_cnt = mac_latency_optimize?rx_srs_cnt*2:rx_srs_cnt*3
952 952 * reqd_tx_cpu_cnt - no. of cpus reqd. for Tx side.
953 953 * reqd_poll_worker_cnt = mac_latency_optimize?rx_srs_cnt:rx_srs_cnt*2
954 954 */
955 955 /*
956 956 * If we bind the Rx fanout soft rings to the same CPUs
957 957 * as poll/worker, would that be enough?
958 958 */
959 959 if (no_of_cpus >= (rx_srs_cnt + reqd_tx_cpu_cnt)) {
960 960 boolean_t worker_assign = B_FALSE;
961 961
962 962 /*
963 963 * If mac_latency_optimize is not set, are there
964 964 * enough CPUs to assign a CPU for worker also?
965 965 */
966 966 if (no_of_cpus >= (reqd_poll_worker_cnt + reqd_tx_cpu_cnt))
967 967 worker_assign = B_TRUE;
968 968 /*
969 969 * Zero'th Rx SRS is the default Rx ring. It is not
970 970 * associated with h/w Rx ring.
971 971 */
972 972 rx_srs = flent->fe_rx_srs[0];
973 973 ASSERT(rx_srs->srs_ring == NULL);
974 974 if (rx_srs->srs_fanout_state == SRS_FANOUT_INIT)
975 975 rx_srs->srs_fanout_state = SRS_FANOUT_REINIT;
976 976 cpu_cnt = 0;
977 977 srs_cpu = &rx_srs->srs_cpu;
978 978 srs_cpu->mc_ncpus = no_of_cpus;
979 979 bcopy(mrp->mrp_cpu,
980 980 srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus));
981 981 srs_cpu->mc_rx_fanout_cnt = 1;
982 982 srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt++];
983 983 /* Retarget the interrupt to the same CPU as the poll */
984 984 srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid;
985 985 srs_cpu->mc_rx_workerid =
986 986 ((!mac_latency_optimize && worker_assign) ?
987 987 mrp->mrp_cpu[cpu_cnt++] : srs_cpu->mc_rx_pollid);
988 988
989 989 srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt];
990 990
991 991 /* Do CPU bindings for SRSes having h/w Rx rings */
992 992 if (flent->fe_rx_srs_cnt > 1) {
993 993 cpu_cnt = 0;
994 994 for (srs_cnt = 1;
995 995 srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) {
996 996 rx_srs = flent->fe_rx_srs[srs_cnt];
997 997 ASSERT(rx_srs->srs_ring != NULL);
998 998 if (rx_srs->srs_fanout_state ==
999 999 SRS_FANOUT_INIT) {
1000 1000 rx_srs->srs_fanout_state =
1001 1001 SRS_FANOUT_REINIT;
1002 1002 }
1003 1003 srs_cpu = &rx_srs->srs_cpu;
1004 1004 srs_cpu->mc_ncpus = no_of_cpus;
1005 1005 bcopy(mrp->mrp_cpu, srs_cpu->mc_cpus,
1006 1006 sizeof (srs_cpu->mc_cpus));
1007 1007 srs_cpu->mc_rx_pollid =
1008 1008 mrp->mrp_cpu[cpu_cnt];
1009 1009 srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid;
1010 1010 srs_cpu->mc_rx_workerid =
1011 1011 ((!mac_latency_optimize && worker_assign) ?
1012 1012 mrp->mrp_cpu[++cpu_cnt] :
1013 1013 srs_cpu->mc_rx_pollid);
1014 1014 srs_cpu->mc_rx_fanout_cnt = 1;
1015 1015 srs_cpu->mc_rx_fanout_cpus[0] =
1016 1016 mrp->mrp_cpu[cpu_cnt];
1017 1017 cpu_cnt++;
1018 1018 ASSERT(cpu_cnt <= no_of_cpus);
1019 1019 }
1020 1020 }
1021 1021 goto tx_cpu_init;
1022 1022 }
1023 1023
1024 1024 /*
1025 1025 * Real sub-optimal case. Not enough CPUs for poll and
1026 1026 * Tx soft rings. Do a round robin assignment where
1027 1027 * each Rx SRS will get the same CPU for poll, worker
1028 1028 * and fanout soft ring.
1029 1029 */
1030 1030 cpu_cnt = 0;
1031 1031 for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) {
1032 1032 rx_srs = flent->fe_rx_srs[srs_cnt];
1033 1033 srs_cpu = &rx_srs->srs_cpu;
1034 1034 if (rx_srs->srs_fanout_state == SRS_FANOUT_INIT)
1035 1035 rx_srs->srs_fanout_state = SRS_FANOUT_REINIT;
1036 1036 srs_cpu->mc_ncpus = no_of_cpus;
1037 1037 bcopy(mrp->mrp_cpu,
1038 1038 srs_cpu->mc_cpus, sizeof (srs_cpu->mc_cpus));
1039 1039 srs_cpu->mc_rx_fanout_cnt = 1;
1040 1040 srs_cpu->mc_rx_pollid = mrp->mrp_cpu[cpu_cnt];
1041 1041 /* Retarget the interrupt to the same CPU as the poll */
1042 1042 srs_cpu->mc_rx_intr_cpu = srs_cpu->mc_rx_pollid;
1043 1043 srs_cpu->mc_rx_workerid = mrp->mrp_cpu[cpu_cnt];
1044 1044 srs_cpu->mc_rx_fanout_cpus[0] = mrp->mrp_cpu[cpu_cnt];
1045 1045 if (++cpu_cnt >= no_of_cpus)
1046 1046 cpu_cnt = 0;
1047 1047 }
1048 1048
1049 1049 tx_cpu_init:
1050 1050 mac_tx_cpu_init(flent, mrp, NULL);
1051 1051
1052 1052 /*
1053 1053 * Copy the user specified CPUs to the effective CPUs
1054 1054 */
1055 1055 for (i = 0; i < mrp->mrp_ncpus; i++) {
1056 1056 emrp->mrp_cpu[i] = mrp->mrp_cpu[i];
1057 1057 }
1058 1058 emrp->mrp_ncpus = mrp->mrp_ncpus;
1059 1059 emrp->mrp_mask = mrp->mrp_mask;
1060 1060 bzero(emrp->mrp_pool, MAXPATHLEN);
1061 1061 }
1062 1062
1063 1063 /*
1064 1064 * mac_flow_cpu_init():
1065 1065 *
1066 1066 * Each SRS has a mac_cpu_t structure, srs_cpu. This routine fills in
1067 1067 * the CPU binding information in srs_cpu for all Rx SRSes associated
1068 1068 * with a flent.
1069 1069 */
1070 1070 static void
1071 1071 mac_flow_cpu_init(flow_entry_t *flent, cpupart_t *cpupart)
1072 1072 {
1073 1073 mac_soft_ring_set_t *rx_srs;
1074 1074 processorid_t cpuid;
1075 1075 int i, j, k, srs_cnt, nscpus, maxcpus, soft_ring_cnt = 0;
1076 1076 mac_cpus_t *srs_cpu;
1077 1077 mac_resource_props_t *emrp = &flent->fe_effective_props;
1078 1078 uint32_t cpus[MRP_NCPUS];
1079 1079
1080 1080 /*
1081 1081 * The maximum number of CPUs available can either be
1082 1082 * the number of CPUs in the pool or the number of CPUs
1083 1083 * in the system.
1084 1084 */
1085 1085 maxcpus = (cpupart != NULL) ? cpupart->cp_ncpus : ncpus;
1086 1086
1087 1087 /*
1088 1088 * Compute the number of soft rings needed on top for each Rx
1089 1089 * SRS. "rx_srs_cnt-1" indicates the number of Rx SRS
1090 1090 * associated with h/w Rx rings. Soft ring count needed for
1091 1091 * each h/w Rx SRS is computed and the same is applied to
1092 1092 * software classified Rx SRS. The first Rx SRS in fe_rx_srs[]
1093 1093 * is the software classified Rx SRS.
1094 1094 */
1095 1095 soft_ring_cnt = mac_compute_soft_ring_count(flent,
1096 1096 flent->fe_rx_srs_cnt - 1, maxcpus);
1097 1097 if (soft_ring_cnt == 0) {
1098 1098 /*
1099 1099 * Even when soft_ring_cnt is 0, we still need
1100 1100 * to create a soft ring for TCP, UDP and
1101 1101 * OTHER. So set it to 1.
1102 1102 */
1103 1103 soft_ring_cnt = 1;
1104 1104 }
1105 1105 for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) {
1106 1106 rx_srs = flent->fe_rx_srs[srs_cnt];
1107 1107 srs_cpu = &rx_srs->srs_cpu;
1108 1108 if (rx_srs->srs_fanout_state == SRS_FANOUT_INIT)
1109 1109 rx_srs->srs_fanout_state = SRS_FANOUT_REINIT;
1110 1110 srs_cpu->mc_ncpus = soft_ring_cnt;
1111 1111 srs_cpu->mc_rx_fanout_cnt = soft_ring_cnt;
1112 1112 mutex_enter(&cpu_lock);
1113 1113 for (j = 0; j < soft_ring_cnt; j++) {
1114 1114 cpuid = mac_next_bind_cpu(cpupart);
1115 1115 srs_cpu->mc_cpus[j] = cpuid;
1116 1116 srs_cpu->mc_rx_fanout_cpus[j] = cpuid;
1117 1117 }
1118 1118 cpuid = mac_next_bind_cpu(cpupart);
1119 1119 srs_cpu->mc_rx_pollid = cpuid;
1120 1120 srs_cpu->mc_rx_intr_cpu = (mac_rx_intr_retarget ?
1121 1121 srs_cpu->mc_rx_pollid : -1);
1122 1122 /* increment ncpus to account for polling cpu */
1123 1123 srs_cpu->mc_ncpus++;
1124 1124 srs_cpu->mc_cpus[j++] = cpuid;
1125 1125 if (!mac_latency_optimize) {
1126 1126 cpuid = mac_next_bind_cpu(cpupart);
1127 1127 srs_cpu->mc_ncpus++;
1128 1128 srs_cpu->mc_cpus[j++] = cpuid;
1129 1129 }
1130 1130 srs_cpu->mc_rx_workerid = cpuid;
1131 1131 mutex_exit(&cpu_lock);
1132 1132 }
1133 1133
1134 1134 nscpus = 0;
1135 1135 for (srs_cnt = 0; srs_cnt < flent->fe_rx_srs_cnt; srs_cnt++) {
1136 1136 rx_srs = flent->fe_rx_srs[srs_cnt];
1137 1137 srs_cpu = &rx_srs->srs_cpu;
1138 1138 for (j = 0; j < srs_cpu->mc_ncpus; j++) {
1139 1139 cpus[nscpus++] = srs_cpu->mc_cpus[j];
1140 1140 }
1141 1141 }
1142 1142
1143 1143
1144 1144 /*
1145 1145 * Copy cpu list to fe_effective_props
1146 1146 * without duplicates.
1147 1147 */
1148 1148 k = 0;
1149 1149 for (i = 0; i < nscpus; i++) {
1150 1150 for (j = 0; j < k; j++) {
1151 1151 if (emrp->mrp_cpu[j] == cpus[i])
1152 1152 break;
1153 1153 }
1154 1154 if (j == k)
1155 1155 emrp->mrp_cpu[k++] = cpus[i];
1156 1156 }
1157 1157 emrp->mrp_ncpus = k;
1158 1158
1159 1159 mac_tx_cpu_init(flent, NULL, cpupart);
1160 1160 }
1161 1161
1162 1162 /*
1163 1163 * DATAPATH SETUP ROUTINES
1164 1164 * (setup SRS and set/update FANOUT, B/W and PRIORITY)
1165 1165 */
1166 1166
1167 1167 /*
1168 1168 * mac_srs_fanout_list_alloc:
1169 1169 *
1170 1170 * The underlying device can expose upto MAX_RINGS_PER_GROUP worth of
1171 1171 * rings to a client. In such a case, MAX_RINGS_PER_GROUP worth of
1172 1172 * array space is needed to store Tx soft rings. Thus we allocate so
1173 1173 * much array space for srs_tx_soft_rings.
1174 1174 *
1175 1175 * And when it is an aggr, again we allocate MAX_RINGS_PER_GROUP worth
1176 1176 * of space to st_soft_rings. This array is used for quick access to
1177 1177 * soft ring associated with a pseudo Tx ring based on the pseudo
1178 1178 * ring's index (mr_index).
↓ open down ↓ |
1145 lines elided |
↑ open up ↑ |
1179 1179 */
1180 1180 static void
1181 1181 mac_srs_fanout_list_alloc(mac_soft_ring_set_t *mac_srs)
1182 1182 {
1183 1183 mac_client_impl_t *mcip = mac_srs->srs_mcip;
1184 1184
1185 1185 if (mac_srs->srs_type & SRST_TX) {
1186 1186 mac_srs->srs_tx_soft_rings = (mac_soft_ring_t **)
1187 1187 kmem_zalloc(sizeof (mac_soft_ring_t *) *
1188 1188 MAX_RINGS_PER_GROUP, KM_SLEEP);
1189 - if (mcip->mci_state_flags & MCIS_IS_AGGR) {
1189 + if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
1190 1190 mac_srs_tx_t *tx = &mac_srs->srs_tx;
1191 1191
1192 1192 tx->st_soft_rings = (mac_soft_ring_t **)
1193 1193 kmem_zalloc(sizeof (mac_soft_ring_t *) *
1194 1194 MAX_RINGS_PER_GROUP, KM_SLEEP);
1195 1195 }
1196 1196 } else {
1197 1197 mac_srs->srs_tcp_soft_rings = (mac_soft_ring_t **)
1198 1198 kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1199 1199 KM_SLEEP);
1200 1200 mac_srs->srs_udp_soft_rings = (mac_soft_ring_t **)
1201 1201 kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1202 1202 KM_SLEEP);
1203 1203 mac_srs->srs_oth_soft_rings = (mac_soft_ring_t **)
1204 1204 kmem_zalloc(sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT,
1205 1205 KM_SLEEP);
1206 1206 }
1207 1207 }
1208 1208
1209 1209 static void
1210 1210 mac_srs_worker_bind(mac_soft_ring_set_t *mac_srs, processorid_t cpuid)
1211 1211 {
1212 1212 cpu_t *cp;
1213 1213 boolean_t clear = B_FALSE;
1214 1214
1215 1215 ASSERT(MUTEX_HELD(&cpu_lock));
1216 1216
1217 1217 if (!mac_srs_thread_bind)
1218 1218 return;
1219 1219
1220 1220 cp = cpu_get(cpuid);
1221 1221 if (cp == NULL || !cpu_is_online(cp))
1222 1222 return;
1223 1223
1224 1224 mutex_enter(&mac_srs->srs_lock);
1225 1225 mac_srs->srs_state |= SRS_WORKER_BOUND;
1226 1226 if (mac_srs->srs_worker_cpuid != -1)
1227 1227 clear = B_TRUE;
1228 1228 mac_srs->srs_worker_cpuid = cpuid;
1229 1229 mutex_exit(&mac_srs->srs_lock);
1230 1230
1231 1231 if (clear)
1232 1232 thread_affinity_clear(mac_srs->srs_worker);
1233 1233
1234 1234 thread_affinity_set(mac_srs->srs_worker, cpuid);
1235 1235 DTRACE_PROBE1(worker__CPU, processorid_t, cpuid);
1236 1236 }
1237 1237
1238 1238 static void
1239 1239 mac_srs_poll_bind(mac_soft_ring_set_t *mac_srs, processorid_t cpuid)
1240 1240 {
1241 1241 cpu_t *cp;
1242 1242 boolean_t clear = B_FALSE;
1243 1243
1244 1244 ASSERT(MUTEX_HELD(&cpu_lock));
1245 1245
1246 1246 if (!mac_srs_thread_bind || mac_srs->srs_poll_thr == NULL)
1247 1247 return;
1248 1248
1249 1249 cp = cpu_get(cpuid);
1250 1250 if (cp == NULL || !cpu_is_online(cp))
1251 1251 return;
1252 1252
1253 1253 mutex_enter(&mac_srs->srs_lock);
1254 1254 mac_srs->srs_state |= SRS_POLL_BOUND;
1255 1255 if (mac_srs->srs_poll_cpuid != -1)
1256 1256 clear = B_TRUE;
1257 1257 mac_srs->srs_poll_cpuid = cpuid;
1258 1258 mutex_exit(&mac_srs->srs_lock);
1259 1259
1260 1260 if (clear)
1261 1261 thread_affinity_clear(mac_srs->srs_poll_thr);
1262 1262
1263 1263 thread_affinity_set(mac_srs->srs_poll_thr, cpuid);
1264 1264 DTRACE_PROBE1(poll__CPU, processorid_t, cpuid);
1265 1265 }
1266 1266
1267 1267 /*
1268 1268 * Re-target interrupt to the passed CPU. If re-target is successful,
1269 1269 * set mc_rx_intr_cpu to the re-targeted CPU. Otherwise set it to -1.
1270 1270 */
1271 1271 void
1272 1272 mac_rx_srs_retarget_intr(mac_soft_ring_set_t *mac_srs, processorid_t cpuid)
1273 1273 {
1274 1274 cpu_t *cp;
1275 1275 mac_ring_t *ring = mac_srs->srs_ring;
1276 1276 mac_intr_t *mintr = &ring->mr_info.mri_intr;
1277 1277 flow_entry_t *flent = mac_srs->srs_flent;
1278 1278 boolean_t primary = mac_is_primary_client(mac_srs->srs_mcip);
1279 1279
1280 1280 ASSERT(MUTEX_HELD(&cpu_lock));
1281 1281
1282 1282 /*
1283 1283 * Don't re-target the interrupt for these cases:
1284 1284 * 1) ring is NULL
1285 1285 * 2) the interrupt is shared (mi_ddi_shared)
1286 1286 * 3) ddi_handle is NULL and !primary
1287 1287 * 4) primary, ddi_handle is NULL but fe_rx_srs_cnt > 2
1288 1288 * Case 3 & 4 are because of mac_client_intr_cpu() routine.
1289 1289 * This routine will re-target fixed interrupt for primary
1290 1290 * mac client if the client has only one ring. In that
1291 1291 * case, mc_rx_intr_cpu will already have the correct value.
1292 1292 */
1293 1293 if (ring == NULL || mintr->mi_ddi_shared || cpuid == -1 ||
1294 1294 (mintr->mi_ddi_handle == NULL && !primary) || (primary &&
1295 1295 mintr->mi_ddi_handle == NULL && flent->fe_rx_srs_cnt > 2)) {
1296 1296 mac_srs->srs_cpu.mc_rx_intr_cpu = -1;
1297 1297 return;
1298 1298 }
1299 1299
1300 1300 if (mintr->mi_ddi_handle == NULL)
1301 1301 return;
1302 1302
1303 1303 cp = cpu_get(cpuid);
1304 1304 if (cp == NULL || !cpu_is_online(cp))
1305 1305 return;
1306 1306
1307 1307 /* Drop the cpu_lock as set_intr_affinity() holds it */
1308 1308 mutex_exit(&cpu_lock);
1309 1309 if (set_intr_affinity(mintr->mi_ddi_handle, cpuid) == DDI_SUCCESS)
1310 1310 mac_srs->srs_cpu.mc_rx_intr_cpu = cpuid;
1311 1311 else
1312 1312 mac_srs->srs_cpu.mc_rx_intr_cpu = -1;
1313 1313 mutex_enter(&cpu_lock);
1314 1314 }
1315 1315
1316 1316 /*
1317 1317 * Re-target Tx interrupts
1318 1318 */
1319 1319 void
1320 1320 mac_tx_srs_retarget_intr(mac_soft_ring_set_t *mac_srs)
1321 1321 {
1322 1322 cpu_t *cp;
1323 1323 mac_ring_t *ring;
1324 1324 mac_intr_t *mintr;
1325 1325 mac_soft_ring_t *sringp;
1326 1326 mac_srs_tx_t *srs_tx;
1327 1327 mac_cpus_t *srs_cpu;
1328 1328 processorid_t cpuid;
1329 1329 int i;
1330 1330
1331 1331 ASSERT(MUTEX_HELD(&cpu_lock));
1332 1332
1333 1333 srs_cpu = &mac_srs->srs_cpu;
1334 1334 if (MAC_TX_SOFT_RINGS(mac_srs)) {
1335 1335 for (i = 0; i < mac_srs->srs_tx_ring_count; i++) {
1336 1336 sringp = mac_srs->srs_tx_soft_rings[i];
1337 1337 ring = (mac_ring_t *)sringp->s_ring_tx_arg2;
1338 1338 cpuid = srs_cpu->mc_tx_intr_cpu[i];
1339 1339 cp = cpu_get(cpuid);
1340 1340 if (cp == NULL || !cpu_is_online(cp) ||
1341 1341 !MAC_RING_RETARGETABLE(ring)) {
1342 1342 srs_cpu->mc_tx_retargeted_cpu[i] = -1;
1343 1343 continue;
1344 1344 }
1345 1345 mintr = &ring->mr_info.mri_intr;
1346 1346 /*
1347 1347 * Drop the cpu_lock as set_intr_affinity()
1348 1348 * holds it
1349 1349 */
1350 1350 mutex_exit(&cpu_lock);
1351 1351 if (set_intr_affinity(mintr->mi_ddi_handle,
1352 1352 cpuid) == DDI_SUCCESS) {
1353 1353 srs_cpu->mc_tx_retargeted_cpu[i] = cpuid;
1354 1354 } else {
1355 1355 srs_cpu->mc_tx_retargeted_cpu[i] = -1;
1356 1356 }
1357 1357 mutex_enter(&cpu_lock);
1358 1358 }
1359 1359 } else {
1360 1360 cpuid = srs_cpu->mc_tx_intr_cpu[0];
1361 1361 cp = cpu_get(cpuid);
1362 1362 if (cp == NULL || !cpu_is_online(cp)) {
1363 1363 srs_cpu->mc_tx_retargeted_cpu[0] = -1;
1364 1364 return;
1365 1365 }
1366 1366 srs_tx = &mac_srs->srs_tx;
1367 1367 ring = (mac_ring_t *)srs_tx->st_arg2;
1368 1368 if (MAC_RING_RETARGETABLE(ring)) {
1369 1369 mintr = &ring->mr_info.mri_intr;
1370 1370 mutex_exit(&cpu_lock);
1371 1371 if ((set_intr_affinity(mintr->mi_ddi_handle,
1372 1372 cpuid) == DDI_SUCCESS)) {
1373 1373 srs_cpu->mc_tx_retargeted_cpu[0] = cpuid;
1374 1374 } else {
1375 1375 srs_cpu->mc_tx_retargeted_cpu[0] = -1;
1376 1376 }
1377 1377 mutex_enter(&cpu_lock);
1378 1378 }
1379 1379 }
1380 1380 }
1381 1381
1382 1382 /*
1383 1383 * When a CPU comes back online, bind the MAC kernel threads which
1384 1384 * were previously bound to that CPU, and had to be unbound because
1385 1385 * the CPU was going away.
1386 1386 *
1387 1387 * These functions are called with cpu_lock held and hence we can't
1388 1388 * cv_wait to grab the mac perimeter. Since these functions walk the soft
1389 1389 * ring list of an SRS without being in the perimeter, the list itself
1390 1390 * is protected by the SRS lock.
1391 1391 */
1392 1392 static void
1393 1393 mac_walk_srs_and_bind(int cpuid)
1394 1394 {
1395 1395 mac_soft_ring_set_t *mac_srs;
1396 1396 mac_soft_ring_t *soft_ring;
1397 1397
1398 1398 rw_enter(&mac_srs_g_lock, RW_READER);
1399 1399
1400 1400 if ((mac_srs = mac_srs_g_list) == NULL)
1401 1401 goto done;
1402 1402
1403 1403 for (; mac_srs != NULL; mac_srs = mac_srs->srs_next) {
1404 1404 if (mac_srs->srs_worker_cpuid == -1 &&
1405 1405 mac_srs->srs_worker_cpuid_save == cpuid) {
1406 1406 mac_srs->srs_worker_cpuid_save = -1;
1407 1407 mac_srs_worker_bind(mac_srs, cpuid);
1408 1408 }
1409 1409
1410 1410 if (!(mac_srs->srs_type & SRST_TX)) {
1411 1411 if (mac_srs->srs_poll_cpuid == -1 &&
1412 1412 mac_srs->srs_poll_cpuid_save == cpuid) {
1413 1413 mac_srs->srs_poll_cpuid_save = -1;
1414 1414 mac_srs_poll_bind(mac_srs, cpuid);
1415 1415 }
1416 1416 }
1417 1417
1418 1418 /* Next tackle the soft rings associated with the srs */
1419 1419 mutex_enter(&mac_srs->srs_lock);
1420 1420 for (soft_ring = mac_srs->srs_soft_ring_head; soft_ring != NULL;
1421 1421 soft_ring = soft_ring->s_ring_next) {
1422 1422 if (soft_ring->s_ring_cpuid == -1 &&
1423 1423 soft_ring->s_ring_cpuid_save == cpuid) {
1424 1424 soft_ring->s_ring_cpuid_save = -1;
1425 1425 (void) mac_soft_ring_bind(soft_ring, cpuid);
1426 1426 }
1427 1427 }
1428 1428 mutex_exit(&mac_srs->srs_lock);
1429 1429 }
1430 1430 done:
1431 1431 rw_exit(&mac_srs_g_lock);
1432 1432 }
1433 1433
1434 1434 /*
1435 1435 * Change the priority of the SRS's poll and worker thread. Additionally,
1436 1436 * update the priority of the worker threads for the SRS's soft rings.
1437 1437 * Need to modify any associated squeue threads.
1438 1438 */
1439 1439 void
1440 1440 mac_update_srs_priority(mac_soft_ring_set_t *mac_srs, pri_t prival)
1441 1441 {
1442 1442 mac_soft_ring_t *ringp;
1443 1443
1444 1444 mac_srs->srs_pri = prival;
1445 1445 thread_lock(mac_srs->srs_worker);
1446 1446 (void) thread_change_pri(mac_srs->srs_worker, mac_srs->srs_pri, 0);
1447 1447 thread_unlock(mac_srs->srs_worker);
1448 1448 if (mac_srs->srs_poll_thr != NULL) {
1449 1449 thread_lock(mac_srs->srs_poll_thr);
1450 1450 (void) thread_change_pri(mac_srs->srs_poll_thr,
1451 1451 mac_srs->srs_pri, 0);
1452 1452 thread_unlock(mac_srs->srs_poll_thr);
1453 1453 }
1454 1454 if ((ringp = mac_srs->srs_soft_ring_head) == NULL)
1455 1455 return;
1456 1456 while (ringp != mac_srs->srs_soft_ring_tail) {
1457 1457 thread_lock(ringp->s_ring_worker);
1458 1458 (void) thread_change_pri(ringp->s_ring_worker,
1459 1459 mac_srs->srs_pri, 0);
1460 1460 thread_unlock(ringp->s_ring_worker);
1461 1461 ringp = ringp->s_ring_next;
1462 1462 }
1463 1463 ASSERT(ringp == mac_srs->srs_soft_ring_tail);
1464 1464 thread_lock(ringp->s_ring_worker);
1465 1465 (void) thread_change_pri(ringp->s_ring_worker, mac_srs->srs_pri, 0);
1466 1466 thread_unlock(ringp->s_ring_worker);
1467 1467 }
1468 1468
1469 1469 /*
1470 1470 * Change the receive bandwidth limit.
1471 1471 */
1472 1472 static void
1473 1473 mac_rx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp)
1474 1474 {
1475 1475 mac_soft_ring_t *softring;
1476 1476
1477 1477 mutex_enter(&srs->srs_lock);
1478 1478 mutex_enter(&srs->srs_bw->mac_bw_lock);
1479 1479
1480 1480 if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) {
1481 1481 /* Reset bandwidth limit */
1482 1482 if (srs->srs_type & SRST_BW_CONTROL) {
1483 1483 softring = srs->srs_soft_ring_head;
1484 1484 while (softring != NULL) {
1485 1485 softring->s_ring_type &= ~ST_RING_BW_CTL;
1486 1486 softring = softring->s_ring_next;
1487 1487 }
1488 1488 srs->srs_type &= ~SRST_BW_CONTROL;
1489 1489 srs->srs_drain_func = mac_rx_srs_drain;
1490 1490 }
1491 1491 } else {
1492 1492 /* Set/Modify bandwidth limit */
1493 1493 srs->srs_bw->mac_bw_limit = FLOW_BYTES_PER_TICK(mrp->mrp_maxbw);
1494 1494 /*
1495 1495 * Give twice the queuing capability before
1496 1496 * dropping packets. The unit is bytes/tick.
1497 1497 */
1498 1498 srs->srs_bw->mac_bw_drop_threshold =
1499 1499 srs->srs_bw->mac_bw_limit << 1;
1500 1500 if (!(srs->srs_type & SRST_BW_CONTROL)) {
1501 1501 softring = srs->srs_soft_ring_head;
1502 1502 while (softring != NULL) {
1503 1503 softring->s_ring_type |= ST_RING_BW_CTL;
1504 1504 softring = softring->s_ring_next;
1505 1505 }
1506 1506 srs->srs_type |= SRST_BW_CONTROL;
1507 1507 srs->srs_drain_func = mac_rx_srs_drain_bw;
1508 1508 }
1509 1509 }
1510 1510 done:
1511 1511 mutex_exit(&srs->srs_bw->mac_bw_lock);
1512 1512 mutex_exit(&srs->srs_lock);
1513 1513 }
1514 1514
1515 1515 /* Change the transmit bandwidth limit */
1516 1516 static void
1517 1517 mac_tx_srs_update_bwlimit(mac_soft_ring_set_t *srs, mac_resource_props_t *mrp)
1518 1518 {
1519 1519 uint32_t tx_mode, ring_info = 0;
1520 1520 mac_srs_tx_t *srs_tx = &srs->srs_tx;
1521 1521 mac_client_impl_t *mcip = srs->srs_mcip;
1522 1522
1523 1523 /*
1524 1524 * We need to quiesce/restart the client here because mac_tx() and
1525 1525 * srs->srs_tx->st_func do not hold srs->srs_lock while accessing
1526 1526 * st_mode and related fields, which are modified by the code below.
1527 1527 */
1528 1528 mac_tx_client_quiesce((mac_client_handle_t)mcip);
1529 1529
1530 1530 mutex_enter(&srs->srs_lock);
1531 1531 mutex_enter(&srs->srs_bw->mac_bw_lock);
1532 1532
1533 1533 tx_mode = srs_tx->st_mode;
1534 1534 if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) {
1535 1535 /* Reset bandwidth limit */
1536 1536 if (tx_mode == SRS_TX_BW) {
1537 1537 if (srs_tx->st_arg2 != NULL)
1538 1538 ring_info = mac_hwring_getinfo(srs_tx->st_arg2);
1539 1539 if (mac_tx_serialize ||
1540 1540 (ring_info & MAC_RING_TX_SERIALIZE)) {
1541 1541 srs_tx->st_mode = SRS_TX_SERIALIZE;
1542 1542 } else {
1543 1543 srs_tx->st_mode = SRS_TX_DEFAULT;
1544 1544 }
1545 1545 } else if (tx_mode == SRS_TX_BW_FANOUT) {
1546 1546 srs_tx->st_mode = SRS_TX_FANOUT;
1547 1547 } else if (tx_mode == SRS_TX_BW_AGGR) {
1548 1548 srs_tx->st_mode = SRS_TX_AGGR;
1549 1549 }
1550 1550 srs->srs_type &= ~SRST_BW_CONTROL;
1551 1551 } else {
1552 1552 /* Set/Modify bandwidth limit */
1553 1553 srs->srs_bw->mac_bw_limit = FLOW_BYTES_PER_TICK(mrp->mrp_maxbw);
1554 1554 /*
1555 1555 * Give twice the queuing capability before
1556 1556 * dropping packets. The unit is bytes/tick.
1557 1557 */
1558 1558 srs->srs_bw->mac_bw_drop_threshold =
1559 1559 srs->srs_bw->mac_bw_limit << 1;
1560 1560 srs->srs_type |= SRST_BW_CONTROL;
1561 1561 if (tx_mode != SRS_TX_BW && tx_mode != SRS_TX_BW_FANOUT &&
1562 1562 tx_mode != SRS_TX_BW_AGGR) {
1563 1563 if (tx_mode == SRS_TX_SERIALIZE ||
1564 1564 tx_mode == SRS_TX_DEFAULT) {
1565 1565 srs_tx->st_mode = SRS_TX_BW;
1566 1566 } else if (tx_mode == SRS_TX_FANOUT) {
1567 1567 srs_tx->st_mode = SRS_TX_BW_FANOUT;
1568 1568 } else if (tx_mode == SRS_TX_AGGR) {
1569 1569 srs_tx->st_mode = SRS_TX_BW_AGGR;
1570 1570 } else {
1571 1571 ASSERT(0);
1572 1572 }
1573 1573 }
1574 1574 }
1575 1575 done:
1576 1576 srs_tx->st_func = mac_tx_get_func(srs_tx->st_mode);
1577 1577 mutex_exit(&srs->srs_bw->mac_bw_lock);
1578 1578 mutex_exit(&srs->srs_lock);
1579 1579
1580 1580 mac_tx_client_restart((mac_client_handle_t)mcip);
1581 1581 }
1582 1582
1583 1583 /*
1584 1584 * The uber function that deals with any update to bandwidth limits.
1585 1585 */
1586 1586 void
1587 1587 mac_srs_update_bwlimit(flow_entry_t *flent, mac_resource_props_t *mrp)
↓ open down ↓ |
388 lines elided |
↑ open up ↑ |
1588 1588 {
1589 1589 int count;
1590 1590
1591 1591 for (count = 0; count < flent->fe_rx_srs_cnt; count++)
1592 1592 mac_rx_srs_update_bwlimit(flent->fe_rx_srs[count], mrp);
1593 1593 mac_tx_srs_update_bwlimit(flent->fe_tx_srs, mrp);
1594 1594 }
1595 1595
1596 1596 /*
1597 1597 * When the first sub-flow is added to a link, we disable polling on the
1598 - * link and also modify the entry point to mac_rx_srs_subflow_process.
1598 + * link and also modify the entry point to mac_rx_srs_subflow_process().
1599 1599 * (polling is disabled because with the subflow added, accounting
1600 1600 * for polling needs additional logic, it is assumed that when a subflow is
1601 1601 * added, we can take some hit as a result of disabling polling rather than
1602 1602 * adding more complexity - if this becomes a perf. issue we need to
1603 1603 * re-rvaluate this logic). When the last subflow is removed, we turn back
1604 - * polling and also reset the entry point to mac_rx_srs_process.
1604 + * polling and also reset the entry point to mac_rx_srs_process().
1605 1605 *
1606 1606 * In the future if there are multiple SRS, we can simply
1607 1607 * take one and give it to the flow rather than disabling polling and
1608 1608 * resetting the entry point.
1609 1609 */
1610 1610 void
1611 1611 mac_client_update_classifier(mac_client_impl_t *mcip, boolean_t enable)
1612 1612 {
1613 1613 flow_entry_t *flent = mcip->mci_flent;
1614 1614 int i;
1615 1615 mac_impl_t *mip = mcip->mci_mip;
1616 1616 mac_rx_func_t rx_func;
1617 1617 uint_t rx_srs_cnt;
1618 1618 boolean_t enable_classifier;
1619 1619
1620 1620 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1621 1621
1622 1622 enable_classifier = !FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && enable;
1623 1623
1624 1624 rx_func = enable_classifier ? mac_rx_srs_subflow_process :
1625 1625 mac_rx_srs_process;
1626 1626
1627 1627 /* Tell mac_srs_poll_state_change to disable polling if necessary */
1628 1628 if (mip->mi_state_flags & MIS_POLL_DISABLE)
1629 1629 enable_classifier = B_TRUE;
1630 1630
1631 1631 /*
1632 1632 * If receive function has already been configured correctly for
1633 1633 * current subflow configuration, do nothing.
1634 1634 */
1635 1635 if (flent->fe_cb_fn == (flow_fn_t)rx_func)
1636 1636 return;
1637 1637
1638 1638 rx_srs_cnt = flent->fe_rx_srs_cnt;
↓ open down ↓ |
24 lines elided |
↑ open up ↑ |
1639 1639 for (i = 0; i < rx_srs_cnt; i++) {
1640 1640 ASSERT(flent->fe_rx_srs[i] != NULL);
1641 1641 mac_srs_poll_state_change(flent->fe_rx_srs[i],
1642 1642 enable_classifier, rx_func);
1643 1643 }
1644 1644
1645 1645 /*
1646 1646 * Change the S/W classifier so that we can land in the
1647 1647 * correct processing function with correct argument.
1648 1648 * If all subflows have been removed we can revert to
1649 - * mac_rx_srsprocess, else we need mac_rx_srs_subflow_process.
1649 + * mac_rx_srs_process(), else we need mac_rx_srs_subflow_process().
1650 1650 */
1651 1651 mutex_enter(&flent->fe_lock);
1652 1652 flent->fe_cb_fn = (flow_fn_t)rx_func;
1653 1653 flent->fe_cb_arg1 = (void *)mip;
1654 1654 flent->fe_cb_arg2 = flent->fe_rx_srs[0];
1655 1655 mutex_exit(&flent->fe_lock);
1656 1656 }
1657 1657
1658 1658 static void
1659 1659 mac_srs_update_fanout_list(mac_soft_ring_set_t *mac_srs)
1660 1660 {
1661 1661 int tcp_count = 0, udp_count = 0, oth_count = 0, tx_count = 0;
1662 1662 mac_soft_ring_t *softring;
1663 1663
1664 1664 softring = mac_srs->srs_soft_ring_head;
1665 1665 if (softring == NULL) {
1666 1666 ASSERT(mac_srs->srs_soft_ring_count == 0);
1667 1667 mac_srs->srs_tcp_ring_count = 0;
1668 1668 mac_srs->srs_udp_ring_count = 0;
1669 1669 mac_srs->srs_oth_ring_count = 0;
1670 1670 mac_srs->srs_tx_ring_count = 0;
1671 1671 return;
1672 1672 }
1673 1673
1674 1674 while (softring != NULL) {
1675 1675 if (softring->s_ring_type & ST_RING_TCP) {
1676 1676 mac_srs->srs_tcp_soft_rings[tcp_count++] = softring;
1677 1677 } else if (softring->s_ring_type & ST_RING_UDP) {
1678 1678 mac_srs->srs_udp_soft_rings[udp_count++] = softring;
1679 1679 } else if (softring->s_ring_type & ST_RING_OTH) {
1680 1680 mac_srs->srs_oth_soft_rings[oth_count++] = softring;
1681 1681 } else {
1682 1682 ASSERT(softring->s_ring_type & ST_RING_TX);
1683 1683 mac_srs->srs_tx_soft_rings[tx_count++] = softring;
1684 1684 }
1685 1685 softring = softring->s_ring_next;
1686 1686 }
1687 1687
1688 1688 ASSERT(mac_srs->srs_soft_ring_count ==
1689 1689 (tcp_count + udp_count + oth_count + tx_count));
1690 1690 mac_srs->srs_tcp_ring_count = tcp_count;
1691 1691 mac_srs->srs_udp_ring_count = udp_count;
1692 1692 mac_srs->srs_oth_ring_count = oth_count;
1693 1693 mac_srs->srs_tx_ring_count = tx_count;
1694 1694 }
1695 1695
1696 1696 void
1697 1697 mac_srs_create_proto_softrings(int id, uint16_t type, pri_t pri,
1698 1698 mac_client_impl_t *mcip, mac_soft_ring_set_t *mac_srs,
1699 1699 processorid_t cpuid, mac_direct_rx_t rx_func, void *x_arg1,
1700 1700 mac_resource_handle_t x_arg2, boolean_t set_bypass)
1701 1701 {
1702 1702 mac_soft_ring_t *softring;
1703 1703 mac_rx_fifo_t mrf;
1704 1704
1705 1705 bzero(&mrf, sizeof (mac_rx_fifo_t));
1706 1706 mrf.mrf_type = MAC_RX_FIFO;
1707 1707 mrf.mrf_receive = (mac_receive_t)mac_soft_ring_poll;
1708 1708 mrf.mrf_intr_enable = (mac_intr_enable_t)mac_soft_ring_intr_enable;
1709 1709 mrf.mrf_intr_disable = (mac_intr_disable_t)mac_soft_ring_intr_disable;
1710 1710 mrf.mrf_flow_priority = pri;
1711 1711
1712 1712 softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait,
1713 1713 (type|ST_RING_TCP), pri, mcip, mac_srs,
1714 1714 cpuid, rx_func, x_arg1, x_arg2);
1715 1715 softring->s_ring_rx_arg2 = NULL;
1716 1716
1717 1717 /*
1718 1718 * TCP and UDP support DLS bypass. In addition TCP
1719 1719 * squeue can also poll their corresponding soft rings.
1720 1720 */
1721 1721 if (set_bypass && (mcip->mci_resource_arg != NULL)) {
1722 1722 mac_soft_ring_dls_bypass(softring,
1723 1723 mcip->mci_direct_rx_fn,
1724 1724 mcip->mci_direct_rx_arg);
1725 1725
1726 1726 mrf.mrf_rx_arg = softring;
1727 1727 mrf.mrf_intr_handle = (mac_intr_handle_t)softring;
1728 1728
1729 1729 /*
1730 1730 * Make a call in IP to get a TCP squeue assigned to
1731 1731 * this softring to maintain full CPU locality through
1732 1732 * the stack and allow the squeue to be able to poll
1733 1733 * the softring so the flow control can be pushed
1734 1734 * all the way to H/W.
1735 1735 */
1736 1736 softring->s_ring_rx_arg2 =
1737 1737 mcip->mci_resource_add((void *)mcip->mci_resource_arg,
1738 1738 (mac_resource_t *)&mrf);
1739 1739 }
1740 1740
1741 1741 /*
1742 1742 * Non-TCP protocols don't support squeues. Hence we
1743 1743 * don't make any ring addition callbacks for non-TCP
1744 1744 * rings. Now create the UDP softring and allow it to
1745 1745 * bypass the DLS layer.
1746 1746 */
1747 1747 softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait,
1748 1748 (type|ST_RING_UDP), pri, mcip, mac_srs,
1749 1749 cpuid, rx_func, x_arg1, x_arg2);
1750 1750 softring->s_ring_rx_arg2 = NULL;
1751 1751
1752 1752 if (set_bypass && (mcip->mci_resource_arg != NULL)) {
1753 1753 mac_soft_ring_dls_bypass(softring,
1754 1754 mcip->mci_direct_rx_fn,
1755 1755 mcip->mci_direct_rx_arg);
1756 1756 }
1757 1757
1758 1758 /* Create the Oth softrings which has to go through the DLS */
1759 1759 softring = mac_soft_ring_create(id, mac_soft_ring_worker_wait,
1760 1760 (type|ST_RING_OTH), pri, mcip, mac_srs,
1761 1761 cpuid, rx_func, x_arg1, x_arg2);
1762 1762 softring->s_ring_rx_arg2 = NULL;
1763 1763 }
1764 1764
1765 1765 /*
1766 1766 * This routine associates a CPU or a set of CPU to process incoming
1767 1767 * traffic from a mac client. If multiple CPUs are specified, then
1768 1768 * so many soft rings are created with each soft ring worker thread
1769 1769 * bound to a CPU in the set. Each soft ring in turn will be
1770 1770 * associated with an squeue and the squeue will be moved to the
1771 1771 * same CPU as that of the soft ring's.
1772 1772 */
1773 1773 static void
1774 1774 mac_srs_fanout_modify(mac_client_impl_t *mcip, mac_direct_rx_t rx_func,
1775 1775 void *x_arg1, mac_resource_handle_t x_arg2,
1776 1776 mac_soft_ring_set_t *mac_rx_srs, mac_soft_ring_set_t *mac_tx_srs)
1777 1777 {
1778 1778 mac_soft_ring_t *softring;
1779 1779 uint32_t soft_ring_flag = 0;
1780 1780 processorid_t cpuid = -1;
1781 1781 int i, srings_present, new_fanout_cnt;
1782 1782 mac_cpus_t *srs_cpu;
1783 1783
1784 1784 /* fanout state is REINIT. Set it back to INIT */
1785 1785 ASSERT(mac_rx_srs->srs_fanout_state == SRS_FANOUT_REINIT);
1786 1786 mac_rx_srs->srs_fanout_state = SRS_FANOUT_INIT;
1787 1787
1788 1788 /* how many are present right now */
1789 1789 srings_present = mac_rx_srs->srs_tcp_ring_count;
1790 1790 /* new request */
1791 1791 srs_cpu = &mac_rx_srs->srs_cpu;
1792 1792 new_fanout_cnt = srs_cpu->mc_rx_fanout_cnt;
1793 1793
1794 1794 mutex_enter(&mac_rx_srs->srs_lock);
1795 1795 if (mac_rx_srs->srs_type & SRST_BW_CONTROL)
1796 1796 soft_ring_flag |= ST_RING_BW_CTL;
1797 1797 mutex_exit(&mac_rx_srs->srs_lock);
1798 1798
1799 1799 if (new_fanout_cnt > srings_present) {
1800 1800 /* soft rings increased */
1801 1801 mutex_enter(&mac_rx_srs->srs_lock);
1802 1802 mac_rx_srs->srs_type |= SRST_FANOUT_SRC_IP;
1803 1803 mutex_exit(&mac_rx_srs->srs_lock);
1804 1804
1805 1805 for (i = mac_rx_srs->srs_tcp_ring_count;
1806 1806 i < new_fanout_cnt; i++) {
1807 1807 /*
1808 1808 * Create the protocol softrings and set the
1809 1809 * DLS bypass where possible.
1810 1810 */
1811 1811 mac_srs_create_proto_softrings(i, soft_ring_flag,
1812 1812 mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid,
1813 1813 rx_func, x_arg1, x_arg2, B_TRUE);
1814 1814 }
1815 1815 mac_srs_update_fanout_list(mac_rx_srs);
1816 1816 } else if (new_fanout_cnt < srings_present) {
1817 1817 /* soft rings decreased */
1818 1818 if (new_fanout_cnt == 1) {
1819 1819 mutex_enter(&mac_rx_srs->srs_lock);
1820 1820 mac_rx_srs->srs_type &= ~SRST_FANOUT_SRC_IP;
1821 1821 ASSERT(mac_rx_srs->srs_type & SRST_FANOUT_PROTO);
1822 1822 mutex_exit(&mac_rx_srs->srs_lock);
1823 1823 }
1824 1824 /* Get rid of extra soft rings */
1825 1825 for (i = new_fanout_cnt;
1826 1826 i < mac_rx_srs->srs_tcp_ring_count; i++) {
1827 1827 softring = mac_rx_srs->srs_tcp_soft_rings[i];
1828 1828 if (softring->s_ring_rx_arg2 != NULL) {
1829 1829 mcip->mci_resource_remove(
1830 1830 (void *)mcip->mci_resource_arg,
1831 1831 softring->s_ring_rx_arg2);
1832 1832 }
1833 1833 mac_soft_ring_remove(mac_rx_srs,
1834 1834 mac_rx_srs->srs_tcp_soft_rings[i]);
1835 1835 mac_soft_ring_remove(mac_rx_srs,
1836 1836 mac_rx_srs->srs_udp_soft_rings[i]);
1837 1837 mac_soft_ring_remove(mac_rx_srs,
1838 1838 mac_rx_srs->srs_oth_soft_rings[i]);
1839 1839 }
1840 1840 mac_srs_update_fanout_list(mac_rx_srs);
1841 1841 }
1842 1842
1843 1843 ASSERT(new_fanout_cnt == mac_rx_srs->srs_tcp_ring_count);
1844 1844 mutex_enter(&cpu_lock);
1845 1845 for (i = 0; i < mac_rx_srs->srs_tcp_ring_count; i++) {
1846 1846 cpuid = srs_cpu->mc_rx_fanout_cpus[i];
1847 1847 (void) mac_soft_ring_bind(mac_rx_srs->srs_udp_soft_rings[i],
1848 1848 cpuid);
1849 1849 (void) mac_soft_ring_bind(mac_rx_srs->srs_oth_soft_rings[i],
1850 1850 cpuid);
1851 1851 (void) mac_soft_ring_bind(mac_rx_srs->srs_tcp_soft_rings[i],
1852 1852 cpuid);
1853 1853 softring = mac_rx_srs->srs_tcp_soft_rings[i];
1854 1854 if (softring->s_ring_rx_arg2 != NULL) {
1855 1855 mcip->mci_resource_bind((void *)mcip->mci_resource_arg,
1856 1856 softring->s_ring_rx_arg2, cpuid);
1857 1857 }
1858 1858 }
1859 1859
1860 1860 mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_rx_workerid);
1861 1861 mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_rx_pollid);
1862 1862 mac_rx_srs_retarget_intr(mac_rx_srs, srs_cpu->mc_rx_intr_cpu);
1863 1863 /*
1864 1864 * Bind Tx srs and soft ring threads too. Let's bind tx
1865 1865 * srs to the last cpu in mrp list.
1866 1866 */
1867 1867 if (mac_tx_srs != NULL) {
1868 1868 BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp);
1869 1869 mac_tx_srs_retarget_intr(mac_tx_srs);
1870 1870 }
1871 1871 mutex_exit(&cpu_lock);
1872 1872 }
1873 1873
1874 1874 /*
1875 1875 * Bind SRS threads and soft rings to CPUs/create fanout list.
1876 1876 */
1877 1877 void
1878 1878 mac_srs_fanout_init(mac_client_impl_t *mcip, mac_resource_props_t *mrp,
1879 1879 mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2,
1880 1880 mac_soft_ring_set_t *mac_rx_srs, mac_soft_ring_set_t *mac_tx_srs,
1881 1881 cpupart_t *cpupart)
1882 1882 {
1883 1883 int i;
1884 1884 processorid_t cpuid;
1885 1885 uint32_t soft_ring_flag = 0;
1886 1886 int soft_ring_cnt;
1887 1887 mac_cpus_t *srs_cpu = &mac_rx_srs->srs_cpu;
1888 1888
1889 1889 /*
1890 1890 * Remove the no soft ring flag and we will adjust it
1891 1891 * appropriately further down.
1892 1892 */
1893 1893 mutex_enter(&mac_rx_srs->srs_lock);
1894 1894 mac_rx_srs->srs_type &= ~SRST_NO_SOFT_RINGS;
1895 1895 mutex_exit(&mac_rx_srs->srs_lock);
1896 1896
1897 1897 ASSERT(mac_rx_srs->srs_soft_ring_head == NULL);
1898 1898
1899 1899 if (mac_rx_srs->srs_type & SRST_BW_CONTROL)
1900 1900 soft_ring_flag |= ST_RING_BW_CTL;
1901 1901
1902 1902 ASSERT(mac_rx_srs->srs_fanout_state == SRS_FANOUT_UNINIT);
1903 1903 mac_rx_srs->srs_fanout_state = SRS_FANOUT_INIT;
1904 1904 /*
1905 1905 * Ring count can be 0 if no fanout is required and no cpu
1906 1906 * were specified. Leave the SRS worker and poll thread
1907 1907 * unbound
1908 1908 */
1909 1909 ASSERT(mrp != NULL);
1910 1910 soft_ring_cnt = srs_cpu->mc_rx_fanout_cnt;
1911 1911
1912 1912 /* Step 1: bind cpu contains cpu list where threads need to bind */
1913 1913 if (soft_ring_cnt > 0) {
1914 1914 mutex_enter(&cpu_lock);
1915 1915 for (i = 0; i < soft_ring_cnt; i++) {
1916 1916 cpuid = srs_cpu->mc_rx_fanout_cpus[i];
1917 1917 /* Create the protocol softrings */
1918 1918 mac_srs_create_proto_softrings(i, soft_ring_flag,
1919 1919 mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid,
1920 1920 rx_func, x_arg1, x_arg2, B_FALSE);
1921 1921 }
1922 1922 mac_srs_worker_bind(mac_rx_srs, srs_cpu->mc_rx_workerid);
1923 1923 mac_srs_poll_bind(mac_rx_srs, srs_cpu->mc_rx_pollid);
1924 1924 mac_rx_srs_retarget_intr(mac_rx_srs, srs_cpu->mc_rx_intr_cpu);
1925 1925 /*
1926 1926 * Bind Tx srs and soft ring threads too.
1927 1927 * Let's bind tx srs to the last cpu in
1928 1928 * mrp list.
1929 1929 */
1930 1930 if (mac_tx_srs == NULL) {
1931 1931 mutex_exit(&cpu_lock);
1932 1932 goto alldone;
1933 1933 }
1934 1934
1935 1935 BIND_TX_SRS_AND_SOFT_RINGS(mac_tx_srs, mrp);
1936 1936 mac_tx_srs_retarget_intr(mac_tx_srs);
1937 1937 mutex_exit(&cpu_lock);
1938 1938 } else {
1939 1939 mutex_enter(&cpu_lock);
1940 1940 /*
1941 1941 * For a subflow, mrp_workerid and mrp_pollid
1942 1942 * is not set.
1943 1943 */
1944 1944 mac_srs_worker_bind(mac_rx_srs, mrp->mrp_rx_workerid);
1945 1945 mac_srs_poll_bind(mac_rx_srs, mrp->mrp_rx_pollid);
1946 1946 mutex_exit(&cpu_lock);
1947 1947 goto no_softrings;
1948 1948 }
1949 1949
1950 1950 alldone:
1951 1951 if (soft_ring_cnt > 1)
1952 1952 mac_rx_srs->srs_type |= SRST_FANOUT_SRC_IP;
1953 1953 mac_srs_update_fanout_list(mac_rx_srs);
1954 1954 mac_srs_client_poll_enable(mcip, mac_rx_srs);
1955 1955 return;
1956 1956
1957 1957 no_softrings:
1958 1958 if (mac_rx_srs->srs_type & SRST_FANOUT_PROTO) {
1959 1959 mutex_enter(&cpu_lock);
1960 1960 cpuid = mac_next_bind_cpu(cpupart);
1961 1961 /* Create the protocol softrings */
1962 1962 mac_srs_create_proto_softrings(0, soft_ring_flag,
1963 1963 mac_rx_srs->srs_pri, mcip, mac_rx_srs, cpuid,
1964 1964 rx_func, x_arg1, x_arg2, B_FALSE);
1965 1965 mutex_exit(&cpu_lock);
1966 1966 } else {
1967 1967 /*
1968 1968 * This is the case when there is no fanout which is
1969 1969 * true for subflows.
1970 1970 */
1971 1971 mac_rx_srs->srs_type |= SRST_NO_SOFT_RINGS;
1972 1972 }
1973 1973 mac_srs_update_fanout_list(mac_rx_srs);
1974 1974 mac_srs_client_poll_enable(mcip, mac_rx_srs);
1975 1975 }
1976 1976
1977 1977 /*
1978 1978 * mac_fanout_setup:
1979 1979 *
1980 1980 * Calls mac_srs_fanout_init() or modify() depending upon whether
1981 1981 * the SRS is getting initialized or re-initialized.
1982 1982 */
1983 1983 void
1984 1984 mac_fanout_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
1985 1985 mac_resource_props_t *mrp, mac_direct_rx_t rx_func, void *x_arg1,
1986 1986 mac_resource_handle_t x_arg2, cpupart_t *cpupart)
1987 1987 {
1988 1988 mac_soft_ring_set_t *mac_rx_srs, *mac_tx_srs;
1989 1989 int i, rx_srs_cnt;
1990 1990
1991 1991 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
1992 1992 /*
1993 1993 * This is an aggregation port. Fanout will be setup
1994 1994 * over the aggregation itself.
1995 1995 */
1996 1996 if (mcip->mci_state_flags & MCIS_EXCLUSIVE)
1997 1997 return;
1998 1998
1999 1999 mac_rx_srs = flent->fe_rx_srs[0];
2000 2000 /*
2001 2001 * Set up the fanout on the tx side only once, with the
2002 2002 * first rx SRS. The CPU binding, fanout, and bandwidth
2003 2003 * criteria are common to both RX and TX, so
2004 2004 * initializing them along side avoids redundant code.
2005 2005 */
2006 2006 mac_tx_srs = flent->fe_tx_srs;
2007 2007 rx_srs_cnt = flent->fe_rx_srs_cnt;
2008 2008
2009 2009 /* No fanout for subflows */
2010 2010 if (flent->fe_type & FLOW_USER) {
2011 2011 mac_srs_fanout_init(mcip, mrp, rx_func,
2012 2012 x_arg1, x_arg2, mac_rx_srs, mac_tx_srs,
2013 2013 cpupart);
2014 2014 return;
2015 2015 }
2016 2016
2017 2017 if (mrp->mrp_mask & MRP_CPUS_USERSPEC)
2018 2018 mac_flow_user_cpu_init(flent, mrp);
2019 2019 else
2020 2020 mac_flow_cpu_init(flent, cpupart);
2021 2021
2022 2022 mrp->mrp_rx_fanout_cnt = mac_rx_srs->srs_cpu.mc_rx_fanout_cnt;
2023 2023
2024 2024 /*
2025 2025 * Set up fanout for both SW (0th SRS) and HW classified
2026 2026 * SRS (the rest of Rx SRSs in flent).
2027 2027 */
2028 2028 for (i = 0; i < rx_srs_cnt; i++) {
2029 2029 mac_rx_srs = flent->fe_rx_srs[i];
2030 2030 if (i != 0)
2031 2031 mac_tx_srs = NULL;
2032 2032 switch (mac_rx_srs->srs_fanout_state) {
2033 2033 case SRS_FANOUT_UNINIT:
2034 2034 mac_srs_fanout_init(mcip, mrp, rx_func,
2035 2035 x_arg1, x_arg2, mac_rx_srs, mac_tx_srs,
2036 2036 cpupart);
2037 2037 break;
2038 2038 case SRS_FANOUT_INIT:
2039 2039 break;
2040 2040 case SRS_FANOUT_REINIT:
2041 2041 mac_rx_srs_quiesce(mac_rx_srs, SRS_QUIESCE);
2042 2042 mac_srs_fanout_modify(mcip, rx_func, x_arg1,
2043 2043 x_arg2, mac_rx_srs, mac_tx_srs);
2044 2044 mac_rx_srs_restart(mac_rx_srs);
2045 2045 break;
2046 2046 default:
2047 2047 VERIFY(mac_rx_srs->srs_fanout_state <=
2048 2048 SRS_FANOUT_REINIT);
2049 2049 break;
2050 2050 }
2051 2051 }
2052 2052 }
2053 2053
2054 2054 /*
2055 2055 * mac_srs_create:
2056 2056 *
2057 2057 * Create a mac_soft_ring_set_t (SRS). If soft_ring_fanout_type is
2058 2058 * SRST_TX, an SRS for Tx side is created. Otherwise an SRS for Rx side
2059 2059 * processing is created.
2060 2060 *
2061 2061 * Details on Rx SRS:
2062 2062 * Create a SRS and also add the necessary soft rings for TCP and
2063 2063 * non-TCP based on fanout type and count specified.
2064 2064 *
2065 2065 * mac_soft_ring_fanout, mac_srs_fanout_modify (?),
2066 2066 * mac_soft_ring_stop_workers, mac_soft_ring_set_destroy, etc need
2067 2067 * to be heavily modified.
2068 2068 *
2069 2069 * mi_soft_ring_list_size, mi_soft_ring_size, etc need to disappear.
2070 2070 */
2071 2071 mac_soft_ring_set_t *
2072 2072 mac_srs_create(mac_client_impl_t *mcip, flow_entry_t *flent, uint32_t srs_type,
2073 2073 mac_direct_rx_t rx_func, void *x_arg1, mac_resource_handle_t x_arg2,
2074 2074 mac_ring_t *ring)
2075 2075 {
2076 2076 mac_soft_ring_set_t *mac_srs;
2077 2077 mac_srs_rx_t *srs_rx;
2078 2078 mac_srs_tx_t *srs_tx;
2079 2079 mac_bw_ctl_t *mac_bw;
2080 2080 mac_resource_props_t *mrp;
2081 2081 boolean_t is_tx_srs = ((srs_type & SRST_TX) != 0);
2082 2082
2083 2083 mac_srs = kmem_cache_alloc(mac_srs_cache, KM_SLEEP);
2084 2084 bzero(mac_srs, sizeof (mac_soft_ring_set_t));
2085 2085 srs_rx = &mac_srs->srs_rx;
2086 2086 srs_tx = &mac_srs->srs_tx;
2087 2087
2088 2088 mutex_enter(&flent->fe_lock);
2089 2089
2090 2090 /*
2091 2091 * Get the bandwidth control structure from the flent. Get
2092 2092 * rid of any residual values in the control structure for
2093 2093 * the tx bw struct and also for the rx, if the rx srs is
2094 2094 * the 1st one being brought up (the rx bw ctl struct may
2095 2095 * be shared by multiple SRSs)
2096 2096 */
2097 2097 if (is_tx_srs) {
2098 2098 mac_srs->srs_bw = &flent->fe_tx_bw;
2099 2099 bzero(mac_srs->srs_bw, sizeof (mac_bw_ctl_t));
2100 2100 flent->fe_tx_srs = mac_srs;
2101 2101 } else {
2102 2102 /*
2103 2103 * The bw counter (stored in the flent) is shared
2104 2104 * by SRS's within an rx group.
2105 2105 */
2106 2106 mac_srs->srs_bw = &flent->fe_rx_bw;
2107 2107 /* First rx SRS, clear the bw structure */
2108 2108 if (flent->fe_rx_srs_cnt == 0)
2109 2109 bzero(mac_srs->srs_bw, sizeof (mac_bw_ctl_t));
2110 2110
2111 2111 /*
2112 2112 * It is better to panic here rather than just assert because
2113 2113 * on a non-debug kernel we might end up courrupting memory
2114 2114 * and making it difficult to debug.
2115 2115 */
2116 2116 if (flent->fe_rx_srs_cnt >= MAX_RINGS_PER_GROUP) {
2117 2117 panic("Array Overrun detected due to MAC client %p "
2118 2118 " having more rings than %d", (void *)mcip,
2119 2119 MAX_RINGS_PER_GROUP);
2120 2120 }
2121 2121 flent->fe_rx_srs[flent->fe_rx_srs_cnt] = mac_srs;
2122 2122 flent->fe_rx_srs_cnt++;
2123 2123 }
2124 2124 mac_srs->srs_flent = flent;
2125 2125 mutex_exit(&flent->fe_lock);
2126 2126
2127 2127 mac_srs->srs_state = 0;
2128 2128 mac_srs->srs_type = (srs_type | SRST_NO_SOFT_RINGS);
2129 2129 mac_srs->srs_worker_cpuid = mac_srs->srs_worker_cpuid_save = -1;
2130 2130 mac_srs->srs_poll_cpuid = mac_srs->srs_poll_cpuid_save = -1;
2131 2131 mac_srs->srs_mcip = mcip;
2132 2132 mac_srs_fanout_list_alloc(mac_srs);
2133 2133
2134 2134 /*
2135 2135 * For a flow we use the underlying MAC client's priority range with
2136 2136 * the priority value to find an absolute priority value. For a MAC
2137 2137 * client we use the MAC client's maximum priority as the value.
2138 2138 */
2139 2139 mrp = &flent->fe_effective_props;
2140 2140 if ((mac_srs->srs_type & SRST_FLOW) != 0) {
2141 2141 mac_srs->srs_pri = FLOW_PRIORITY(mcip->mci_min_pri,
2142 2142 mcip->mci_max_pri, mrp->mrp_priority);
2143 2143 } else {
2144 2144 mac_srs->srs_pri = mcip->mci_max_pri;
2145 2145 }
2146 2146 /*
2147 2147 * We need to insert the SRS in the global list before
2148 2148 * binding the SRS and SR threads. Otherwise there is a
2149 2149 * is a small window where the cpu reconfig callbacks
2150 2150 * may miss the SRS in the list walk and DR could fail
2151 2151 * as there are bound threads.
2152 2152 */
2153 2153 mac_srs_add_glist(mac_srs);
2154 2154
2155 2155 /* Initialize bw limit */
2156 2156 if ((mrp->mrp_mask & MRP_MAXBW) != 0) {
2157 2157 mac_srs->srs_drain_func = mac_rx_srs_drain_bw;
2158 2158
2159 2159 mac_bw = mac_srs->srs_bw;
2160 2160 mutex_enter(&mac_bw->mac_bw_lock);
2161 2161 mac_bw->mac_bw_limit = FLOW_BYTES_PER_TICK(mrp->mrp_maxbw);
2162 2162
2163 2163 /*
2164 2164 * Give twice the queuing capability before
2165 2165 * dropping packets. The unit is bytes/tick.
2166 2166 */
2167 2167 mac_bw->mac_bw_drop_threshold = mac_bw->mac_bw_limit << 1;
2168 2168 mutex_exit(&mac_bw->mac_bw_lock);
2169 2169 mac_srs->srs_type |= SRST_BW_CONTROL;
2170 2170 } else {
2171 2171 mac_srs->srs_drain_func = mac_rx_srs_drain;
2172 2172 }
2173 2173
2174 2174 /*
2175 2175 * We use the following policy to control Receive
2176 2176 * Side Dynamic Polling:
2177 2177 * 1) We switch to poll mode anytime the processing thread causes
↓ open down ↓ |
518 lines elided |
↑ open up ↑ |
2178 2178 * a backlog to build up in SRS and its associated Soft Rings
2179 2179 * (sr_poll_pkt_cnt > 0).
2180 2180 * 2) As long as the backlog stays under the low water mark
2181 2181 * (sr_lowat), we poll the H/W for more packets.
2182 2182 * 3) If the backlog (sr_poll_pkt_cnt) exceeds low water mark, we
2183 2183 * stay in poll mode but don't poll the H/W for more packets.
2184 2184 * 4) Anytime in polling mode, if we poll the H/W for packets and
2185 2185 * find nothing plus we have an existing backlog
2186 2186 * (sr_poll_pkt_cnt > 0), we stay in polling mode but don't poll
2187 2187 * the H/W for packets anymore (let the polling thread go to sleep).
2188 - * 5) Once the backlog is relived (packets are processed) we reenable
2188 + * 5) Once the backlog is relieved (packets are processed) we reenable
2189 2189 * polling (by signalling the poll thread) only when the backlog
2190 2190 * dips below sr_poll_thres.
2191 2191 * 6) sr_hiwat is used exclusively when we are not polling capable
2192 2192 * and is used to decide when to drop packets so the SRS queue
2193 2193 * length doesn't grow infinitely.
2194 2194 */
2195 2195 if (!is_tx_srs) {
2196 2196 srs_rx->sr_hiwat = mac_soft_ring_max_q_cnt;
2197 2197 /* Low water mark needs to be less than high water mark */
2198 2198 srs_rx->sr_lowat = mac_soft_ring_min_q_cnt <=
2199 2199 mac_soft_ring_max_q_cnt ? mac_soft_ring_min_q_cnt :
2200 2200 (mac_soft_ring_max_q_cnt >> 2);
2201 2201 /* Poll threshold need to be half of low water mark or less */
2202 2202 srs_rx->sr_poll_thres = mac_soft_ring_poll_thres <=
2203 2203 (srs_rx->sr_lowat >> 1) ? mac_soft_ring_poll_thres :
2204 2204 (srs_rx->sr_lowat >> 1);
2205 2205 if (mac_latency_optimize)
2206 2206 mac_srs->srs_state |= SRS_LATENCY_OPT;
2207 2207 else
2208 2208 mac_srs->srs_state |= SRS_SOFTRING_QUEUE;
2209 2209 }
2210 2210
2211 2211 mac_srs->srs_worker = thread_create(NULL, 0,
2212 2212 mac_srs_worker, mac_srs, 0, &p0, TS_RUN, mac_srs->srs_pri);
2213 2213
2214 2214 if (is_tx_srs) {
2215 2215 /* Handle everything about Tx SRS and return */
2216 2216 mac_srs->srs_drain_func = mac_tx_srs_drain;
2217 2217 srs_tx->st_max_q_cnt = mac_tx_srs_max_q_cnt;
2218 2218 srs_tx->st_hiwat =
2219 2219 (mac_tx_srs_hiwat > mac_tx_srs_max_q_cnt) ?
2220 2220 mac_tx_srs_max_q_cnt : mac_tx_srs_hiwat;
2221 2221 srs_tx->st_arg1 = x_arg1;
2222 2222 srs_tx->st_arg2 = x_arg2;
2223 2223 goto done;
2224 2224 }
2225 2225
2226 2226 if ((srs_type & SRST_FLOW) != 0 ||
2227 2227 FLOW_TAB_EMPTY(mcip->mci_subflow_tab))
2228 2228 srs_rx->sr_lower_proc = mac_rx_srs_process;
2229 2229 else
2230 2230 srs_rx->sr_lower_proc = mac_rx_srs_subflow_process;
2231 2231
2232 2232 srs_rx->sr_func = rx_func;
2233 2233 srs_rx->sr_arg1 = x_arg1;
2234 2234 srs_rx->sr_arg2 = x_arg2;
2235 2235
2236 2236 if (ring != NULL) {
2237 2237 uint_t ring_info;
2238 2238
2239 2239 /* Is the mac_srs created over the RX default group? */
2240 2240 if (ring->mr_gh == (mac_group_handle_t)
2241 2241 MAC_DEFAULT_RX_GROUP(mcip->mci_mip)) {
2242 2242 mac_srs->srs_type |= SRST_DEFAULT_GRP;
2243 2243 }
2244 2244 mac_srs->srs_ring = ring;
2245 2245 ring->mr_srs = mac_srs;
2246 2246 ring->mr_classify_type = MAC_HW_CLASSIFIER;
2247 2247 ring->mr_flag |= MR_INCIPIENT;
2248 2248
↓ open down ↓ |
50 lines elided |
↑ open up ↑ |
2249 2249 if (!(mcip->mci_mip->mi_state_flags & MIS_POLL_DISABLE) &&
2250 2250 FLOW_TAB_EMPTY(mcip->mci_subflow_tab) && mac_poll_enable)
2251 2251 mac_srs->srs_state |= SRS_POLLING_CAPAB;
2252 2252
2253 2253 mac_srs->srs_poll_thr = thread_create(NULL, 0,
2254 2254 mac_rx_srs_poll_ring, mac_srs, 0, &p0, TS_RUN,
2255 2255 mac_srs->srs_pri);
2256 2256 /*
2257 2257 * Some drivers require serialization and don't send
2258 2258 * packet chains in interrupt context. For such
2259 - * drivers, we should always queue in soft ring
2260 - * so that we get a chance to switch into a polling
2259 + * drivers, we should always queue in the soft ring
2260 + * so that we get a chance to switch into polling
2261 2261 * mode under backlog.
2262 2262 */
2263 2263 ring_info = mac_hwring_getinfo((mac_ring_handle_t)ring);
2264 2264 if (ring_info & MAC_RING_RX_ENQUEUE)
2265 2265 mac_srs->srs_state |= SRS_SOFTRING_QUEUE;
2266 2266 }
2267 2267 done:
2268 2268 mac_srs_stat_create(mac_srs);
2269 2269 return (mac_srs);
2270 2270 }
2271 2271
2272 2272 /*
2273 2273 * Figure out the number of soft rings required. Its dependant on
2274 2274 * if protocol fanout is required (for LINKs), global settings
2275 2275 * require us to do fanout for performance (based on mac_soft_ring_enable),
2276 2276 * or user has specifically requested fanout.
2277 2277 */
2278 2278 static uint32_t
2279 2279 mac_find_fanout(flow_entry_t *flent, uint32_t link_type)
2280 2280 {
2281 2281 uint32_t fanout_type;
2282 2282 mac_resource_props_t *mrp = &flent->fe_effective_props;
2283 2283
2284 2284 /* no fanout for subflows */
2285 2285 switch (link_type) {
2286 2286 case SRST_FLOW:
2287 2287 fanout_type = SRST_NO_SOFT_RINGS;
2288 2288 break;
2289 2289 case SRST_LINK:
2290 2290 fanout_type = SRST_FANOUT_PROTO;
2291 2291 break;
2292 2292 }
2293 2293
2294 2294 /* A primary NIC/link is being plumbed */
2295 2295 if (flent->fe_type & FLOW_PRIMARY_MAC) {
2296 2296 if (mac_soft_ring_enable && mac_rx_soft_ring_count > 1) {
2297 2297 fanout_type |= SRST_FANOUT_SRC_IP;
2298 2298 }
2299 2299 } else if (flent->fe_type & FLOW_VNIC) {
2300 2300 /* A VNIC is being created */
2301 2301 if (mrp != NULL && mrp->mrp_ncpus > 0) {
2302 2302 fanout_type |= SRST_FANOUT_SRC_IP;
2303 2303 }
2304 2304 }
2305 2305
2306 2306 return (fanout_type);
2307 2307 }
2308 2308
2309 2309 /*
2310 2310 * Change a group from h/w to s/w classification.
2311 2311 */
2312 2312 void
2313 2313 mac_rx_switch_grp_to_sw(mac_group_t *group)
2314 2314 {
2315 2315 mac_ring_t *ring;
2316 2316 mac_soft_ring_set_t *mac_srs;
2317 2317
2318 2318 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) {
2319 2319 if (ring->mr_classify_type == MAC_HW_CLASSIFIER) {
2320 2320 /*
2321 2321 * Remove the SRS associated with the HW ring.
2322 2322 * As a result, polling will be disabled.
2323 2323 */
2324 2324 mac_srs = ring->mr_srs;
2325 2325 ASSERT(mac_srs != NULL);
2326 2326 mac_rx_srs_remove(mac_srs);
2327 2327 ring->mr_srs = NULL;
2328 2328 }
2329 2329
2330 2330 if (ring->mr_state != MR_INUSE)
2331 2331 (void) mac_start_ring(ring);
2332 2332
2333 2333 /*
2334 2334 * We need to perform SW classification
2335 2335 * for packets landing in these rings
2336 2336 */
2337 2337 ring->mr_flag = 0;
2338 2338 ring->mr_classify_type = MAC_SW_CLASSIFIER;
2339 2339 }
2340 2340 }
2341 2341
2342 2342 /*
2343 2343 * Create the Rx SRS for S/W classifier and for each ring in the
2344 2344 * group (if exclusive group). Also create the Tx SRS.
2345 2345 */
2346 2346 void
2347 2347 mac_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2348 2348 uint32_t link_type)
2349 2349 {
2350 2350 cpupart_t *cpupart;
2351 2351 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
2352 2352 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2353 2353 boolean_t use_default = B_FALSE;
2354 2354
2355 2355 mac_rx_srs_group_setup(mcip, flent, link_type);
2356 2356 mac_tx_srs_group_setup(mcip, flent, link_type);
↓ open down ↓ |
86 lines elided |
↑ open up ↑ |
2357 2357
2358 2358 pool_lock();
2359 2359 cpupart = mac_pset_find(mrp, &use_default);
2360 2360 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip),
2361 2361 mac_rx_deliver, mcip, NULL, cpupart);
2362 2362 mac_set_pool_effective(use_default, cpupart, mrp, emrp);
2363 2363 pool_unlock();
2364 2364 }
2365 2365
2366 2366 /*
2367 - * Set up the RX SRSs. If the S/W SRS is not set, set it up, if there
2368 - * is a group associated with this MAC client, set up SRSs for individual
2369 - * h/w rings.
2367 + * Set up the Rx SRSes. If there is no group associated with the
2368 + * client, then only setup SW classification. If the client has
2369 + * exlusive (MAC_GROUP_STATE_RESERVED) use of the group, then create an
2370 + * SRS for each HW ring. If the client is sharing a group, then make
2371 + * sure to teardown the HW SRSes.
2370 2372 */
2371 2373 void
2372 2374 mac_rx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2373 2375 uint32_t link_type)
2374 2376 {
2375 2377 mac_impl_t *mip = mcip->mci_mip;
2376 2378 mac_soft_ring_set_t *mac_srs;
2377 2379 mac_ring_t *ring;
2378 2380 uint32_t fanout_type;
2379 2381 mac_group_t *rx_group = flent->fe_rx_ring_group;
2382 + boolean_t no_unicast;
2380 2383
2381 2384 fanout_type = mac_find_fanout(flent, link_type);
2385 + no_unicast = (mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR) != 0;
2382 2386
2383 - /* Create the SRS for S/W classification if none exists */
2387 + /* Create the SRS for SW classification if none exists */
2384 2388 if (flent->fe_rx_srs[0] == NULL) {
2385 2389 ASSERT(flent->fe_rx_srs_cnt == 0);
2386 - /* Setup the Rx SRS */
2387 2390 mac_srs = mac_srs_create(mcip, flent, fanout_type | link_type,
2388 2391 mac_rx_deliver, mcip, NULL, NULL);
2389 2392 mutex_enter(&flent->fe_lock);
2390 2393 flent->fe_cb_fn = (flow_fn_t)mac_srs->srs_rx.sr_lower_proc;
2391 2394 flent->fe_cb_arg1 = (void *)mip;
2392 2395 flent->fe_cb_arg2 = (void *)mac_srs;
2393 2396 mutex_exit(&flent->fe_lock);
2394 2397 }
2395 2398
2396 2399 if (rx_group == NULL)
2397 2400 return;
2401 +
2398 2402 /*
2399 - * fanout for default SRS is done when default SRS are created
2400 - * above. As each ring is added to the group, we setup the
2401 - * SRS and fanout to it.
2403 + * If the group is marked RESERVED then setup an SRS and
2404 + * fanout for each HW ring.
2402 2405 */
2403 2406 switch (rx_group->mrg_state) {
2404 2407 case MAC_GROUP_STATE_RESERVED:
2405 2408 for (ring = rx_group->mrg_rings; ring != NULL;
2406 2409 ring = ring->mr_next) {
2410 + uint16_t vid = i_mac_flow_vid(mcip->mci_flent);
2411 +
2407 2412 switch (ring->mr_state) {
2408 2413 case MR_INUSE:
2409 2414 case MR_FREE:
2410 2415 if (ring->mr_srs != NULL)
2411 2416 break;
2412 2417 if (ring->mr_state != MR_INUSE)
2413 2418 (void) mac_start_ring(ring);
2414 2419
2415 2420 /*
2416 - * Since the group is exclusively ours create
2417 - * an SRS for this ring to allow the
2418 - * individual SRS to dynamically poll the
2419 - * ring. Do this only if the client is not
2420 - * a VLAN MAC client, since for VLAN we do
2421 - * s/w classification for the VID check, and
2422 - * if it has a unicast address.
2421 + * If a client requires SW VLAN
2422 + * filtering or has no unicast address
2423 + * then we don't create any HW ring
2424 + * SRSes.
2423 2425 */
2424 - if ((mcip->mci_state_flags &
2425 - MCIS_NO_UNICAST_ADDR) ||
2426 - i_mac_flow_vid(mcip->mci_flent) !=
2427 - VLAN_ID_NONE) {
2426 + if ((!MAC_GROUP_HW_VLAN(rx_group) &&
2427 + vid != VLAN_ID_NONE) || no_unicast)
2428 2428 break;
2429 - }
2429 +
2430 + /*
2431 + * When a client has exclusive use of
2432 + * a group, and that group's traffic
2433 + * is fully HW classified, we create
2434 + * an SRS for each HW ring in order to
2435 + * make use of dynamic polling of said
2436 + * HW rings.
2437 + */
2430 2438 mac_srs = mac_srs_create(mcip, flent,
2431 2439 fanout_type | link_type,
2432 2440 mac_rx_deliver, mcip, NULL, ring);
2433 2441 break;
2434 2442 default:
2435 2443 cmn_err(CE_PANIC,
2436 2444 "srs_setup: mcip = %p "
2437 2445 "trying to add UNKNOWN ring = %p\n",
2438 2446 (void *)mcip, (void *)ring);
2439 2447 break;
2440 2448 }
2441 2449 }
2442 2450 break;
2443 2451 case MAC_GROUP_STATE_SHARED:
2444 2452 /*
2445 - * Set all rings of this group to software classified.
2446 - *
2447 - * If the group is current RESERVED, the existing mac
2448 - * client (the only client on this group) is using
2449 - * this group exclusively. In that case we need to
2450 - * disable polling on the rings of the group (if it
2451 - * was enabled), and free the SRS associated with the
2452 - * rings.
2453 + * When a group is shared by multiple clients, we must
2454 + * use SW classifiction to ensure packets are
2455 + * delivered to the correct client.
2453 2456 */
2454 2457 mac_rx_switch_grp_to_sw(rx_group);
2455 2458 break;
2456 2459 default:
2457 2460 ASSERT(B_FALSE);
2458 2461 break;
2459 2462 }
2460 2463 }
2461 2464
2462 2465 /*
2463 2466 * Set up the TX SRS.
2464 2467 */
2465 2468 void
2466 2469 mac_tx_srs_group_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2467 2470 uint32_t link_type)
2468 2471 {
2469 2472 int cnt;
2470 2473 int ringcnt;
2471 2474 mac_ring_t *ring;
2472 2475 mac_group_t *grp;
2473 2476
2474 2477 /*
2475 2478 * If we are opened exclusively (like aggr does for aggr_ports),
2476 2479 * don't set up Tx SRS and Tx soft rings as they won't be used.
2477 2480 * The same thing has to be done for Rx side also. See bug:
2478 2481 * 6880080
2479 2482 */
2480 2483 if (mcip->mci_state_flags & MCIS_EXCLUSIVE) {
2481 2484 /*
2482 2485 * If we have rings, start them here.
2483 2486 */
2484 2487 if (flent->fe_tx_ring_group == NULL)
2485 2488 return;
2486 2489 grp = (mac_group_t *)flent->fe_tx_ring_group;
2487 2490 ringcnt = grp->mrg_cur_count;
2488 2491 ring = grp->mrg_rings;
2489 2492 for (cnt = 0; cnt < ringcnt; cnt++) {
2490 2493 if (ring->mr_state != MR_INUSE) {
2491 2494 (void) mac_start_ring(ring);
2492 2495 }
2493 2496 ring = ring->mr_next;
2494 2497 }
↓ open down ↓ |
32 lines elided |
↑ open up ↑ |
2495 2498 return;
2496 2499 }
2497 2500 if (flent->fe_tx_srs == NULL) {
2498 2501 (void) mac_srs_create(mcip, flent, SRST_TX | link_type,
2499 2502 NULL, mcip, NULL, NULL);
2500 2503 }
2501 2504 mac_tx_srs_setup(mcip, flent);
2502 2505 }
2503 2506
2504 2507 /*
2505 - * Remove all the RX SRSs. If we want to remove only the SRSs associated
2506 - * with h/w rings, leave the S/W SRS alone. This is used when we want to
2507 - * move the MAC client from one group to another, so we need to teardown
2508 - * on the h/w SRSs.
2508 + * Teardown all the Rx SRSes. Unless hwonly is set, then only teardown
2509 + * the Rx HW SRSes and leave the SW SRS alone. The hwonly flag is set
2510 + * when we wish to move a MAC client from one group to another. In
2511 + * that case, we need to release the current HW SRSes but keep the SW
2512 + * SRS for continued traffic classifiction.
2509 2513 */
2510 2514 void
2511 2515 mac_rx_srs_group_teardown(flow_entry_t *flent, boolean_t hwonly)
2512 2516 {
2513 2517 mac_soft_ring_set_t *mac_srs;
2514 2518 int i;
2515 2519 int count = flent->fe_rx_srs_cnt;
2516 2520
2517 2521 for (i = 0; i < count; i++) {
2518 2522 if (i == 0 && hwonly)
2519 2523 continue;
2520 2524 mac_srs = flent->fe_rx_srs[i];
2521 2525 mac_rx_srs_quiesce(mac_srs, SRS_CONDEMNED);
2522 2526 mac_srs_free(mac_srs);
2523 2527 flent->fe_rx_srs[i] = NULL;
2524 2528 flent->fe_rx_srs_cnt--;
2525 2529 }
2526 - ASSERT(!hwonly || flent->fe_rx_srs_cnt == 1);
2527 - ASSERT(hwonly || flent->fe_rx_srs_cnt == 0);
2530 +
2531 + /*
2532 + * If we are only tearing down the HW SRSes then there must be
2533 + * one SRS left for SW classification. Otherwise we are tearing
2534 + * down both HW and SW and there should be no SRSes left.
2535 + */
2536 + if (hwonly)
2537 + VERIFY3S(flent->fe_rx_srs_cnt, ==, 1);
2538 + else
2539 + VERIFY3S(flent->fe_rx_srs_cnt, ==, 0);
2528 2540 }
2529 2541
2530 2542 /*
2531 2543 * Remove the TX SRS.
2532 2544 */
2533 2545 void
2534 2546 mac_tx_srs_group_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
2535 2547 uint32_t link_type)
2536 2548 {
2537 2549 mac_soft_ring_set_t *tx_srs;
2538 2550 mac_srs_tx_t *tx;
2539 2551
2540 2552 if ((tx_srs = flent->fe_tx_srs) == NULL)
2541 2553 return;
2542 2554
2543 2555 tx = &tx_srs->srs_tx;
2544 2556 switch (link_type) {
2545 2557 case SRST_FLOW:
2546 2558 /*
2547 2559 * For flows, we need to work with passed
2548 2560 * flent to find the Rx/Tx SRS.
2549 2561 */
2550 2562 mac_tx_srs_quiesce(tx_srs, SRS_CONDEMNED);
2551 2563 break;
2552 2564 case SRST_LINK:
2553 2565 mac_tx_client_condemn((mac_client_handle_t)mcip);
2554 2566 if (tx->st_arg2 != NULL) {
2555 2567 ASSERT(tx_srs->srs_type & SRST_TX);
2556 2568 /*
2557 2569 * The ring itself will be stopped when
2558 2570 * we release the group or in the
2559 2571 * mac_datapath_teardown (for the default
2560 2572 * group)
2561 2573 */
2562 2574 tx->st_arg2 = NULL;
2563 2575 }
2564 2576 break;
2565 2577 default:
2566 2578 ASSERT(B_FALSE);
2567 2579 break;
2568 2580 }
2569 2581 mac_srs_free(tx_srs);
2570 2582 flent->fe_tx_srs = NULL;
2571 2583 }
2572 2584
2573 2585 /*
2574 2586 * This is the group state machine.
2575 2587 *
2576 2588 * The state of an Rx group is given by
2577 2589 * the following table. The default group and its rings are started in
2578 2590 * mac_start itself and the default group stays in SHARED state until
2579 2591 * mac_stop at which time the group and rings are stopped and and it
2580 2592 * reverts to the Registered state.
2581 2593 *
2582 2594 * Typically this function is called on a group after adding or removing a
2583 2595 * client from it, to find out what should be the new state of the group.
2584 2596 * If the new state is RESERVED, then the client that owns this group
2585 2597 * exclusively is also returned. Note that adding or removing a client from
2586 2598 * a group could also impact the default group and the caller needs to
2587 2599 * evaluate the effect on the default group.
2588 2600 *
2589 2601 * Group type # of clients mi_nactiveclients Group State
2590 2602 * in the group
2591 2603 *
2592 2604 * Non-default 0 N.A. REGISTERED
2593 2605 * Non-default 1 N.A. RESERVED
2594 2606 *
2595 2607 * Default 0 N.A. SHARED
2596 2608 * Default 1 1 RESERVED
2597 2609 * Default 1 > 1 SHARED
2598 2610 * Default > 1 N.A. SHARED
2599 2611 *
2600 2612 * For a TX group, the following is the state table.
2601 2613 *
2602 2614 * Group type # of clients Group State
2603 2615 * in the group
2604 2616 *
2605 2617 * Non-default 0 REGISTERED
2606 2618 * Non-default 1 RESERVED
2607 2619 *
2608 2620 * Default 0 REGISTERED
2609 2621 * Default 1 RESERVED
2610 2622 * Default > 1 SHARED
2611 2623 */
2612 2624 mac_group_state_t
2613 2625 mac_group_next_state(mac_group_t *grp, mac_client_impl_t **group_only_mcip,
2614 2626 mac_group_t *defgrp, boolean_t rx_group)
2615 2627 {
2616 2628 mac_impl_t *mip = (mac_impl_t *)grp->mrg_mh;
2617 2629
2618 2630 *group_only_mcip = NULL;
2619 2631
2620 2632 /* Non-default group */
2621 2633
2622 2634 if (grp != defgrp) {
2623 2635 if (MAC_GROUP_NO_CLIENT(grp))
2624 2636 return (MAC_GROUP_STATE_REGISTERED);
2625 2637
2626 2638 *group_only_mcip = MAC_GROUP_ONLY_CLIENT(grp);
2627 2639 if (*group_only_mcip != NULL)
2628 2640 return (MAC_GROUP_STATE_RESERVED);
2629 2641
2630 2642 return (MAC_GROUP_STATE_SHARED);
2631 2643 }
2632 2644
2633 2645 /* Default group */
2634 2646
2635 2647 if (MAC_GROUP_NO_CLIENT(grp)) {
2636 2648 if (rx_group)
2637 2649 return (MAC_GROUP_STATE_SHARED);
2638 2650 else
2639 2651 return (MAC_GROUP_STATE_REGISTERED);
2640 2652 }
2641 2653 *group_only_mcip = MAC_GROUP_ONLY_CLIENT(grp);
2642 2654 if (*group_only_mcip == NULL)
2643 2655 return (MAC_GROUP_STATE_SHARED);
2644 2656
2645 2657 if (rx_group && mip->mi_nactiveclients != 1)
2646 2658 return (MAC_GROUP_STATE_SHARED);
2647 2659
2648 2660 ASSERT(*group_only_mcip != NULL);
2649 2661 return (MAC_GROUP_STATE_RESERVED);
2650 2662 }
2651 2663
2652 2664 /*
2653 2665 * OVERVIEW NOTES FOR DATAPATH
2654 2666 * ===========================
2655 2667 *
2656 2668 * Create an SRS and setup the corresponding flow function and args.
2657 2669 * Add a classification rule for the flow specified by 'flent' and program
2658 2670 * the hardware classifier when applicable.
2659 2671 *
2660 2672 * Rx ring assignment, SRS, polling and B/W enforcement
2661 2673 * ----------------------------------------------------
2662 2674 *
2663 2675 * We try to use H/W classification on NIC and assign traffic to a
2664 2676 * MAC address to a particular Rx ring. There is a 1-1 mapping
2665 2677 * between a SRS and a Rx ring. The SRS (short for soft ring set)
2666 2678 * dynamically switches the underlying Rx ring between interrupt
2667 2679 * and polling mode and enforces any specified B/W control.
2668 2680 *
2669 2681 * There is always a SRS created and tied to each H/W and S/W rule.
2670 2682 * Whenever we create a H/W rule, we always add the the same rule to
2671 2683 * S/W classifier and tie a SRS to it.
2672 2684 *
2673 2685 * In case a B/W control is specified, its broken into bytes
2674 2686 * per ticks and as soon as the quota for a tick is exhausted,
2675 2687 * the underlying Rx ring is forced into poll mode for remianing
2676 2688 * tick. The SRS poll thread only polls for bytes that are
2677 2689 * allowed to come in the SRS. We typically let 4x the configured
2678 2690 * B/W worth of packets to come in the SRS (to prevent unnecessary
2679 2691 * drops due to bursts) but only process the specified amount.
2680 2692 *
2681 2693 * A Link (primary NIC, VNIC, VLAN or aggr) can have 1 or more
2682 2694 * Rx rings (and corresponding SRSs) assigned to it. The SRS
2683 2695 * in turn can have softrings to do protocol level fanout or
2684 2696 * softrings to do S/W based fanout or both. In case the NIC
2685 2697 * has no Rx rings, we do S/W classification to respective SRS.
2686 2698 * The S/W classification rule is always setup and ready. This
2687 2699 * allows the MAC layer to reassign Rx rings whenever needed
2688 2700 * but packets still continue to flow via the default path and
2689 2701 * getting S/W classified to correct SRS.
2690 2702 *
2691 2703 * In other cases where a NIC or VNIC is plumbed, our goal is use
2692 2704 * H/W classifier and get two Rx ring assigned for the Link. One
2693 2705 * for TCP and one for UDP|SCTP. The respective SRS still do the
2694 2706 * polling on the Rx ring. For Link that is plumbed for IP, there
2695 2707 * is a TCP squeue which also does polling and can control the
2696 2708 * the Rx ring directly (where SRS is just pass through). For
2697 2709 * the following cases, the SRS does the polling underneath.
2698 2710 * 1) non IP based Links (Links which are not plumbed via ifconfig)
2699 2711 * and paths which have no IP squeues (UDP & SCTP)
2700 2712 * 2) If B/W control is specified on the Link
2701 2713 * 3) If S/W fanout is secified
2702 2714 *
2703 2715 * Note1: As of current implementation, we try to assign only 1 Rx
2704 2716 * ring per Link and more than 1 Rx ring for primary Link for
2705 2717 * H/W based fanout. We always create following softrings per SRS:
2706 2718 * 1) TCP softring which is polled by TCP squeue where possible
2707 2719 * (and also bypasses DLS)
2708 2720 * 2) UDP/SCTP based which bypasses DLS
2709 2721 * 3) OTH softring which goes via DLS (currently deal with IPv6
2710 2722 * and non TCP/UDP/SCTP for IPv4 packets).
2711 2723 *
2712 2724 * It is necessary to create 3 softrings since SRS has to poll
2713 2725 * the single Rx ring underneath and enforce any link level B/W
2714 2726 * control (we can't switch the Rx ring in poll mode just based
2715 2727 * on TCP squeue if the same Rx ring is sharing UDP and other
2716 2728 * traffic as well). Once polling is done and any Link level B/W
2717 2729 * control is specified, the packets are assigned to respective
2718 2730 * softring based on protocol. Since TCP has IP based squeue
2719 2731 * which benefits by polling, we separate TCP packets into
2720 2732 * its own softring which can be polled by IP squeue. We need
2721 2733 * to separate out UDP/SCTP to UDP softring since it can bypass
2722 2734 * the DLS layer which has heavy performance advanatges and we
2723 2735 * need a softring (OTH) for rest.
2724 2736 *
2725 2737 * ToDo: The 3 softrings for protocol are needed only till we can
2726 2738 * get rid of DLS from datapath, make IPv4 and IPv6 paths
2727 2739 * symmetric (deal with mac_header_info for v6 and polling for
2728 2740 * IPv4 TCP - ip_accept_tcp is IPv4 specific although squeues
2729 2741 * are generic), and bring SAP based classification to MAC layer
2730 2742 *
2731 2743 * H/W and S/W based fanout and multiple Rx rings per Link
2732 2744 * -------------------------------------------------------
2733 2745 *
2734 2746 * In case, fanout is requested (or determined automatically based
2735 2747 * on Link speed and processor speed), we try to assign multiple
2736 2748 * Rx rings per Link with their respective SRS. In this case
2737 2749 * the NIC should be capable of fanning out incoming packets between
2738 2750 * the assigned Rx rings (H/W based fanout). All the SRS
2739 2751 * individually switch their Rx ring between interrupt and polling
2740 2752 * mode but share a common B/W control counter in case of Link
2741 2753 * level B/W is specified.
2742 2754 *
2743 2755 * If S/W based fanout is specified in lieu of H/W based fanout,
2744 2756 * the Link SRS creates the specified number of softrings for
2745 2757 * each protocol (TCP, UDP, OTH). Incoming packets are fanned
2746 2758 * out to the correct softring based on their protocol and
2747 2759 * protocol specific hash function.
2748 2760 *
2749 2761 * Primary and non primary MAC clients
2750 2762 * -----------------------------------
2751 2763 *
2752 2764 * The NICs, VNICs, Vlans, and Aggrs are typically termed as Links
2753 2765 * and are a Layer 2 construct.
2754 2766 *
2755 2767 * Primary NIC:
2756 2768 * The Link that owns the primary MAC address and typically
2757 2769 * is used as the data NIC in non virtualized cases. As such
2758 2770 * H/W resources are preferntially given to primary NIC. As
2759 2771 * far as code is concerned, there is no difference in the
2760 2772 * primary NIC vs VNICs. They are all treated as Links.
2761 2773 * At the very first call to mac_unicast_add() we program the S/W
2762 2774 * classifier for the primary MAC address, get a soft ring set
2763 2775 * (and soft rings based on 'ip_soft_ring_cnt')
2764 2776 * and a Rx ring assigned for polling to get enabled.
2765 2777 * When IP get plumbed and negotiates polling, we can
2766 2778 * let squeue do the polling on TCP softring.
2767 2779 *
2768 2780 * VNICs:
2769 2781 * Same as any other Link. As long as the H/W resource assignments
2770 2782 * are equal, the data path and setup for all Links is same.
2771 2783 *
2772 2784 * Flows:
2773 2785 * Can be configured on Links. They have their own SRS and the
2774 2786 * S/W classifier is programmed appropriately based on the flow.
2775 2787 * The flows typically deal with layer 3 and above and
2776 2788 * creates a soft ring set specific to the flow. The receive
2777 2789 * side function is switched from mac_rx_srs_process to
2778 2790 * mac_rx_srs_subflow_process which first tries to assign the
2779 2791 * packet to appropriate flow SRS and failing which assigns it
2780 2792 * to link SRS. This allows us to avoid the layered approach
2781 2793 * which gets complex.
2782 2794 *
2783 2795 * By the time mac_datapath_setup() completes, we already have the
2784 2796 * soft rings set, Rx rings, soft rings, etc figured out and both H/W
2785 2797 * and S/W classifiers programmed. IP is not plumbed yet (and might
2786 2798 * never be for Virtual Machines guest OS path). When IP is plumbed
2787 2799 * (for both NIC and VNIC), we do a capability negotiation for polling
2788 2800 * and upcall functions etc.
2789 2801 *
2790 2802 * Rx ring Assignement NOTES
2791 2803 * -------------------------
2792 2804 *
2793 2805 * For NICs which have only 1 Rx ring (we treat NICs with no Rx rings
2794 2806 * as NIC with a single default ring), we assign the only ring to
2795 2807 * primary Link. The primary Link SRS can do polling on it as long as
2796 2808 * it is the only link in use and we compare the MAC address for unicast
2797 2809 * packets before accepting an incoming packet (there is no need for S/W
2798 2810 * classification in this case). We disable polling on the only ring the
2799 2811 * moment 2nd link gets created (the polling remains enabled even though
2800 2812 * there are broadcast and * multicast flows created).
2801 2813 *
2802 2814 * If the NIC has more than 1 Rx ring, we assign the default ring (the
2803 2815 * 1st ring) to deal with broadcast, multicast and traffic for other
2804 2816 * NICs which needs S/W classification. We assign the primary mac
2805 2817 * addresses to another ring by specifiying a classification rule for
2806 2818 * primary unicast MAC address to the selected ring. The primary Link
2807 2819 * (and its SRS) can continue to poll the assigned Rx ring at all times
2808 2820 * independantly.
2809 2821 *
2810 2822 * Note: In future, if no fanout is specified, we try to assign 2 Rx
2811 2823 * rings for the primary Link with the primary MAC address + TCP going
2812 2824 * to one ring and primary MAC address + UDP|SCTP going to other ring.
2813 2825 * Any remaining traffic for primary MAC address can go to the default
2814 2826 * Rx ring and get S/W classified. This way the respective SRSs don't
2815 2827 * need to do proto fanout and don't need to have softrings at all and
2816 2828 * can poll their respective Rx rings.
2817 2829 *
2818 2830 * As an optimization, when a new NIC or VNIC is created, we can get
↓ open down ↓ |
281 lines elided |
↑ open up ↑ |
2819 2831 * only one Rx ring and make it a TCP specific Rx ring and use the
2820 2832 * H/W default Rx ring for the rest (this Rx ring is never polled).
2821 2833 *
2822 2834 * For clients that don't have MAC address, but want to receive and
2823 2835 * transmit packets (e.g, bpf, gvrp etc.), we need to setup the datapath.
2824 2836 * For such clients (identified by the MCIS_NO_UNICAST_ADDR flag) we
2825 2837 * always give the default group and use software classification (i.e.
2826 2838 * even if this is the only client in the default group, we will
2827 2839 * leave group as shared).
2828 2840 */
2841 +
2829 2842 int
2830 2843 mac_datapath_setup(mac_client_impl_t *mcip, flow_entry_t *flent,
2831 2844 uint32_t link_type)
2832 2845 {
2833 2846 mac_impl_t *mip = mcip->mci_mip;
2834 2847 mac_group_t *rgroup = NULL;
2835 2848 mac_group_t *tgroup = NULL;
2836 2849 mac_group_t *default_rgroup;
2837 2850 mac_group_t *default_tgroup;
2838 2851 int err;
2852 + uint16_t vid;
2839 2853 uint8_t *mac_addr;
2840 2854 mac_group_state_t next_state;
2841 2855 mac_client_impl_t *group_only_mcip;
2842 2856 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
2843 2857 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip);
2844 2858 boolean_t rxhw;
2845 2859 boolean_t txhw;
2846 2860 boolean_t use_default = B_FALSE;
2847 2861 cpupart_t *cpupart;
2848 2862 boolean_t no_unicast;
2849 2863 boolean_t isprimary = flent->fe_type & FLOW_PRIMARY_MAC;
2850 2864 mac_client_impl_t *reloc_pmcip = NULL;
2865 + boolean_t use_hw;
2851 2866
2852 2867 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
2853 2868
2854 2869 switch (link_type) {
2855 2870 case SRST_FLOW:
2856 2871 mac_srs_group_setup(mcip, flent, link_type);
2857 2872 return (0);
2858 2873
2859 2874 case SRST_LINK:
2860 2875 no_unicast = mcip->mci_state_flags & MCIS_NO_UNICAST_ADDR;
2861 2876 mac_addr = flent->fe_flow_desc.fd_dst_mac;
2862 2877
2863 2878 /* Default RX group */
2864 2879 default_rgroup = MAC_DEFAULT_RX_GROUP(mip);
2865 2880
2866 2881 /* Default TX group */
2867 2882 default_tgroup = MAC_DEFAULT_TX_GROUP(mip);
2868 2883
2869 2884 if (no_unicast) {
2870 2885 rgroup = default_rgroup;
2871 2886 tgroup = default_tgroup;
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
2872 2887 goto grp_found;
2873 2888 }
2874 2889 rxhw = (mrp->mrp_mask & MRP_RX_RINGS) &&
2875 2890 (mrp->mrp_nrxrings > 0 ||
2876 2891 (mrp->mrp_mask & MRP_RXRINGS_UNSPEC));
2877 2892 txhw = (mrp->mrp_mask & MRP_TX_RINGS) &&
2878 2893 (mrp->mrp_ntxrings > 0 ||
2879 2894 (mrp->mrp_mask & MRP_TXRINGS_UNSPEC));
2880 2895
2881 2896 /*
2882 - * By default we have given the primary all the rings
2883 - * i.e. the default group. Let's see if the primary
2884 - * needs to be relocated so that the addition of this
2885 - * client doesn't impact the primary's performance,
2886 - * i.e. if the primary is in the default group and
2887 - * we add this client, the primary will lose polling.
2888 - * We do this only for NICs supporting dynamic ring
2889 - * grouping and only when this is the first client
2890 - * after the primary (i.e. nactiveclients is 2)
2897 + * All the rings initially belong to the default group
2898 + * under dynamic grouping. The primary client uses the
2899 + * default group when it is the only client. The
2900 + * default group is also used as the destination for
2901 + * all multicast and broadcast traffic of all clients.
2902 + * Therefore, the primary client loses its ability to
2903 + * poll the softrings on addition of a second client.
2904 + * To avoid a performance penalty, MAC will move the
2905 + * primary client to a dedicated group when it can.
2906 + *
2907 + * When using static grouping, the primary client
2908 + * begins life on a non-default group. There is
2909 + * no moving needed upon addition of a second client.
2891 2910 */
2892 2911 if (!isprimary && mip->mi_nactiveclients == 2 &&
2893 2912 (group_only_mcip = mac_primary_client_handle(mip)) !=
2894 2913 NULL && mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) {
2895 2914 reloc_pmcip = mac_check_primary_relocation(
2896 2915 group_only_mcip, rxhw);
2897 2916 }
2917 +
2898 2918 /*
2899 2919 * Check to see if we can get an exclusive group for
2900 2920 * this mac address or if there already exists a
2901 2921 * group that has this mac address (case of VLANs).
2902 2922 * If no groups are available, use the default group.
2903 2923 */
2904 2924 rgroup = mac_reserve_rx_group(mcip, mac_addr, B_FALSE);
2905 2925 if (rgroup == NULL && rxhw) {
2906 2926 err = ENOSPC;
2907 2927 goto setup_failed;
2908 2928 } else if (rgroup == NULL) {
2909 2929 rgroup = default_rgroup;
2910 2930 }
2931 +
2911 2932 /*
2933 + * If we are adding a second client to a
2934 + * non-default group then we need to move the
2935 + * existing client to the default group and
2936 + * add the new client to the default group as
2937 + * well.
2938 + */
2939 + if (rgroup != default_rgroup &&
2940 + rgroup->mrg_state == MAC_GROUP_STATE_RESERVED) {
2941 + group_only_mcip = MAC_GROUP_ONLY_CLIENT(rgroup);
2942 + err = mac_rx_switch_group(group_only_mcip, rgroup,
2943 + default_rgroup);
2944 +
2945 + if (err != 0)
2946 + goto setup_failed;
2947 +
2948 + rgroup = default_rgroup;
2949 + }
2950 +
2951 + /*
2912 2952 * Check to see if we can get an exclusive group for
2913 2953 * this mac client. If no groups are available, use
2914 2954 * the default group.
2915 2955 */
2916 2956 tgroup = mac_reserve_tx_group(mcip, B_FALSE);
2917 2957 if (tgroup == NULL && txhw) {
2918 2958 if (rgroup != NULL && rgroup != default_rgroup)
2919 2959 mac_release_rx_group(mcip, rgroup);
2920 2960 err = ENOSPC;
2921 2961 goto setup_failed;
2922 2962 } else if (tgroup == NULL) {
2923 2963 tgroup = default_tgroup;
2924 2964 }
2925 2965
2926 2966 /*
2927 2967 * Some NICs don't support any Rx rings, so there may not
2928 2968 * even be a default group.
2929 2969 */
2930 2970 grp_found:
2931 2971 if (rgroup != NULL) {
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
2932 2972 if (rgroup != default_rgroup &&
2933 2973 MAC_GROUP_NO_CLIENT(rgroup) &&
2934 2974 (rxhw || mcip->mci_share != 0)) {
2935 2975 MAC_RX_GRP_RESERVED(mip);
2936 2976 if (mip->mi_rx_group_type ==
2937 2977 MAC_GROUP_TYPE_DYNAMIC) {
2938 2978 MAC_RX_RING_RESERVED(mip,
2939 2979 rgroup->mrg_cur_count);
2940 2980 }
2941 2981 }
2982 +
2942 2983 flent->fe_rx_ring_group = rgroup;
2943 2984 /*
2944 - * Add the client to the group. This could cause
2945 - * either this group to move to the shared state or
2946 - * cause the default group to move to the shared state.
2947 - * The actions on this group are done here, while the
2948 - * actions on the default group are postponed to
2949 - * the end of this function.
2985 + * Add the client to the group and update the
2986 + * group's state. If rgroup != default_group
2987 + * then the rgroup should only ever have one
2988 + * client and be in the RESERVED state. But no
2989 + * matter what, the default_rgroup will enter
2990 + * the SHARED state since it has to receive
2991 + * all broadcast and multicast traffic. This
2992 + * case is handled later in the function.
2950 2993 */
2951 2994 mac_group_add_client(rgroup, mcip);
2952 2995 next_state = mac_group_next_state(rgroup,
2953 2996 &group_only_mcip, default_rgroup, B_TRUE);
2954 2997 mac_set_group_state(rgroup, next_state);
2955 2998 }
2956 2999
2957 3000 if (tgroup != NULL) {
2958 3001 if (tgroup != default_tgroup &&
2959 3002 MAC_GROUP_NO_CLIENT(tgroup) &&
2960 3003 (txhw || mcip->mci_share != 0)) {
2961 3004 MAC_TX_GRP_RESERVED(mip);
2962 3005 if (mip->mi_tx_group_type ==
2963 3006 MAC_GROUP_TYPE_DYNAMIC) {
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
2964 3007 MAC_TX_RING_RESERVED(mip,
2965 3008 tgroup->mrg_cur_count);
2966 3009 }
2967 3010 }
2968 3011 flent->fe_tx_ring_group = tgroup;
2969 3012 mac_group_add_client(tgroup, mcip);
2970 3013 next_state = mac_group_next_state(tgroup,
2971 3014 &group_only_mcip, default_tgroup, B_FALSE);
2972 3015 tgroup->mrg_state = next_state;
2973 3016 }
2974 - /*
2975 - * Setup the Rx and Tx SRSes. If we got a pristine group
2976 - * exclusively above, mac_srs_group_setup would simply create
2977 - * the required SRSes. If we ended up sharing a previously
2978 - * reserved group, mac_srs_group_setup would also dismantle the
2979 - * SRSes of the previously exclusive group
2980 - */
2981 - mac_srs_group_setup(mcip, flent, link_type);
2982 3017
2983 3018 /* We are setting up minimal datapath only */
2984 - if (no_unicast)
3019 + if (no_unicast) {
3020 + mac_srs_group_setup(mcip, flent, link_type);
2985 3021 break;
2986 - /* Program the S/W Classifer */
3022 + }
3023 +
3024 + /* Program software classification. */
2987 3025 if ((err = mac_flow_add(mip->mi_flow_tab, flent)) != 0)
2988 3026 goto setup_failed;
2989 3027
2990 - /* Program the H/W Classifier */
2991 - if ((err = mac_add_macaddr(mip, rgroup, mac_addr,
2992 - (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0)) != 0)
3028 + /* Program hardware classification. */
3029 + vid = i_mac_flow_vid(flent);
3030 + use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0;
3031 + err = mac_add_macaddr_vlan(mip, rgroup, mac_addr, vid, use_hw);
3032 +
3033 + if (err != 0)
2993 3034 goto setup_failed;
3035 +
2994 3036 mcip->mci_unicast = mac_find_macaddr(mip, mac_addr);
2995 - ASSERT(mcip->mci_unicast != NULL);
3037 + VERIFY3P(mcip->mci_unicast, !=, NULL);
3038 +
3039 + /*
3040 + * Setup the Rx and Tx SRSes. If the client has a
3041 + * reserved group, then mac_srs_group_setup() creates
3042 + * the required SRSes for the HW rings. If we have a
3043 + * shared group, mac_srs_group_setup() dismantles the
3044 + * HW SRSes of the previously exclusive group.
3045 + */
3046 + mac_srs_group_setup(mcip, flent, link_type);
3047 +
2996 3048 /* (Re)init the v6 token & local addr used by link protection */
2997 3049 mac_protect_update_mac_token(mcip);
2998 3050 break;
2999 3051
3000 3052 default:
3001 3053 ASSERT(B_FALSE);
3002 3054 break;
3003 3055 }
3004 3056
3005 3057 /*
3006 3058 * All broadcast and multicast traffic is received only on the default
3007 3059 * group. If we have setup the datapath for a non-default group above
3008 3060 * then move the default group to shared state to allow distribution of
3009 3061 * incoming broadcast traffic to the other groups and dismantle the
3010 3062 * SRSes over the default group.
3011 3063 */
3012 3064 if (rgroup != NULL) {
3013 3065 if (rgroup != default_rgroup) {
3014 3066 if (default_rgroup->mrg_state ==
3015 3067 MAC_GROUP_STATE_RESERVED) {
3016 3068 group_only_mcip = MAC_GROUP_ONLY_CLIENT(
3017 3069 default_rgroup);
3018 3070 ASSERT(group_only_mcip != NULL &&
3019 3071 mip->mi_nactiveclients > 1);
3020 3072
3021 3073 mac_set_group_state(default_rgroup,
3022 3074 MAC_GROUP_STATE_SHARED);
3023 3075 mac_rx_srs_group_setup(group_only_mcip,
3024 3076 group_only_mcip->mci_flent, SRST_LINK);
3025 3077 pool_lock();
3026 3078 cpupart = mac_pset_find(mrp, &use_default);
3027 3079 mac_fanout_setup(group_only_mcip,
3028 3080 group_only_mcip->mci_flent,
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
3029 3081 MCIP_RESOURCE_PROPS(group_only_mcip),
3030 3082 mac_rx_deliver, group_only_mcip, NULL,
3031 3083 cpupart);
3032 3084 mac_set_pool_effective(use_default, cpupart,
3033 3085 mrp, emrp);
3034 3086 pool_unlock();
3035 3087 }
3036 3088 ASSERT(default_rgroup->mrg_state ==
3037 3089 MAC_GROUP_STATE_SHARED);
3038 3090 }
3091 +
3039 3092 /*
3040 - * If we get an exclusive group for a VLAN MAC client we
3041 - * need to take the s/w path to make the additional check for
3042 - * the vid. Disable polling and set it to s/w classification.
3043 - * Similarly for clients that don't have a unicast address.
3093 + * A VLAN MAC client on a reserved group still
3094 + * requires SW classification if the MAC doesn't
3095 + * provide VLAN HW filtering.
3096 + *
3097 + * Clients with no unicast address also require SW
3098 + * classification.
3044 3099 */
3045 3100 if (rgroup->mrg_state == MAC_GROUP_STATE_RESERVED &&
3046 - (i_mac_flow_vid(flent) != VLAN_ID_NONE || no_unicast)) {
3101 + ((!MAC_GROUP_HW_VLAN(rgroup) && vid != VLAN_ID_NONE) ||
3102 + no_unicast)) {
3047 3103 mac_rx_switch_grp_to_sw(rgroup);
3048 3104 }
3105 +
3049 3106 }
3107 +
3050 3108 mac_set_rings_effective(mcip);
3051 3109 return (0);
3052 3110
3053 3111 setup_failed:
3054 3112 /* Switch the primary back to default group */
3055 3113 if (reloc_pmcip != NULL) {
3056 3114 (void) mac_rx_switch_group(reloc_pmcip,
3057 3115 reloc_pmcip->mci_flent->fe_rx_ring_group, default_rgroup);
3058 3116 }
3059 3117 mac_datapath_teardown(mcip, flent, link_type);
3060 3118 return (err);
3061 3119 }
3062 3120
3063 3121 void
3064 3122 mac_datapath_teardown(mac_client_impl_t *mcip, flow_entry_t *flent,
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
3065 3123 uint32_t link_type)
3066 3124 {
3067 3125 mac_impl_t *mip = mcip->mci_mip;
3068 3126 mac_group_t *group = NULL;
3069 3127 mac_client_impl_t *grp_only_mcip;
3070 3128 flow_entry_t *group_only_flent;
3071 3129 mac_group_t *default_group;
3072 3130 boolean_t check_default_group = B_FALSE;
3073 3131 mac_group_state_t next_state;
3074 3132 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
3133 + uint16_t vid;
3075 3134
3076 3135 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
3077 3136
3078 3137 switch (link_type) {
3079 3138 case SRST_FLOW:
3080 3139 mac_rx_srs_group_teardown(flent, B_FALSE);
3081 3140 mac_tx_srs_group_teardown(mcip, flent, SRST_FLOW);
3082 3141 return;
3083 3142
3084 3143 case SRST_LINK:
3085 3144 /* Stop sending packets */
3086 3145 mac_tx_client_block(mcip);
3146 + group = flent->fe_rx_ring_group;
3147 + vid = i_mac_flow_vid(flent);
3087 3148
3088 - /* Stop the packets coming from the H/W */
3149 + /*
3150 + * Stop the packet flow from the hardware by disabling
3151 + * any hardware filters assigned to this client.
3152 + */
3089 3153 if (mcip->mci_unicast != NULL) {
3090 3154 int err;
3091 - err = mac_remove_macaddr(mcip->mci_unicast);
3155 +
3156 + err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid);
3157 +
3092 3158 if (err != 0) {
3093 - cmn_err(CE_WARN, "%s: failed to remove a MAC"
3094 - " address because of error 0x%x",
3159 + cmn_err(CE_WARN, "%s: failed to remove a MAC HW"
3160 + " filters because of error 0x%x",
3095 3161 mip->mi_name, err);
3096 3162 }
3163 +
3097 3164 mcip->mci_unicast = NULL;
3098 3165 }
3099 3166
3100 3167 /* Stop the packets coming from the S/W classifier */
3101 3168 mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
3102 3169 mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
3103 3170
3104 3171 /* Now quiesce and destroy all SRS and soft rings */
3105 3172 mac_rx_srs_group_teardown(flent, B_FALSE);
3106 3173 mac_tx_srs_group_teardown(mcip, flent, SRST_LINK);
3107 3174
3108 3175 ASSERT((mcip->mci_flent == flent) &&
3109 3176 (flent->fe_next == NULL));
3110 3177
3111 3178 /*
3112 3179 * Release our hold on the group as well. We need
3113 3180 * to check if the shared group has only one client
3114 3181 * left who can use it exclusively. Also, if we
3115 3182 * were the last client, release the group.
3116 3183 */
3117 - group = flent->fe_rx_ring_group;
3118 3184 default_group = MAC_DEFAULT_RX_GROUP(mip);
3119 3185 if (group != NULL) {
3120 3186 mac_group_remove_client(group, mcip);
3121 3187 next_state = mac_group_next_state(group,
3122 3188 &grp_only_mcip, default_group, B_TRUE);
3189 +
3123 3190 if (next_state == MAC_GROUP_STATE_RESERVED) {
3124 3191 /*
3125 3192 * Only one client left on this RX group.
3126 3193 */
3127 - ASSERT(grp_only_mcip != NULL);
3194 + VERIFY3P(grp_only_mcip, !=, NULL);
3128 3195 mac_set_group_state(group,
3129 3196 MAC_GROUP_STATE_RESERVED);
3130 3197 group_only_flent = grp_only_mcip->mci_flent;
3131 3198
3132 3199 /*
3133 3200 * The only remaining client has exclusive
3134 3201 * access on the group. Allow it to
3135 3202 * dynamically poll the H/W rings etc.
3136 3203 */
3137 3204 mac_rx_srs_group_setup(grp_only_mcip,
3138 3205 group_only_flent, SRST_LINK);
3139 3206 mac_fanout_setup(grp_only_mcip,
3140 3207 group_only_flent,
3141 3208 MCIP_RESOURCE_PROPS(grp_only_mcip),
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
3142 3209 mac_rx_deliver, grp_only_mcip, NULL, NULL);
3143 3210 mac_rx_group_unmark(group, MR_INCIPIENT);
3144 3211 mac_set_rings_effective(grp_only_mcip);
3145 3212 } else if (next_state == MAC_GROUP_STATE_REGISTERED) {
3146 3213 /*
3147 3214 * This is a non-default group being freed up.
3148 3215 * We need to reevaluate the default group
3149 3216 * to see if the primary client can get
3150 3217 * exclusive access to the default group.
3151 3218 */
3152 - ASSERT(group != MAC_DEFAULT_RX_GROUP(mip));
3219 + VERIFY3P(group, !=, MAC_DEFAULT_RX_GROUP(mip));
3153 3220 if (mrp->mrp_mask & MRP_RX_RINGS) {
3154 3221 MAC_RX_GRP_RELEASED(mip);
3155 3222 if (mip->mi_rx_group_type ==
3156 3223 MAC_GROUP_TYPE_DYNAMIC) {
3157 3224 MAC_RX_RING_RELEASED(mip,
3158 3225 group->mrg_cur_count);
3159 3226 }
3160 3227 }
3161 3228 mac_release_rx_group(mcip, group);
3162 3229 mac_set_group_state(group,
3163 3230 MAC_GROUP_STATE_REGISTERED);
3164 3231 check_default_group = B_TRUE;
3165 3232 } else {
3166 - ASSERT(next_state == MAC_GROUP_STATE_SHARED);
3233 + VERIFY3S(next_state, ==,
3234 + MAC_GROUP_STATE_SHARED);
3167 3235 mac_set_group_state(group,
3168 3236 MAC_GROUP_STATE_SHARED);
3169 3237 mac_rx_group_unmark(group, MR_CONDEMNED);
3170 3238 }
3171 3239 flent->fe_rx_ring_group = NULL;
3172 3240 }
3173 3241 /*
3174 3242 * Remove the client from the TX group. Additionally, if
3175 3243 * this a non-default group, then we also need to release
3176 3244 * the group.
3177 3245 */
3178 3246 group = flent->fe_tx_ring_group;
3179 3247 default_group = MAC_DEFAULT_TX_GROUP(mip);
3180 3248 if (group != NULL) {
3181 3249 mac_group_remove_client(group, mcip);
3182 3250 next_state = mac_group_next_state(group,
3183 3251 &grp_only_mcip, default_group, B_FALSE);
3184 3252 if (next_state == MAC_GROUP_STATE_REGISTERED) {
3185 3253 if (group != default_group) {
3186 3254 if (mrp->mrp_mask & MRP_TX_RINGS) {
3187 3255 MAC_TX_GRP_RELEASED(mip);
3188 3256 if (mip->mi_tx_group_type ==
3189 3257 MAC_GROUP_TYPE_DYNAMIC) {
3190 3258 MAC_TX_RING_RELEASED(
3191 3259 mip, group->
3192 3260 mrg_cur_count);
3193 3261 }
3194 3262 }
3195 3263 mac_release_tx_group(mcip, group);
3196 3264 /*
3197 3265 * If the default group is reserved,
3198 3266 * then we need to set the effective
3199 3267 * rings as we would have given
3200 3268 * back some rings when the group
3201 3269 * was released
3202 3270 */
3203 3271 if (mip->mi_tx_group_type ==
3204 3272 MAC_GROUP_TYPE_DYNAMIC &&
3205 3273 default_group->mrg_state ==
3206 3274 MAC_GROUP_STATE_RESERVED) {
3207 3275 grp_only_mcip =
3208 3276 MAC_GROUP_ONLY_CLIENT
3209 3277 (default_group);
3210 3278 mac_set_rings_effective(
3211 3279 grp_only_mcip);
3212 3280 }
3213 3281 } else {
3214 3282 mac_ring_t *ring;
3215 3283 int cnt;
3216 3284 int ringcnt;
3217 3285
3218 3286 /*
3219 3287 * Stop all the rings except the
3220 3288 * default ring.
3221 3289 */
3222 3290 ringcnt = group->mrg_cur_count;
3223 3291 ring = group->mrg_rings;
3224 3292 for (cnt = 0; cnt < ringcnt; cnt++) {
3225 3293 if (ring->mr_state ==
3226 3294 MR_INUSE && ring !=
3227 3295 (mac_ring_t *)
3228 3296 mip->mi_default_tx_ring) {
3229 3297 mac_stop_ring(ring);
3230 3298 ring->mr_flag = 0;
3231 3299 }
3232 3300 ring = ring->mr_next;
3233 3301 }
3234 3302 }
3235 3303 } else if (next_state == MAC_GROUP_STATE_RESERVED) {
3236 3304 mac_set_rings_effective(grp_only_mcip);
3237 3305 }
3238 3306 flent->fe_tx_ring_group = NULL;
3239 3307 group->mrg_state = next_state;
3240 3308 }
3241 3309 break;
3242 3310 default:
3243 3311 ASSERT(B_FALSE);
3244 3312 break;
↓ open down ↓ |
68 lines elided |
↑ open up ↑ |
3245 3313 }
3246 3314
3247 3315 /*
3248 3316 * The mac client using the default group gets exclusive access to the
3249 3317 * default group if and only if it is the sole client on the entire
3250 3318 * mip. If so set the group state to reserved, and set up the SRSes
3251 3319 * over the default group.
3252 3320 */
3253 3321 if (check_default_group) {
3254 3322 default_group = MAC_DEFAULT_RX_GROUP(mip);
3255 - ASSERT(default_group->mrg_state == MAC_GROUP_STATE_SHARED);
3323 + VERIFY3S(default_group->mrg_state, ==, MAC_GROUP_STATE_SHARED);
3256 3324 next_state = mac_group_next_state(default_group,
3257 3325 &grp_only_mcip, default_group, B_TRUE);
3258 3326 if (next_state == MAC_GROUP_STATE_RESERVED) {
3259 - ASSERT(grp_only_mcip != NULL &&
3260 - mip->mi_nactiveclients == 1);
3327 + VERIFY3P(grp_only_mcip, !=, NULL);
3328 + VERIFY3U(mip->mi_nactiveclients, ==, 1);
3261 3329 mac_set_group_state(default_group,
3262 3330 MAC_GROUP_STATE_RESERVED);
3263 3331 mac_rx_srs_group_setup(grp_only_mcip,
3264 3332 grp_only_mcip->mci_flent, SRST_LINK);
3265 3333 mac_fanout_setup(grp_only_mcip,
3266 3334 grp_only_mcip->mci_flent,
3267 3335 MCIP_RESOURCE_PROPS(grp_only_mcip), mac_rx_deliver,
3268 3336 grp_only_mcip, NULL, NULL);
3269 3337 mac_rx_group_unmark(default_group, MR_INCIPIENT);
3270 3338 mac_set_rings_effective(grp_only_mcip);
3271 3339 }
3272 3340 }
3273 3341
3274 3342 /*
3275 3343 * If the primary is the only one left and the MAC supports
3276 3344 * dynamic grouping, we need to see if the primary needs to
3277 3345 * be moved to the default group so that it can use all the
3278 3346 * H/W rings.
3279 3347 */
3280 3348 if (!(flent->fe_type & FLOW_PRIMARY_MAC) &&
3281 3349 mip->mi_nactiveclients == 1 &&
3282 3350 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) {
3283 3351 default_group = MAC_DEFAULT_RX_GROUP(mip);
3284 3352 grp_only_mcip = mac_primary_client_handle(mip);
3285 3353 if (grp_only_mcip == NULL)
3286 3354 return;
3287 3355 group_only_flent = grp_only_mcip->mci_flent;
3288 3356 mrp = MCIP_RESOURCE_PROPS(grp_only_mcip);
3289 3357 /*
3290 3358 * If the primary has an explicit property set, leave it
3291 3359 * alone.
3292 3360 */
3293 3361 if (mrp->mrp_mask & MRP_RX_RINGS)
3294 3362 return;
3295 3363 /*
3296 3364 * Switch the primary to the default group.
3297 3365 */
3298 3366 (void) mac_rx_switch_group(grp_only_mcip,
3299 3367 group_only_flent->fe_rx_ring_group, default_group);
3300 3368 }
3301 3369 }
3302 3370
3303 3371 /* DATAPATH TEAR DOWN ROUTINES (SRS and FANOUT teardown) */
3304 3372
3305 3373 static void
3306 3374 mac_srs_fanout_list_free(mac_soft_ring_set_t *mac_srs)
3307 3375 {
3308 3376 if (mac_srs->srs_type & SRST_TX) {
3309 3377 mac_srs_tx_t *tx;
3310 3378
3311 3379 ASSERT(mac_srs->srs_tcp_soft_rings == NULL);
3312 3380 ASSERT(mac_srs->srs_udp_soft_rings == NULL);
3313 3381 ASSERT(mac_srs->srs_oth_soft_rings == NULL);
3314 3382 ASSERT(mac_srs->srs_tx_soft_rings != NULL);
3315 3383 kmem_free(mac_srs->srs_tx_soft_rings,
3316 3384 sizeof (mac_soft_ring_t *) * MAX_RINGS_PER_GROUP);
3317 3385 mac_srs->srs_tx_soft_rings = NULL;
3318 3386 tx = &mac_srs->srs_tx;
3319 3387 if (tx->st_soft_rings != NULL) {
3320 3388 kmem_free(tx->st_soft_rings,
3321 3389 sizeof (mac_soft_ring_t *) * MAX_RINGS_PER_GROUP);
3322 3390 }
3323 3391 } else {
3324 3392 ASSERT(mac_srs->srs_tx_soft_rings == NULL);
3325 3393 ASSERT(mac_srs->srs_tcp_soft_rings != NULL);
3326 3394 kmem_free(mac_srs->srs_tcp_soft_rings,
3327 3395 sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT);
3328 3396 mac_srs->srs_tcp_soft_rings = NULL;
3329 3397 ASSERT(mac_srs->srs_udp_soft_rings != NULL);
3330 3398 kmem_free(mac_srs->srs_udp_soft_rings,
3331 3399 sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT);
3332 3400 mac_srs->srs_udp_soft_rings = NULL;
3333 3401 ASSERT(mac_srs->srs_oth_soft_rings != NULL);
3334 3402 kmem_free(mac_srs->srs_oth_soft_rings,
3335 3403 sizeof (mac_soft_ring_t *) * MAX_SR_FANOUT);
3336 3404 mac_srs->srs_oth_soft_rings = NULL;
3337 3405 }
3338 3406 }
3339 3407
3340 3408 /*
3341 3409 * An RX SRS is attached to at most one mac_ring.
3342 3410 * A TX SRS has no rings.
3343 3411 */
3344 3412 static void
3345 3413 mac_srs_ring_free(mac_soft_ring_set_t *mac_srs)
3346 3414 {
3347 3415 mac_client_impl_t *mcip;
3348 3416 mac_ring_t *ring;
3349 3417 flow_entry_t *flent;
3350 3418
3351 3419 ring = mac_srs->srs_ring;
3352 3420 if (mac_srs->srs_type & SRST_TX) {
3353 3421 ASSERT(ring == NULL);
3354 3422 return;
3355 3423 }
3356 3424
3357 3425 if (ring == NULL)
3358 3426 return;
3359 3427
3360 3428 /*
3361 3429 * Broadcast flows don't have a client impl association, but they
3362 3430 * use only soft rings.
3363 3431 */
3364 3432 flent = mac_srs->srs_flent;
3365 3433 mcip = flent->fe_mcip;
3366 3434 ASSERT(mcip != NULL);
3367 3435
3368 3436 ring->mr_classify_type = MAC_NO_CLASSIFIER;
3369 3437 ring->mr_srs = NULL;
3370 3438 }
3371 3439
3372 3440 /*
3373 3441 * Physical unlink and free of the data structures happen below. This is
3374 3442 * driven from mac_flow_destroy(), on the last refrele of a flow.
3375 3443 *
3376 3444 * Assumes Rx srs is 1-1 mapped with an ring.
3377 3445 */
3378 3446 void
3379 3447 mac_srs_free(mac_soft_ring_set_t *mac_srs)
3380 3448 {
3381 3449 ASSERT(mac_srs->srs_mcip == NULL ||
3382 3450 MAC_PERIM_HELD((mac_handle_t)mac_srs->srs_mcip->mci_mip));
3383 3451 ASSERT((mac_srs->srs_state & (SRS_CONDEMNED | SRS_CONDEMNED_DONE |
3384 3452 SRS_PROC | SRS_PROC_FAST)) == (SRS_CONDEMNED | SRS_CONDEMNED_DONE));
3385 3453
3386 3454 mac_pkt_drop(NULL, NULL, mac_srs->srs_first, B_FALSE);
3387 3455 mac_srs_ring_free(mac_srs);
3388 3456 mac_srs_soft_rings_free(mac_srs);
3389 3457 mac_srs_fanout_list_free(mac_srs);
3390 3458
3391 3459 mac_srs->srs_bw = NULL;
3392 3460 mac_srs_stat_delete(mac_srs);
3393 3461 kmem_cache_free(mac_srs_cache, mac_srs);
3394 3462 }
3395 3463
3396 3464 static void
3397 3465 mac_srs_soft_rings_quiesce(mac_soft_ring_set_t *mac_srs, uint_t s_ring_flag)
3398 3466 {
3399 3467 mac_soft_ring_t *softring;
3400 3468
3401 3469 ASSERT(MUTEX_HELD(&mac_srs->srs_lock));
3402 3470
3403 3471 mac_srs_soft_rings_signal(mac_srs, s_ring_flag);
3404 3472 if (s_ring_flag == S_RING_CONDEMNED) {
3405 3473 while (mac_srs->srs_soft_ring_condemned_count !=
3406 3474 mac_srs->srs_soft_ring_count)
3407 3475 cv_wait(&mac_srs->srs_async, &mac_srs->srs_lock);
3408 3476 } else {
3409 3477 while (mac_srs->srs_soft_ring_quiesced_count !=
3410 3478 mac_srs->srs_soft_ring_count)
3411 3479 cv_wait(&mac_srs->srs_async, &mac_srs->srs_lock);
3412 3480 }
3413 3481 mutex_exit(&mac_srs->srs_lock);
3414 3482
3415 3483 for (softring = mac_srs->srs_soft_ring_head; softring != NULL;
3416 3484 softring = softring->s_ring_next) {
3417 3485 (void) untimeout(softring->s_ring_tid);
3418 3486 softring->s_ring_tid = NULL;
3419 3487 }
3420 3488
3421 3489 (void) untimeout(mac_srs->srs_tid);
3422 3490 mac_srs->srs_tid = NULL;
3423 3491
3424 3492 mutex_enter(&mac_srs->srs_lock);
3425 3493 }
3426 3494
3427 3495 /*
3428 3496 * The block comment above mac_rx_classify_flow_state_change explains the
3429 3497 * background. At this point upcalls from the driver (both hardware classified
3430 3498 * and software classified) have been cut off. We now need to quiesce the
3431 3499 * SRS worker, poll, and softring threads. The SRS worker thread serves as
3432 3500 * the master controller. The steps involved are described below in the function
3433 3501 */
3434 3502 void
3435 3503 mac_srs_worker_quiesce(mac_soft_ring_set_t *mac_srs)
3436 3504 {
3437 3505 uint_t s_ring_flag;
3438 3506 uint_t srs_poll_wait_flag;
3439 3507
3440 3508 ASSERT(MUTEX_HELD(&mac_srs->srs_lock));
3441 3509 ASSERT(mac_srs->srs_state & (SRS_CONDEMNED | SRS_QUIESCE));
3442 3510
3443 3511 if (mac_srs->srs_state & SRS_CONDEMNED) {
3444 3512 s_ring_flag = S_RING_CONDEMNED;
3445 3513 srs_poll_wait_flag = SRS_POLL_THR_EXITED;
3446 3514 } else {
3447 3515 s_ring_flag = S_RING_QUIESCE;
3448 3516 srs_poll_wait_flag = SRS_POLL_THR_QUIESCED;
3449 3517 }
3450 3518
3451 3519 /*
3452 3520 * In the case of Rx SRS wait till the poll thread is done.
3453 3521 */
3454 3522 if ((mac_srs->srs_type & SRST_TX) == 0 &&
3455 3523 mac_srs->srs_poll_thr != NULL) {
3456 3524 while (!(mac_srs->srs_state & srs_poll_wait_flag))
3457 3525 cv_wait(&mac_srs->srs_async, &mac_srs->srs_lock);
3458 3526
3459 3527 /*
3460 3528 * Turn off polling as part of the quiesce operation.
3461 3529 */
3462 3530 MAC_SRS_POLLING_OFF(mac_srs);
3463 3531 mac_srs->srs_state &= ~(SRS_POLLING | SRS_GET_PKTS);
3464 3532 }
3465 3533
3466 3534 /*
3467 3535 * Then signal the soft ring worker threads to quiesce or quit
3468 3536 * as needed and then wait till that happens.
3469 3537 */
3470 3538 mac_srs_soft_rings_quiesce(mac_srs, s_ring_flag);
3471 3539
3472 3540 if (mac_srs->srs_state & SRS_CONDEMNED)
3473 3541 mac_srs->srs_state |= (SRS_QUIESCE_DONE | SRS_CONDEMNED_DONE);
3474 3542 else
3475 3543 mac_srs->srs_state |= SRS_QUIESCE_DONE;
3476 3544 cv_signal(&mac_srs->srs_quiesce_done_cv);
3477 3545 }
3478 3546
3479 3547 /*
3480 3548 * Signal an SRS to start a temporary quiesce, or permanent removal, or restart
3481 3549 * a quiesced SRS by setting the appropriate flags and signaling the SRS worker
3482 3550 * or poll thread. This function is internal to the quiescing logic and is
3483 3551 * called internally from the SRS quiesce or flow quiesce or client quiesce
3484 3552 * higher level functions.
3485 3553 */
3486 3554 void
3487 3555 mac_srs_signal(mac_soft_ring_set_t *mac_srs, uint_t srs_flag)
3488 3556 {
3489 3557 mac_ring_t *ring;
3490 3558
3491 3559 ring = mac_srs->srs_ring;
3492 3560 ASSERT(ring == NULL || ring->mr_refcnt == 0);
3493 3561
3494 3562 if (srs_flag == SRS_CONDEMNED) {
3495 3563 /*
3496 3564 * The SRS is going away. We need to unbind the SRS and SR
3497 3565 * threads before removing from the global SRS list. Otherwise
3498 3566 * there is a small window where the cpu reconfig callbacks
3499 3567 * may miss the SRS in the list walk and DR could fail since
3500 3568 * there are still bound threads.
3501 3569 */
3502 3570 mac_srs_threads_unbind(mac_srs);
3503 3571 mac_srs_remove_glist(mac_srs);
3504 3572 }
3505 3573 /*
3506 3574 * Wakeup the SRS worker and poll threads.
3507 3575 */
3508 3576 mutex_enter(&mac_srs->srs_lock);
3509 3577 mac_srs->srs_state |= srs_flag;
3510 3578 cv_signal(&mac_srs->srs_async);
3511 3579 cv_signal(&mac_srs->srs_cv);
3512 3580 mutex_exit(&mac_srs->srs_lock);
3513 3581 }
3514 3582
3515 3583 /*
3516 3584 * In the Rx side, the quiescing is done bottom up. After the Rx upcalls
3517 3585 * from the driver are done, then the Rx SRS is quiesced and only then can
3518 3586 * we signal the soft rings. Thus this function can't be called arbitrarily
3519 3587 * without satisfying the prerequisites. On the Tx side, the threads from
3520 3588 * top need to quiesced, then the Tx SRS and only then can we signal the
3521 3589 * Tx soft rings.
3522 3590 */
3523 3591 static void
3524 3592 mac_srs_soft_rings_signal(mac_soft_ring_set_t *mac_srs, uint_t sr_flag)
3525 3593 {
3526 3594 mac_soft_ring_t *softring;
3527 3595
3528 3596 for (softring = mac_srs->srs_soft_ring_head; softring != NULL;
3529 3597 softring = softring->s_ring_next)
3530 3598 mac_soft_ring_signal(softring, sr_flag);
3531 3599 }
3532 3600
3533 3601 /*
3534 3602 * The block comment above mac_rx_classify_flow_state_change explains the
3535 3603 * background. At this point the SRS is quiesced and we need to restart the
3536 3604 * SRS worker, poll, and softring threads. The SRS worker thread serves as
3537 3605 * the master controller. The steps involved are described below in the function
3538 3606 */
3539 3607 void
3540 3608 mac_srs_worker_restart(mac_soft_ring_set_t *mac_srs)
3541 3609 {
3542 3610 boolean_t iam_rx_srs;
3543 3611 mac_soft_ring_t *softring;
3544 3612
3545 3613 ASSERT(MUTEX_HELD(&mac_srs->srs_lock));
3546 3614 if ((mac_srs->srs_type & SRST_TX) != 0) {
3547 3615 iam_rx_srs = B_FALSE;
3548 3616 ASSERT((mac_srs->srs_state &
3549 3617 (SRS_POLL_THR_QUIESCED | SRS_QUIESCE_DONE | SRS_QUIESCE)) ==
3550 3618 (SRS_QUIESCE_DONE | SRS_QUIESCE));
3551 3619 } else {
3552 3620 iam_rx_srs = B_TRUE;
3553 3621 ASSERT((mac_srs->srs_state &
3554 3622 (SRS_QUIESCE_DONE | SRS_QUIESCE)) ==
3555 3623 (SRS_QUIESCE_DONE | SRS_QUIESCE));
3556 3624 if (mac_srs->srs_poll_thr != NULL) {
3557 3625 ASSERT((mac_srs->srs_state & SRS_POLL_THR_QUIESCED) ==
3558 3626 SRS_POLL_THR_QUIESCED);
3559 3627 }
3560 3628 }
3561 3629
3562 3630 /*
3563 3631 * Signal any quiesced soft ring workers to restart and wait for the
3564 3632 * soft ring down count to come down to zero.
3565 3633 */
3566 3634 if (mac_srs->srs_soft_ring_quiesced_count != 0) {
3567 3635 for (softring = mac_srs->srs_soft_ring_head; softring != NULL;
3568 3636 softring = softring->s_ring_next) {
3569 3637 if (!(softring->s_ring_state & S_RING_QUIESCE))
3570 3638 continue;
3571 3639 mac_soft_ring_signal(softring, S_RING_RESTART);
3572 3640 }
3573 3641 while (mac_srs->srs_soft_ring_quiesced_count != 0)
3574 3642 cv_wait(&mac_srs->srs_async, &mac_srs->srs_lock);
3575 3643 }
3576 3644
3577 3645 mac_srs->srs_state &= ~(SRS_QUIESCE_DONE | SRS_QUIESCE | SRS_RESTART);
3578 3646 if (iam_rx_srs && mac_srs->srs_poll_thr != NULL) {
3579 3647 /*
3580 3648 * Signal the poll thread and ask it to restart. Wait till it
3581 3649 * actually restarts and the SRS_POLL_THR_QUIESCED flag gets
3582 3650 * cleared.
3583 3651 */
3584 3652 mac_srs->srs_state |= SRS_POLL_THR_RESTART;
3585 3653 cv_signal(&mac_srs->srs_cv);
3586 3654 while (mac_srs->srs_state & SRS_POLL_THR_QUIESCED)
3587 3655 cv_wait(&mac_srs->srs_async, &mac_srs->srs_lock);
3588 3656 ASSERT(!(mac_srs->srs_state & SRS_POLL_THR_RESTART));
3589 3657 }
3590 3658 /* Wake up any waiter waiting for the restart to complete */
3591 3659 mac_srs->srs_state |= SRS_RESTART_DONE;
3592 3660 cv_signal(&mac_srs->srs_quiesce_done_cv);
3593 3661 }
3594 3662
3595 3663 static void
3596 3664 mac_srs_worker_unbind(mac_soft_ring_set_t *mac_srs)
3597 3665 {
3598 3666 mutex_enter(&mac_srs->srs_lock);
3599 3667 if (!(mac_srs->srs_state & SRS_WORKER_BOUND)) {
3600 3668 ASSERT(mac_srs->srs_worker_cpuid == -1);
3601 3669 mutex_exit(&mac_srs->srs_lock);
3602 3670 return;
3603 3671 }
3604 3672
3605 3673 mac_srs->srs_worker_cpuid = -1;
3606 3674 mac_srs->srs_state &= ~SRS_WORKER_BOUND;
3607 3675 thread_affinity_clear(mac_srs->srs_worker);
3608 3676 mutex_exit(&mac_srs->srs_lock);
3609 3677 }
3610 3678
3611 3679 static void
3612 3680 mac_srs_poll_unbind(mac_soft_ring_set_t *mac_srs)
3613 3681 {
3614 3682 mutex_enter(&mac_srs->srs_lock);
3615 3683 if (mac_srs->srs_poll_thr == NULL ||
3616 3684 (mac_srs->srs_state & SRS_POLL_BOUND) == 0) {
3617 3685 ASSERT(mac_srs->srs_poll_cpuid == -1);
3618 3686 mutex_exit(&mac_srs->srs_lock);
3619 3687 return;
3620 3688 }
3621 3689
3622 3690 mac_srs->srs_poll_cpuid = -1;
3623 3691 mac_srs->srs_state &= ~SRS_POLL_BOUND;
3624 3692 thread_affinity_clear(mac_srs->srs_poll_thr);
3625 3693 mutex_exit(&mac_srs->srs_lock);
3626 3694 }
3627 3695
3628 3696 static void
3629 3697 mac_srs_threads_unbind(mac_soft_ring_set_t *mac_srs)
3630 3698 {
3631 3699 mac_soft_ring_t *soft_ring;
3632 3700
3633 3701 ASSERT(MAC_PERIM_HELD((mac_handle_t)mac_srs->srs_mcip->mci_mip));
3634 3702
3635 3703 mutex_enter(&cpu_lock);
3636 3704 mac_srs_worker_unbind(mac_srs);
3637 3705 if (!(mac_srs->srs_type & SRST_TX))
3638 3706 mac_srs_poll_unbind(mac_srs);
3639 3707
3640 3708 for (soft_ring = mac_srs->srs_soft_ring_head; soft_ring != NULL;
3641 3709 soft_ring = soft_ring->s_ring_next) {
3642 3710 mac_soft_ring_unbind(soft_ring);
3643 3711 }
3644 3712 mutex_exit(&cpu_lock);
3645 3713 }
3646 3714
3647 3715 /*
3648 3716 * When a CPU is going away, unbind all MAC threads which are bound
3649 3717 * to that CPU. The affinity of the thread to the CPU is saved to allow
3650 3718 * the thread to be rebound to the CPU if it comes back online.
3651 3719 */
3652 3720 static void
3653 3721 mac_walk_srs_and_unbind(int cpuid)
3654 3722 {
3655 3723 mac_soft_ring_set_t *mac_srs;
3656 3724 mac_soft_ring_t *soft_ring;
3657 3725
3658 3726 rw_enter(&mac_srs_g_lock, RW_READER);
3659 3727
3660 3728 if ((mac_srs = mac_srs_g_list) == NULL)
3661 3729 goto done;
3662 3730
3663 3731 for (; mac_srs != NULL; mac_srs = mac_srs->srs_next) {
3664 3732 if (mac_srs->srs_worker_cpuid == cpuid) {
3665 3733 mac_srs->srs_worker_cpuid_save = cpuid;
3666 3734 mac_srs_worker_unbind(mac_srs);
3667 3735 }
3668 3736
3669 3737 if (!(mac_srs->srs_type & SRST_TX)) {
3670 3738 if (mac_srs->srs_poll_cpuid == cpuid) {
3671 3739 mac_srs->srs_poll_cpuid_save = cpuid;
3672 3740 mac_srs_poll_unbind(mac_srs);
3673 3741 }
3674 3742 }
3675 3743
3676 3744 /* Next tackle the soft rings associated with the srs */
3677 3745 mutex_enter(&mac_srs->srs_lock);
3678 3746 for (soft_ring = mac_srs->srs_soft_ring_head; soft_ring != NULL;
3679 3747 soft_ring = soft_ring->s_ring_next) {
3680 3748 if (soft_ring->s_ring_cpuid == cpuid) {
3681 3749 soft_ring->s_ring_cpuid_save = cpuid;
3682 3750 mac_soft_ring_unbind(soft_ring);
3683 3751 }
3684 3752 }
3685 3753 mutex_exit(&mac_srs->srs_lock);
3686 3754 }
3687 3755 done:
3688 3756 rw_exit(&mac_srs_g_lock);
3689 3757 }
3690 3758
3691 3759 /* TX SETUP and TEARDOWN ROUTINES */
3692 3760
3693 3761 /*
3694 3762 * XXXHIO need to make sure the two mac_tx_srs_{add,del}_ring()
3695 3763 * handle the case where the number of rings is one. I.e. there is
3696 3764 * a ring pointed to by mac_srs->srs_tx_arg2.
3697 3765 */
3698 3766 void
3699 3767 mac_tx_srs_add_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring)
3700 3768 {
3701 3769 mac_client_impl_t *mcip = mac_srs->srs_mcip;
3702 3770 mac_soft_ring_t *soft_ring;
3703 3771 int count = mac_srs->srs_tx_ring_count;
3704 3772 uint32_t soft_ring_type = ST_RING_TX;
3705 3773 uint_t ring_info;
3706 3774
3707 3775 ASSERT(mac_srs->srs_state & SRS_QUIESCE);
3708 3776 ring_info = mac_hwring_getinfo((mac_ring_handle_t)tx_ring);
3709 3777 if (mac_tx_serialize || (ring_info & MAC_RING_TX_SERIALIZE))
3710 3778 soft_ring_type |= ST_RING_WORKER_ONLY;
3711 3779 soft_ring = mac_soft_ring_create(count, 0,
3712 3780 soft_ring_type, maxclsyspri, mcip, mac_srs, -1,
3713 3781 NULL, mcip, (mac_resource_handle_t)tx_ring);
3714 3782 mac_srs->srs_tx_ring_count++;
3715 3783 mac_srs_update_fanout_list(mac_srs);
3716 3784 /*
3717 3785 * put this soft ring in quiesce mode too so when we restart
3718 3786 * all soft rings in the srs are in the same state.
3719 3787 */
3720 3788 mac_soft_ring_signal(soft_ring, S_RING_QUIESCE);
3721 3789 }
3722 3790
3723 3791 static void
3724 3792 mac_soft_ring_remove(mac_soft_ring_set_t *mac_srs, mac_soft_ring_t *softring)
3725 3793 {
3726 3794 int sringcnt;
3727 3795
3728 3796 mutex_enter(&mac_srs->srs_lock);
3729 3797 sringcnt = mac_srs->srs_soft_ring_count;
3730 3798 ASSERT(sringcnt > 0);
3731 3799 mac_soft_ring_signal(softring, S_RING_CONDEMNED);
3732 3800
3733 3801 ASSERT(mac_srs->srs_soft_ring_condemned_count == 0);
3734 3802 while (mac_srs->srs_soft_ring_condemned_count != 1)
3735 3803 cv_wait(&mac_srs->srs_async, &mac_srs->srs_lock);
3736 3804
3737 3805 if (softring == mac_srs->srs_soft_ring_head) {
3738 3806 mac_srs->srs_soft_ring_head = softring->s_ring_next;
3739 3807 if (mac_srs->srs_soft_ring_head != NULL) {
3740 3808 mac_srs->srs_soft_ring_head->s_ring_prev = NULL;
3741 3809 } else {
3742 3810 mac_srs->srs_soft_ring_tail = NULL;
3743 3811 }
3744 3812 } else {
3745 3813 softring->s_ring_prev->s_ring_next =
3746 3814 softring->s_ring_next;
3747 3815 if (softring->s_ring_next != NULL) {
3748 3816 softring->s_ring_next->s_ring_prev =
3749 3817 softring->s_ring_prev;
3750 3818 } else {
3751 3819 mac_srs->srs_soft_ring_tail =
3752 3820 softring->s_ring_prev;
3753 3821 }
3754 3822 }
3755 3823 mac_srs->srs_soft_ring_count--;
3756 3824
3757 3825 mac_srs->srs_soft_ring_condemned_count--;
3758 3826 mutex_exit(&mac_srs->srs_lock);
3759 3827
3760 3828 mac_soft_ring_free(softring);
3761 3829 }
3762 3830
3763 3831 void
3764 3832 mac_tx_srs_del_ring(mac_soft_ring_set_t *mac_srs, mac_ring_t *tx_ring)
3765 3833 {
3766 3834 int i;
3767 3835 mac_soft_ring_t *soft_ring, *remove_sring;
3768 3836 mac_client_impl_t *mcip = mac_srs->srs_mcip;
3769 3837
3770 3838 mutex_enter(&mac_srs->srs_lock);
3771 3839 for (i = 0; i < mac_srs->srs_tx_ring_count; i++) {
3772 3840 soft_ring = mac_srs->srs_tx_soft_rings[i];
3773 3841 if (soft_ring->s_ring_tx_arg2 == tx_ring)
↓ open down ↓ |
503 lines elided |
↑ open up ↑ |
3774 3842 break;
3775 3843 }
3776 3844 mutex_exit(&mac_srs->srs_lock);
3777 3845 ASSERT(i < mac_srs->srs_tx_ring_count);
3778 3846 remove_sring = soft_ring;
3779 3847 /*
3780 3848 * In the case of aggr, the soft ring associated with a Tx ring
3781 3849 * is also stored in st_soft_rings[] array. That entry should
3782 3850 * be removed.
3783 3851 */
3784 - if (mcip->mci_state_flags & MCIS_IS_AGGR) {
3852 + if (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) {
3785 3853 mac_srs_tx_t *tx = &mac_srs->srs_tx;
3786 3854
3787 3855 ASSERT(tx->st_soft_rings[tx_ring->mr_index] == remove_sring);
3788 3856 tx->st_soft_rings[tx_ring->mr_index] = NULL;
3789 3857 }
3790 3858 mac_soft_ring_remove(mac_srs, remove_sring);
3791 3859 mac_srs_update_fanout_list(mac_srs);
3792 3860 }
3793 3861
3794 3862 /*
3795 3863 * mac_tx_srs_setup():
3796 3864 * Used to setup Tx rings. If no free Tx ring is available, then default
3797 3865 * Tx ring is used.
3798 3866 */
3799 3867 void
3800 3868 mac_tx_srs_setup(mac_client_impl_t *mcip, flow_entry_t *flent)
3801 3869 {
3802 3870 mac_impl_t *mip = mcip->mci_mip;
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
3803 3871 mac_soft_ring_set_t *tx_srs = flent->fe_tx_srs;
3804 3872 int i;
3805 3873 int tx_ring_count = 0;
3806 3874 uint32_t soft_ring_type;
3807 3875 mac_group_t *grp = NULL;
3808 3876 mac_ring_t *ring;
3809 3877 mac_srs_tx_t *tx = &tx_srs->srs_tx;
3810 3878 boolean_t is_aggr;
3811 3879 uint_t ring_info = 0;
3812 3880
3813 - is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR) != 0;
3881 + is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT) != 0;
3814 3882 grp = flent->fe_tx_ring_group;
3815 3883 if (grp == NULL) {
3816 3884 ring = (mac_ring_t *)mip->mi_default_tx_ring;
3817 3885 goto no_group;
3818 3886 }
3819 3887 tx_ring_count = grp->mrg_cur_count;
3820 3888 ring = grp->mrg_rings;
3821 3889 /*
3822 3890 * An attempt is made to reserve 'tx_ring_count' number
3823 3891 * of Tx rings. If tx_ring_count is 0, default Tx ring
3824 3892 * is used. If it is 1, an attempt is made to reserve one
3825 3893 * Tx ring. In both the cases, the ring information is
3826 3894 * stored in Tx SRS. If multiple Tx rings are specified,
3827 3895 * then each Tx ring will have a Tx-side soft ring. All
3828 3896 * these soft rings will be hang off Tx SRS.
3829 3897 */
3830 3898 switch (grp->mrg_state) {
3831 3899 case MAC_GROUP_STATE_SHARED:
3832 3900 case MAC_GROUP_STATE_RESERVED:
3833 3901 if (tx_ring_count <= 1 && !is_aggr) {
3834 3902 no_group:
3835 3903 if (ring != NULL &&
3836 3904 ring->mr_state != MR_INUSE) {
3837 3905 (void) mac_start_ring(ring);
3838 3906 ring_info = mac_hwring_getinfo(
3839 3907 (mac_ring_handle_t)ring);
3840 3908 }
3841 3909 tx->st_arg2 = (void *)ring;
3842 3910 mac_tx_srs_stat_recreate(tx_srs, B_FALSE);
3843 3911 if (tx_srs->srs_type & SRST_BW_CONTROL) {
3844 3912 tx->st_mode = SRS_TX_BW;
3845 3913 } else if (mac_tx_serialize ||
3846 3914 (ring_info & MAC_RING_TX_SERIALIZE)) {
3847 3915 tx->st_mode = SRS_TX_SERIALIZE;
3848 3916 } else {
3849 3917 tx->st_mode = SRS_TX_DEFAULT;
3850 3918 }
3851 3919 break;
3852 3920 }
3853 3921 soft_ring_type = ST_RING_TX;
3854 3922 if (tx_srs->srs_type & SRST_BW_CONTROL) {
3855 3923 tx->st_mode = is_aggr ?
3856 3924 SRS_TX_BW_AGGR : SRS_TX_BW_FANOUT;
3857 3925 } else {
3858 3926 tx->st_mode = is_aggr ? SRS_TX_AGGR :
3859 3927 SRS_TX_FANOUT;
3860 3928 }
3861 3929 for (i = 0; i < tx_ring_count; i++) {
3862 3930 ASSERT(ring != NULL);
3863 3931 switch (ring->mr_state) {
3864 3932 case MR_INUSE:
3865 3933 case MR_FREE:
3866 3934 ASSERT(ring->mr_srs == NULL);
3867 3935
3868 3936 if (ring->mr_state != MR_INUSE)
3869 3937 (void) mac_start_ring(ring);
3870 3938 ring_info = mac_hwring_getinfo(
3871 3939 (mac_ring_handle_t)ring);
3872 3940 if (mac_tx_serialize || (ring_info &
3873 3941 MAC_RING_TX_SERIALIZE)) {
3874 3942 soft_ring_type |=
3875 3943 ST_RING_WORKER_ONLY;
3876 3944 }
3877 3945 (void) mac_soft_ring_create(i, 0,
3878 3946 soft_ring_type, maxclsyspri,
3879 3947 mcip, tx_srs, -1, NULL, mcip,
3880 3948 (mac_resource_handle_t)ring);
3881 3949 break;
3882 3950 default:
3883 3951 cmn_err(CE_PANIC,
3884 3952 "srs_setup: mcip = %p "
3885 3953 "trying to add UNKNOWN ring = %p\n",
3886 3954 (void *)mcip, (void *)ring);
3887 3955 break;
3888 3956 }
3889 3957 ring = ring->mr_next;
3890 3958 }
3891 3959 mac_srs_update_fanout_list(tx_srs);
3892 3960 break;
3893 3961 default:
3894 3962 ASSERT(B_FALSE);
3895 3963 break;
3896 3964 }
3897 3965 tx->st_func = mac_tx_get_func(tx->st_mode);
3898 3966 if (is_aggr) {
3899 3967 VERIFY(i_mac_capab_get((mac_handle_t)mip,
3900 3968 MAC_CAPAB_AGGR, &tx->st_capab_aggr));
3901 3969 }
3902 3970 DTRACE_PROBE3(tx__srs___setup__return, mac_soft_ring_set_t *, tx_srs,
3903 3971 int, tx->st_mode, int, tx_srs->srs_tx_ring_count);
3904 3972 }
3905 3973
3906 3974 /*
3907 3975 * Update the fanout of a client if its recorded link speed doesn't match
3908 3976 * its current link speed.
3909 3977 */
3910 3978 void
3911 3979 mac_fanout_recompute_client(mac_client_impl_t *mcip, cpupart_t *cpupart)
3912 3980 {
3913 3981 uint64_t link_speed;
3914 3982 mac_resource_props_t *mcip_mrp;
3915 3983 flow_entry_t *flent = mcip->mci_flent;
3916 3984 mac_soft_ring_set_t *rx_srs;
3917 3985 mac_cpus_t *srs_cpu;
3918 3986 int soft_ring_count, maxcpus;
3919 3987
3920 3988 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
3921 3989
3922 3990 link_speed = mac_client_stat_get(mcip->mci_flent->fe_mcip,
3923 3991 MAC_STAT_IFSPEED);
3924 3992
3925 3993 if ((link_speed != 0) &&
3926 3994 (link_speed != mcip->mci_flent->fe_nic_speed)) {
3927 3995 mcip_mrp = MCIP_RESOURCE_PROPS(mcip);
3928 3996 /*
3929 3997 * Before calling mac_fanout_setup(), check to see if
3930 3998 * the SRSes already have the right number of soft
3931 3999 * rings. mac_fanout_setup() is a heavy duty operation
3932 4000 * where new cpu bindings are done for SRS and soft
3933 4001 * ring threads and interrupts re-targeted.
3934 4002 */
3935 4003 maxcpus = (cpupart != NULL) ? cpupart->cp_ncpus : ncpus;
3936 4004 soft_ring_count = mac_compute_soft_ring_count(flent,
3937 4005 flent->fe_rx_srs_cnt - 1, maxcpus);
3938 4006 /*
3939 4007 * If soft_ring_count returned by
3940 4008 * mac_compute_soft_ring_count() is 0, bump it
3941 4009 * up by 1 because we always have atleast one
3942 4010 * TCP, UDP, and OTH soft ring associated with
3943 4011 * an SRS.
3944 4012 */
3945 4013 soft_ring_count = (soft_ring_count == 0) ?
3946 4014 1 : soft_ring_count;
3947 4015 rx_srs = flent->fe_rx_srs[0];
3948 4016 srs_cpu = &rx_srs->srs_cpu;
3949 4017 if (soft_ring_count != srs_cpu->mc_rx_fanout_cnt) {
3950 4018 mac_fanout_setup(mcip, flent, mcip_mrp,
3951 4019 mac_rx_deliver, mcip, NULL, cpupart);
3952 4020 }
3953 4021 }
3954 4022 }
3955 4023
3956 4024 /*
3957 4025 * Walk through the list of mac clients for the MAC.
3958 4026 * For each active mac client, recompute the number of soft rings
3959 4027 * associated with every client, only if current speed is different
3960 4028 * from the speed that was previously used for soft ring computation.
3961 4029 * If the cable is disconnected whlie the NIC is started, we would get
3962 4030 * notification with speed set to 0. We do not recompute in that case.
3963 4031 */
3964 4032 void
3965 4033 mac_fanout_recompute(mac_impl_t *mip)
3966 4034 {
3967 4035 mac_client_impl_t *mcip;
3968 4036 cpupart_t *cpupart;
3969 4037 boolean_t use_default;
3970 4038 mac_resource_props_t *mrp, *emrp;
3971 4039
3972 4040 i_mac_perim_enter(mip);
3973 4041 if ((mip->mi_state_flags & MIS_IS_VNIC) != 0 ||
3974 4042 mip->mi_linkstate != LINK_STATE_UP) {
3975 4043 i_mac_perim_exit(mip);
3976 4044 return;
3977 4045 }
3978 4046
3979 4047 for (mcip = mip->mi_clients_list; mcip != NULL;
3980 4048 mcip = mcip->mci_client_next) {
3981 4049 if ((mcip->mci_state_flags & MCIS_SHARE_BOUND) != 0 ||
3982 4050 !MCIP_DATAPATH_SETUP(mcip))
3983 4051 continue;
3984 4052 mrp = MCIP_RESOURCE_PROPS(mcip);
3985 4053 emrp = MCIP_EFFECTIVE_PROPS(mcip);
3986 4054 use_default = B_FALSE;
3987 4055 pool_lock();
3988 4056 cpupart = mac_pset_find(mrp, &use_default);
3989 4057 mac_fanout_recompute_client(mcip, cpupart);
3990 4058 mac_set_pool_effective(use_default, cpupart, mrp, emrp);
3991 4059 pool_unlock();
3992 4060 }
3993 4061 i_mac_perim_exit(mip);
3994 4062 }
3995 4063
3996 4064 /*
3997 4065 * Given a MAC, change the polling state for all its MAC clients. 'enable' is
3998 4066 * B_TRUE to enable polling or B_FALSE to disable. Polling is enabled by
3999 4067 * default.
4000 4068 */
4001 4069 void
4002 4070 mac_poll_state_change(mac_handle_t mh, boolean_t enable)
4003 4071 {
4004 4072 mac_impl_t *mip = (mac_impl_t *)mh;
4005 4073 mac_client_impl_t *mcip;
4006 4074
4007 4075 i_mac_perim_enter(mip);
4008 4076 if (enable)
4009 4077 mip->mi_state_flags &= ~MIS_POLL_DISABLE;
4010 4078 else
4011 4079 mip->mi_state_flags |= MIS_POLL_DISABLE;
4012 4080 for (mcip = mip->mi_clients_list; mcip != NULL;
4013 4081 mcip = mcip->mci_client_next)
4014 4082 mac_client_update_classifier(mcip, B_TRUE);
4015 4083 i_mac_perim_exit(mip);
4016 4084 }
↓ open down ↓ |
193 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX