Print this page
11490 SRS ring polling disabled for VLANs
11491 Want DLS bypass for VLAN traffic
11492 add VLVF bypass to ixgbe core
2869 duplicate packets with vnics over aggrs
11489 DLS stat delete and aggr kstat can deadlock
Portions contributed by: Theo Schlossnagle <jesus@omniti.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/ip/ip6_input.c
+++ new/usr/src/uts/common/inet/ip/ip6_input.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved
24 24 *
25 25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 + * Copyright 2018 Joyent, Inc.
26 27 */
27 28 /* Copyright (c) 1990 Mentat Inc. */
28 29
29 30 #include <sys/types.h>
30 31 #include <sys/stream.h>
31 32 #include <sys/dlpi.h>
32 33 #include <sys/stropts.h>
33 34 #include <sys/sysmacros.h>
34 35 #include <sys/strsubr.h>
35 36 #include <sys/strlog.h>
36 37 #include <sys/strsun.h>
37 38 #include <sys/zone.h>
38 39 #define _SUN_TPI_VERSION 2
39 40 #include <sys/tihdr.h>
40 41 #include <sys/xti_inet.h>
41 42 #include <sys/ddi.h>
42 43 #include <sys/sunddi.h>
43 44 #include <sys/cmn_err.h>
44 45 #include <sys/debug.h>
45 46 #include <sys/kobj.h>
46 47 #include <sys/modctl.h>
47 48 #include <sys/atomic.h>
48 49 #include <sys/policy.h>
49 50 #include <sys/priv.h>
50 51
51 52 #include <sys/systm.h>
52 53 #include <sys/param.h>
53 54 #include <sys/kmem.h>
54 55 #include <sys/sdt.h>
55 56 #include <sys/socket.h>
56 57 #include <sys/vtrace.h>
57 58 #include <sys/isa_defs.h>
58 59 #include <sys/mac.h>
59 60 #include <net/if.h>
60 61 #include <net/if_arp.h>
61 62 #include <net/route.h>
62 63 #include <sys/sockio.h>
63 64 #include <netinet/in.h>
64 65 #include <net/if_dl.h>
65 66
66 67 #include <inet/common.h>
67 68 #include <inet/mi.h>
68 69 #include <inet/mib2.h>
69 70 #include <inet/nd.h>
70 71 #include <inet/arp.h>
71 72 #include <inet/snmpcom.h>
72 73 #include <inet/kstatcom.h>
73 74
74 75 #include <netinet/igmp_var.h>
75 76 #include <netinet/ip6.h>
76 77 #include <netinet/icmp6.h>
77 78 #include <netinet/sctp.h>
78 79
79 80 #include <inet/ip.h>
80 81 #include <inet/ip_impl.h>
81 82 #include <inet/ip6.h>
82 83 #include <inet/ip6_asp.h>
83 84 #include <inet/optcom.h>
84 85 #include <inet/tcp.h>
85 86 #include <inet/tcp_impl.h>
86 87 #include <inet/ip_multi.h>
87 88 #include <inet/ip_if.h>
88 89 #include <inet/ip_ire.h>
89 90 #include <inet/ip_ftable.h>
90 91 #include <inet/ip_rts.h>
91 92 #include <inet/ip_ndp.h>
92 93 #include <inet/ip_listutils.h>
93 94 #include <netinet/igmp.h>
94 95 #include <netinet/ip_mroute.h>
95 96 #include <inet/ipp_common.h>
96 97
97 98 #include <net/pfkeyv2.h>
98 99 #include <inet/sadb.h>
99 100 #include <inet/ipsec_impl.h>
100 101 #include <inet/ipdrop.h>
101 102 #include <inet/ip_netinfo.h>
102 103 #include <inet/ilb_ip.h>
103 104 #include <sys/squeue_impl.h>
104 105 #include <sys/squeue.h>
105 106
106 107 #include <sys/ethernet.h>
107 108 #include <net/if_types.h>
108 109 #include <sys/cpuvar.h>
109 110
110 111 #include <ipp/ipp.h>
111 112 #include <ipp/ipp_impl.h>
112 113 #include <ipp/ipgpc/ipgpc.h>
113 114
114 115 #include <sys/pattr.h>
115 116 #include <inet/ipclassifier.h>
116 117 #include <inet/sctp_ip.h>
117 118 #include <inet/sctp/sctp_impl.h>
118 119 #include <inet/udp_impl.h>
119 120 #include <sys/sunddi.h>
120 121
121 122 #include <sys/tsol/label.h>
122 123 #include <sys/tsol/tnet.h>
123 124
124 125 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */
125 126
126 127 #ifdef DEBUG
127 128 extern boolean_t skip_sctp_cksum;
128 129 #endif
129 130
130 131 static void ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *);
131 132
132 133 static void ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *,
133 134 ip_recv_attr_t *);
134 135
135 136 #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6)
↓ open down ↓ |
100 lines elided |
↑ open up ↑ |
136 137
137 138 /*
138 139 * Direct read side procedure capable of dealing with chains. GLDv3 based
139 140 * drivers call this function directly with mblk chains while STREAMS
140 141 * read side procedure ip_rput() calls this for single packet with ip_ring
141 142 * set to NULL to process one packet at a time.
142 143 *
143 144 * The ill will always be valid if this function is called directly from
144 145 * the driver.
145 146 *
146 - * If ip_input_v6() is called from GLDv3:
147 + * If this chain is part of a VLAN stream, then the VLAN tag is
148 + * stripped from the MAC header before being delivered to this
149 + * function.
147 150 *
148 - * - This must be a non-VLAN IP stream.
149 - * - 'mp' is either an untagged or a special priority-tagged packet.
150 - * - Any VLAN tag that was in the MAC header has been stripped.
151 - *
152 151 * If the IP header in packet is not 32-bit aligned, every message in the
153 152 * chain will be aligned before further operations. This is required on SPARC
154 153 * platform.
155 154 */
156 155 void
157 156 ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
158 157 struct mac_header_info_s *mhip)
159 158 {
160 159 (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL,
161 160 NULL);
162 161 }
163 162
164 163 /*
165 164 * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves
166 165 * a chain of packets in the poll mode. The packets have gone through the
167 166 * data link processing but not IP processing. For performance and latency
168 167 * reasons, the squeue wants to process the chain in line instead of feeding
169 168 * it back via ip_input path.
170 169 *
171 170 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6
172 171 * will pass back any TCP packets matching the target sqp to
173 172 * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by
174 173 * ip_input_v6 and ip_fanout_v6 as normal.
175 174 * The TCP packets that match the target squeue are returned to the caller
176 175 * as a b_next chain after each packet has been prepend with an mblk
177 176 * from ip_recv_attr_to_mblk.
178 177 */
179 178 mblk_t *
180 179 ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp,
181 180 mblk_t *mp_chain, mblk_t **last, uint_t *cnt)
182 181 {
183 182 return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp,
184 183 last, cnt));
185 184 }
186 185
187 186 /*
188 187 * Used by ip_input_v6 and ip_accept_tcp_v6
189 188 * The last three arguments are only used by ip_accept_tcp_v6, and mhip is
190 189 * only used by ip_input_v6.
191 190 */
192 191 mblk_t *
193 192 ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
194 193 struct mac_header_info_s *mhip, squeue_t *target_sqp,
195 194 mblk_t **last, uint_t *cnt)
196 195 {
197 196 mblk_t *mp;
198 197 ip6_t *ip6h;
199 198 ip_recv_attr_t iras; /* Receive attributes */
200 199 rtc_t rtc;
201 200 iaflags_t chain_flags = 0; /* Fixed for chain */
202 201 mblk_t *ahead = NULL; /* Accepted head */
203 202 mblk_t *atail = NULL; /* Accepted tail */
204 203 uint_t acnt = 0; /* Accepted count */
205 204
206 205 ASSERT(mp_chain != NULL);
207 206 ASSERT(ill != NULL);
208 207
209 208 /* These ones do not change as we loop over packets */
210 209 iras.ira_ill = iras.ira_rill = ill;
211 210 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
212 211 iras.ira_rifindex = iras.ira_ruifindex;
213 212 iras.ira_sqp = NULL;
214 213 iras.ira_ring = ip_ring;
215 214 /* For ECMP and outbound transmit ring selection */
216 215 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring);
217 216
218 217 iras.ira_target_sqp = target_sqp;
219 218 iras.ira_target_sqp_mp = NULL;
220 219 if (target_sqp != NULL)
221 220 chain_flags |= IRAF_TARGET_SQP;
222 221
223 222 /*
224 223 * We try to have a mhip pointer when possible, but
225 224 * it might be NULL in some cases. In those cases we
226 225 * have to assume unicast.
227 226 */
228 227 iras.ira_mhip = mhip;
229 228 iras.ira_flags = 0;
230 229 if (mhip != NULL) {
231 230 switch (mhip->mhi_dsttype) {
232 231 case MAC_ADDRTYPE_MULTICAST :
233 232 chain_flags |= IRAF_L2DST_MULTICAST;
234 233 break;
235 234 case MAC_ADDRTYPE_BROADCAST :
236 235 chain_flags |= IRAF_L2DST_BROADCAST;
237 236 break;
238 237 }
239 238 }
240 239
241 240 /*
242 241 * Initialize the one-element route cache.
243 242 *
244 243 * We do ire caching from one iteration to
245 244 * another. In the event the packet chain contains
246 245 * all packets from the same dst, this caching saves
247 246 * an ire_route_recursive for each of the succeeding
248 247 * packets in a packet chain.
249 248 */
250 249 rtc.rtc_ire = NULL;
251 250 rtc.rtc_ip6addr = ipv6_all_zeros;
252 251
253 252 /* Loop over b_next */
254 253 for (mp = mp_chain; mp != NULL; mp = mp_chain) {
255 254 mp_chain = mp->b_next;
256 255 mp->b_next = NULL;
257 256
258 257 /*
259 258 * if db_ref > 1 then copymsg and free original. Packet
260 259 * may be changed and we do not want the other entity
261 260 * who has a reference to this message to trip over the
262 261 * changes. This is a blind change because trying to
263 262 * catch all places that might change the packet is too
264 263 * difficult.
265 264 *
266 265 * This corresponds to the fast path case, where we have
267 266 * a chain of M_DATA mblks. We check the db_ref count
268 267 * of only the 1st data block in the mblk chain. There
269 268 * doesn't seem to be a reason why a device driver would
270 269 * send up data with varying db_ref counts in the mblk
271 270 * chain. In any case the Fast path is a private
272 271 * interface, and our drivers don't do such a thing.
273 272 * Given the above assumption, there is no need to walk
274 273 * down the entire mblk chain (which could have a
275 274 * potential performance problem)
276 275 *
277 276 * The "(DB_REF(mp) > 1)" check was moved from ip_rput()
278 277 * to here because of exclusive ip stacks and vnics.
279 278 * Packets transmitted from exclusive stack over vnic
280 279 * can have db_ref > 1 and when it gets looped back to
281 280 * another vnic in a different zone, you have ip_input()
282 281 * getting dblks with db_ref > 1. So if someone
283 282 * complains of TCP performance under this scenario,
284 283 * take a serious look here on the impact of copymsg().
285 284 */
286 285 if (DB_REF(mp) > 1) {
287 286 if ((mp = ip_fix_dbref(mp, &iras)) == NULL)
288 287 continue;
289 288 }
290 289
291 290 /*
292 291 * IP header ptr not aligned?
293 292 * OR IP header not complete in first mblk
294 293 */
295 294 ip6h = (ip6_t *)mp->b_rptr;
296 295 if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) {
297 296 mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras);
298 297 if (mp == NULL)
299 298 continue;
300 299 ip6h = (ip6_t *)mp->b_rptr;
301 300 }
302 301
303 302 /* Protect against a mix of Ethertypes and IP versions */
304 303 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) {
305 304 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
306 305 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
307 306 freemsg(mp);
308 307 /* mhip might point into 1st packet in the chain. */
309 308 iras.ira_mhip = NULL;
310 309 continue;
311 310 }
312 311
313 312 /*
314 313 * Check for Martian addrs; we have to explicitly
315 314 * test for for zero dst since this is also used as
316 315 * an indication that the rtc is not used.
317 316 */
318 317 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) {
319 318 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
320 319 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
321 320 freemsg(mp);
322 321 /* mhip might point into 1st packet in the chain. */
323 322 iras.ira_mhip = NULL;
324 323 continue;
325 324 }
326 325 /*
327 326 * Keep L2SRC from a previous packet in chain since mhip
328 327 * might point into an earlier packet in the chain.
329 328 */
330 329 chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET);
331 330
332 331 iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags;
333 332 iras.ira_free_flags = 0;
334 333 iras.ira_cred = NULL;
335 334 iras.ira_cpid = NOPID;
336 335 iras.ira_tsl = NULL;
337 336 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */
338 337
339 338 /*
340 339 * We must count all incoming packets, even if they end
341 340 * up being dropped later on. Defer counting bytes until
342 341 * we have the whole IP header in first mblk.
343 342 */
344 343 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
345 344
346 345 iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
347 346 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets,
348 347 iras.ira_pktlen);
349 348
350 349 /*
351 350 * Call one of:
352 351 * ill_input_full_v6
353 352 * ill_input_short_v6
354 353 * The former is used in the case of TX. See ill_set_inputfn().
355 354 */
356 355 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc);
357 356
358 357 /* Any references to clean up? No hold on ira_ill */
359 358 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED))
360 359 ira_cleanup(&iras, B_FALSE);
361 360
362 361 if (iras.ira_target_sqp_mp != NULL) {
363 362 /* Better be called from ip_accept_tcp */
364 363 ASSERT(target_sqp != NULL);
365 364
366 365 /* Found one packet to accept */
367 366 mp = iras.ira_target_sqp_mp;
368 367 iras.ira_target_sqp_mp = NULL;
369 368 ASSERT(ip_recv_attr_is_mblk(mp));
370 369
371 370 if (atail != NULL)
372 371 atail->b_next = mp;
373 372 else
374 373 ahead = mp;
375 374 atail = mp;
376 375 acnt++;
377 376 mp = NULL;
378 377 }
379 378 /* mhip might point into 1st packet in the chain. */
380 379 iras.ira_mhip = NULL;
381 380 }
382 381 /* Any remaining references to the route cache? */
383 382 if (rtc.rtc_ire != NULL) {
384 383 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr));
385 384 ire_refrele(rtc.rtc_ire);
386 385 }
387 386
388 387 if (ahead != NULL) {
389 388 /* Better be called from ip_accept_tcp */
390 389 ASSERT(target_sqp != NULL);
391 390 *last = atail;
392 391 *cnt = acnt;
393 392 return (ahead);
394 393 }
395 394
396 395 return (NULL);
397 396 }
398 397
399 398 /*
400 399 * This input function is used when
401 400 * - is_system_labeled()
402 401 *
403 402 * Note that for IPv6 CGTP filtering is handled only when receiving fragment
404 403 * headers, and RSVP uses router alert options, thus we don't need anything
405 404 * extra for them.
406 405 */
407 406 void
408 407 ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
409 408 ip_recv_attr_t *ira, rtc_t *rtc)
410 409 {
411 410 ip6_t *ip6h = (ip6_t *)iph_arg;
412 411 in6_addr_t *nexthop = (in6_addr_t *)nexthop_arg;
413 412 ill_t *ill = ira->ira_ill;
414 413
415 414 ASSERT(ira->ira_tsl == NULL);
416 415
417 416 /*
418 417 * Attach any necessary label information to
419 418 * this packet
420 419 */
421 420 if (is_system_labeled()) {
422 421 ira->ira_flags |= IRAF_SYSTEM_LABELED;
423 422
424 423 /*
425 424 * This updates ira_cred, ira_tsl and ira_free_flags based
426 425 * on the label.
427 426 */
428 427 if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) {
429 428 if (ip6opt_ls != 0)
430 429 ip0dbg(("tsol_get_pkt_label v6 failed\n"));
431 430 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
432 431 ip_drop_input("ipIfStatsInDiscards", mp, ill);
433 432 freemsg(mp);
434 433 return;
435 434 }
436 435 /* Note that ira_tsl can be NULL here. */
437 436
438 437 /* tsol_get_pkt_label sometimes does pullupmsg */
439 438 ip6h = (ip6_t *)mp->b_rptr;
440 439 }
441 440 ill_input_short_v6(mp, ip6h, nexthop, ira, rtc);
442 441 }
443 442
444 443 /*
445 444 * Check for IPv6 addresses that should not appear on the wire
446 445 * as either source or destination.
447 446 * If we ever implement Stateless IPv6 Translators (SIIT) we'd have
448 447 * to revisit the IPv4-mapped part.
449 448 */
450 449 static boolean_t
451 450 ip6_bad_address(in6_addr_t *addr, boolean_t is_src)
452 451 {
453 452 if (IN6_IS_ADDR_V4MAPPED(addr)) {
454 453 ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr"));
455 454 return (B_TRUE);
456 455 }
457 456 if (IN6_IS_ADDR_LOOPBACK(addr)) {
458 457 ip1dbg(("ip_input_v6: pkt with loopback addr"));
459 458 return (B_TRUE);
460 459 }
461 460 if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) {
462 461 /*
463 462 * having :: in the src is ok: it's used for DAD.
464 463 */
465 464 ip1dbg(("ip_input_v6: pkt with unspecified addr"));
466 465 return (B_TRUE);
467 466 }
468 467 return (B_FALSE);
469 468 }
470 469
471 470 /*
472 471 * Routing lookup for IPv6 link-locals.
473 472 * First we look on the inbound interface, then we check for IPMP and
474 473 * look on the upper interface.
475 474 * We update ira_ruifindex if we find the IRE on the upper interface.
476 475 */
477 476 static ire_t *
478 477 ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira,
479 478 uint_t irr_flags, ip_stack_t *ipst)
480 479 {
481 480 int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL;
482 481 ire_t *ire;
483 482
484 483 ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop));
485 484 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
486 485 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
487 486 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
488 487 !IS_UNDER_IPMP(ill))
489 488 return (ire);
490 489
491 490 /*
492 491 * When we are using IMP we need to look for an IRE on both the
493 492 * under and upper interfaces since there are different
494 493 * link-local addresses for the under and upper.
495 494 */
496 495 ill = ipmp_ill_hold_ipmp_ill(ill);
497 496 if (ill == NULL)
498 497 return (ire);
499 498
500 499 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
501 500
502 501 ire_refrele(ire);
503 502 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
504 503 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
505 504 ill_refrele(ill);
506 505 return (ire);
507 506 }
508 507
509 508 /*
510 509 * This is the tail-end of the full receive side packet handling.
511 510 * It can be used directly when the configuration is simple.
512 511 */
513 512 void
514 513 ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
515 514 ip_recv_attr_t *ira, rtc_t *rtc)
516 515 {
517 516 ire_t *ire;
518 517 ill_t *ill = ira->ira_ill;
519 518 ip_stack_t *ipst = ill->ill_ipst;
520 519 uint_t pkt_len;
521 520 ssize_t len;
522 521 ip6_t *ip6h = (ip6_t *)iph_arg;
523 522 in6_addr_t nexthop = *(in6_addr_t *)nexthop_arg;
524 523 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb;
525 524 uint_t irr_flags;
526 525 #define rptr ((uchar_t *)ip6h)
527 526
528 527 ASSERT(DB_TYPE(mp) == M_DATA);
529 528
530 529 /*
531 530 * Check for source/dest being a bad address: loopback, any, or
532 531 * v4mapped. All of them start with a 64 bits of zero.
533 532 */
534 533 if (ip6h->ip6_src.s6_addr32[0] == 0 &&
535 534 ip6h->ip6_src.s6_addr32[1] == 0) {
536 535 if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) {
537 536 ip1dbg(("ip_input_v6: pkt with bad src addr\n"));
538 537 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
539 538 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
540 539 freemsg(mp);
541 540 return;
542 541 }
543 542 }
544 543 if (ip6h->ip6_dst.s6_addr32[0] == 0 &&
545 544 ip6h->ip6_dst.s6_addr32[1] == 0) {
546 545 if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) {
547 546 ip1dbg(("ip_input_v6: pkt with bad dst addr\n"));
548 547 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
549 548 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
550 549 freemsg(mp);
551 550 return;
552 551 }
553 552 }
554 553
555 554 len = mp->b_wptr - rptr;
556 555 pkt_len = ira->ira_pktlen;
557 556
558 557 /* multiple mblk or too short */
559 558 len -= pkt_len;
560 559 if (len != 0) {
561 560 mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira);
562 561 if (mp == NULL)
563 562 return;
564 563 ip6h = (ip6_t *)mp->b_rptr;
565 564 }
566 565
567 566 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
568 567 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
569 568 int, 0);
570 569 /*
571 570 * The event for packets being received from a 'physical'
572 571 * interface is placed after validation of the source and/or
573 572 * destination address as being local so that packets can be
574 573 * redirected to loopback addresses using ipnat.
575 574 */
576 575 DTRACE_PROBE4(ip6__physical__in__start,
577 576 ill_t *, ill, ill_t *, NULL,
578 577 ip6_t *, ip6h, mblk_t *, mp);
579 578
580 579 if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) {
581 580 int ll_multicast = 0;
582 581 int error;
583 582 in6_addr_t orig_dst = ip6h->ip6_dst;
584 583
585 584 if (ira->ira_flags & IRAF_L2DST_MULTICAST)
586 585 ll_multicast = HPE_MULTICAST;
587 586 else if (ira->ira_flags & IRAF_L2DST_BROADCAST)
588 587 ll_multicast = HPE_BROADCAST;
589 588
590 589 FW_HOOKS6(ipst->ips_ip6_physical_in_event,
591 590 ipst->ips_ipv6firewall_physical_in,
592 591 ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error);
593 592
594 593 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp);
595 594
596 595 if (mp == NULL)
597 596 return;
598 597
599 598 /* The length could have changed */
600 599 ip6h = (ip6_t *)mp->b_rptr;
601 600 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
602 601 pkt_len = ira->ira_pktlen;
603 602
604 603 /*
605 604 * In case the destination changed we override any previous
606 605 * change to nexthop.
607 606 */
608 607 if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst))
609 608 nexthop = ip6h->ip6_dst;
610 609
611 610 if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) {
612 611 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
613 612 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
614 613 freemsg(mp);
615 614 return;
616 615 }
617 616
618 617 }
619 618
620 619 if (ipst->ips_ip6_observe.he_interested) {
621 620 zoneid_t dzone;
622 621
623 622 /*
624 623 * On the inbound path the src zone will be unknown as
625 624 * this packet has come from the wire.
626 625 */
627 626 dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES);
628 627 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst);
629 628 }
630 629
631 630 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) !=
632 631 IPV6_DEFAULT_VERS_AND_FLOW) {
633 632 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
634 633 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion);
635 634 ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill);
636 635 freemsg(mp);
637 636 return;
638 637 }
639 638
640 639 /*
641 640 * For IPv6 we update ira_ip_hdr_length and ira_protocol as
642 641 * we parse the headers, starting with the hop-by-hop options header.
643 642 */
644 643 ira->ira_ip_hdr_length = IPV6_HDR_LEN;
645 644 if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) {
646 645 ip6_hbh_t *hbhhdr;
647 646 uint_t ehdrlen;
648 647 uint8_t *optptr;
649 648
650 649 if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) {
651 650 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
652 651 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
653 652 freemsg(mp);
654 653 return;
655 654 }
656 655 if (mp->b_cont != NULL &&
657 656 rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) {
658 657 ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira);
659 658 if (ip6h == NULL) {
660 659 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
661 660 ip_drop_input("ipIfStatsInDiscards", mp, ill);
662 661 freemsg(mp);
663 662 return;
664 663 }
665 664 }
666 665 hbhhdr = (ip6_hbh_t *)&ip6h[1];
667 666 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
668 667
669 668 if (pkt_len < IPV6_HDR_LEN + ehdrlen) {
670 669 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
671 670 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
672 671 freemsg(mp);
673 672 return;
674 673 }
675 674 if (mp->b_cont != NULL &&
676 675 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
677 676 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
678 677 if (ip6h == NULL) {
679 678 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
680 679 ip_drop_input("ipIfStatsInDiscards", mp, ill);
681 680 freemsg(mp);
682 681 return;
683 682 }
684 683 hbhhdr = (ip6_hbh_t *)&ip6h[1];
685 684 }
686 685
687 686 /*
688 687 * Update ira_ip_hdr_length to skip the hop-by-hop header
689 688 * once we get to ip_fanout_v6
690 689 */
691 690 ira->ira_ip_hdr_length += ehdrlen;
692 691 ira->ira_protocol = hbhhdr->ip6h_nxt;
693 692
694 693 optptr = (uint8_t *)&hbhhdr[1];
695 694 switch (ip_process_options_v6(mp, ip6h, optptr,
696 695 ehdrlen - 2, IPPROTO_HOPOPTS, ira)) {
697 696 case -1:
698 697 /*
699 698 * Packet has been consumed and any
700 699 * needed ICMP messages sent.
701 700 */
702 701 return;
703 702 case 0:
704 703 /* no action needed */
705 704 break;
706 705 case 1:
707 706 /*
708 707 * Known router alert. Make use handle it as local
709 708 * by setting the nexthop to be the all-host multicast
710 709 * address, and skip multicast membership filter by
711 710 * marking as a router alert.
712 711 */
713 712 ira->ira_flags |= IRAF_ROUTER_ALERT;
714 713 nexthop = ipv6_all_hosts_mcast;
715 714 break;
716 715 }
717 716 }
718 717
719 718 /*
720 719 * Here we check to see if we machine is setup as
721 720 * L3 loadbalancer and if the incoming packet is for a VIP
722 721 *
723 722 * Check the following:
724 723 * - there is at least a rule
725 724 * - protocol of the packet is supported
726 725 *
727 726 * We don't load balance IPv6 link-locals.
728 727 */
729 728 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) &&
730 729 !IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
731 730 in6_addr_t lb_dst;
732 731 int lb_ret;
733 732
734 733 /* For convenience, we just pull up the mblk. */
735 734 if (mp->b_cont != NULL) {
736 735 if (pullupmsg(mp, -1) == 0) {
737 736 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
738 737 ip_drop_input("ipIfStatsInDiscards - pullupmsg",
739 738 mp, ill);
740 739 freemsg(mp);
741 740 return;
742 741 }
743 742 ip6h = (ip6_t *)mp->b_rptr;
744 743 }
745 744 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol,
746 745 (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst);
747 746 if (lb_ret == ILB_DROPPED) {
748 747 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
749 748 ip_drop_input("ILB_DROPPED", mp, ill);
750 749 freemsg(mp);
751 750 return;
752 751 }
753 752 if (lb_ret == ILB_BALANCED) {
754 753 /* Set the dst to that of the chosen server */
755 754 nexthop = lb_dst;
756 755 DB_CKSUMFLAGS(mp) = 0;
757 756 }
758 757 }
759 758
760 759 if (ill->ill_flags & ILLF_ROUTER)
761 760 irr_flags = IRR_ALLOCATE;
762 761 else
763 762 irr_flags = IRR_NONE;
764 763
765 764 /* Can not use route cache with TX since the labels can differ */
766 765 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
767 766 if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
768 767 ire = ire_multicast(ill);
769 768 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
770 769 ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
771 770 ipst);
772 771 } else {
773 772 /* Match destination and label */
774 773 ire = ire_route_recursive_v6(&nexthop, 0, NULL,
775 774 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR,
776 775 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL,
777 776 NULL);
778 777 }
779 778 /* Update the route cache so we do the ire_refrele */
780 779 ASSERT(ire != NULL);
781 780 if (rtc->rtc_ire != NULL)
782 781 ire_refrele(rtc->rtc_ire);
783 782 rtc->rtc_ire = ire;
784 783 rtc->rtc_ip6addr = nexthop;
785 784 } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr) &&
786 785 rtc->rtc_ire != NULL) {
787 786 /* Use the route cache */
788 787 ire = rtc->rtc_ire;
789 788 } else {
790 789 /* Update the route cache */
791 790 if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
792 791 ire = ire_multicast(ill);
793 792 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
794 793 ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
795 794 ipst);
796 795 } else {
797 796 ire = ire_route_recursive_dstonly_v6(&nexthop,
798 797 irr_flags, ira->ira_xmit_hint, ipst);
799 798 }
800 799 ASSERT(ire != NULL);
801 800 if (rtc->rtc_ire != NULL)
802 801 ire_refrele(rtc->rtc_ire);
803 802 rtc->rtc_ire = ire;
804 803 rtc->rtc_ip6addr = nexthop;
805 804 }
806 805
807 806 ire->ire_ib_pkt_count++;
808 807
809 808 /*
810 809 * Based on ire_type and ire_flags call one of:
811 810 * ire_recv_local_v6 - for IRE_LOCAL
812 811 * ire_recv_loopback_v6 - for IRE_LOOPBACK
813 812 * ire_recv_multirt_v6 - if RTF_MULTIRT
814 813 * ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE
815 814 * ire_recv_multicast_v6 - for IRE_MULTICAST
816 815 * ire_recv_noaccept_v6 - for ire_noaccept ones
817 816 * ire_recv_forward_v6 - for the rest.
818 817 */
819 818
820 819 (*ire->ire_recvfn)(ire, mp, ip6h, ira);
821 820 }
822 821 #undef rptr
823 822
824 823 /*
825 824 * ire_recvfn for IREs that need forwarding
826 825 */
827 826 void
828 827 ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
829 828 {
830 829 ip6_t *ip6h = (ip6_t *)iph_arg;
831 830 ill_t *ill = ira->ira_ill;
832 831 ip_stack_t *ipst = ill->ill_ipst;
833 832 iaflags_t iraflags = ira->ira_flags;
834 833 ill_t *dst_ill;
835 834 nce_t *nce;
836 835 uint32_t added_tx_len;
837 836 uint32_t mtu, iremtu;
838 837
839 838 if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
840 839 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
841 840 ip_drop_input("l2 multicast not forwarded", mp, ill);
842 841 freemsg(mp);
843 842 return;
844 843 }
845 844
846 845 if (!(ill->ill_flags & ILLF_ROUTER)) {
847 846 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
848 847 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
849 848 freemsg(mp);
850 849 return;
851 850 }
852 851
853 852 /*
854 853 * Either ire_nce_capable or ire_dep_parent would be set for the IRE
855 854 * when it is found by ire_route_recursive, but that some other thread
856 855 * could have changed the routes with the effect of clearing
857 856 * ire_dep_parent. In that case we'd end up dropping the packet, or
858 857 * finding a new nce below.
859 858 * Get, allocate, or update the nce.
860 859 * We get a refhold on ire_nce_cache as a result of this to avoid races
861 860 * where ire_nce_cache is deleted.
862 861 *
863 862 * This ensures that we don't forward if the interface is down since
864 863 * ipif_down removes all the nces.
865 864 */
866 865 mutex_enter(&ire->ire_lock);
867 866 nce = ire->ire_nce_cache;
868 867 if (nce == NULL) {
869 868 /* Not yet set up - try to set one up */
870 869 mutex_exit(&ire->ire_lock);
871 870 (void) ire_revalidate_nce(ire);
872 871 mutex_enter(&ire->ire_lock);
873 872 nce = ire->ire_nce_cache;
874 873 if (nce == NULL) {
875 874 mutex_exit(&ire->ire_lock);
876 875 /* The ire_dep_parent chain went bad, or no memory */
877 876 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
878 877 ip_drop_input("No ire_dep_parent", mp, ill);
879 878 freemsg(mp);
880 879 return;
881 880 }
882 881 }
883 882 nce_refhold(nce);
884 883 mutex_exit(&ire->ire_lock);
885 884
886 885 if (nce->nce_is_condemned) {
887 886 nce_t *nce1;
888 887
889 888 nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE);
890 889 nce_refrele(nce);
891 890 if (nce1 == NULL) {
892 891 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
893 892 ip_drop_input("No nce", mp, ill);
894 893 freemsg(mp);
895 894 return;
896 895 }
897 896 nce = nce1;
898 897 }
899 898 dst_ill = nce->nce_ill;
900 899
901 900 /*
902 901 * Unless we are forwarding, drop the packet.
903 902 * Unlike IPv4 we don't allow source routed packets out the same
904 903 * interface when we are not a router.
905 904 * Note that ill_forward_set() will set the ILLF_ROUTER on
906 905 * all the group members when it gets an ipmp-ill or under-ill.
907 906 */
908 907 if (!(dst_ill->ill_flags & ILLF_ROUTER)) {
909 908 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
910 909 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
911 910 freemsg(mp);
912 911 nce_refrele(nce);
913 912 return;
914 913 }
915 914
916 915 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) {
917 916 ire->ire_ib_pkt_count--;
918 917 /*
919 918 * Should only use IREs that are visible from the
920 919 * global zone for forwarding.
921 920 * For IPv6 any source route would have already been
922 921 * advanced in ip_fanout_v6
923 922 */
924 923 ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL,
925 924 GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR,
926 925 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE,
927 926 ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
928 927 ire->ire_ib_pkt_count++;
929 928 (*ire->ire_recvfn)(ire, mp, ip6h, ira);
930 929 ire_refrele(ire);
931 930 nce_refrele(nce);
932 931 return;
933 932 }
934 933 /*
935 934 * ipIfStatsHCInForwDatagrams should only be increment if there
936 935 * will be an attempt to forward the packet, which is why we
937 936 * increment after the above condition has been checked.
938 937 */
939 938 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
940 939
941 940 /* Initiate Read side IPPF processing */
942 941 if (IPP_ENABLED(IPP_FWD_IN, ipst)) {
943 942 /* ip_process translates an IS_UNDER_IPMP */
944 943 mp = ip_process(IPP_FWD_IN, mp, ill, ill);
945 944 if (mp == NULL) {
946 945 /* ip_drop_packet and MIB done */
947 946 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred "
948 947 "during IPPF processing\n"));
949 948 nce_refrele(nce);
950 949 return;
951 950 }
952 951 }
953 952
954 953 DTRACE_PROBE4(ip6__forwarding__start,
955 954 ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp);
956 955
957 956 if (HOOKS6_INTERESTED_FORWARDING(ipst)) {
958 957 int error;
959 958
960 959 FW_HOOKS(ipst->ips_ip6_forwarding_event,
961 960 ipst->ips_ipv6firewall_forwarding,
962 961 ill, dst_ill, ip6h, mp, mp, 0, ipst, error);
963 962
964 963 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp);
965 964
966 965 if (mp == NULL) {
967 966 nce_refrele(nce);
968 967 return;
969 968 }
970 969 /*
971 970 * Even if the destination was changed by the filter we use the
972 971 * forwarding decision that was made based on the address
973 972 * in ip_input.
974 973 */
975 974
976 975 /* Might have changed */
977 976 ip6h = (ip6_t *)mp->b_rptr;
978 977 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
979 978 }
980 979
981 980 /* Packet is being forwarded. Turning off hwcksum flag. */
982 981 DB_CKSUMFLAGS(mp) = 0;
983 982
984 983 /*
985 984 * Per RFC 3513 section 2.5.2, we must not forward packets with
986 985 * an unspecified source address.
987 986 * The loopback address check for both src and dst has already
988 987 * been checked in ip_input_v6
989 988 * In the future one can envision adding RPF checks using number 3.
990 989 */
991 990 switch (ipst->ips_src_check) {
992 991 case 0:
993 992 break;
994 993 case 1:
995 994 case 2:
996 995 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) ||
997 996 IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
998 997 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
999 998 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1000 999 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1001 1000 nce_refrele(nce);
1002 1001 freemsg(mp);
1003 1002 return;
1004 1003 }
1005 1004 break;
1006 1005 }
1007 1006
1008 1007 /*
1009 1008 * Check to see if we're forwarding the packet to a
1010 1009 * different link from which it came. If so, check the
1011 1010 * source and destination addresses since routers must not
1012 1011 * forward any packets with link-local source or
1013 1012 * destination addresses to other links. Otherwise (if
1014 1013 * we're forwarding onto the same link), conditionally send
1015 1014 * a redirect message.
1016 1015 */
1017 1016 if (!IS_ON_SAME_LAN(dst_ill, ill)) {
1018 1017 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ||
1019 1018 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) {
1020 1019 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1021 1020 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1022 1021 freemsg(mp);
1023 1022 nce_refrele(nce);
1024 1023 return;
1025 1024 }
1026 1025 /* TBD add site-local check at site boundary? */
1027 1026 } else if (ipst->ips_ipv6_send_redirects) {
1028 1027 ip_send_potential_redirect_v6(mp, ip6h, ire, ira);
1029 1028 }
1030 1029
1031 1030 added_tx_len = 0;
1032 1031 if (iraflags & IRAF_SYSTEM_LABELED) {
1033 1032 mblk_t *mp1;
1034 1033 uint32_t old_pkt_len = ira->ira_pktlen;
1035 1034
1036 1035 /*
1037 1036 * Check if it can be forwarded and add/remove
1038 1037 * CIPSO options as needed.
1039 1038 */
1040 1039 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) {
1041 1040 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1042 1041 ip_drop_input("tsol_ip_forward", mp, ill);
1043 1042 freemsg(mp);
1044 1043 nce_refrele(nce);
1045 1044 return;
1046 1045 }
1047 1046 /*
1048 1047 * Size may have changed. Remember amount added in case
1049 1048 * ip_fragment needs to send an ICMP too big.
1050 1049 */
1051 1050 mp = mp1;
1052 1051 ip6h = (ip6_t *)mp->b_rptr;
1053 1052 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
1054 1053 ira->ira_ip_hdr_length = IPV6_HDR_LEN;
1055 1054 if (ira->ira_pktlen > old_pkt_len)
1056 1055 added_tx_len = ira->ira_pktlen - old_pkt_len;
1057 1056 }
1058 1057
1059 1058 mtu = dst_ill->ill_mtu;
1060 1059 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu)
1061 1060 mtu = iremtu;
1062 1061 ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len);
1063 1062 nce_refrele(nce);
1064 1063 return;
1065 1064
1066 1065 }
1067 1066
1068 1067 /*
1069 1068 * Used for sending out unicast and multicast packets that are
1070 1069 * forwarded.
1071 1070 */
1072 1071 void
1073 1072 ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira,
1074 1073 uint32_t mtu, uint32_t added_tx_len)
1075 1074 {
1076 1075 ill_t *dst_ill = nce->nce_ill;
1077 1076 uint32_t pkt_len;
1078 1077 iaflags_t iraflags = ira->ira_flags;
1079 1078 ip_stack_t *ipst = dst_ill->ill_ipst;
1080 1079
1081 1080 if (ip6h->ip6_hops-- <= 1) {
1082 1081 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1083 1082 ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill);
1084 1083 icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE,
1085 1084 ira);
1086 1085 return;
1087 1086 }
1088 1087
1089 1088 /* Initiate Write side IPPF processing before any fragmentation */
1090 1089 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) {
1091 1090 /* ip_process translates an IS_UNDER_IPMP */
1092 1091 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill);
1093 1092 if (mp == NULL) {
1094 1093 /* ip_drop_packet and MIB done */
1095 1094 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \
1096 1095 " during IPPF processing\n"));
1097 1096 return;
1098 1097 }
1099 1098 }
1100 1099
1101 1100 pkt_len = ira->ira_pktlen;
1102 1101
1103 1102 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams);
1104 1103
1105 1104 if (pkt_len > mtu) {
1106 1105 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails);
1107 1106 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill);
1108 1107 if (iraflags & IRAF_SYSTEM_LABELED) {
1109 1108 /*
1110 1109 * Remove any CIPSO option added by
1111 1110 * tsol_ip_forward, and make sure we report
1112 1111 * a path MTU so that there
1113 1112 * is room to add such a CIPSO option for future
1114 1113 * packets.
1115 1114 */
1116 1115 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6);
1117 1116 }
1118 1117 icmp_pkt2big_v6(mp, mtu, B_TRUE, ira);
1119 1118 return;
1120 1119 }
1121 1120
1122 1121 ASSERT(pkt_len ==
1123 1122 ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN);
1124 1123
1125 1124 if (iraflags & IRAF_LOOPBACK_COPY) {
1126 1125 /*
1127 1126 * IXAF_NO_LOOP_ZONEID is not set hence 6th arg
1128 1127 * is don't care
1129 1128 */
1130 1129 (void) ip_postfrag_loopcheck(mp, nce,
1131 1130 (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL),
1132 1131 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1133 1132 } else {
1134 1133 (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL,
1135 1134 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1136 1135 }
1137 1136 }
1138 1137
1139 1138 /*
1140 1139 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE,
1141 1140 * which is what ire_route_recursive returns when there is no matching ire.
1142 1141 * Send ICMP unreachable unless blackhole.
1143 1142 */
1144 1143 void
1145 1144 ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1146 1145 {
1147 1146 ip6_t *ip6h = (ip6_t *)iph_arg;
1148 1147 ill_t *ill = ira->ira_ill;
1149 1148 ip_stack_t *ipst = ill->ill_ipst;
1150 1149
1151 1150 /* Would we have forwarded this packet if we had a route? */
1152 1151 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
1153 1152 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1154 1153 ip_drop_input("l2 multicast not forwarded", mp, ill);
1155 1154 freemsg(mp);
1156 1155 return;
1157 1156 }
1158 1157
1159 1158 if (!(ill->ill_flags & ILLF_ROUTER)) {
1160 1159 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1161 1160 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
1162 1161 freemsg(mp);
1163 1162 return;
1164 1163 }
1165 1164 /*
1166 1165 * If we had a route this could have been forwarded. Count as such.
1167 1166 *
1168 1167 * ipIfStatsHCInForwDatagrams should only be increment if there
1169 1168 * will be an attempt to forward the packet, which is why we
1170 1169 * increment after the above condition has been checked.
1171 1170 */
1172 1171 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
1173 1172
1174 1173 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1175 1174
1176 1175 ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST,
1177 1176 ipst);
1178 1177
1179 1178 if (ire->ire_flags & RTF_BLACKHOLE) {
1180 1179 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill);
1181 1180 freemsg(mp);
1182 1181 } else {
1183 1182 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill);
1184 1183
1185 1184 icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE,
1186 1185 ira);
1187 1186 }
1188 1187 }
1189 1188
1190 1189 /*
1191 1190 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for
1192 1191 * VRRP when in noaccept mode.
1193 1192 * We silently drop packets except for Neighbor Solicitations and
1194 1193 * Neighbor Advertisements.
1195 1194 */
1196 1195 void
1197 1196 ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1198 1197 ip_recv_attr_t *ira)
1199 1198 {
1200 1199 ip6_t *ip6h = (ip6_t *)iph_arg;
1201 1200 ill_t *ill = ira->ira_ill;
1202 1201 icmp6_t *icmp6;
1203 1202 int ip_hdr_length;
1204 1203
1205 1204 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
1206 1205 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1207 1206 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1208 1207 freemsg(mp);
1209 1208 return;
1210 1209 }
1211 1210 ip_hdr_length = ira->ira_ip_hdr_length;
1212 1211 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
1213 1212 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
1214 1213 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
1215 1214 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
1216 1215 freemsg(mp);
1217 1216 return;
1218 1217 }
1219 1218 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
1220 1219 if (ip6h == NULL) {
1221 1220 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1222 1221 freemsg(mp);
1223 1222 return;
1224 1223 }
1225 1224 }
1226 1225 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
1227 1226
1228 1227 if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT &&
1229 1228 icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) {
1230 1229 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1231 1230 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1232 1231 freemsg(mp);
1233 1232 return;
1234 1233 }
1235 1234 ire_recv_local_v6(ire, mp, ip6h, ira);
1236 1235 }
1237 1236
1238 1237 /*
1239 1238 * ire_recvfn for IRE_MULTICAST.
1240 1239 */
1241 1240 void
1242 1241 ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1243 1242 ip_recv_attr_t *ira)
1244 1243 {
1245 1244 ip6_t *ip6h = (ip6_t *)iph_arg;
1246 1245 ill_t *ill = ira->ira_ill;
1247 1246
1248 1247 ASSERT(ire->ire_ill == ira->ira_ill);
1249 1248
1250 1249 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts);
1251 1250 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen);
1252 1251
1253 1252 /* Tag for higher-level protocols */
1254 1253 ira->ira_flags |= IRAF_MULTICAST;
1255 1254
1256 1255 /*
1257 1256 * So that we don't end up with dups, only one ill an IPMP group is
1258 1257 * nominated to receive multicast traffic.
1259 1258 * If we have no cast_ill we are liberal and accept everything.
1260 1259 */
1261 1260 if (IS_UNDER_IPMP(ill)) {
1262 1261 ip_stack_t *ipst = ill->ill_ipst;
1263 1262
1264 1263 /* For an under ill_grp can change under lock */
1265 1264 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1266 1265 if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
1267 1266 ill->ill_grp->ig_cast_ill != NULL) {
1268 1267 rw_exit(&ipst->ips_ill_g_lock);
1269 1268 ip_drop_input("not on cast ill", mp, ill);
1270 1269 freemsg(mp);
1271 1270 return;
1272 1271 }
1273 1272 rw_exit(&ipst->ips_ill_g_lock);
1274 1273 /*
1275 1274 * We switch to the upper ill so that mrouter and hasmembers
1276 1275 * can operate on upper here and in ip_input_multicast.
1277 1276 */
1278 1277 ill = ipmp_ill_hold_ipmp_ill(ill);
1279 1278 if (ill != NULL) {
1280 1279 ASSERT(ill != ira->ira_ill);
1281 1280 ASSERT(ire->ire_ill == ira->ira_ill);
1282 1281 ira->ira_ill = ill;
1283 1282 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1284 1283 } else {
1285 1284 ill = ira->ira_ill;
1286 1285 }
1287 1286 }
1288 1287
1289 1288 #ifdef notdef
1290 1289 /*
1291 1290 * Check if we are a multicast router - send ip_mforward a copy of
1292 1291 * the packet.
1293 1292 * Due to mroute_decap tunnels we consider forwarding packets even if
1294 1293 * mrouted has not joined the allmulti group on this interface.
1295 1294 */
1296 1295 if (ipst->ips_ip_g_mrouter) {
1297 1296 int retval;
1298 1297
1299 1298 /*
1300 1299 * Clear the indication that this may have hardware
1301 1300 * checksum as we are not using it for forwarding.
1302 1301 */
1303 1302 DB_CKSUMFLAGS(mp) = 0;
1304 1303
1305 1304 /*
1306 1305 * ip_mforward helps us make these distinctions: If received
1307 1306 * on tunnel and not IGMP, then drop.
1308 1307 * If IGMP packet, then don't check membership
1309 1308 * If received on a phyint and IGMP or PIM, then
1310 1309 * don't check membership
1311 1310 */
1312 1311 retval = ip_mforward_v6(mp, ira);
1313 1312 /* ip_mforward updates mib variables if needed */
1314 1313
1315 1314 switch (retval) {
1316 1315 case 0:
1317 1316 /*
1318 1317 * pkt is okay and arrived on phyint.
1319 1318 */
1320 1319 break;
1321 1320 case -1:
1322 1321 /* pkt is mal-formed, toss it */
1323 1322 freemsg(mp);
1324 1323 goto done;
1325 1324 case 1:
1326 1325 /*
1327 1326 * pkt is okay and arrived on a tunnel
1328 1327 *
1329 1328 * If we are running a multicast router
1330 1329 * we need to see all mld packets, which
1331 1330 * are marked with router alerts.
1332 1331 */
1333 1332 if (ira->ira_flags & IRAF_ROUTER_ALERT)
1334 1333 goto forus;
1335 1334 ip_drop_input("Multicast on tunnel ignored", mp, ill);
1336 1335 freemsg(mp);
1337 1336 goto done;
1338 1337 }
1339 1338 }
1340 1339 #endif /* notdef */
1341 1340
1342 1341 /*
1343 1342 * If this was a router alert we skip the group membership check.
1344 1343 */
1345 1344 if (ira->ira_flags & IRAF_ROUTER_ALERT)
1346 1345 goto forus;
1347 1346
1348 1347 /*
1349 1348 * Check if we have members on this ill. This is not necessary for
1350 1349 * correctness because even if the NIC/GLD had a leaky filter, we
1351 1350 * filter before passing to each conn_t.
1352 1351 */
1353 1352 if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) {
1354 1353 /*
1355 1354 * Nobody interested
1356 1355 *
1357 1356 * This might just be caused by the fact that
1358 1357 * multiple IP Multicast addresses map to the same
1359 1358 * link layer multicast - no need to increment counter!
1360 1359 */
1361 1360 ip_drop_input("Multicast with no members", mp, ill);
1362 1361 freemsg(mp);
1363 1362 goto done;
1364 1363 }
1365 1364 forus:
1366 1365 ip2dbg(("ire_recv_multicast_v6: multicast for us\n"));
1367 1366
1368 1367 /*
1369 1368 * After reassembly and IPsec we will need to duplicate the
1370 1369 * multicast packet for all matching zones on the ill.
1371 1370 */
1372 1371 ira->ira_zoneid = ALL_ZONES;
1373 1372
1374 1373 /* Reassemble on the ill on which the packet arrived */
1375 1374 ip_input_local_v6(ire, mp, ip6h, ira);
1376 1375 done:
1377 1376 if (ill != ire->ire_ill) {
1378 1377 ill_refrele(ill);
1379 1378 ira->ira_ill = ire->ire_ill;
1380 1379 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
1381 1380 }
1382 1381 }
1383 1382
1384 1383 /*
1385 1384 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT.
1386 1385 * Drop packets since we don't forward out multirt routes.
1387 1386 */
1388 1387 /* ARGSUSED */
1389 1388 void
1390 1389 ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1391 1390 {
1392 1391 ill_t *ill = ira->ira_ill;
1393 1392
1394 1393 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1395 1394 ip_drop_input("Not forwarding out MULTIRT", mp, ill);
1396 1395 freemsg(mp);
1397 1396 }
1398 1397
1399 1398 /*
1400 1399 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK
1401 1400 * has rewritten the packet to have a loopback destination address (We
1402 1401 * filter out packet with a loopback destination from arriving over the wire).
1403 1402 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID.
1404 1403 */
1405 1404 void
1406 1405 ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1407 1406 {
1408 1407 ip6_t *ip6h = (ip6_t *)iph_arg;
1409 1408 ill_t *ill = ira->ira_ill;
1410 1409 ill_t *ire_ill = ire->ire_ill;
1411 1410
1412 1411 ira->ira_zoneid = GLOBAL_ZONEID;
1413 1412
1414 1413 /* Switch to the lo0 ill for further processing */
1415 1414 if (ire_ill != ill) {
1416 1415 /*
1417 1416 * Update ira_ill to be the ILL on which the IP address
1418 1417 * is hosted.
1419 1418 * No need to hold the ill since we have a hold on the ire
1420 1419 */
1421 1420 ASSERT(ira->ira_ill == ira->ira_rill);
1422 1421 ira->ira_ill = ire_ill;
1423 1422
1424 1423 ip_input_local_v6(ire, mp, ip6h, ira);
1425 1424
1426 1425 /* Restore */
1427 1426 ASSERT(ira->ira_ill == ire_ill);
1428 1427 ira->ira_ill = ill;
1429 1428 return;
1430 1429
1431 1430 }
1432 1431 ip_input_local_v6(ire, mp, ip6h, ira);
1433 1432 }
1434 1433
1435 1434 /*
1436 1435 * ire_recvfn for IRE_LOCAL.
1437 1436 */
1438 1437 void
1439 1438 ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1440 1439 {
1441 1440 ip6_t *ip6h = (ip6_t *)iph_arg;
1442 1441 ill_t *ill = ira->ira_ill;
1443 1442 ill_t *ire_ill = ire->ire_ill;
1444 1443
1445 1444 /* Make a note for DAD that this address is in use */
1446 1445 ire->ire_last_used_time = LBOLT_FASTPATH;
1447 1446
1448 1447 /* Only target the IRE_LOCAL with the right zoneid. */
1449 1448 ira->ira_zoneid = ire->ire_zoneid;
1450 1449
1451 1450 /*
1452 1451 * If the packet arrived on the wrong ill, we check that
1453 1452 * this is ok.
1454 1453 * If it is, then we ensure that we do the reassembly on
1455 1454 * the ill on which the address is hosted. We keep ira_rill as
1456 1455 * the one on which the packet arrived, so that IP_PKTINFO and
1457 1456 * friends can report this.
1458 1457 */
1459 1458 if (ire_ill != ill) {
1460 1459 ire_t *new_ire;
1461 1460
1462 1461 new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill);
1463 1462 if (new_ire == NULL) {
1464 1463 /* Drop packet */
1465 1464 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1466 1465 ip_drop_input("ipIfStatsInForwProhibits", mp, ill);
1467 1466 freemsg(mp);
1468 1467 return;
1469 1468 }
1470 1469 /*
1471 1470 * Update ira_ill to be the ILL on which the IP address
1472 1471 * is hosted. No need to hold the ill since we have a
1473 1472 * hold on the ire. Note that we do the switch even if
1474 1473 * new_ire == ire (for IPMP, ire would be the one corresponding
1475 1474 * to the IPMP ill).
1476 1475 */
1477 1476 ASSERT(ira->ira_ill == ira->ira_rill);
1478 1477 ira->ira_ill = new_ire->ire_ill;
1479 1478
1480 1479 /* ira_ruifindex tracks the upper for ira_rill */
1481 1480 if (IS_UNDER_IPMP(ill))
1482 1481 ira->ira_ruifindex = ill_get_upper_ifindex(ill);
1483 1482
1484 1483 ip_input_local_v6(new_ire, mp, ip6h, ira);
1485 1484
1486 1485 /* Restore */
1487 1486 ASSERT(ira->ira_ill == new_ire->ire_ill);
1488 1487 ira->ira_ill = ill;
1489 1488 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1490 1489
1491 1490 if (new_ire != ire)
1492 1491 ire_refrele(new_ire);
1493 1492 return;
1494 1493 }
1495 1494
1496 1495 ip_input_local_v6(ire, mp, ip6h, ira);
1497 1496 }
1498 1497
1499 1498 /*
1500 1499 * Common function for packets arriving for the host. Handles
1501 1500 * checksum verification, reassembly checks, etc.
1502 1501 */
1503 1502 static void
1504 1503 ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1505 1504 {
1506 1505 iaflags_t iraflags = ira->ira_flags;
1507 1506
1508 1507 /*
1509 1508 * For multicast we need some extra work before
1510 1509 * we call ip_fanout_v6(), since in the case of shared-IP zones
1511 1510 * we need to pretend that a packet arrived for each zoneid.
1512 1511 */
1513 1512 if (iraflags & IRAF_MULTICAST) {
1514 1513 ip_input_multicast_v6(ire, mp, ip6h, ira);
1515 1514 return;
1516 1515 }
1517 1516 ip_fanout_v6(mp, ip6h, ira);
1518 1517 }
1519 1518
1520 1519 /*
1521 1520 * Handle multiple zones which want to receive the same multicast packets
1522 1521 * on this ill by delivering a packet to each of them.
1523 1522 *
1524 1523 * Note that for packets delivered to transports we could instead do this
1525 1524 * as part of the fanout code, but since we need to handle icmp_inbound
1526 1525 * it is simpler to have multicast work the same as IPv4 broadcast.
1527 1526 *
1528 1527 * The ip_fanout matching for multicast matches based on ilm independent of
1529 1528 * zoneid since the zoneid restriction is applied when joining a multicast
1530 1529 * group.
1531 1530 */
1532 1531 /* ARGSUSED */
1533 1532 static void
1534 1533 ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1535 1534 {
1536 1535 ill_t *ill = ira->ira_ill;
1537 1536 iaflags_t iraflags = ira->ira_flags;
1538 1537 ip_stack_t *ipst = ill->ill_ipst;
1539 1538 netstack_t *ns = ipst->ips_netstack;
1540 1539 zoneid_t zoneid;
1541 1540 mblk_t *mp1;
1542 1541 ip6_t *ip6h1;
1543 1542 uint_t ira_pktlen = ira->ira_pktlen;
1544 1543 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length;
1545 1544
1546 1545 /* ire_recv_multicast has switched to the upper ill for IPMP */
1547 1546 ASSERT(!IS_UNDER_IPMP(ill));
1548 1547
1549 1548 /*
1550 1549 * If we don't have more than one shared-IP zone, or if
1551 1550 * there are no members in anything but the global zone,
1552 1551 * then just set the zoneid and proceed.
1553 1552 */
1554 1553 if (ns->netstack_numzones == 1 ||
1555 1554 !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst,
1556 1555 GLOBAL_ZONEID)) {
1557 1556 ira->ira_zoneid = GLOBAL_ZONEID;
1558 1557
1559 1558 /* If sender didn't want this zone to receive it, drop */
1560 1559 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1561 1560 ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1562 1561 ip_drop_input("Multicast but wrong zoneid", mp, ill);
1563 1562 freemsg(mp);
1564 1563 return;
1565 1564 }
1566 1565 ip_fanout_v6(mp, ip6h, ira);
1567 1566 return;
1568 1567 }
1569 1568
1570 1569 /*
1571 1570 * Here we loop over all zoneids that have members in the group
1572 1571 * and deliver a packet to ip_fanout for each zoneid.
1573 1572 *
1574 1573 * First find any members in the lowest numeric zoneid by looking for
1575 1574 * first zoneid larger than -1 (ALL_ZONES).
1576 1575 * We terminate the loop when we receive -1 (ALL_ZONES).
1577 1576 */
1578 1577 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES);
1579 1578 for (; zoneid != ALL_ZONES;
1580 1579 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) {
1581 1580 /*
1582 1581 * Avoid an extra copymsg/freemsg by skipping global zone here
1583 1582 * and doing that at the end.
1584 1583 */
1585 1584 if (zoneid == GLOBAL_ZONEID)
1586 1585 continue;
1587 1586
1588 1587 ira->ira_zoneid = zoneid;
1589 1588
1590 1589 /* If sender didn't want this zone to receive it, skip */
1591 1590 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1592 1591 ira->ira_no_loop_zoneid == ira->ira_zoneid)
1593 1592 continue;
1594 1593
1595 1594 mp1 = copymsg(mp);
1596 1595 if (mp1 == NULL) {
1597 1596 /* Failed to deliver to one zone */
1598 1597 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1599 1598 ip_drop_input("ipIfStatsInDiscards", mp, ill);
1600 1599 continue;
1601 1600 }
1602 1601 ip6h1 = (ip6_t *)mp1->b_rptr;
1603 1602 ip_fanout_v6(mp1, ip6h1, ira);
1604 1603 /*
1605 1604 * IPsec might have modified ira_pktlen and ira_ip_hdr_length
1606 1605 * so we restore them for a potential next iteration
1607 1606 */
1608 1607 ira->ira_pktlen = ira_pktlen;
1609 1608 ira->ira_ip_hdr_length = ira_ip_hdr_length;
1610 1609 }
1611 1610
1612 1611 /* Do the main ire */
1613 1612 ira->ira_zoneid = GLOBAL_ZONEID;
1614 1613 /* If sender didn't want this zone to receive it, drop */
1615 1614 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1616 1615 ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1617 1616 ip_drop_input("Multicast but wrong zoneid", mp, ill);
1618 1617 freemsg(mp);
1619 1618 } else {
1620 1619 ip_fanout_v6(mp, ip6h, ira);
1621 1620 }
1622 1621 }
1623 1622
1624 1623
1625 1624 /*
1626 1625 * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions
1627 1626 * is in use. Updates ira_zoneid and ira_flags as a result.
1628 1627 */
1629 1628 static void
1630 1629 ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length,
1631 1630 ip_recv_attr_t *ira)
1632 1631 {
1633 1632 uint16_t *up;
1634 1633 uint16_t lport;
1635 1634 zoneid_t zoneid;
1636 1635
1637 1636 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED);
1638 1637
1639 1638 /*
1640 1639 * If the packet is unlabeled we might allow read-down
1641 1640 * for MAC_EXEMPT. Below we clear this if it is a multi-level
1642 1641 * port (MLP).
1643 1642 * Note that ira_tsl can be NULL here.
1644 1643 */
1645 1644 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED)
1646 1645 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE;
1647 1646
1648 1647 if (ira->ira_zoneid != ALL_ZONES)
1649 1648 return;
1650 1649
1651 1650 ira->ira_flags |= IRAF_TX_SHARED_ADDR;
1652 1651
1653 1652 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
1654 1653 switch (protocol) {
1655 1654 case IPPROTO_TCP:
1656 1655 case IPPROTO_SCTP:
1657 1656 case IPPROTO_UDP:
1658 1657 /* Caller ensures this */
1659 1658 ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr);
1660 1659
1661 1660 /*
1662 1661 * Only these transports support MLP.
1663 1662 * We know their destination port numbers is in
1664 1663 * the same place in the header.
1665 1664 */
1666 1665 lport = up[1];
1667 1666
1668 1667 /*
1669 1668 * No need to handle exclusive-stack zones
1670 1669 * since ALL_ZONES only applies to the shared IP instance.
1671 1670 */
1672 1671 zoneid = tsol_mlp_findzone(protocol, lport);
1673 1672 /*
1674 1673 * If no shared MLP is found, tsol_mlp_findzone returns
1675 1674 * ALL_ZONES. In that case, we assume it's SLP, and
1676 1675 * search for the zone based on the packet label.
1677 1676 *
1678 1677 * If there is such a zone, we prefer to find a
1679 1678 * connection in it. Otherwise, we look for a
1680 1679 * MAC-exempt connection in any zone whose label
1681 1680 * dominates the default label on the packet.
1682 1681 */
1683 1682 if (zoneid == ALL_ZONES)
1684 1683 zoneid = tsol_attr_to_zoneid(ira);
1685 1684 else
1686 1685 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE;
1687 1686 break;
1688 1687 default:
1689 1688 /* Handle shared address for other protocols */
1690 1689 zoneid = tsol_attr_to_zoneid(ira);
1691 1690 break;
1692 1691 }
1693 1692 ira->ira_zoneid = zoneid;
1694 1693 }
1695 1694
1696 1695 /*
1697 1696 * Increment checksum failure statistics
1698 1697 */
1699 1698 static void
1700 1699 ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill)
1701 1700 {
1702 1701 ip_stack_t *ipst = ill->ill_ipst;
1703 1702
1704 1703 switch (protocol) {
1705 1704 case IPPROTO_TCP:
1706 1705 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs);
1707 1706
1708 1707 if (hck_flags & HCK_FULLCKSUM)
1709 1708 IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err);
1710 1709 else if (hck_flags & HCK_PARTIALCKSUM)
1711 1710 IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err);
1712 1711 else
1713 1712 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err);
1714 1713 break;
1715 1714 case IPPROTO_UDP:
1716 1715 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1717 1716 if (hck_flags & HCK_FULLCKSUM)
1718 1717 IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err);
1719 1718 else if (hck_flags & HCK_PARTIALCKSUM)
1720 1719 IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err);
1721 1720 else
1722 1721 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err);
1723 1722 break;
1724 1723 case IPPROTO_ICMPV6:
1725 1724 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
1726 1725 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1727 1726 break;
1728 1727 default:
1729 1728 ASSERT(0);
1730 1729 break;
1731 1730 }
1732 1731 }
1733 1732
1734 1733 /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */
1735 1734 uint32_t
1736 1735 ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira)
1737 1736 {
1738 1737 uint_t ulp_len;
1739 1738 uint32_t cksum;
1740 1739 uint8_t protocol = ira->ira_protocol;
1741 1740 uint16_t ip_hdr_length = ira->ira_ip_hdr_length;
1742 1741
1743 1742 #define iphs ((uint16_t *)ip6h)
1744 1743
1745 1744 switch (protocol) {
1746 1745 case IPPROTO_TCP:
1747 1746 ulp_len = ira->ira_pktlen - ip_hdr_length;
1748 1747
1749 1748 /* Protocol and length */
1750 1749 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP;
1751 1750 /* IP addresses */
1752 1751 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1753 1752 iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1754 1753 iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1755 1754 iphs[16] + iphs[17] + iphs[18] + iphs[19];
1756 1755 break;
1757 1756
1758 1757 case IPPROTO_UDP: {
1759 1758 udpha_t *udpha;
1760 1759
1761 1760 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length);
1762 1761
1763 1762 /* Protocol and length */
1764 1763 cksum = udpha->uha_length + IP_UDP_CSUM_COMP;
1765 1764 /* IP addresses */
1766 1765 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1767 1766 iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1768 1767 iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1769 1768 iphs[16] + iphs[17] + iphs[18] + iphs[19];
1770 1769 break;
1771 1770 }
1772 1771 case IPPROTO_ICMPV6:
1773 1772 ulp_len = ira->ira_pktlen - ip_hdr_length;
1774 1773
1775 1774 /* Protocol and length */
1776 1775 cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP;
1777 1776 /* IP addresses */
1778 1777 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1779 1778 iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1780 1779 iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1781 1780 iphs[16] + iphs[17] + iphs[18] + iphs[19];
1782 1781 break;
1783 1782 default:
1784 1783 cksum = 0;
1785 1784 break;
1786 1785 }
1787 1786 #undef iphs
1788 1787 return (cksum);
1789 1788 }
1790 1789
1791 1790
1792 1791 /*
1793 1792 * Software verification of the ULP checksums.
1794 1793 * Returns B_TRUE if ok.
1795 1794 * Increments statistics of failed.
1796 1795 */
1797 1796 static boolean_t
1798 1797 ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1799 1798 {
1800 1799 ip_stack_t *ipst = ira->ira_ill->ill_ipst;
1801 1800 uint32_t cksum;
1802 1801 uint8_t protocol = ira->ira_protocol;
1803 1802 uint16_t ip_hdr_length = ira->ira_ip_hdr_length;
1804 1803
1805 1804 IP6_STAT(ipst, ip6_in_sw_cksum);
1806 1805
1807 1806 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP ||
1808 1807 protocol == IPPROTO_ICMPV6);
1809 1808
1810 1809 cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1811 1810 cksum = IP_CSUM(mp, ip_hdr_length, cksum);
1812 1811 if (cksum == 0)
1813 1812 return (B_TRUE);
1814 1813
1815 1814 ip_input_cksum_err_v6(protocol, 0, ira->ira_ill);
1816 1815 return (B_FALSE);
1817 1816 }
1818 1817
1819 1818 /*
1820 1819 * Verify the ULP checksums.
1821 1820 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum
1822 1821 * algorithm.
1823 1822 * Increments statistics if failed.
1824 1823 */
1825 1824 static boolean_t
1826 1825 ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h,
1827 1826 ip_recv_attr_t *ira)
1828 1827 {
1829 1828 ill_t *ill = ira->ira_rill;
1830 1829 uint16_t hck_flags;
1831 1830 uint32_t cksum;
1832 1831 mblk_t *mp1;
1833 1832 uint_t len;
1834 1833 uint8_t protocol = ira->ira_protocol;
1835 1834 uint16_t ip_hdr_length = ira->ira_ip_hdr_length;
1836 1835
1837 1836
1838 1837 switch (protocol) {
1839 1838 case IPPROTO_TCP:
1840 1839 case IPPROTO_ICMPV6:
1841 1840 break;
1842 1841
1843 1842 case IPPROTO_UDP: {
1844 1843 udpha_t *udpha;
1845 1844
1846 1845 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length);
1847 1846 /*
1848 1847 * Before going through the regular checksum
1849 1848 * calculation, make sure the received checksum
1850 1849 * is non-zero. RFC 2460 says, a 0x0000 checksum
1851 1850 * in a UDP packet (within IPv6 packet) is invalid
1852 1851 * and should be replaced by 0xffff. This makes
1853 1852 * sense as regular checksum calculation will
1854 1853 * pass for both the cases i.e. 0x0000 and 0xffff.
1855 1854 * Removing one of the case makes error detection
1856 1855 * stronger.
1857 1856 */
1858 1857 if (udpha->uha_checksum == 0) {
1859 1858 /* 0x0000 checksum is invalid */
1860 1859 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1861 1860 return (B_FALSE);
1862 1861 }
1863 1862 break;
1864 1863 }
1865 1864 case IPPROTO_SCTP: {
1866 1865 sctp_hdr_t *sctph;
1867 1866 uint32_t pktsum;
1868 1867
1869 1868 sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length);
1870 1869 #ifdef DEBUG
1871 1870 if (skip_sctp_cksum)
1872 1871 return (B_TRUE);
1873 1872 #endif
1874 1873 pktsum = sctph->sh_chksum;
1875 1874 sctph->sh_chksum = 0;
1876 1875 cksum = sctp_cksum(mp, ip_hdr_length);
1877 1876 sctph->sh_chksum = pktsum;
1878 1877 if (cksum == pktsum)
1879 1878 return (B_TRUE);
1880 1879
1881 1880 /*
1882 1881 * Defer until later whether a bad checksum is ok
1883 1882 * in order to allow RAW sockets to use Adler checksum
1884 1883 * with SCTP.
1885 1884 */
1886 1885 ira->ira_flags |= IRAF_SCTP_CSUM_ERR;
1887 1886 return (B_TRUE);
1888 1887 }
1889 1888
1890 1889 default:
1891 1890 /* No ULP checksum to verify. */
1892 1891 return (B_TRUE);
1893 1892 }
1894 1893
1895 1894 /*
1896 1895 * Revert to software checksum calculation if the interface
1897 1896 * isn't capable of checksum offload.
1898 1897 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout.
1899 1898 * Note: IRAF_NO_HW_CKSUM is not currently used.
1900 1899 */
1901 1900 ASSERT(!IS_IPMP(ill));
1902 1901 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
1903 1902 !dohwcksum) {
1904 1903 return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1905 1904 }
1906 1905
1907 1906 /*
1908 1907 * We apply this for all ULP protocols. Does the HW know to
1909 1908 * not set the flags for SCTP and other protocols.
1910 1909 */
1911 1910
1912 1911 hck_flags = DB_CKSUMFLAGS(mp);
1913 1912
1914 1913 if (hck_flags & HCK_FULLCKSUM_OK) {
1915 1914 /*
1916 1915 * Hardware has already verified the checksum.
1917 1916 */
1918 1917 return (B_TRUE);
1919 1918 }
1920 1919
1921 1920 if (hck_flags & HCK_FULLCKSUM) {
1922 1921 /*
1923 1922 * Full checksum has been computed by the hardware
1924 1923 * and has been attached. If the driver wants us to
1925 1924 * verify the correctness of the attached value, in
1926 1925 * order to protect against faulty hardware, compare
1927 1926 * it against -0 (0xFFFF) to see if it's valid.
1928 1927 */
1929 1928 cksum = DB_CKSUM16(mp);
1930 1929 if (cksum == 0xFFFF)
1931 1930 return (B_TRUE);
1932 1931 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1933 1932 return (B_FALSE);
1934 1933 }
1935 1934
1936 1935 mp1 = mp->b_cont;
1937 1936 if ((hck_flags & HCK_PARTIALCKSUM) &&
1938 1937 (mp1 == NULL || mp1->b_cont == NULL) &&
1939 1938 ip_hdr_length >= DB_CKSUMSTART(mp) &&
1940 1939 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) {
1941 1940 uint32_t adj;
1942 1941 uchar_t *cksum_start;
1943 1942
1944 1943 cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1945 1944
1946 1945 cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp));
1947 1946
1948 1947 /*
1949 1948 * Partial checksum has been calculated by hardware
1950 1949 * and attached to the packet; in addition, any
1951 1950 * prepended extraneous data is even byte aligned,
1952 1951 * and there are at most two mblks associated with
1953 1952 * the packet. If any such data exists, we adjust
1954 1953 * the checksum; also take care any postpended data.
1955 1954 */
1956 1955 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj);
1957 1956 /*
1958 1957 * One's complement subtract extraneous checksum
1959 1958 */
1960 1959 cksum += DB_CKSUM16(mp);
1961 1960 if (adj >= cksum)
1962 1961 cksum = ~(adj - cksum) & 0xFFFF;
1963 1962 else
1964 1963 cksum -= adj;
1965 1964 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1966 1965 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1967 1966 if (!(~cksum & 0xFFFF))
1968 1967 return (B_TRUE);
1969 1968
1970 1969 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1971 1970 return (B_FALSE);
1972 1971 }
1973 1972 return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1974 1973 }
1975 1974
1976 1975
1977 1976 /*
1978 1977 * Handle fanout of received packets.
1979 1978 * Unicast packets that are looped back (from ire_send_local_v6) and packets
1980 1979 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM.
1981 1980 *
1982 1981 * IPQoS Notes
1983 1982 * Before sending it to the client, invoke IPPF processing. Policy processing
1984 1983 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled.
1985 1984 */
1986 1985 void
1987 1986 ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1988 1987 {
1989 1988 ill_t *ill = ira->ira_ill;
1990 1989 iaflags_t iraflags = ira->ira_flags;
1991 1990 ip_stack_t *ipst = ill->ill_ipst;
1992 1991 uint8_t protocol;
1993 1992 conn_t *connp;
1994 1993 #define rptr ((uchar_t *)ip6h)
1995 1994 uint_t ip_hdr_length;
1996 1995 uint_t min_ulp_header_length;
1997 1996 int offset;
1998 1997 ssize_t len;
1999 1998 netstack_t *ns = ipst->ips_netstack;
2000 1999 ipsec_stack_t *ipss = ns->netstack_ipsec;
2001 2000 ill_t *rill = ira->ira_rill;
2002 2001
2003 2002 ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN);
2004 2003
2005 2004 /*
2006 2005 * We repeat this as we parse over destination options header and
2007 2006 * fragment headers (earlier we've handled any hop-by-hop options
2008 2007 * header.)
2009 2008 * We update ira_protocol and ira_ip_hdr_length as we skip past
2010 2009 * the intermediate headers; they already point past any
2011 2010 * hop-by-hop header.
2012 2011 */
2013 2012 repeat:
2014 2013 protocol = ira->ira_protocol;
2015 2014 ip_hdr_length = ira->ira_ip_hdr_length;
2016 2015
2017 2016 /*
2018 2017 * Time for IPP once we've done reassembly and IPsec.
2019 2018 * We skip this for loopback packets since we don't do IPQoS
2020 2019 * on loopback.
2021 2020 */
2022 2021 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) &&
2023 2022 !(iraflags & IRAF_LOOPBACK) &&
2024 2023 (protocol != IPPROTO_ESP && protocol != IPPROTO_AH &&
2025 2024 protocol != IPPROTO_DSTOPTS && protocol != IPPROTO_ROUTING &&
2026 2025 protocol != IPPROTO_FRAGMENT)) {
2027 2026 /*
2028 2027 * Use the interface on which the packet arrived - not where
2029 2028 * the IP address is hosted.
2030 2029 */
2031 2030 /* ip_process translates an IS_UNDER_IPMP */
2032 2031 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill);
2033 2032 if (mp == NULL) {
2034 2033 /* ip_drop_packet and MIB done */
2035 2034 return;
2036 2035 }
2037 2036 }
2038 2037
2039 2038 /* Determine the minimum required size of the upper-layer header */
2040 2039 /* Need to do this for at least the set of ULPs that TX handles. */
2041 2040 switch (protocol) {
2042 2041 case IPPROTO_TCP:
2043 2042 min_ulp_header_length = TCP_MIN_HEADER_LENGTH;
2044 2043 break;
2045 2044 case IPPROTO_SCTP:
2046 2045 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH;
2047 2046 break;
2048 2047 case IPPROTO_UDP:
2049 2048 min_ulp_header_length = UDPH_SIZE;
2050 2049 break;
2051 2050 case IPPROTO_ICMP:
2052 2051 case IPPROTO_ICMPV6:
2053 2052 min_ulp_header_length = ICMPH_SIZE;
2054 2053 break;
2055 2054 case IPPROTO_FRAGMENT:
2056 2055 case IPPROTO_DSTOPTS:
2057 2056 case IPPROTO_ROUTING:
2058 2057 min_ulp_header_length = MIN_EHDR_LEN;
2059 2058 break;
2060 2059 default:
2061 2060 min_ulp_header_length = 0;
2062 2061 break;
2063 2062 }
2064 2063 /* Make sure we have the min ULP header length */
2065 2064 len = mp->b_wptr - rptr;
2066 2065 if (len < ip_hdr_length + min_ulp_header_length) {
2067 2066 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length)
2068 2067 goto pkt_too_short;
2069 2068
2070 2069 IP6_STAT(ipst, ip6_recv_pullup);
2071 2070 ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length,
2072 2071 ira);
2073 2072 if (ip6h == NULL)
2074 2073 goto discard;
2075 2074 len = mp->b_wptr - rptr;
2076 2075 }
2077 2076
2078 2077 /*
2079 2078 * If trusted extensions then determine the zoneid and TX specific
2080 2079 * ira_flags.
2081 2080 */
2082 2081 if (iraflags & IRAF_SYSTEM_LABELED) {
2083 2082 /* This can update ira->ira_flags and ira->ira_zoneid */
2084 2083 ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira);
2085 2084 iraflags = ira->ira_flags;
2086 2085 }
2087 2086
2088 2087
2089 2088 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */
2090 2089 if (iraflags & IRAF_VERIFY_ULP_CKSUM) {
2091 2090 if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) {
2092 2091 /* Bad checksum. Stats are already incremented */
2093 2092 ip_drop_input("Bad ULP checksum", mp, ill);
2094 2093 freemsg(mp);
2095 2094 return;
2096 2095 }
2097 2096 /* IRAF_SCTP_CSUM_ERR could have been set */
2098 2097 iraflags = ira->ira_flags;
2099 2098 }
2100 2099 switch (protocol) {
2101 2100 case IPPROTO_TCP:
2102 2101 /* For TCP, discard multicast packets. */
2103 2102 if (iraflags & IRAF_MULTIBROADCAST)
2104 2103 goto discard;
2105 2104
2106 2105 /* First mblk contains IP+TCP headers per above check */
2107 2106 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH);
2108 2107
2109 2108 /* TCP options present? */
2110 2109 offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4;
2111 2110 if (offset != 5) {
2112 2111 if (offset < 5)
2113 2112 goto discard;
2114 2113
2115 2114 /*
2116 2115 * There must be TCP options.
2117 2116 * Make sure we can grab them.
2118 2117 */
2119 2118 offset <<= 2;
2120 2119 offset += ip_hdr_length;
2121 2120 if (len < offset) {
2122 2121 if (ira->ira_pktlen < offset)
2123 2122 goto pkt_too_short;
2124 2123
2125 2124 IP6_STAT(ipst, ip6_recv_pullup);
2126 2125 ip6h = ip_pullup(mp, offset, ira);
2127 2126 if (ip6h == NULL)
2128 2127 goto discard;
2129 2128 len = mp->b_wptr - rptr;
2130 2129 }
2131 2130 }
2132 2131
2133 2132 /*
2134 2133 * Pass up a squeue hint to tcp.
2135 2134 * If ira_sqp is already set (this is loopback) we leave it
2136 2135 * alone.
2137 2136 */
2138 2137 if (ira->ira_sqp == NULL) {
2139 2138 ira->ira_sqp = ip_squeue_get(ira->ira_ring);
2140 2139 }
2141 2140
2142 2141 /* Look for AF_INET or AF_INET6 that matches */
2143 2142 connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length,
2144 2143 ira, ipst);
2145 2144 if (connp == NULL) {
2146 2145 /* Send the TH_RST */
2147 2146 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2148 2147 tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2149 2148 return;
2150 2149 }
2151 2150 if (connp->conn_incoming_ifindex != 0 &&
2152 2151 connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2153 2152 CONN_DEC_REF(connp);
2154 2153
2155 2154 /* Send the TH_RST */
2156 2155 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2157 2156 tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2158 2157 return;
2159 2158 }
2160 2159 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2161 2160 (iraflags & IRAF_IPSEC_SECURE)) {
2162 2161 mp = ipsec_check_inbound_policy(mp, connp,
2163 2162 NULL, ip6h, ira);
2164 2163 if (mp == NULL) {
2165 2164 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2166 2165 /* Note that mp is NULL */
2167 2166 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2168 2167 CONN_DEC_REF(connp);
2169 2168 return;
2170 2169 }
2171 2170 }
2172 2171 /* Found a client; up it goes */
2173 2172 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2174 2173 ira->ira_ill = ira->ira_rill = NULL;
2175 2174 if (!IPCL_IS_TCP(connp)) {
2176 2175 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
2177 2176 (connp->conn_recv)(connp, mp, NULL, ira);
2178 2177 CONN_DEC_REF(connp);
2179 2178 ira->ira_ill = ill;
2180 2179 ira->ira_rill = rill;
2181 2180 return;
2182 2181 }
2183 2182
2184 2183 /*
2185 2184 * We do different processing whether called from
2186 2185 * ip_accept_tcp and we match the target, don't match
2187 2186 * the target, and when we are called by ip_input.
2188 2187 */
2189 2188 if (iraflags & IRAF_TARGET_SQP) {
2190 2189 if (ira->ira_target_sqp == connp->conn_sqp) {
2191 2190 mblk_t *attrmp;
2192 2191
2193 2192 attrmp = ip_recv_attr_to_mblk(ira);
2194 2193 if (attrmp == NULL) {
2195 2194 BUMP_MIB(ill->ill_ip_mib,
2196 2195 ipIfStatsInDiscards);
2197 2196 ip_drop_input("ipIfStatsInDiscards",
2198 2197 mp, ill);
2199 2198 freemsg(mp);
2200 2199 CONN_DEC_REF(connp);
2201 2200 } else {
2202 2201 SET_SQUEUE(attrmp, connp->conn_recv,
2203 2202 connp);
2204 2203 attrmp->b_cont = mp;
2205 2204 ASSERT(ira->ira_target_sqp_mp == NULL);
2206 2205 ira->ira_target_sqp_mp = attrmp;
2207 2206 /*
2208 2207 * Conn ref release when drained from
2209 2208 * the squeue.
2210 2209 */
2211 2210 }
2212 2211 } else {
2213 2212 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
2214 2213 connp->conn_recv, connp, ira, SQ_FILL,
2215 2214 SQTAG_IP6_TCP_INPUT);
2216 2215 }
2217 2216 } else {
2218 2217 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv,
2219 2218 connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT);
2220 2219 }
2221 2220 ira->ira_ill = ill;
2222 2221 ira->ira_rill = rill;
2223 2222 return;
2224 2223
2225 2224 case IPPROTO_SCTP: {
2226 2225 sctp_hdr_t *sctph;
2227 2226 uint32_t ports; /* Source and destination ports */
2228 2227 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp;
2229 2228
2230 2229 /* For SCTP, discard multicast packets. */
2231 2230 if (iraflags & IRAF_MULTIBROADCAST)
2232 2231 goto discard;
2233 2232
2234 2233 /*
2235 2234 * Since there is no SCTP h/w cksum support yet, just
2236 2235 * clear the flag.
2237 2236 */
2238 2237 DB_CKSUMFLAGS(mp) = 0;
2239 2238
2240 2239 /* Length ensured above */
2241 2240 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH);
2242 2241 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length);
2243 2242
2244 2243 /* get the ports */
2245 2244 ports = *(uint32_t *)&sctph->sh_sport;
2246 2245
2247 2246 if (iraflags & IRAF_SCTP_CSUM_ERR) {
2248 2247 /*
2249 2248 * No potential sctp checksum errors go to the Sun
2250 2249 * sctp stack however they might be Adler-32 summed
2251 2250 * packets a userland stack bound to a raw IP socket
2252 2251 * could reasonably use. Note though that Adler-32 is
2253 2252 * a long deprecated algorithm and customer sctp
2254 2253 * networks should eventually migrate to CRC-32 at
2255 2254 * which time this facility should be removed.
2256 2255 */
2257 2256 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2258 2257 return;
2259 2258 }
2260 2259 connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports,
2261 2260 ira, mp, sctps, sctph);
2262 2261 if (connp == NULL) {
2263 2262 /* Check for raw socket or OOTB handling */
2264 2263 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2265 2264 return;
2266 2265 }
2267 2266 if (connp->conn_incoming_ifindex != 0 &&
2268 2267 connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2269 2268 CONN_DEC_REF(connp);
2270 2269
2271 2270 /* Check for raw socket or OOTB handling */
2272 2271 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2273 2272 return;
2274 2273 }
2275 2274
2276 2275 /* Found a client; up it goes */
2277 2276 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2278 2277 sctp_input(connp, NULL, ip6h, mp, ira);
2279 2278 /* sctp_input does a rele of the sctp_t */
2280 2279 return;
2281 2280 }
2282 2281
2283 2282 case IPPROTO_UDP:
2284 2283 /* First mblk contains IP+UDP headers as checked above */
2285 2284 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE);
2286 2285
2287 2286 if (iraflags & IRAF_MULTIBROADCAST) {
2288 2287 uint16_t *up; /* Pointer to ports in ULP header */
2289 2288
2290 2289 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
2291 2290
2292 2291 ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira);
2293 2292 return;
2294 2293 }
2295 2294
2296 2295 /* Look for AF_INET or AF_INET6 that matches */
2297 2296 connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length,
2298 2297 ira, ipst);
2299 2298 if (connp == NULL) {
2300 2299 no_udp_match:
2301 2300 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].
2302 2301 connf_head != NULL) {
2303 2302 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2304 2303 ip_fanout_proto_v6(mp, ip6h, ira);
2305 2304 } else {
2306 2305 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2307 2306 ICMP6_DST_UNREACH_NOPORT, ira);
2308 2307 }
2309 2308 return;
2310 2309
2311 2310 }
2312 2311 if (connp->conn_incoming_ifindex != 0 &&
2313 2312 connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2314 2313 CONN_DEC_REF(connp);
2315 2314 goto no_udp_match;
2316 2315 }
2317 2316 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld :
2318 2317 !canputnext(connp->conn_rq)) {
2319 2318 CONN_DEC_REF(connp);
2320 2319 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
2321 2320 ip_drop_input("udpIfStatsInOverflows", mp, ill);
2322 2321 freemsg(mp);
2323 2322 return;
2324 2323 }
2325 2324 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2326 2325 (iraflags & IRAF_IPSEC_SECURE)) {
2327 2326 mp = ipsec_check_inbound_policy(mp, connp,
2328 2327 NULL, ip6h, ira);
2329 2328 if (mp == NULL) {
2330 2329 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2331 2330 /* Note that mp is NULL */
2332 2331 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2333 2332 CONN_DEC_REF(connp);
2334 2333 return;
2335 2334 }
2336 2335 }
2337 2336
2338 2337 /* Found a client; up it goes */
2339 2338 IP6_STAT(ipst, ip6_udp_fannorm);
2340 2339 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2341 2340 ira->ira_ill = ira->ira_rill = NULL;
2342 2341 (connp->conn_recv)(connp, mp, NULL, ira);
2343 2342 CONN_DEC_REF(connp);
2344 2343 ira->ira_ill = ill;
2345 2344 ira->ira_rill = rill;
2346 2345 return;
2347 2346 default:
2348 2347 break;
2349 2348 }
2350 2349
2351 2350 /*
2352 2351 * Clear hardware checksumming flag as it is currently only
2353 2352 * used by TCP and UDP.
2354 2353 */
2355 2354 DB_CKSUMFLAGS(mp) = 0;
2356 2355
2357 2356 switch (protocol) {
2358 2357 case IPPROTO_ICMPV6:
2359 2358 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
2360 2359
2361 2360 /* Check variable for testing applications */
2362 2361 if (ipst->ips_ipv6_drop_inbound_icmpv6) {
2363 2362 ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill);
2364 2363 freemsg(mp);
2365 2364 return;
2366 2365 }
2367 2366 /*
2368 2367 * We need to accomodate icmp messages coming in clear
2369 2368 * until we get everything secure from the wire. If
2370 2369 * icmp_accept_clear_messages is zero we check with
2371 2370 * the global policy and act accordingly. If it is
2372 2371 * non-zero, we accept the message without any checks.
2373 2372 * But *this does not mean* that this will be delivered
2374 2373 * to RAW socket clients. By accepting we might send
2375 2374 * replies back, change our MTU value etc.,
2376 2375 * but delivery to the ULP/clients depends on their
2377 2376 * policy dispositions.
2378 2377 */
2379 2378 if (ipst->ips_icmp_accept_clear_messages == 0) {
2380 2379 mp = ipsec_check_global_policy(mp, NULL,
2381 2380 NULL, ip6h, ira, ns);
2382 2381 if (mp == NULL)
2383 2382 return;
2384 2383 }
2385 2384
2386 2385 /*
2387 2386 * On a labeled system, we have to check whether the zone
2388 2387 * itself is permitted to receive raw traffic.
2389 2388 */
2390 2389 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2391 2390 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2392 2391 BUMP_MIB(ill->ill_icmp6_mib,
2393 2392 ipv6IfIcmpInErrors);
2394 2393 ip_drop_input("tsol_can_accept_raw", mp, ill);
2395 2394 freemsg(mp);
2396 2395 return;
2397 2396 }
2398 2397 }
2399 2398
2400 2399 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2401 2400 mp = icmp_inbound_v6(mp, ira);
2402 2401 if (mp == NULL) {
2403 2402 /* No need to pass to RAW sockets */
2404 2403 return;
2405 2404 }
2406 2405 break;
2407 2406
2408 2407 case IPPROTO_DSTOPTS: {
2409 2408 ip6_dest_t *desthdr;
2410 2409 uint_t ehdrlen;
2411 2410 uint8_t *optptr;
2412 2411
2413 2412 /* We already check for MIN_EHDR_LEN above */
2414 2413
2415 2414 /* Check if AH is present and needs to be processed. */
2416 2415 mp = ipsec_early_ah_v6(mp, ira);
2417 2416 if (mp == NULL)
2418 2417 return;
2419 2418
2420 2419 /*
2421 2420 * Reinitialize pointers, as ipsec_early_ah_v6() does
2422 2421 * complete pullups. We don't have to do more pullups
2423 2422 * as a result.
2424 2423 */
2425 2424 ip6h = (ip6_t *)mp->b_rptr;
2426 2425
2427 2426 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2428 2427 goto pkt_too_short;
2429 2428
2430 2429 if (mp->b_cont != NULL &&
2431 2430 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2432 2431 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2433 2432 if (ip6h == NULL)
2434 2433 goto discard;
2435 2434 }
2436 2435 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2437 2436 ehdrlen = 8 * (desthdr->ip6d_len + 1);
2438 2437 if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2439 2438 goto pkt_too_short;
2440 2439 if (mp->b_cont != NULL &&
2441 2440 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2442 2441 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2443 2442 if (ip6h == NULL)
2444 2443 goto discard;
2445 2444
2446 2445 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2447 2446 }
2448 2447 optptr = (uint8_t *)&desthdr[1];
2449 2448
2450 2449 /*
2451 2450 * Update ira_ip_hdr_length to skip the destination header
2452 2451 * when we repeat.
2453 2452 */
2454 2453 ira->ira_ip_hdr_length += ehdrlen;
2455 2454
2456 2455 ira->ira_protocol = desthdr->ip6d_nxt;
2457 2456
2458 2457 /*
2459 2458 * Note: XXX This code does not seem to make
2460 2459 * distinction between Destination Options Header
2461 2460 * being before/after Routing Header which can
2462 2461 * happen if we are at the end of source route.
2463 2462 * This may become significant in future.
2464 2463 * (No real significant Destination Options are
2465 2464 * defined/implemented yet ).
2466 2465 */
2467 2466 switch (ip_process_options_v6(mp, ip6h, optptr,
2468 2467 ehdrlen - 2, IPPROTO_DSTOPTS, ira)) {
2469 2468 case -1:
2470 2469 /*
2471 2470 * Packet has been consumed and any needed
2472 2471 * ICMP errors sent.
2473 2472 */
2474 2473 return;
2475 2474 case 0:
2476 2475 /* No action needed continue */
2477 2476 break;
2478 2477 case 1:
2479 2478 /*
2480 2479 * Unnexpected return value
2481 2480 * (Router alert is a Hop-by-Hop option)
2482 2481 */
2483 2482 #ifdef DEBUG
2484 2483 panic("ip_fanout_v6: router "
2485 2484 "alert hbh opt indication in dest opt");
2486 2485 /*NOTREACHED*/
2487 2486 #else
2488 2487 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2489 2488 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2490 2489 freemsg(mp);
2491 2490 return;
2492 2491 #endif
2493 2492 }
2494 2493 goto repeat;
2495 2494 }
2496 2495 case IPPROTO_FRAGMENT: {
2497 2496 ip6_frag_t *fraghdr;
2498 2497
2499 2498 if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t))
2500 2499 goto pkt_too_short;
2501 2500
2502 2501 if (mp->b_cont != NULL &&
2503 2502 rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) {
2504 2503 ip6h = ip_pullup(mp,
2505 2504 ip_hdr_length + sizeof (ip6_frag_t), ira);
2506 2505 if (ip6h == NULL)
2507 2506 goto discard;
2508 2507 }
2509 2508
2510 2509 fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length);
2511 2510 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds);
2512 2511
2513 2512 /*
2514 2513 * Invoke the CGTP (multirouting) filtering module to
2515 2514 * process the incoming packet. Packets identified as
2516 2515 * duplicates must be discarded. Filtering is active
2517 2516 * only if the ip_cgtp_filter ndd variable is
2518 2517 * non-zero.
2519 2518 */
2520 2519 if (ipst->ips_ip_cgtp_filter &&
2521 2520 ipst->ips_ip_cgtp_filter_ops != NULL) {
2522 2521 int cgtp_flt_pkt;
2523 2522 netstackid_t stackid;
2524 2523
2525 2524 stackid = ipst->ips_netstack->netstack_stackid;
2526 2525
2527 2526 /*
2528 2527 * CGTP and IPMP are mutually exclusive so
2529 2528 * phyint_ifindex is fine here.
2530 2529 */
2531 2530 cgtp_flt_pkt =
2532 2531 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6(
2533 2532 stackid, ill->ill_phyint->phyint_ifindex,
2534 2533 ip6h, fraghdr);
2535 2534 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
2536 2535 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill);
2537 2536 freemsg(mp);
2538 2537 return;
2539 2538 }
2540 2539 }
2541 2540
2542 2541 /*
2543 2542 * Update ip_hdr_length to skip the frag header
2544 2543 * ip_input_fragment_v6 will determine the extension header
2545 2544 * prior to the fragment header and update its nexthdr value,
2546 2545 * and also set ira_protocol to the nexthdr that follows the
2547 2546 * completed fragment.
2548 2547 */
2549 2548 ip_hdr_length += sizeof (ip6_frag_t);
2550 2549
2551 2550 /*
2552 2551 * Make sure we have ira_l2src before we loose the original
2553 2552 * mblk
2554 2553 */
2555 2554 if (!(ira->ira_flags & IRAF_L2SRC_SET))
2556 2555 ip_setl2src(mp, ira, ira->ira_rill);
2557 2556
2558 2557 mp = ip_input_fragment_v6(mp, ip6h, fraghdr,
2559 2558 ira->ira_pktlen - ip_hdr_length, ira);
2560 2559 if (mp == NULL) {
2561 2560 /* Reassembly is still pending */
2562 2561 return;
2563 2562 }
2564 2563 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs);
2565 2564
2566 2565 /*
2567 2566 * The mblk chain has the frag header removed and
2568 2567 * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the
2569 2568 * IP header has been updated to refleact the result.
2570 2569 */
2571 2570 ip6h = (ip6_t *)mp->b_rptr;
2572 2571 ip_hdr_length = ira->ira_ip_hdr_length;
2573 2572 goto repeat;
2574 2573 }
2575 2574 case IPPROTO_HOPOPTS:
2576 2575 /*
2577 2576 * Illegal header sequence.
2578 2577 * (Hop-by-hop headers are processed above
2579 2578 * and required to immediately follow IPv6 header)
2580 2579 */
2581 2580 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2582 2581 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2583 2582 return;
2584 2583
2585 2584 case IPPROTO_ROUTING: {
2586 2585 uint_t ehdrlen;
2587 2586 ip6_rthdr_t *rthdr;
2588 2587
2589 2588 /* Check if AH is present and needs to be processed. */
2590 2589 mp = ipsec_early_ah_v6(mp, ira);
2591 2590 if (mp == NULL)
2592 2591 return;
2593 2592
2594 2593 /*
2595 2594 * Reinitialize pointers, as ipsec_early_ah_v6() does
2596 2595 * complete pullups. We don't have to do more pullups
2597 2596 * as a result.
2598 2597 */
2599 2598 ip6h = (ip6_t *)mp->b_rptr;
2600 2599
2601 2600 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2602 2601 goto pkt_too_short;
2603 2602
2604 2603 if (mp->b_cont != NULL &&
2605 2604 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2606 2605 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2607 2606 if (ip6h == NULL)
2608 2607 goto discard;
2609 2608 }
2610 2609 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2611 2610 protocol = ira->ira_protocol = rthdr->ip6r_nxt;
2612 2611 ehdrlen = 8 * (rthdr->ip6r_len + 1);
2613 2612 if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2614 2613 goto pkt_too_short;
2615 2614 if (mp->b_cont != NULL &&
2616 2615 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2617 2616 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2618 2617 if (ip6h == NULL)
2619 2618 goto discard;
2620 2619 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2621 2620 }
2622 2621 if (rthdr->ip6r_segleft != 0) {
2623 2622 /* Not end of source route */
2624 2623 if (ira->ira_flags &
2625 2624 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
2626 2625 BUMP_MIB(ill->ill_ip_mib,
2627 2626 ipIfStatsForwProhibits);
2628 2627 ip_drop_input("ipIfStatsInForwProhibits",
2629 2628 mp, ill);
2630 2629 freemsg(mp);
2631 2630 return;
2632 2631 }
2633 2632 ip_process_rthdr(mp, ip6h, rthdr, ira);
2634 2633 return;
2635 2634 }
2636 2635 ira->ira_ip_hdr_length += ehdrlen;
2637 2636 goto repeat;
2638 2637 }
2639 2638
2640 2639 case IPPROTO_AH:
2641 2640 case IPPROTO_ESP: {
2642 2641 /*
2643 2642 * Fast path for AH/ESP.
2644 2643 */
2645 2644 netstack_t *ns = ipst->ips_netstack;
2646 2645 ipsec_stack_t *ipss = ns->netstack_ipsec;
2647 2646
2648 2647 IP_STAT(ipst, ipsec_proto_ahesp);
2649 2648
2650 2649 if (!ipsec_loaded(ipss)) {
2651 2650 ip_proto_not_sup(mp, ira);
2652 2651 return;
2653 2652 }
2654 2653
2655 2654 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2656 2655 /* select inbound SA and have IPsec process the pkt */
2657 2656 if (protocol == IPPROTO_ESP) {
2658 2657 esph_t *esph;
2659 2658
2660 2659 mp = ipsec_inbound_esp_sa(mp, ira, &esph);
2661 2660 if (mp == NULL)
2662 2661 return;
2663 2662
2664 2663 ASSERT(esph != NULL);
2665 2664 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2666 2665 ASSERT(ira->ira_ipsec_esp_sa != NULL);
2667 2666 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL);
2668 2667
2669 2668 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph,
2670 2669 ira);
2671 2670 } else {
2672 2671 ah_t *ah;
2673 2672
2674 2673 mp = ipsec_inbound_ah_sa(mp, ira, &ah);
2675 2674 if (mp == NULL)
2676 2675 return;
2677 2676
2678 2677 ASSERT(ah != NULL);
2679 2678 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2680 2679 ASSERT(ira->ira_ipsec_ah_sa != NULL);
2681 2680 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
2682 2681 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah,
2683 2682 ira);
2684 2683 }
2685 2684
2686 2685 if (mp == NULL) {
2687 2686 /*
2688 2687 * Either it failed or is pending. In the former case
2689 2688 * ipIfStatsInDiscards was increased.
2690 2689 */
2691 2690 return;
2692 2691 }
2693 2692 /* we're done with IPsec processing, send it up */
2694 2693 ip_input_post_ipsec(mp, ira);
2695 2694 return;
2696 2695 }
2697 2696 case IPPROTO_NONE:
2698 2697 /* All processing is done. Count as "delivered". */
2699 2698 freemsg(mp);
2700 2699 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2701 2700 return;
2702 2701
2703 2702 case IPPROTO_ENCAP:
2704 2703 case IPPROTO_IPV6:
2705 2704 /* iptun will verify trusted label */
2706 2705 connp = ipcl_classify_v6(mp, protocol, ip_hdr_length,
2707 2706 ira, ipst);
2708 2707 if (connp != NULL) {
2709 2708 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2710 2709 ira->ira_ill = ira->ira_rill = NULL;
2711 2710 connp->conn_recv(connp, mp, NULL, ira);
2712 2711 CONN_DEC_REF(connp);
2713 2712 ira->ira_ill = ill;
2714 2713 ira->ira_rill = rill;
2715 2714 return;
2716 2715 }
2717 2716 /* FALLTHRU */
2718 2717 default:
2719 2718 /*
2720 2719 * On a labeled system, we have to check whether the zone
2721 2720 * itself is permitted to receive raw traffic.
2722 2721 */
2723 2722 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2724 2723 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2725 2724 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2726 2725 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2727 2726 freemsg(mp);
2728 2727 return;
2729 2728 }
2730 2729 }
2731 2730 break;
2732 2731 }
2733 2732
2734 2733 /*
2735 2734 * The above input functions may have returned the pulled up message.
2736 2735 * So ip6h need to be reinitialized.
2737 2736 */
2738 2737 ip6h = (ip6_t *)mp->b_rptr;
2739 2738 ira->ira_protocol = protocol;
2740 2739 if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) {
2741 2740 /* No user-level listener for these packets packets */
2742 2741 ip_proto_not_sup(mp, ira);
2743 2742 return;
2744 2743 }
2745 2744
2746 2745 /*
2747 2746 * Handle fanout to raw sockets. There
2748 2747 * can be more than one stream bound to a particular
2749 2748 * protocol. When this is the case, each one gets a copy
2750 2749 * of any incoming packets.
2751 2750 */
2752 2751 ASSERT(ira->ira_protocol == protocol);
2753 2752 ip_fanout_proto_v6(mp, ip6h, ira);
2754 2753 return;
2755 2754
2756 2755 pkt_too_short:
2757 2756 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
2758 2757 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
2759 2758 freemsg(mp);
2760 2759 return;
2761 2760
2762 2761 discard:
2763 2762 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2764 2763 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2765 2764 freemsg(mp);
2766 2765 #undef rptr
2767 2766 }
↓ open down ↓ |
2606 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX